import asyncio
import logging
from typing import List, Dict
from telethon import TelegramClient
from telethon.errors import SessionPasswordNeededError
import os
from pathlib import Path
from dotenv import load_dotenv

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class TelegramScraper:
    """Scrapes app posts from Telegram channels"""
    
    def __init__(self):
        # Load environment
        env_path = Path(__file__).parent.parent / ".env"
        load_dotenv(dotenv_path=env_path)
        
        # Get credentials from environment or use defaults
        self.api_id = int(os.getenv("TELEGRAM_API_ID", "20898819"))  # You'll need to set this
        self.api_hash = os.getenv("TELEGRAM_API_HASH", "d97e59c9e1b6c46f46e3e4c8c3c3c3c3")  # You'll need to set this
        self.phone = os.getenv("TELEGRAM_PHONE", "")  # Your phone number (optional)
        
        self.session_name = "telegram_scraper_session"
        self.client = None
    
    async def connect(self):
        """Connect to Telegram"""
        try:
            self.client = TelegramClient(self.session_name, self.api_id, self.api_hash)
            await self.client.connect()
            
            if not await self.client.is_user_authorized():
                logger.info("Not authorized. Attempting to authorize...")
                if self.phone:
                    await self.client.send_code_request(self.phone)
                    code = input("Enter the code you received: ")
                    try:
                        await self.client.sign_in(self.phone, code)
                    except SessionPasswordNeededError:
                        password = input("2FA password: ")
                        await self.client.sign_in(password=password)
                else:
                    logger.warning("Phone number not set. Using bot token instead.")
                    bot_token = os.getenv("TELEGRAM_BOT_TOKEN")
                    if bot_token:
                        await self.client.start(bot_token=bot_token)
            
            logger.info("Connected to Telegram successfully")
            return True
        except Exception as e:
            logger.error(f"Connection error: {e}")
            return False
    
    async def disconnect(self):
        """Disconnect from Telegram"""
        if self.client:
            await self.client.disconnect()
            logger.info("Disconnected from Telegram")
    
    async def scrape_channel(self, channel_name: str, limit: int = 1) -> List[Dict]:
        """
        Scrape messages from a public Telegram channel
        
        Args:
            channel_name: Channel username (without @) or URL
            limit: Number of recent messages to get
            
        Returns:
            List of app dictionaries with title, url, etc.
        """
        try:
            if channel_name.startswith("@"):
                channel_name = channel_name[1:]
            
            logger.info(f"Scraping channel: {channel_name}")
            
            # Get channel entity
            channel = await self.client.get_entity(channel_name)
            
            # Get recent messages
            messages = await self.client.get_messages(channel, limit=limit)
            
            results = []
            for msg in messages:
                if msg.text:
                    # Try to extract app info from message
                    app_info = self._parse_app_message(msg, channel_name)
                    if app_info:
                        results.append(app_info)
            
            return results
        except Exception as e:
            logger.error(f"Error scraping channel {channel_name}: {e}")
            return []
    
    def _parse_app_message(self, msg, channel_name: str) -> Dict:
        """
        Parse app information from a Telegram message
        Extracts app name, description, and links
        """
        try:
            text = msg.text or ""
            
            # Extract app name (usually first line or in bold)
            lines = text.split('\n')
            app_name = lines[0][:100] if lines else "Unknown App"
            
            # Look for links in message
            app_url = None
            if msg.entities:
                for entity in msg.entities:
                    # Handle different entity types
                    entity_type = str(entity.__class__.__name__).lower()
                    if 'url' in entity_type or 'texturl' in entity_type:
                        if hasattr(entity, 'offset') and hasattr(entity, 'length'):
                            app_url = text[entity.offset:entity.offset + entity.length]
                        elif hasattr(entity, 'url'):
                            app_url = entity.url
                        break
            
            # Fallback: create Telegram channel link
            if not app_url:
                app_url = f"https://t.me/{channel_name}"
            
            # Create attribution
            telegram_link = f"https://t.me/{channel_name}"
            
            return {
                'title': app_name,
                'url': app_url,
                'source': f'Telegram - {channel_name}',
                'source_url': telegram_link,
                'thumbnail': 'https://telegram.org/img/t_logo.png'
            }
        except Exception as e:
            logger.error(f"Error parsing message: {e}")
            return None
    
    async def scrape_channels(self, channels: List[str]) -> Dict:
        """
        Scrape multiple channels and return one app from each
        
        Args:
            channels: List of channel names (with or without @)
            
        Returns:
            Dictionary with channel names as keys and app info as values
        """
        results = {}
        
        for channel in channels:
            apps = await self.scrape_channel(channel, limit=1)
            if apps:
                # Use simplified channel name as key
                clean_name = channel.replace("@", "").lower()
                results[clean_name] = apps[0]
            else:
                logger.warning(f"No apps found in {channel}")
        
        return results


async def main():
    """Test the scraper"""
    scraper = TelegramScraper()
    
    # Example channels to scrape
    channels = ["@apkpure", "@APKUpdates", "@AndroidAPKShare"]
    
    if await scraper.connect():
        try:
            results = await scraper.scrape_channels(channels)
            
            print("\n" + "="*60)
            print("TELEGRAM SCRAPER TEST")
            print("="*60)
            
            for channel, app in results.items():
                print(f"\n📱 From: {channel}")
                print(f"   Title: {app['title']}")
                print(f"   URL: {app['url']}")
                print(f"   Source: {app['source']}")
            
            print("\n" + "="*60)
            print("✅ Telegram scraping works!")
            print("="*60)
        finally:
            await scraper.disconnect()
    else:
        print("❌ Failed to connect to Telegram")


if __name__ == "__main__":
    asyncio.run(main())
