#!/usr/bin/env python3
"""
Download Wikimedia Beach Photos Script
Downloads authentic, location-specific photos for beaches from the greek_beaches_export.json file
Uses Wikimedia Commons for public domain photos - no API key required!
"""

import os
import requests
import json
import time
import uuid
from urllib.parse import quote

def search_wikimedia_photos(beach_name, city):
    """Search Wikimedia Commons for beach photos"""
    # Create multiple search queries with different approaches
    search_queries = [
        f"{beach_name} {city} Greece beach",
        f"{beach_name} beach Greece",
        f"{city} Greece beach",
        f"{beach_name} {city} Greece",
        f"{beach_name} {city}"
    ]
    
    # Set proper headers to avoid being blocked
    headers = {
        'User-Agent': 'BeachPhotoCollector/1.0 (https://example.com/contact) Python/requests'
    }
    
    wikimedia_api = "https://commons.wikimedia.org/w/api.php"
    
    for search_query in search_queries:
        try:
            # Search for files on Wikimedia Commons
            params = {
                'action': 'query',
                'format': 'json',
                'list': 'search',
                'srsearch': f'filetype:bitmap {search_query}',
                'srnamespace': 6,  # File namespace
                'srlimit': 10,
                'srprop': 'title|snippet'
            }
            
            response = requests.get(wikimedia_api, params=params, headers=headers, timeout=10)
            response.raise_for_status()
            
            data = response.json()
            
            if 'query' in data and 'search' in data['query'] and data['query']['search']:
                # Get file info for the search results
                files = []
                for result in data['query']['search'][:5]:  # Top 5 results
                    file_title = result['title']
                    file_info = get_file_info(file_title, wikimedia_api, headers)
                    if file_info:
                        # Filter out inappropriate images by title
                        title = file_title.lower()
                        if not any(skip_word in title for skip_word in ['map', 'flag', 'coat', 'logo', 'diagram', 'chart', 'plan']):
                            files.append(file_info)
                
                if files:
                    return files
            
        except Exception as e:
            print(f"❌ Wikimedia search failed for query '{search_query}': {str(e)}")
            continue
    
    return None

def get_file_info(file_title, wikimedia_api, headers=None):
    """Get detailed information about a Wikimedia file"""
    try:
        params = {
            'action': 'query',
            'format': 'json',
            'titles': file_title,
            'prop': 'imageinfo',
            'iiprop': 'url|size|mime|extmetadata',
            'iiurlwidth': 800  # Get medium-sized image
        }
        
        if headers is None:
            headers = {
                'User-Agent': 'BeachPhotoCollector/1.0 (https://example.com/contact) Python/requests'
            }
        
        response = requests.get(wikimedia_api, params=params, headers=headers, timeout=10)
        response.raise_for_status()
        
        data = response.json()
        
        if 'query' in data and 'pages' in data['query']:
            for page_id, page_data in data['query']['pages'].items():
                if 'imageinfo' in page_data and page_data['imageinfo']:
                    img_info = page_data['imageinfo'][0]
                    
                    # Check if it's a reasonable image (not too small, is JPEG/PNG)
                    if (img_info.get('width', 0) >= 400 and 
                        img_info.get('height', 0) >= 300 and
                        img_info.get('mime', '').startswith('image/')):
                        
                        return {
                            'title': file_title,
                            'url': img_info.get('thumburl', img_info.get('url')),
                            'full_url': img_info.get('url'),
                            'width': img_info.get('width'),
                            'height': img_info.get('height'),
                            'mime': img_info.get('mime'),
                            'page_url': f"https://commons.wikimedia.org/wiki/{quote(file_title)}",
                            'extmetadata': img_info.get('extmetadata', {})
                        }
        
        return None
        
    except Exception as e:
        print(f"❌ Failed to get file info for {file_title}: {str(e)}")
        return None

def download_photo(photo_data, beach_name, city, upload_dir):
    """Download photo from Wikimedia Commons using direct URL"""
    try:
        # Use the full URL instead of the thumbnail URL to avoid 403 errors
        photo_url = photo_data['full_url']
        
        # Create filename
        safe_beach_name = "".join(c for c in beach_name if c.isalnum() or c in (' ', '-', '_')).strip()
        safe_beach_name = safe_beach_name.replace(' ', '_').lower()
        safe_city = city.replace(' ', '_').lower()
        
        # Get file extension from mime type
        mime_type = photo_data.get('mime', 'image/jpeg')
        ext = '.jpg' if 'jpeg' in mime_type else '.png' if 'png' in mime_type else '.jpg'
        
        filename = f"{safe_beach_name}_{safe_city}_wikimedia_{uuid.uuid4().hex[:8]}{ext}"
        file_path = os.path.join(upload_dir, filename)
        
        # Set headers to mimic a browser request
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        
        # Download the photo
        response = requests.get(photo_url, headers=headers, stream=True, timeout=30)
        response.raise_for_status()
        
        with open(file_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
        
        print(f"✅ Downloaded: {filename}")
        return filename
        
    except Exception as e:
        print(f"❌ Failed to download photo: {str(e)}")
        return None

def main():
    """Main function to download authentic photos for all beaches in the JSON file"""
    # Load beaches from JSON file
    json_file_path = "greek_beaches_export.json"
    
    try:
        with open(json_file_path, 'r', encoding='utf-8') as f:
            beaches = json.load(f)
    except Exception as e:
        print(f"❌ Failed to load JSON file: {str(e)}")
        return
    
    print(f"🏖️ Found {len(beaches)} beaches in the database")
    
    # Create upload directory if it doesn't exist
    upload_dir = "backend/uploads/beach_photos"
    os.makedirs(upload_dir, exist_ok=True)
    
    success_count = 0
    failed_count = 0
    
    print(f"📸 Starting Wikimedia photo download process...")
    print("=" * 60)
    
    for i, beach in enumerate(beaches, 1):
        beach_name = beach['name']
        city = beach['city']
        
        print(f"\n[{i}/{len(beaches)}] Processing: {beach_name}, {city}")
        
        # Search for photos
        photos = search_wikimedia_photos(beach_name, city)
        
        if not photos:
            print(f"❌ No authentic photos found for {beach_name}")
            failed_count += 1
            continue
        
        # Try to download the best photo
        photo_downloaded = False
        for photo_data in photos:
            filename = download_photo(photo_data, beach_name, city, upload_dir)
            
            if filename:
                photo_downloaded = True
                success_count += 1
                break
        
        if not photo_downloaded:
            print(f"❌ Failed to download authentic photo for {beach_name}")
            failed_count += 1
        
        # Rate limiting - be respectful to Wikimedia API
        time.sleep(2)  # 2 second delay between requests
    
    print(f"\n📊 DOWNLOAD COMPLETE:")
    print(f"✅ Successful: {success_count} beaches")
    print(f"❌ Failed: {failed_count} beaches")
    print(f"📈 Success rate: {success_count/len(beaches)*100:.1f}%")
    
    if success_count > 0:
        print(f"\n🎉 {success_count} authentic beach photos successfully downloaded!")
        print(f"📁 Photos saved to: {upload_dir}")

if __name__ == "__main__":
    main()