cosmo_backend/scripts/fetch_and_cache.py

#!/usr/bin/env python3
"""
Fetch celestial body positions from NASA Horizons API and cache them

This script:
1. Fetches position data for all celestial bodies
2. Caches data in Redis (L2 cache)
3. Saves data to PostgreSQL (L3 cache/persistent storage)

Usage:
    python scripts/fetch_and_cache.py [--days DAYS]

Options:
    --days DAYS    Number of days to fetch (default: 7)
"""
import asyncio
import sys
from pathlib import Path
from datetime import datetime, timedelta
import argparse
import logging

sys.path.insert(0, str(Path(__file__).parent.parent))

from app.services.horizons import horizons_service
from app.services.db_service import (
    celestial_body_service,
    position_service,
    nasa_cache_service
)
from app.services.redis_cache import redis_cache, cache_nasa_response
from app.config import settings

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)


async def fetch_and_cache_body(body_id: str, body_name: str, days: int = 7):
    """Fetch and cache position data for a single celestial body"""
    logger.info(f"Fetching data for {body_name} ({body_id})...")

    try:
        # Calculate time range
        now = datetime.utcnow()
        start_time = now
        end_time = now + timedelta(days=days)
        step = "1d"

        # Fetch positions from NASA API (synchronous call in async context)
        loop = asyncio.get_event_loop()
        positions = await loop.run_in_executor(
            None,
            horizons_service.get_body_positions,
            body_id,
            start_time,
            end_time,
            step
        )

        if not positions:
            logger.warning(f"No positions returned for {body_name}")
            return False

        logger.info(f"Fetched {len(positions)} positions for {body_name}")

        # Prepare data for caching
        position_data = [
            {
                "time": pos.time,
                "x": pos.x,
                "y": pos.y,
                "z": pos.z,
            }
            for pos in positions
        ]

        # Cache in Redis (L2)
        redis_cached = await cache_nasa_response(
            body_id=body_id,
            start_time=start_time,
            end_time=end_time,
            step=step,
            data=position_data
        )

        if redis_cached:
            logger.info(f"✓ Cached {body_name} data in Redis")
        else:
            logger.warning(f"⚠ Failed to cache {body_name} data in Redis")

        # Save to PostgreSQL (L3 - persistent storage)
        # Save raw NASA response for future cache hits
        await nasa_cache_service.save_response(
            body_id=body_id,
            start_time=start_time,
            end_time=end_time,
            step=step,
            response_data={"positions": position_data},
            ttl_days=settings.cache_ttl_days
        )
        logger.info(f"✓ Cached {body_name} data in PostgreSQL (nasa_cache)")

        # Save positions to positions table for querying
        saved_count = await position_service.save_positions(
            body_id=body_id,
            positions=position_data,
            source="nasa_horizons"
        )
        logger.info(f"✓ Saved {saved_count} positions for {body_name} in PostgreSQL")

        return True

    except Exception as e:
        logger.error(f"✗ Failed to fetch/cache {body_name}: {e}")
        import traceback
        traceback.print_exc()
        return False


async def main():
    """Fetch and cache data for all celestial bodies"""
    parser = argparse.ArgumentParser(description='Fetch and cache celestial body positions')
    parser.add_argument('--days', type=int, default=7, help='Number of days to fetch (default: 7)')
    args = parser.parse_args()

    logger.info("=" * 60)
    logger.info("Fetch and Cache NASA Horizons Data")
    logger.info("=" * 60)
    logger.info(f"Time range: {args.days} days from now")
    logger.info("=" * 60)

    # Connect to Redis
    await redis_cache.connect()

    try:
        # Get all celestial bodies from database
        bodies = await celestial_body_service.get_all_bodies()
        logger.info(f"\nFound {len(bodies)} celestial bodies in database")

        # Filter for probes and planets (skip stars)
        bodies_to_fetch = [
            body for body in bodies
            if body.type in ['probe', 'planet']
        ]
        logger.info(f"Will fetch data for {len(bodies_to_fetch)} bodies (probes + planets)")

        # Fetch and cache data for each body
        success_count = 0
        fail_count = 0

        for i, body in enumerate(bodies_to_fetch, 1):
            logger.info(f"\n[{i}/{len(bodies_to_fetch)}] Processing {body.name}...")
            success = await fetch_and_cache_body(
                body_id=body.id,
                body_name=body.name,
                days=args.days
            )

            if success:
                success_count += 1
            else:
                fail_count += 1

            # Small delay to avoid overwhelming NASA API
            if i < len(bodies_to_fetch):
                await asyncio.sleep(0.5)

        # Summary
        logger.info("\n" + "=" * 60)
        logger.info("Summary")
        logger.info("=" * 60)
        logger.info(f"✓ Successfully cached: {success_count} bodies")
        if fail_count > 0:
            logger.warning(f"✗ Failed: {fail_count} bodies")
        logger.info("=" * 60)

        # Check cache status
        redis_stats = await redis_cache.get_stats()
        if redis_stats.get("connected"):
            logger.info("\nRedis Cache Status:")
            logger.info(f"  Memory: {redis_stats.get('used_memory_human')}")
            logger.info(f"  Clients: {redis_stats.get('connected_clients')}")
            logger.info(f"  Hits: {redis_stats.get('keyspace_hits')}")
            logger.info(f"  Misses: {redis_stats.get('keyspace_misses')}")

    except Exception as e:
        logger.error(f"\n✗ Failed: {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)
    finally:
        # Disconnect from Redis
        await redis_cache.disconnect()


if __name__ == "__main__":
    asyncio.run(main())