cosmo_backend/scripts/fetch_and_cache.py

201 lines
6.1 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Fetch celestial body positions from NASA Horizons API and cache them
This script:
1. Fetches position data for all celestial bodies
2. Caches data in Redis (L2 cache)
3. Saves data to PostgreSQL (L3 cache/persistent storage)
Usage:
python scripts/fetch_and_cache.py [--days DAYS]
Options:
--days DAYS Number of days to fetch (default: 7)
"""
import asyncio
import sys
from pathlib import Path
from datetime import datetime, timedelta
import argparse
import logging
sys.path.insert(0, str(Path(__file__).parent.parent))
from app.services.horizons import horizons_service
from app.services.db_service import (
celestial_body_service,
position_service,
nasa_cache_service
)
from app.services.redis_cache import redis_cache, cache_nasa_response
from app.config import settings
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
async def fetch_and_cache_body(body_id: str, body_name: str, days: int = 7):
"""Fetch and cache position data for a single celestial body"""
logger.info(f"Fetching data for {body_name} ({body_id})...")
try:
# Calculate time range
now = datetime.utcnow()
start_time = now
end_time = now + timedelta(days=days)
step = "1d"
# Fetch positions from NASA API (synchronous call in async context)
loop = asyncio.get_event_loop()
positions = await loop.run_in_executor(
None,
horizons_service.get_body_positions,
body_id,
start_time,
end_time,
step
)
if not positions:
logger.warning(f"No positions returned for {body_name}")
return False
logger.info(f"Fetched {len(positions)} positions for {body_name}")
# Prepare data for caching
position_data = [
{
"time": pos.time,
"x": pos.x,
"y": pos.y,
"z": pos.z,
}
for pos in positions
]
# Cache in Redis (L2)
redis_cached = await cache_nasa_response(
body_id=body_id,
start_time=start_time,
end_time=end_time,
step=step,
data=position_data
)
if redis_cached:
logger.info(f"✓ Cached {body_name} data in Redis")
else:
logger.warning(f"⚠ Failed to cache {body_name} data in Redis")
# Save to PostgreSQL (L3 - persistent storage)
# Save raw NASA response for future cache hits
await nasa_cache_service.save_response(
body_id=body_id,
start_time=start_time,
end_time=end_time,
step=step,
response_data={"positions": position_data},
ttl_days=settings.cache_ttl_days
)
logger.info(f"✓ Cached {body_name} data in PostgreSQL (nasa_cache)")
# Save positions to positions table for querying
saved_count = await position_service.save_positions(
body_id=body_id,
positions=position_data,
source="nasa_horizons"
)
logger.info(f"✓ Saved {saved_count} positions for {body_name} in PostgreSQL")
return True
except Exception as e:
logger.error(f"✗ Failed to fetch/cache {body_name}: {e}")
import traceback
traceback.print_exc()
return False
async def main():
"""Fetch and cache data for all celestial bodies"""
parser = argparse.ArgumentParser(description='Fetch and cache celestial body positions')
parser.add_argument('--days', type=int, default=7, help='Number of days to fetch (default: 7)')
args = parser.parse_args()
logger.info("=" * 60)
logger.info("Fetch and Cache NASA Horizons Data")
logger.info("=" * 60)
logger.info(f"Time range: {args.days} days from now")
logger.info("=" * 60)
# Connect to Redis
await redis_cache.connect()
try:
# Get all celestial bodies from database
bodies = await celestial_body_service.get_all_bodies()
logger.info(f"\nFound {len(bodies)} celestial bodies in database")
# Filter for probes and planets (skip stars)
bodies_to_fetch = [
body for body in bodies
if body.type in ['probe', 'planet']
]
logger.info(f"Will fetch data for {len(bodies_to_fetch)} bodies (probes + planets)")
# Fetch and cache data for each body
success_count = 0
fail_count = 0
for i, body in enumerate(bodies_to_fetch, 1):
logger.info(f"\n[{i}/{len(bodies_to_fetch)}] Processing {body.name}...")
success = await fetch_and_cache_body(
body_id=body.id,
body_name=body.name,
days=args.days
)
if success:
success_count += 1
else:
fail_count += 1
# Small delay to avoid overwhelming NASA API
if i < len(bodies_to_fetch):
await asyncio.sleep(0.5)
# Summary
logger.info("\n" + "=" * 60)
logger.info("Summary")
logger.info("=" * 60)
logger.info(f"✓ Successfully cached: {success_count} bodies")
if fail_count > 0:
logger.warning(f"✗ Failed: {fail_count} bodies")
logger.info("=" * 60)
# Check cache status
redis_stats = await redis_cache.get_stats()
if redis_stats.get("connected"):
logger.info("\nRedis Cache Status:")
logger.info(f" Memory: {redis_stats.get('used_memory_human')}")
logger.info(f" Clients: {redis_stats.get('connected_clients')}")
logger.info(f" Hits: {redis_stats.get('keyspace_hits')}")
logger.info(f" Misses: {redis_stats.get('keyspace_misses')}")
except Exception as e:
logger.error(f"\n✗ Failed: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
finally:
# Disconnect from Redis
await redis_cache.disconnect()
if __name__ == "__main__":
asyncio.run(main())