#!/usr/bin/env python3 """ Historical Data Prefetch Script This script prefetches historical position data for all celestial bodies and stores them in the database for fast retrieval. Usage: # Prefetch last 12 months python scripts/prefetch_historical_data.py --months 12 # Prefetch specific year-month python scripts/prefetch_historical_data.py --year 2024 --month 1 # Prefetch a range python scripts/prefetch_historical_data.py --start-year 2023 --start-month 1 --end-year 2023 --end-month 12 """ import sys import os import asyncio import argparse from datetime import datetime, timedelta from dateutil.relativedelta import relativedelta # Add backend to path sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from app.database import get_db from app.services.horizons import horizons_service from app.services.db_service import position_service, celestial_body_service async def prefetch_month(year: int, month: int, session): """ Prefetch data for a specific month Args: year: Year (e.g., 2023) month: Month (1-12) session: Database session """ # Calculate start and end of month start_date = datetime(year, month, 1, 0, 0, 0) if month == 12: end_date = datetime(year + 1, 1, 1, 0, 0, 0) else: end_date = datetime(year, month + 1, 1, 0, 0, 0) print(f"\n{'='*60}") print(f"šŸ“… Prefetching data for {year}-{month:02d}") print(f" Period: {start_date.date()} to {end_date.date()}") print(f"{'='*60}") # Get all celestial bodies from database all_bodies = await celestial_body_service.get_all_bodies(session) total_bodies = len(all_bodies) success_count = 0 skip_count = 0 error_count = 0 for idx, body in enumerate(all_bodies, 1): body_id = body.id body_name = body.name try: # Check if we already have data for this month existing_positions = await position_service.get_positions_in_range( body_id, start_date, end_date, session ) if existing_positions and len(existing_positions) > 0: print(f" [{idx}/{total_bodies}] ā­ļø {body_name:20s} - Already exists ({len(existing_positions)} positions)") skip_count += 1 continue print(f" [{idx}/{total_bodies}] šŸ”„ {body_name:20s} - Fetching...", end='', flush=True) # Query NASA Horizons API for this month # Sample every 7 days to reduce data volume step = "7d" if body_id == "10": # Sun is always at origin positions = [ {"time": start_date, "x": 0.0, "y": 0.0, "z": 0.0}, {"time": end_date, "x": 0.0, "y": 0.0, "z": 0.0}, ] elif body_id == "-82": # Cassini mission ended 2017-09-15 if year < 2017 or (year == 2017 and month <= 9): cassini_date = datetime(2017, 9, 15, 11, 58, 0) positions_data = horizons_service.get_body_positions( body_id, cassini_date, cassini_date, step ) positions = [ {"time": p.time, "x": p.x, "y": p.y, "z": p.z} for p in positions_data ] else: print(f" ā­ļø Mission ended", flush=True) skip_count += 1 continue else: # Query other bodies positions_data = horizons_service.get_body_positions( body_id, start_date, end_date, step ) positions = [ {"time": p.time, "x": p.x, "y": p.y, "z": p.z} for p in positions_data ] # Store in database for pos_data in positions: await position_service.save_position( body_id=body_id, time=pos_data["time"], x=pos_data["x"], y=pos_data["y"], z=pos_data["z"], source="nasa_horizons", session=session, ) print(f" āœ… Saved {len(positions)} positions", flush=True) success_count += 1 # Small delay to avoid overwhelming NASA API await asyncio.sleep(0.5) except Exception as e: print(f" āŒ Error: {str(e)}", flush=True) error_count += 1 continue print(f"\n{'='*60}") print(f"šŸ“Š Summary for {year}-{month:02d}:") print(f" āœ… Success: {success_count}") print(f" ā­ļø Skipped: {skip_count}") print(f" āŒ Errors: {error_count}") print(f"{'='*60}\n") return success_count, skip_count, error_count async def main(): parser = argparse.ArgumentParser(description="Prefetch historical celestial data") parser.add_argument("--months", type=int, help="Number of months to prefetch from now (default: 12)") parser.add_argument("--year", type=int, help="Specific year to prefetch") parser.add_argument("--month", type=int, help="Specific month to prefetch (1-12)") parser.add_argument("--start-year", type=int, help="Start year for range") parser.add_argument("--start-month", type=int, help="Start month for range (1-12)") parser.add_argument("--end-year", type=int, help="End year for range") parser.add_argument("--end-month", type=int, help="End month for range (1-12)") args = parser.parse_args() # Determine date range months_to_fetch = [] if args.year and args.month: # Single month months_to_fetch.append((args.year, args.month)) elif args.start_year and args.start_month and args.end_year and args.end_month: # Date range current = datetime(args.start_year, args.start_month, 1) end = datetime(args.end_year, args.end_month, 1) while current <= end: months_to_fetch.append((current.year, current.month)) current += relativedelta(months=1) else: # Default: last N months months = args.months or 12 current = datetime.now() for i in range(months): past_date = current - relativedelta(months=i) months_to_fetch.append((past_date.year, past_date.month)) months_to_fetch.reverse() # Start from oldest if not months_to_fetch: print("āŒ No months to fetch. Please specify a valid date range.") return print(f"\nšŸš€ Historical Data Prefetch Script") print(f"{'='*60}") print(f"šŸ“… Total months to fetch: {len(months_to_fetch)}") print(f" From: {months_to_fetch[0][0]}-{months_to_fetch[0][1]:02d}") print(f" To: {months_to_fetch[-1][0]}-{months_to_fetch[-1][1]:02d}") print(f"{'='*60}\n") total_success = 0 total_skip = 0 total_error = 0 async for session in get_db(): start_time = datetime.now() for year, month in months_to_fetch: success, skip, error = await prefetch_month(year, month, session) total_success += success total_skip += skip total_error += error end_time = datetime.now() duration = end_time - start_time print(f"\n{'='*60}") print(f"šŸŽ‰ Prefetch Complete!") print(f"{'='*60}") print(f"šŸ“Š Overall Summary:") print(f" Total months processed: {len(months_to_fetch)}") print(f" āœ… Total success: {total_success}") print(f" ā­ļø Total skipped: {total_skip}") print(f" āŒ Total errors: {total_error}") print(f" ā±ļø Duration: {duration}") print(f"{'='*60}\n") break if __name__ == "__main__": asyncio.run(main())