cosmo/backend/scripts/prefetch_historical_data.py

224 lines
7.8 KiB
Python

#!/usr/bin/env python3
"""
Historical Data Prefetch Script
This script prefetches historical position data for all celestial bodies
and stores them in the database for fast retrieval.
Usage:
# Prefetch last 12 months
python scripts/prefetch_historical_data.py --months 12
# Prefetch specific year-month
python scripts/prefetch_historical_data.py --year 2024 --month 1
# Prefetch a range
python scripts/prefetch_historical_data.py --start-year 2023 --start-month 1 --end-year 2023 --end-month 12
"""
import sys
import os
import asyncio
import argparse
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
# Add backend to path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from app.database import get_db
from app.services.horizons import horizons_service
from app.services.db_service import position_service, celestial_body_service
async def prefetch_month(year: int, month: int, session):
"""
Prefetch data for a specific month
Args:
year: Year (e.g., 2023)
month: Month (1-12)
session: Database session
"""
# Calculate start and end of month
start_date = datetime(year, month, 1, 0, 0, 0)
if month == 12:
end_date = datetime(year + 1, 1, 1, 0, 0, 0)
else:
end_date = datetime(year, month + 1, 1, 0, 0, 0)
print(f"\n{'='*60}")
print(f"📅 Prefetching data for {year}-{month:02d}")
print(f" Period: {start_date.date()} to {end_date.date()}")
print(f"{'='*60}")
# Get all celestial bodies from database
all_bodies = await celestial_body_service.get_all_bodies(session)
total_bodies = len(all_bodies)
success_count = 0
skip_count = 0
error_count = 0
for idx, body in enumerate(all_bodies, 1):
body_id = body.id
body_name = body.name
try:
# Check if we already have data for this month
existing_positions = await position_service.get_positions_in_range(
body_id, start_date, end_date, session
)
if existing_positions and len(existing_positions) > 0:
print(f" [{idx}/{total_bodies}] ⏭️ {body_name:20s} - Already exists ({len(existing_positions)} positions)")
skip_count += 1
continue
print(f" [{idx}/{total_bodies}] 🔄 {body_name:20s} - Fetching...", end='', flush=True)
# Query NASA Horizons API for this month
# Sample every 7 days to reduce data volume
step = "7d"
if body_id == "10":
# Sun is always at origin
positions = [
{"time": start_date, "x": 0.0, "y": 0.0, "z": 0.0},
{"time": end_date, "x": 0.0, "y": 0.0, "z": 0.0},
]
elif body_id == "-82":
# Cassini mission ended 2017-09-15
if year < 2017 or (year == 2017 and month <= 9):
cassini_date = datetime(2017, 9, 15, 11, 58, 0)
positions_data = horizons_service.get_body_positions(
body_id, cassini_date, cassini_date, step
)
positions = [
{"time": p.time, "x": p.x, "y": p.y, "z": p.z}
for p in positions_data
]
else:
print(f" ⏭️ Mission ended", flush=True)
skip_count += 1
continue
else:
# Query other bodies
positions_data = horizons_service.get_body_positions(
body_id, start_date, end_date, step
)
positions = [
{"time": p.time, "x": p.x, "y": p.y, "z": p.z}
for p in positions_data
]
# Store in database
for pos_data in positions:
await position_service.save_position(
body_id=body_id,
time=pos_data["time"],
x=pos_data["x"],
y=pos_data["y"],
z=pos_data["z"],
source="nasa_horizons",
session=session,
)
print(f" ✅ Saved {len(positions)} positions", flush=True)
success_count += 1
# Small delay to avoid overwhelming NASA API
await asyncio.sleep(0.5)
except Exception as e:
print(f" ❌ Error: {str(e)}", flush=True)
error_count += 1
continue
print(f"\n{'='*60}")
print(f"📊 Summary for {year}-{month:02d}:")
print(f" ✅ Success: {success_count}")
print(f" ⏭️ Skipped: {skip_count}")
print(f" ❌ Errors: {error_count}")
print(f"{'='*60}\n")
return success_count, skip_count, error_count
async def main():
parser = argparse.ArgumentParser(description="Prefetch historical celestial data")
parser.add_argument("--months", type=int, help="Number of months to prefetch from now (default: 12)")
parser.add_argument("--year", type=int, help="Specific year to prefetch")
parser.add_argument("--month", type=int, help="Specific month to prefetch (1-12)")
parser.add_argument("--start-year", type=int, help="Start year for range")
parser.add_argument("--start-month", type=int, help="Start month for range (1-12)")
parser.add_argument("--end-year", type=int, help="End year for range")
parser.add_argument("--end-month", type=int, help="End month for range (1-12)")
args = parser.parse_args()
# Determine date range
months_to_fetch = []
if args.year and args.month:
# Single month
months_to_fetch.append((args.year, args.month))
elif args.start_year and args.start_month and args.end_year and args.end_month:
# Date range
current = datetime(args.start_year, args.start_month, 1)
end = datetime(args.end_year, args.end_month, 1)
while current <= end:
months_to_fetch.append((current.year, current.month))
current += relativedelta(months=1)
else:
# Default: last N months
months = args.months or 12
current = datetime.now()
for i in range(months):
past_date = current - relativedelta(months=i)
months_to_fetch.append((past_date.year, past_date.month))
months_to_fetch.reverse() # Start from oldest
if not months_to_fetch:
print("❌ No months to fetch. Please specify a valid date range.")
return
print(f"\n🚀 Historical Data Prefetch Script")
print(f"{'='*60}")
print(f"📅 Total months to fetch: {len(months_to_fetch)}")
print(f" From: {months_to_fetch[0][0]}-{months_to_fetch[0][1]:02d}")
print(f" To: {months_to_fetch[-1][0]}-{months_to_fetch[-1][1]:02d}")
print(f"{'='*60}\n")
total_success = 0
total_skip = 0
total_error = 0
async for session in get_db():
start_time = datetime.now()
for year, month in months_to_fetch:
success, skip, error = await prefetch_month(year, month, session)
total_success += success
total_skip += skip
total_error += error
end_time = datetime.now()
duration = end_time - start_time
print(f"\n{'='*60}")
print(f"🎉 Prefetch Complete!")
print(f"{'='*60}")
print(f"📊 Overall Summary:")
print(f" Total months processed: {len(months_to_fetch)}")
print(f" ✅ Total success: {total_success}")
print(f" ⏭️ Total skipped: {total_skip}")
print(f" ❌ Total errors: {total_error}")
print(f" ⏱️ Duration: {duration}")
print(f"{'='*60}\n")
break
if __name__ == "__main__":
asyncio.run(main())