cosmo/backend/scripts/fetch_interstellar_data.py

178 lines
7.0 KiB
Python

"""
Fetch Interstellar Data (Nearby Stars & Exoplanets)
Phase 3: Interstellar Expansion
This script fetches data from the NASA Exoplanet Archive using astroquery.
It retrieves the nearest stars (within 100pc) and their planetary system details.
The data is stored in the `static_data` table with category 'interstellar'.
"""
import asyncio
import os
import sys
import math
from sqlalchemy import select, text, func
from sqlalchemy.dialects.postgresql import insert
# Add backend directory to path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from app.database import get_db
from app.models.db.static_data import StaticData
# Try to import astroquery/astropy, handle if missing
try:
from astroquery.ipac.nexsci.nasa_exoplanet_archive import NasaExoplanetArchive
from astropy.coordinates import SkyCoord
from astropy import units as u
except ImportError:
print("❌ Error: astroquery or astropy not installed.")
print(" Please run: pip install astroquery astropy")
sys.exit(1)
async def fetch_and_store_interstellar_data():
print("🌌 Fetching Interstellar Data (Phase 3)...")
# 1. Query NASA Exoplanet Archive
# We query the Planetary Systems (PS) table
# sy_dist: System Distance [pc]
# ra, dec: Coordinates [deg]
# sy_pnum: Number of Planets
# st_spectype: Spectral Type
# st_rad: Stellar Radius [Solar Radii]
# st_mass: Stellar Mass [Solar Mass]
# st_teff: Effective Temperature [K]
# pl_name: Planet Name
# pl_orbsmax: Semi-Major Axis [AU]
# pl_orbper: Orbital Period [days]
# pl_orbeccen: Eccentricity
# pl_rade: Planet Radius [Earth Radii]
print(" Querying NASA Exoplanet Archive (this may take a while)...")
try:
# We fetch systems within 100 parsecs
table = NasaExoplanetArchive.query_criteria(
table="ps",
select="hostname, sy_dist, ra, dec, sy_pnum, st_spectype, st_rad, st_mass, st_teff, pl_name, pl_orbsmax, pl_orbper, pl_orbeccen, pl_rade, pl_eqt",
where="sy_dist < 50", # Limit to 50pc for initial Phase 3 to keep it fast and relevant
order="sy_dist"
)
print(f" ✅ Fetched {len(table)} records.")
except Exception as e:
print(f" ❌ Query failed: {e}")
return
# 2. Process Data
# We need to group planets by host star
systems = {}
print(" Processing data...")
for row in table:
hostname = str(row['hostname'])
# Helper function to safely get value from potential Quantity object
def get_val(obj):
if hasattr(obj, 'value'):
return obj.value
return obj
if hostname not in systems:
# Coordinate conversion: Spherical (RA/Dec/Dist) -> Cartesian (X/Y/Z)
dist_pc = float(get_val(row['sy_dist']))
ra_deg = float(get_val(row['ra']))
dec_deg = float(get_val(row['dec']))
# Convert to Cartesian (X, Y, Z) in Parsecs
# Z is up (towards North Celestial Pole?) - Standard Astropy conversion
c = SkyCoord(ra=ra_deg*u.deg, dec=dec_deg*u.deg, distance=dist_pc*u.pc)
x = c.cartesian.x.value
y = c.cartesian.y.value
z = c.cartesian.z.value
# Determine color based on Spectral Type (simplified)
spectype = str(row['st_spectype']) if row['st_spectype'] else 'G'
color = '#FFFFFF' # Default
if 'O' in spectype: color = '#9db4ff'
elif 'B' in spectype: color = '#aabfff'
elif 'A' in spectype: color = '#cad8ff'
elif 'F' in spectype: color = '#fbf8ff'
elif 'G' in spectype: color = '#fff4e8'
elif 'K' in spectype: color = '#ffddb4'
elif 'M' in spectype: color = '#ffbd6f'
systems[hostname] = {
"category": "interstellar",
"name": hostname,
"name_zh": hostname, # Placeholder, maybe need translation map later
"data": {
"distance_pc": dist_pc,
"ra": ra_deg,
"dec": dec_deg,
"position": {"x": x, "y": y, "z": z},
"spectral_type": spectype,
"radius_solar": float(get_val(row['st_rad'])) if get_val(row['st_rad']) is not None else 1.0,
"mass_solar": float(get_val(row['st_mass'])) if get_val(row['st_mass']) is not None else 1.0,
"temperature_k": float(get_val(row['st_teff'])) if get_val(row['st_teff']) is not None else 5700,
"planet_count": int(get_val(row['sy_pnum'])),
"color": color,
"planets": []
}
}
# Add planet info
planet = {
"name": str(row['pl_name']),
"semi_major_axis_au": float(get_val(row['pl_orbsmax'])) if get_val(row['pl_orbsmax']) is not None else 0.0,
"period_days": float(get_val(row['pl_orbper'])) if get_val(row['pl_orbper']) is not None else 0.0,
"eccentricity": float(get_val(row['pl_orbeccen'])) if get_val(row['pl_orbeccen']) is not None else 0.0,
"radius_earth": float(get_val(row['pl_rade'])) if get_val(row['pl_rade']) is not None else 1.0,
"temperature_k": float(get_val(row['pl_eqt'])) if get_val(row['pl_eqt']) is not None else None
}
systems[hostname]["data"]["planets"].append(planet)
print(f" Processed {len(systems)} unique star systems.")
# 3. Store in Database
print(" Storing in database...")
# Helper to clean NaN values for JSON compatibility
def clean_nan(obj):
if isinstance(obj, float):
return None if math.isnan(obj) else obj
elif isinstance(obj, dict):
return {k: clean_nan(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [clean_nan(v) for v in obj]
return obj
async for session in get_db():
try:
count = 0
for hostname, info in systems.items():
# Clean data
cleaned_data = clean_nan(info["data"])
# Use UPSERT
stmt = insert(StaticData).values(
category=info["category"],
name=info["name"],
name_zh=info["name_zh"],
data=cleaned_data
).on_conflict_do_update(
constraint="uq_category_name",
set_={"data": cleaned_data, "updated_at": func.now()}
)
await session.execute(stmt)
count += 1
await session.commit()
print(f" ✅ Successfully stored {count} interstellar systems.")
except Exception as e:
await session.rollback()
print(f" ❌ Database error: {e}")
finally:
break
if __name__ == "__main__":
asyncio.run(fetch_and_store_interstellar_data())