""" Fetch Interstellar Data (Nearby Stars & Exoplanets) Phase 3: Interstellar Expansion This script fetches data from the NASA Exoplanet Archive using astroquery. It retrieves the nearest stars (within 100pc) and their planetary system details. The data is stored in the `static_data` table with category 'interstellar'. """ import asyncio import os import sys import math from sqlalchemy import select, text, func from sqlalchemy.dialects.postgresql import insert # Add backend directory to path sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from app.database import get_db from app.models.db.static_data import StaticData # Try to import astroquery/astropy, handle if missing try: from astroquery.ipac.nexsci.nasa_exoplanet_archive import NasaExoplanetArchive from astropy.coordinates import SkyCoord from astropy import units as u except ImportError: print("❌ Error: astroquery or astropy not installed.") print(" Please run: pip install astroquery astropy") sys.exit(1) async def fetch_and_store_interstellar_data(): print("🌌 Fetching Interstellar Data (Phase 3)...") # 1. Query NASA Exoplanet Archive # We query the Planetary Systems (PS) table # sy_dist: System Distance [pc] # ra, dec: Coordinates [deg] # sy_pnum: Number of Planets # st_spectype: Spectral Type # st_rad: Stellar Radius [Solar Radii] # st_mass: Stellar Mass [Solar Mass] # st_teff: Effective Temperature [K] # pl_name: Planet Name # pl_orbsmax: Semi-Major Axis [AU] # pl_orbper: Orbital Period [days] # pl_orbeccen: Eccentricity # pl_rade: Planet Radius [Earth Radii] print(" Querying NASA Exoplanet Archive (this may take a while)...") try: # We fetch systems within 100 parsecs table = NasaExoplanetArchive.query_criteria( table="ps", select="hostname, sy_dist, ra, dec, sy_pnum, st_spectype, st_rad, st_mass, st_teff, pl_name, pl_orbsmax, pl_orbper, pl_orbeccen, pl_rade, pl_eqt", where="sy_dist < 50", # Limit to 50pc for initial Phase 3 to keep it fast and relevant order="sy_dist" ) print(f" ✅ Fetched {len(table)} records.") except Exception as e: print(f" ❌ Query failed: {e}") return # 2. Process Data # We need to group planets by host star systems = {} print(" Processing data...") for row in table: hostname = str(row['hostname']) # Helper function to safely get value from potential Quantity object def get_val(obj): if hasattr(obj, 'value'): return obj.value return obj if hostname not in systems: # Coordinate conversion: Spherical (RA/Dec/Dist) -> Cartesian (X/Y/Z) dist_pc = float(get_val(row['sy_dist'])) ra_deg = float(get_val(row['ra'])) dec_deg = float(get_val(row['dec'])) # Convert to Cartesian (X, Y, Z) in Parsecs # Z is up (towards North Celestial Pole?) - Standard Astropy conversion c = SkyCoord(ra=ra_deg*u.deg, dec=dec_deg*u.deg, distance=dist_pc*u.pc) x = c.cartesian.x.value y = c.cartesian.y.value z = c.cartesian.z.value # Determine color based on Spectral Type (simplified) spectype = str(row['st_spectype']) if row['st_spectype'] else 'G' color = '#FFFFFF' # Default if 'O' in spectype: color = '#9db4ff' elif 'B' in spectype: color = '#aabfff' elif 'A' in spectype: color = '#cad8ff' elif 'F' in spectype: color = '#fbf8ff' elif 'G' in spectype: color = '#fff4e8' elif 'K' in spectype: color = '#ffddb4' elif 'M' in spectype: color = '#ffbd6f' systems[hostname] = { "category": "interstellar", "name": hostname, "name_zh": hostname, # Placeholder, maybe need translation map later "data": { "distance_pc": dist_pc, "ra": ra_deg, "dec": dec_deg, "position": {"x": x, "y": y, "z": z}, "spectral_type": spectype, "radius_solar": float(get_val(row['st_rad'])) if get_val(row['st_rad']) is not None else 1.0, "mass_solar": float(get_val(row['st_mass'])) if get_val(row['st_mass']) is not None else 1.0, "temperature_k": float(get_val(row['st_teff'])) if get_val(row['st_teff']) is not None else 5700, "planet_count": int(get_val(row['sy_pnum'])), "color": color, "planets": [] } } # Add planet info planet = { "name": str(row['pl_name']), "semi_major_axis_au": float(get_val(row['pl_orbsmax'])) if get_val(row['pl_orbsmax']) is not None else 0.0, "period_days": float(get_val(row['pl_orbper'])) if get_val(row['pl_orbper']) is not None else 0.0, "eccentricity": float(get_val(row['pl_orbeccen'])) if get_val(row['pl_orbeccen']) is not None else 0.0, "radius_earth": float(get_val(row['pl_rade'])) if get_val(row['pl_rade']) is not None else 1.0, "temperature_k": float(get_val(row['pl_eqt'])) if get_val(row['pl_eqt']) is not None else None } systems[hostname]["data"]["planets"].append(planet) print(f" Processed {len(systems)} unique star systems.") # 3. Store in Database print(" Storing in database...") # Helper to clean NaN values for JSON compatibility def clean_nan(obj): if isinstance(obj, float): return None if math.isnan(obj) else obj elif isinstance(obj, dict): return {k: clean_nan(v) for k, v in obj.items()} elif isinstance(obj, list): return [clean_nan(v) for v in obj] return obj async for session in get_db(): try: count = 0 for hostname, info in systems.items(): # Clean data cleaned_data = clean_nan(info["data"]) # Use UPSERT stmt = insert(StaticData).values( category=info["category"], name=info["name"], name_zh=info["name_zh"], data=cleaned_data ).on_conflict_do_update( constraint="uq_category_name", set_={"data": cleaned_data, "updated_at": func.now()} ) await session.execute(stmt) count += 1 await session.commit() print(f" ✅ Successfully stored {count} interstellar systems.") except Exception as e: await session.rollback() print(f" ❌ Database error: {e}") finally: break if __name__ == "__main__": asyncio.run(fetch_and_store_interstellar_data())