-- Clean up duplicate celestial events -- This script removes duplicate events and adds a unique index to prevent future duplicates BEGIN; -- Step 1: Show current duplicate count SELECT 'Duplicate events before cleanup' as status, COUNT(*) as total_duplicates FROM ( SELECT body_id, event_type, DATE_TRUNC('minute', event_time) as event_time_minute, COUNT(*) as cnt FROM celestial_events GROUP BY body_id, event_type, DATE_TRUNC('minute', event_time) HAVING COUNT(*) > 1 ) duplicates; -- Step 2: Remove duplicate events (keep the earliest created_at) WITH duplicates AS ( SELECT id, ROW_NUMBER() OVER ( PARTITION BY body_id, event_type, DATE_TRUNC('minute', event_time) ORDER BY created_at ASC ) AS rn FROM celestial_events ) DELETE FROM celestial_events WHERE id IN ( SELECT id FROM duplicates WHERE rn > 1 ) RETURNING id; -- Step 3: Add unique index to prevent future duplicates CREATE UNIQUE INDEX IF NOT EXISTS idx_celestial_events_unique ON celestial_events ( body_id, event_type, DATE_TRUNC('minute', event_time) ); -- Step 4: Verify no duplicates remain SELECT 'Duplicate events after cleanup' as status, COUNT(*) as total_duplicates FROM ( SELECT body_id, event_type, DATE_TRUNC('minute', event_time) as event_time_minute, COUNT(*) as cnt FROM celestial_events GROUP BY body_id, event_type, DATE_TRUNC('minute', event_time) HAVING COUNT(*) > 1 ) duplicates; -- Step 5: Show summary statistics SELECT source, COUNT(*) as total_events, COUNT(DISTINCT body_id) as unique_bodies, MIN(event_time) as earliest_event, MAX(event_time) as latest_event FROM celestial_events GROUP BY source ORDER BY source; COMMIT; -- Verify the index was created SELECT indexname, indexdef FROM pg_indexes WHERE tablename = 'celestial_events' AND indexname = 'idx_celestial_events_unique';