cosmo/backend/scripts/cleanup_duplicate_events.sql

79 lines
1.9 KiB
PL/PgSQL

-- Clean up duplicate celestial events
-- This script removes duplicate events and adds a unique index to prevent future duplicates
BEGIN;
-- Step 1: Show current duplicate count
SELECT
'Duplicate events before cleanup' as status,
COUNT(*) as total_duplicates
FROM (
SELECT
body_id,
event_type,
DATE_TRUNC('minute', event_time) as event_time_minute,
COUNT(*) as cnt
FROM celestial_events
GROUP BY body_id, event_type, DATE_TRUNC('minute', event_time)
HAVING COUNT(*) > 1
) duplicates;
-- Step 2: Remove duplicate events (keep the earliest created_at)
WITH duplicates AS (
SELECT
id,
ROW_NUMBER() OVER (
PARTITION BY body_id, event_type, DATE_TRUNC('minute', event_time)
ORDER BY created_at ASC
) AS rn
FROM celestial_events
)
DELETE FROM celestial_events
WHERE id IN (
SELECT id FROM duplicates WHERE rn > 1
)
RETURNING id;
-- Step 3: Add unique index to prevent future duplicates
CREATE UNIQUE INDEX IF NOT EXISTS idx_celestial_events_unique
ON celestial_events (
body_id,
event_type,
DATE_TRUNC('minute', event_time)
);
-- Step 4: Verify no duplicates remain
SELECT
'Duplicate events after cleanup' as status,
COUNT(*) as total_duplicates
FROM (
SELECT
body_id,
event_type,
DATE_TRUNC('minute', event_time) as event_time_minute,
COUNT(*) as cnt
FROM celestial_events
GROUP BY body_id, event_type, DATE_TRUNC('minute', event_time)
HAVING COUNT(*) > 1
) duplicates;
-- Step 5: Show summary statistics
SELECT
source,
COUNT(*) as total_events,
COUNT(DISTINCT body_id) as unique_bodies,
MIN(event_time) as earliest_event,
MAX(event_time) as latest_event
FROM celestial_events
GROUP BY source
ORDER BY source;
COMMIT;
-- Verify the index was created
SELECT
indexname,
indexdef
FROM pg_indexes
WHERE tablename = 'celestial_events' AND indexname = 'idx_celestial_events_unique';