79 lines
1.9 KiB
PL/PgSQL
79 lines
1.9 KiB
PL/PgSQL
-- Clean up duplicate celestial events
|
|
-- This script removes duplicate events and adds a unique index to prevent future duplicates
|
|
|
|
BEGIN;
|
|
|
|
-- Step 1: Show current duplicate count
|
|
SELECT
|
|
'Duplicate events before cleanup' as status,
|
|
COUNT(*) as total_duplicates
|
|
FROM (
|
|
SELECT
|
|
body_id,
|
|
event_type,
|
|
DATE_TRUNC('minute', event_time) as event_time_minute,
|
|
COUNT(*) as cnt
|
|
FROM celestial_events
|
|
GROUP BY body_id, event_type, DATE_TRUNC('minute', event_time)
|
|
HAVING COUNT(*) > 1
|
|
) duplicates;
|
|
|
|
-- Step 2: Remove duplicate events (keep the earliest created_at)
|
|
WITH duplicates AS (
|
|
SELECT
|
|
id,
|
|
ROW_NUMBER() OVER (
|
|
PARTITION BY body_id, event_type, DATE_TRUNC('minute', event_time)
|
|
ORDER BY created_at ASC
|
|
) AS rn
|
|
FROM celestial_events
|
|
)
|
|
DELETE FROM celestial_events
|
|
WHERE id IN (
|
|
SELECT id FROM duplicates WHERE rn > 1
|
|
)
|
|
RETURNING id;
|
|
|
|
-- Step 3: Add unique index to prevent future duplicates
|
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_celestial_events_unique
|
|
ON celestial_events (
|
|
body_id,
|
|
event_type,
|
|
DATE_TRUNC('minute', event_time)
|
|
);
|
|
|
|
-- Step 4: Verify no duplicates remain
|
|
SELECT
|
|
'Duplicate events after cleanup' as status,
|
|
COUNT(*) as total_duplicates
|
|
FROM (
|
|
SELECT
|
|
body_id,
|
|
event_type,
|
|
DATE_TRUNC('minute', event_time) as event_time_minute,
|
|
COUNT(*) as cnt
|
|
FROM celestial_events
|
|
GROUP BY body_id, event_type, DATE_TRUNC('minute', event_time)
|
|
HAVING COUNT(*) > 1
|
|
) duplicates;
|
|
|
|
-- Step 5: Show summary statistics
|
|
SELECT
|
|
source,
|
|
COUNT(*) as total_events,
|
|
COUNT(DISTINCT body_id) as unique_bodies,
|
|
MIN(event_time) as earliest_event,
|
|
MAX(event_time) as latest_event
|
|
FROM celestial_events
|
|
GROUP BY source
|
|
ORDER BY source;
|
|
|
|
COMMIT;
|
|
|
|
-- Verify the index was created
|
|
SELECT
|
|
indexname,
|
|
indexdef
|
|
FROM pg_indexes
|
|
WHERE tablename = 'celestial_events' AND indexname = 'idx_celestial_events_unique';
|