/* legal disclaimer in /opt/starfish/data/starfish/sql-copyright-and-license.md */
WITH churn_raw AS (
    SELECT
        COALESCE(volume.display_name, volume.name) AS volume,
        -- line below is: convert scan.heartbeat to epoch (seconds since 1970), round to X days, convert back to date
        to_timestamp(ROUND(EXTRACT(EPOCH FROM scan.heartbeat) / (3600 * 24 * {{group_by_days}})) * (3600 * 24 * {{group_by_days}}) )::date AS date,
        -- collect sum of all churn as an absolute number (a proxy indicator for history size)
        COALESCE(SUM((stats#>>'{processing_stats,ADDED_DIR_COUNT}')::BIGINT), 0) +
        COALESCE(SUM((stats#>>'{processing_stats,ADDED_FILE_COUNT}')::BIGINT), 0) +
        COALESCE(SUM((stats#>>'{processing_stats,REMOVED_DIR_COUNT}')::BIGINT), 0) +
        COALESCE(SUM((stats#>>'{processing_stats,REMOVED_FILE_COUNT}')::BIGINT), 0) +
        COALESCE(SUM((stats#>>'{processing_stats,CHANGED_DIR_COUNT}')::BIGINT), 0) +
        COALESCE(SUM((stats#>>'{processing_stats,CHANGED_FILE_COUNT}')::BIGINT), 0) AS changed_objects
    FROM sf_scans.scan
    LEFT JOIN sf_scans.loader_info ON loader_info.scan_id = scan.id
    LEFT JOIN sf_volumes.volume ON scan.volume_id = volume.id
    WHERE -- scan.state_name = 'done' AND
        scan.heartbeat >= now() - interval '{{number_of_days_to_look_back}} days'
    GROUP BY volume, date

), rankings as (
    -- this could probably be done as a partition and rank command, but this is very readable
    SELECT volume,
           sum(changed_objects)::BIGINT as allchanged
    FROM churn_raw
    GROUP BY volume
    ORDER BY allchanged desc
    LIMIT {{top_n}}
)
SELECT
    churn_raw.volume,
    date,
    changed_objects
FROM churn_raw INNER JOIN rankings on churn_raw.volume = rankings.volume
ORDER BY volume, date
