/* legal disclaimer in /opt/starfish/data/starfish/sql-copyright-and-license.md */
-- Most of this query is common with top_directories_by_churn_redash_query.sql - make sure that they stay synced
WITH first_record AS (
    SELECT
           MIN(LOWER(valid)) AS "min_scan_time"
    FROM sf.dir
    WHERE path = '{{{dir_path}}}'
        AND volume_id = sf.volume_id_from_name('{{{volume}}}')
), paths AS (
    SELECT volume_id,
           path,
           (local_aggrs->'total'->>'size')::BIGINT AS total_size,
           (local_aggrs->'total'->>'files')::BIGINT AS total_files,
           valid
    FROM sf.dir
    WHERE path = '{{{dir_path}}}'
        AND volume_id = sf.volume_id_from_name('{{{volume}}}')
        AND LOWER(valid) > CURRENT_TIMESTAMP - INTERVAL '{{number_of_days_to_look_back}} days'
), artificial_rows_for_removed_dirs AS (
    -- We cannot just simply compare one dir row with other dir row to calculate churn.
    -- Assume that user has directory with total size 30 GB, removes it and after some time
    -- create directory with the same name but this time with size 50 GB
    -- sf.dir will contains two rows:
    -- 1. path='foo/bar', valid=[N, N + 1), size=30 GB
    -- 2. path='foo/bar', valid=[N + 2, inf), size=50 GB
    -- We need to convert that into 3 rows to express the fact that dir didn't exist from N + 1 to N + 2:
    -- 1. path='foo/bar', scan_time=N, size=30 GB
    -- 2. path='foo/bar', scan_time=N + 1, size=0 GB <- artificial row for removed dir
    -- 3. path='foo/bar', scan_time=N + 2, size=50 GB
    -- This SQL below adds artificial row with size 0 to make it possible
    SELECT * FROM (
        SELECT path,
               0::BIGINT AS total_size,
               0::BIGINT AS total_files,
               LAG(UPPER(valid), 1) OVER w AS scan_time,
               LAG(UPPER(valid), 1) OVER w != LOWER(valid) as is_new_row_after_remove
        FROM paths
        WINDOW w AS (ORDER BY LOWER(valid))  -- no need to partition by path as there is only 1 path possible
    ) t
    WHERE is_new_row_after_remove
), paths_with_artificial_rows_for_removed AS (
    SELECT path,
           total_size,
           total_files,
           LOWER(valid) AS scan_time
    FROM paths

    UNION

    SELECT path,
           total_size,
           total_files,
           scan_time
    FROM artificial_rows_for_removed_dirs
) SELECT
    path,
    ROUND(total_size / (1024.0 * 1024 * 1024), 2) AS "dir size (GiB)",
    COALESCE(
        ROUND((total_size - lag(total_size, 1) OVER w) / (1024.0 * 1024 * 1024), 2),
        -- Reports are used in two ways: for recent changes, and for the
        -- whole volume history. In the second case, churn includes
        -- the initial scan. For recent changes, it's a minor glitch that
        -- first data point is not included in churn/delta.
        CASE WHEN scan_time = min_scan_time
            THEN ROUND(total_size / (1024.0 * 1024 * 1024), 2)
            ELSE NULL
        END) AS "delta (GiB)",
    COALESCE(
        total_files - lag(total_files, 1) OVER w,
        CASE WHEN scan_time = min_scan_time
            THEN total_files
            ELSE NULL
        END)::BIGINT AS "count delta",
    total_files AS "number of files",
    scan_time AS "date of scan"
    FROM paths_with_artificial_rows_for_removed
        CROSS JOIN first_record
    WINDOW w AS (ORDER BY scan_time)  -- no need to partition by path as there is only 1 path possible
