/* legal disclaimer in /opt/starfish/data/starfish/sql-copyright-and-license.md */
WITH fs_entries_within_zones AS (
    SELECT volume_id, fs_entry_id, array_agg(name_id) AS tag_ids
    FROM sf.tag_value_current
    WHERE name_id IN (
        SELECT tn.id
        FROM sf.tag_name AS tn
        JOIN sf.tag_namespace AS tns ON tn.namespace_id = tns.id
        WHERE tns.name = '__zone'
    )
    GROUP BY volume_id, fs_entry_id
),
zone_with_dir_path  AS (
    SELECT dir.volume_id, dir.path, tag.name_id
    FROM sf.dir_current AS dir
        JOIN sf.tag_value_current AS tag
            ON dir.id = tag.fs_entry_id AND dir.volume_id = tag.volume_id
                AND name_id IN (
                    SELECT tn.id
                    FROM sf.tag_name AS tn
                    JOIN sf.tag_namespace AS tns ON tn.namespace_id = tns.id
                    WHERE tns.name = '__zone'
                )
),
dirs_within_zones AS (
    SELECT dir.volume_id, dir.id, array_agg(tag.name_id) AS tag_ids
    FROM sf.dir_current AS dir
        INNER JOIN zone_with_dir_path AS tag
            ON tag.volume_id = dir.volume_id
                AND (dir.path = tag.path
                    -- regexp_replace is inlined from sf_internal.subtree_pattern
                    OR dir.path LIKE regexp_replace(tag.path, '(%|_|\\)', '\\\1', 'g') || (CASE WHEN tag.path = '' THEN '_%' ELSE '/%' END))
    GROUP BY dir.volume_id, dir.id
),
files_within_zones AS (
    SELECT
        file.id,
        file.volume_id,
        file.size AS logical_size,
        (file.blocks * 512 / CASE WHEN file.nlinks > 0 THEN file.nlinks ELSE 1 END) AS physical_size,
        (EXTRACT(EPOCH FROM(current_timestamp)) - EXTRACT(EPOCH FROM(atime))) / 86400.0 AS atime_days,
        (EXTRACT(EPOCH FROM(current_timestamp)) - EXTRACT(EPOCH FROM(mtime))) / 86400.0 AS mtime_days,
        unnest(dwt.tag_ids) AS zone_id
    FROM sf.file_current AS file
        JOIN dirs_within_zones AS dwt ON file.parent_id = dwt.id AND file.volume_id = dwt.volume_id
UNION  -- UNION is required to remove duplicates (file got the same tag inherited and directly assigned)
    SELECT
        file.id,
        file.volume_id,
        file.size AS logical_size,
        (file.blocks * 512 / CASE WHEN file.nlinks > 0 THEN file.nlinks ELSE 1 END) AS physical_size,
        (EXTRACT(EPOCH FROM(current_timestamp)) - EXTRACT(EPOCH FROM(atime))) / 86400.0 AS atime_days,
        (EXTRACT(EPOCH FROM(current_timestamp)) - EXTRACT(EPOCH FROM(mtime))) / 86400.0 AS mtime_days,
        unnest(tag.tag_ids) AS zone_id
    FROM sf.file_current AS file
        JOIN fs_entries_within_zones AS tag ON file.id = tag.fs_entry_id AND file.volume_id = tag.volume_id
),
fs_entries_with_zone_tags AS (
    SELECT volume_id, fs_entry_id, array_agg(name_id) AS tag_ids
    FROM sf.tag_value_current
    WHERE name_id IN (
        SELECT tn.id
        FROM sf.tag_name AS tn
        JOIN sf.tag_namespace AS tns ON tn.namespace_id = tns.id
        WHERE tns.name != '' AND SUBSTRING(tns.name, 1, length('__')) != '__'
    )  -- non-empty and non-private namespaces
    GROUP BY volume_id, fs_entry_id
),
tag_with_dir_path  AS (
    SELECT dir.volume_id, dir.path, tag.name_id
    FROM sf.dir_current AS dir
        JOIN sf.tag_value_current AS tag
            ON dir.id = tag.fs_entry_id AND dir.volume_id = tag.volume_id
                AND name_id IN (
                    SELECT tn.id
                    FROM sf.tag_name AS tn
                    JOIN sf.tag_namespace AS tns ON tn.namespace_id = tns.id
                    WHERE tns.name != '' AND SUBSTRING(tns.name, 1, length('__')) != '__'
                )  -- non-empty and non-private namespaces
),
dirs_with_zone_tags AS (
    SELECT dir.volume_id, dir.id, array_agg(tag.name_id) AS tag_ids
    FROM sf.dir_current AS dir
        INNER JOIN tag_with_dir_path AS tag
            ON tag.volume_id = dir.volume_id
                AND (dir.path = tag.path
                    -- regexp_replace is inlined from sf_internal.subtree_pattern
                    OR dir.path LIKE regexp_replace(tag.path, '(%|_|\\)', '\\\1', 'g') || (CASE WHEN tag.path = '' THEN '_%' ELSE '/%' END))
    GROUP BY dir.volume_id, dir.id
),
files_with_tag AS (
    SELECT
        file.id,
        file.volume_id,
        file.size AS logical_size,
        (file.blocks * 512 / CASE WHEN file.nlinks > 0 THEN file.nlinks ELSE 1 END) AS physical_size,
        (EXTRACT(EPOCH FROM(current_timestamp)) - EXTRACT(EPOCH FROM(atime))) / 86400.0 AS atime_days,
        (EXTRACT(EPOCH FROM(current_timestamp)) - EXTRACT(EPOCH FROM(mtime))) / 86400.0 AS mtime_days,
        unnest(dwt.tag_ids) AS tag_id
    FROM sf.file_current AS file
        JOIN dirs_with_zone_tags AS dwt ON file.parent_id = dwt.id AND file.volume_id = dwt.volume_id
    UNION  -- UNION is required to remove duplicates (file got the same tag inherited and directly assigned)
    SELECT
        file.id,
        file.volume_id,
        file.size AS logical_size,
        (file.blocks * 512 / CASE WHEN file.nlinks > 0 THEN file.nlinks ELSE 1 END) AS physical_size,
        (EXTRACT(EPOCH FROM(current_timestamp)) - EXTRACT(EPOCH FROM(atime))) / 86400.0 AS atime_days,
        (EXTRACT(EPOCH FROM(current_timestamp)) - EXTRACT(EPOCH FROM(mtime))) / 86400.0 AS mtime_days,
        unnest(tag.tag_ids) AS tag_id
    FROM sf.file_current AS file
        JOIN fs_entries_with_zone_tags AS tag ON file.id = tag.fs_entry_id AND file.volume_id = tag.volume_id
),
tag_per_vol_agg AS (
    SELECT zone_id,
           tag_id,
           fwz.volume_id,
           SUM(fwz.logical_size) AS logical_size,
           SUM(fwz.physical_size) AS physical_size,
           COUNT(*) AS count,
           CASE
                  WHEN fwz.atime_days >= 1095 THEN 'Previous Years: > 3'
                  WHEN fwz.atime_days >= 730  THEN 'Previous Years: 2-3'
                  WHEN fwz.atime_days >= 365  THEN 'Previous Years: 1-2'
                  WHEN fwz.atime_days >= 180  THEN 'Previous Months: 6-12'
                  WHEN fwz.atime_days >= 90   THEN 'Previous Months: 3-6'
                  WHEN fwz.atime_days >= 30   THEN 'Previous Months: 1-3'
                  WHEN fwz.atime_days >= 0    THEN 'Previous Months: 0-1'
                  ELSE 'future'
           END AS atime_age,
           CASE
               WHEN fwz.mtime_days >= 1095 THEN 'Previous Years: > 3'
               WHEN fwz.mtime_days >= 730  THEN 'Previous Years: 2-3'
               WHEN fwz.mtime_days >= 365  THEN 'Previous Years: 1-2'
               WHEN fwz.mtime_days >= 180  THEN 'Previous Months: 6-12'
               WHEN fwz.mtime_days >= 90   THEN 'Previous Months: 3-6'
               WHEN fwz.mtime_days >= 30   THEN 'Previous Months: 1-3'
               WHEN fwz.mtime_days >= 0    THEN 'Previous Months: 0-1'
               ELSE 'future'
           END AS mtime_age
    FROM files_within_zones AS fwz
    JOIN files_with_tag AS fwt
        ON fwz.id = fwt.id AND fwz.volume_id = fwt.volume_id
    GROUP BY zone_id, tag_id, fwz.volume_id, atime_age, mtime_age
)
SELECT v.name AS volume_name,
       z.name AS zone,
       tns.name AS namespace,
       tn.name AS tag,
       tpv.atime_age,
       tpv.mtime_age,
       tpv.logical_size AS size,  -- name size is backward compatibility
       tpv.physical_size,
       CASE
            WHEN COALESCE(v.total_capacity, 0) > 0 THEN ROUND(tpv.physical_size * 100 / v.total_capacity, 1)
            ELSE NULL
       END AS "%_of_whole_volume",
       tpv.count,
       tpv.physical_size / (1000 * 1000 * 1000) * CAST(COALESCE(vupc.value, '0') AS FLOAT) AS cost
FROM tag_per_vol_agg AS tpv
    INNER JOIN sf.tag_name zn ON tpv.zone_id = zn.id
    JOIN sf_auth.zone AS z ON zn.name = z.id::VARCHAR
    INNER JOIN sf.tag_name tn ON tpv.tag_id = tn.id
    JOIN sf.tag_namespace AS tns ON tn.namespace_id = tns.id
    LEFT JOIN sf_volumes.volume AS v ON tpv.volume_id = v.id
    LEFT JOIN sf_volumes.user_param AS vupc ON vupc.volume_id = tpv.volume_id AND vupc.name = 'cost_per_gb'
ORDER BY volume_name, zone, tag, atime_age, mtime_age;
