/* legal disclaimer in /opt/starfish/data/starfish/sql-copyright-and-license.md */
WITH fs_entries_with_flat_tags_or_within_zones AS (
    SELECT volume_id, fs_entry_id, array_agg(name_id) AS tag_ids
    FROM sf.tag_value_current
    WHERE name_id IN (
        SELECT tn.id
        FROM sf.tag_name AS tn
        JOIN sf.tag_namespace AS tns ON tn.namespace_id = tns.id
        WHERE tns.name not in ('__archive')
    )  -- global tags, custom namespace tags + internal __zone tags. Discard internal __archive tags
    GROUP BY volume_id, fs_entry_id
),
tag_with_path  AS (
    SELECT dir.volume_id, dir.path, tag.name_id
    FROM sf.dir_current AS dir
        JOIN sf.tag_value_current AS tag
            ON dir.id = tag.fs_entry_id AND dir.volume_id = tag.volume_id
                AND name_id IN (
                    SELECT tn.id
                    FROM sf.tag_name AS tn
                    JOIN sf.tag_namespace AS tns ON tn.namespace_id = tns.id
                    WHERE tns.name not in ('__archive')
                )  -- global tags, custom namespace tags + internal __zone tags. Discard internal __archive tags
),
dirs_with_flat_tags AS (
    SELECT dir.volume_id, dir.id, array_agg(tag.name_id) AS tag_ids
    FROM sf.dir_current AS dir
        INNER JOIN tag_with_path AS tag
            ON tag.volume_id = dir.volume_id
                AND (dir.path = tag.path
                    -- regexp_replace is inlined from sf_internal.subtree_pattern
                    OR dir.path LIKE regexp_replace(tag.path, '(%|_|\\)', '\\\1', 'g') || (CASE WHEN tag.path = '' THEN '_%' ELSE '/%' END))
    GROUP BY dir.volume_id, dir.id
),
files_with_tag AS (
    SELECT          file.id,
                    file.volume_id,
                    file.size AS logical_size,
                    (file.blocks * 512 / CASE WHEN file.nlinks > 0 THEN file.nlinks ELSE 1 END) AS physical_size,
                    (EXTRACT(EPOCH FROM(current_timestamp)) - EXTRACT(EPOCH FROM(atime))) / 86400.0 AS atime_days,
                    (EXTRACT(EPOCH FROM(current_timestamp)) - EXTRACT(EPOCH FROM(mtime))) / 86400.0 AS mtime_days,
                    unnest((dwt.tag_ids)::BIGINT[]) AS tag_id
    FROM sf.file_current AS file
        JOIN dirs_with_flat_tags AS dwt ON file.parent_id = dwt.id
    UNION  -- UNION is required to remove duplicates (file got the same tag inherited and directly assigned)
    SELECT          file.id,
                    file.volume_id,
                    file.size AS logical_size,
                    (file.blocks * 512 / CASE WHEN file.nlinks > 0 THEN file.nlinks ELSE 1 END) AS physical_size,
                    (EXTRACT(EPOCH FROM(current_timestamp)) - EXTRACT(EPOCH FROM(atime))) / 86400.0 AS atime_days,
                    (EXTRACT(EPOCH FROM(current_timestamp)) - EXTRACT(EPOCH FROM(mtime))) / 86400.0 AS mtime_days,
                    unnest((tag.tag_ids)::BIGINT[]) AS tag_id
    FROM sf.file_current AS file
        JOIN fs_entries_with_flat_tags_or_within_zones AS tag ON file.id = tag.fs_entry_id
),
tag_per_vol_agg AS (
    SELECT tag_id,
           volume_id,
           SUM(logical_size) AS logical_size,
           SUM(physical_size) AS physical_size,
           COUNT(*) AS count,
           CASE
                  WHEN atime_days >= 1095 THEN 'Previous Years: > 3'
                  WHEN atime_days >= 730  THEN 'Previous Years: 2-3'
                  WHEN atime_days >= 365  THEN 'Previous Years: 1-2'
                  WHEN atime_days >= 180  THEN 'Previous Months: 6-12'
                  WHEN atime_days >= 90   THEN 'Previous Months: 3-6'
                  WHEN atime_days >= 30   THEN 'Previous Months: 1-3'
                  WHEN atime_days >= 0    THEN 'Previous Months: 0-1'
                  ELSE 'future'
           END AS atime_age,
           CASE
               WHEN mtime_days >= 1095 THEN 'Previous Years: > 3'
               WHEN mtime_days >= 730  THEN 'Previous Years: 2-3'
               WHEN mtime_days >= 365  THEN 'Previous Years: 1-2'
               WHEN mtime_days >= 180  THEN 'Previous Months: 6-12'
               WHEN mtime_days >= 90   THEN 'Previous Months: 3-6'
               WHEN mtime_days >= 30   THEN 'Previous Months: 1-3'
               WHEN mtime_days >= 0    THEN 'Previous Months: 0-1'
               ELSE 'future'
           END AS mtime_age
    FROM files_with_tag
    GROUP BY tag_id, volume_id, atime_age, mtime_age
)
SELECT v.name AS volume_name,
       (CASE WHEN tns.name = '' THEN '' ELSE tns.name || ':' END) || n.name AS tag,
       tpv.atime_age,
       tpv.mtime_age,
       tpv.logical_size AS size,  -- name size is backward compatibility
       tpv.physical_size,
       CASE
            WHEN COALESCE(v.total_capacity, 0) > 0 THEN ROUND(tpv.physical_size * 100 / v.total_capacity, 1)
            ELSE NULL
       END AS "%_of_whole_volume",
       tpv.count,
       tpv.physical_size / (1000 * 1000 * 1000) * CAST(COALESCE(vupc.value, '0') AS FLOAT) AS cost
FROM tag_per_vol_agg AS tpv
    INNER JOIN sf.tag_name n ON tpv.tag_id = n.id
    JOIN sf.tag_namespace AS tns ON n.namespace_id = tns.id
    LEFT JOIN sf_volumes.volume AS v ON tpv.volume_id = v.id
    LEFT JOIN sf_volumes.user_param AS vupc ON vupc.volume_id = tpv.volume_id AND vupc.name = 'cost_per_gb'
ORDER BY volume_name, tag, atime_age, mtime_age;
