/* legal disclaimer in /opt/starfish/data/starfish/sql-copyright-and-license.md */

WITH ajobs AS (
    SELECT
        volume_id,
        target_id,
        archive_target_name,
        creation_time,
        end_time,
        EXTRACT(EPOCH FROM (end_time - creation_time)) AS time,
        JSONB_ARRAY_ELEMENTS_TEXT(low_level_jobs::JSONB->'UPLOADING_FILES')::BIGINT AS llj
    FROM sf_archive.archive_job
    WHERE creation_time >= NOW() - INTERVAL '{{number_of_days_to_look_back}} day'
),
jobs AS (
    SELECT
        ajobs.volume_id,
        vol.name,
        ajobs.target_id,
        ajobs.archive_target_name,
        ajobs.creation_time,
        ajobs.end_time,
        sji.fs_stats->>'fs_bytes_done' AS Bytes,
        CASE WHEN ajobs.time != 0
            THEN (sji.fs_stats->>'fs_bytes_done')::BIGINT / (1000 * 1000.0) / ajobs.time
        ELSE 0.00
        END AS "MB/s"
    FROM ajobs
    INNER JOIN sf_dispatcher.incarnation sji ON ajobs.llj = sji.job_id
    LEFT JOIN sf_archive.archive_target at ON ajobs.target_id = at.id
    LEFT JOIN sf_volumes.volume vol ON vol.id = ajobs.volume_id
    WHERE sji.fs_stats->>'fs_bytes_done' IS NOT NULL
),
jobs_with_dates AS (
    SELECT
        name AS volume,
        archive_target_name AS target,
        "MB/s",
        GENERATE_SERIES(creation_time::DATE, end_time::DATE, '1 day') AS archive_date
    FROM jobs
),
jobs_with_months_and_weeks AS (
    SELECT
        volume,
        target,
        "MB/s"::NUMERIC,
        archive_date,
        CASE WHEN EXTRACT(month FROM archive_date) < 10 THEN
            CONCAT(EXTRACT(year FROM archive_date)::TEXT, '-', '0', EXTRACT(month FROM archive_date)::TEXT)
        ELSE
            CONCAT(EXTRACT(year FROM archive_date)::TEXT, '-', EXTRACT(month FROM archive_date)::TEXT)
        END AS archive_month,
        CASE WHEN EXTRACT(week FROM archive_date) < 10 THEN
            CONCAT(EXTRACT(year FROM archive_date)::TEXT, '/', '0', EXTRACT(week FROM archive_date)::TEXT)
        ELSE
            CONCAT(EXTRACT(year FROM archive_date)::TEXT, '/', EXTRACT(week FROM archive_date)::TEXT)
        END AS archive_week
    FROM jobs_with_dates
)

SELECT
    volume,
    target,
    CASE WHEN AVG("MB/s")::TEXT ~ '^[0-9]+$|\.00' THEN ROUND(AVG("MB/s"))::TEXT
    ELSE CONCAT(FLOOR(AVG("MB/s")), '-', CEIL(AVG("MB/s"))) END AS "avg MB/s",
    --ROUND(AVG("MB/s")) AS "avg MB/s",
    CASE WHEN MAX("MB/s")::TEXT ~ '^[0-9]+$|\.00' THEN ROUND(MAX("MB/s"))::TEXT
    ELSE CONCAT(FLOOR(MAX("MB/s")), '-', CEIL(MAX("MB/s"))) END AS "max MB/s",
    --ROUND(MAX("MB/s")) AS "max MB/s",
    CASE WHEN (PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY "MB/s"))::NUMERIC::TEXT ~ '^[0-9]+$|\.00'
    THEN ROUND((PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY "MB/s"))::NUMERIC)::TEXT
    ELSE CONCAT(FLOOR(PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY "MB/s")), '-',
    CEIL(PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY "MB/s"))) END AS "median MB/s",
     --ROUND((PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY "MB/s"))::NUMERIC) AS "median MB/s"
    -- MIN() was skipped because there are too many zeros in customer databases
    archive_month,
    archive_week
FROM jobs_with_months_and_weeks
GROUP BY volume, target, archive_month, archive_week -- CASE WHEN 1=1 THEN archive_month ELSE archive_week
ORDER BY volume, target, archive_month, archive_week
