/* legal disclaimer in /opt/starfish/data/starfish/sql-copyright-and-license.md */

-- This query only shows the results from the monitor scans

WITH scans AS (
    SELECT
        scan.id AS scan_id,
        scan.volume_id AS volume_id,
        scan.heartbeat::DATE AS date,
        loader_info.stats AS stats
    FROM sf_scans.scan
    LEFT JOIN sf_scans.loader_info loader_info ON loader_info.scan_id = scan.id
    WHERE scan.type ILIKE 'monitor%'
      AND scan.heartbeat >= NOW() - INTERVAL '{{number_of_days_to_look_back}} days'
    UNION SELECT
        msh.scan_id,
        msh.volume_id,
        (msh.run_time - INTERVAL '1 DAY')::DATE,
        msh.stats
    FROM sf_reports.monitor_scans_history msh
    WHERE msh.heartbeat >= NOW() - INTERVAL '{{number_of_days_to_look_back}} days'
), extracted_stats as (
    SELECT
      volume_id,
      scan_id,
      date,
      COALESCE((stats#>>'{processing_stats,ADDED_DIR_COUNT}')::BIGINT, 0) AS added_dir_count,
      COALESCE((stats#>>'{processing_stats,ADDED_FILE_COUNT}')::BIGINT, 0) AS added_file_count,
      COALESCE((stats#>>'{processing_stats,REMOVED_DIR_COUNT}')::BIGINT, 0) AS removed_dir_count,
      COALESCE((stats#>>'{processing_stats,REMOVED_FILE_COUNT}')::BIGINT, 0) AS removed_file_count,
      COALESCE((stats#>>'{processing_stats,CHANGED_DIR_COUNT}')::BIGINT, 0) AS changed_dir_count,
      COALESCE((stats#>>'{processing_stats,CHANGED_FILE_COUNT}')::BIGINT, 0) AS changed_file_count,
      COALESCE((stats#>>'{processing_stats,ADDED_DIR_SIZE}')::BIGINT, 0) AS added_dir_size,
      COALESCE((stats#>>'{processing_stats,ADDED_FILE_SIZE}')::BIGINT, 0) AS added_file_size,
      COALESCE((stats#>>'{processing_stats,REMOVED_DIR_SIZE}')::BIGINT, 0) AS removed_dir_size,
      COALESCE((stats#>>'{processing_stats,REMOVED_FILE_SIZE}')::BIGINT, 0) AS removed_file_size,
      COALESCE((stats#>>'{processing_stats,CHANGED_DIR_SIZE}')::BIGINT, 0) AS changed_dir_size_delta,
      COALESCE((stats#>>'{processing_stats,CHANGED_FILE_SIZE}')::BIGINT, 0) AS changed_file_size_delta
    FROM scans
), day_differences AS (
    SELECT
        volume_id,
        date,
        added_dir_count - lead(added_dir_count, 1, 0::BIGINT) OVER volume_scan_window AS added_dir_count,
        added_file_count - lead(added_file_count, 1, 0::BIGINT) OVER volume_scan_window AS added_file_count,
        removed_dir_count - lead(removed_dir_count, 1, 0::BIGINT) OVER volume_scan_window AS removed_dir_count,
        removed_file_count - lead(removed_file_count, 1, 0::BIGINT) OVER volume_scan_window AS removed_file_count,
        changed_dir_count - lead(changed_dir_count, 1, 0::BIGINT) OVER volume_scan_window AS changed_dir_count,
        changed_file_count - lead(changed_file_count, 1, 0::BIGINT) OVER volume_scan_window AS changed_file_count,
        added_dir_size - lead(added_dir_size, 1, 0::BIGINT) OVER volume_scan_window AS added_dir_size,
        added_file_size - lead(added_file_size, 1, 0::BIGINT) OVER volume_scan_window AS added_file_size,
        removed_dir_size - lead(removed_dir_size, 1, 0::BIGINT) OVER volume_scan_window AS removed_dir_size,
        removed_file_size - lead(removed_file_size, 1, 0::BIGINT) OVER volume_scan_window AS removed_file_size,
        changed_dir_size_delta - lead(changed_dir_size_delta, 1, 0::BIGINT) OVER volume_scan_window AS changed_dir_size_delta,
        changed_file_size_delta - lead(changed_file_size_delta, 1, 0::BIGINT) OVER volume_scan_window AS changed_file_size_delta
    FROM extracted_stats
    WINDOW volume_scan_window AS (partition BY volume_id, scan_id ORDER BY date DESC)
), day_results AS (
    SELECT
      volume_id,
      to_timestamp(ROUND(EXTRACT(EPOCH FROM stats.date::TIMESTAMP WITH TIME ZONE) / (3600 * 24 * {{group_by_days}})) * (3600 * 24 * {{group_by_days}}) )::date AS group_date,
      SUM(stats.added_dir_count)::BIGINT AS added_dir_count,
      SUM(stats.added_file_count)::BIGINT AS added_file_count,
      SUM(stats.removed_dir_count)::BIGINT AS removed_dir_count,
      SUM(stats.removed_file_count)::BIGINT AS removed_file_count,
      SUM(stats.changed_dir_count)::BIGINT AS changed_dir_count,
      SUM(stats.changed_file_count)::BIGINT AS changed_file_count,
      SUM(stats.added_dir_size)::BIGINT AS added_dir_size,
      SUM(stats.added_file_size)::BIGINT AS added_file_size,
      SUM(stats.removed_dir_size)::BIGINT AS removed_dir_size,
      SUM(stats.removed_file_size)::BIGINT AS removed_file_size,
      SUM(stats.changed_dir_size_delta)::BIGINT AS changed_dir_size_delta,
      SUM(stats.changed_file_size_delta)::BIGINT AS changed_file_size_delta
    FROM day_differences stats
    GROUP BY volume_id, group_date
)
SELECT
    volume.name AS "VOLUME NAME::filter", -- ::filter is magic word for redash
    group_date AS date,
    added_dir_count + added_file_count AS "added files",
    removed_dir_count + removed_file_count AS "removed files",
    (- removed_dir_count - removed_file_count) AS "removed files negative",
    changed_dir_count + changed_file_count AS "changed files",
    ROUND((added_dir_size + added_file_size) / (1024 * 1024 * 1024.0), 2) AS "added files size (GiB)",
    ROUND((removed_dir_size + removed_file_size) / (1024 * 1024 * 1024.0), 2) AS "removed files size (GiB)",
    ROUND((- removed_dir_size - removed_file_size) / (1024 * 1024 * 1024.0), 2) AS "removed files size negative (GiB)",
    ROUND((changed_dir_size_delta + changed_file_size_delta) / (1024 * 1024 * 1024.0), 2) AS "changed files size delta (GiB)"
FROM day_results
LEFT JOIN sf_volumes.volume volume ON volume.id = day_results.volume_id
ORDER BY volume.name, date DESC;
