/* legal disclaimer in /opt/starfish/data/starfish/sql-copyright-and-license.md */
-- This query looks at the current utilization and capacity of all volumes and adds
-- a projection line as to growth trend in future based upon past growth
-- Note: for AWS/object volumes, sfs3, the reported available capacity may be exabytes,
-- making the graph less useful. To get around this, mark these volumes as "object volumes"
-- using an sf volume command so that they are not counted towards on-prem capacity
-- use partition function to produce continuous averages and values
WITH s3_mounts AS (
  SELECT "volume name"
  FROM sf_reports.stats_current stats
  INNER JOIN sf_volumes.volume v ON stats."volume name" = v.name
  INNER JOIN sf_volumes.user_param up ON v.id = up.volume_id
  WHERE up.name = 'storage'
    AND up.value = 'object'
), sums AS (
    SELECT run_time,
         SUM("volume occupied space") AS "total occupied space"
    FROM sf_reports.stats_history
    WHERE run_time > now() - INTERVAL '{{number_of_days_to_look_back}} days'
    AND "volume name" NOT IN (SELECT "volume name" FROM s3_mounts)
    GROUP BY run_time
), averages AS (
  SELECT "total occupied space" AS y,
       AVG("total occupied space") OVER() AS y_bar,
       EXTRACT(epoch FROM run_time) AS x,
       AVG(EXTRACT(epoch FROM run_time)) OVER() AS x_bar
  FROM sums
  ORDER BY x
),
-- compute linear regression fit, save maximum x and y values
-- handle case when days might be 0 or other infinite div/0 error
slope AS (
  SELECT
  CASE
    WHEN SUM((x - x_bar) * (x - x_bar)) != 0
      THEN SUM((x - x_bar) * (y - y_bar)) / SUM((x - x_bar) * (x - x_bar))
    ELSE 0
  END AS slope,
  MAX(x_bar) AS max_xbar,
  MAX(y_bar) AS max_ybar
  FROM averages
),
-- save most recent y value, occupied space, and current capacity to use for drawing slope for future prediction
most_recent AS (
  SELECT MAX(run_time) AS base_time,
  SUM("volume occupied space")  AS base_point,
  SUM("total capacity") AS "total capacity"
  FROM sf_reports.stats_current
  WHERE "volume name" NOT IN (SELECT "volume name" FROM s3_mounts)

),
-- compute the y intercept of the slope
intercept AS (
  SELECT slope,
       max_ybar - max_xbar * slope AS intercept
  FROM slope
)
-- now draw the current data points, the curve fit based upon time of the x value, and the capacity of the Volume
-- extrapolate the prediction into the future based upon some future date in time
SELECT TO_TIMESTAMP(x) AS run_time,
       (averages.y/(1024.0*1024*1024*1024))::BIGINT AS "Consumed",
       (averages.y/(1024.0*1024*1024*1024))::BIGINT AS "Estimated size",
       "total capacity" / (1024.0*1024*1024*1024) AS "Total Capacity"
FROM averages, intercept, most_recent
UNION
SELECT run_time,
       most_recent.base_point / (1024.0*1024*1024*1024) AS "Consumed",
       predict_point(run_time, intercept.slope, intercept.intercept, most_recent.base_time, most_recent.base_point) AS "Estimated size",
       most_recent."total capacity" /(1024.0*1024*1024*1024) AS "Total Capacity"
  FROM generate_series(CURRENT_DATE, CURRENT_DATE + INTERVAL '{{number_of_days_to_predict_ahead}} days', INTERVAL '1 day') run_time,
       most_recent,
       intercept

ORDER BY run_time
