/* legal disclaimer in /opt/starfish/data/starfish/sql-copyright-and-license.md */


CREATE OR REPLACE FUNCTION sf_internal.prepare_calculate_aggrs_part_{{ partition_id }}(
) RETURNS BIGINT AS $$
DECLARE
    max_depth BIGINT = NULL;
BEGIN

    -- Each event adds a parent directory, refresh_aggregates is likely to contain lots of duplicates
    -- It is important to remove all id=NULL values otherwise we would loop forever. We can insert id=Null as parent
    -- of root (for example REMOVED event for root)
    CREATE TEMP TABLE _temp_refresh_aggregates AS
        WITH ids_to_refresh AS (
            DELETE FROM sf.refresh_aggregates_part_{{ partition_id }} RETURNING id, depth
        )
        SELECT DISTINCT id, depth FROM ids_to_refresh WHERE id IS NOT NULL;

    CREATE INDEX depth_idx ON _temp_refresh_aggregates(depth);
    ANALYZE _temp_refresh_aggregates;

    CREATE TEMP TABLE _processed_ids (id BIGINT);

    SELECT MAX(depth)
            INTO max_depth
            FROM _temp_refresh_aggregates;

    RETURN max_depth;
END;
$$ LANGUAGE plpgsql SECURITY DEFINER VOLATILE PARALLEL UNSAFE;


CREATE OR REPLACE FUNCTION sf_internal.calculate_aggrs_one_level_part_{{ partition_id }}(
    current_depth INT
) RETURNS record AS $$
DECLARE
    local_aggrs_limit INT = 20;
    written_dirs BIGINT = 0;
    read_dirs BIGINT = 0;
    read_files BIGINT = 0;
    ret record;
BEGIN
    TRUNCATE _processed_ids;
    WITH current_depth_processed_ids AS (
        DELETE FROM _temp_refresh_aggregates
            WHERE depth = current_depth
        RETURNING id
    )
        INSERT INTO _processed_ids(id)
            SELECT DISTINCT * FROM current_depth_processed_ids;

    IF (SELECT count(*) != 0 FROM (SELECT 1 FROM _processed_ids LIMIT 1) AS t) THEN
        ANALYZE _processed_ids;
        -- Temp table 'calculate_aggrs_dir_process' is created in 'temp_tables_for_event_processor.j2.sql'
        -- when new Postgres session is created

        -- choose dirs that correspond to _processed_ids (and eliminate possible duplicates)
        INSERT INTO calculate_aggrs_dir_process
            SELECT d.id,
                   d.volume_id,
                   d.parent_id,
                   d.depth,
                   d.size,
                   d.blocks,
                   d.atime,
                   d.ctime,
                   d.mtime,
                   d.sync_time,
                   d.local_aggrs
                FROM sf.dir_current_part_{{ partition_id }} AS d JOIN _processed_ids p ON p.id = d.id
                -- this condition is important when dir is moved and changes it's depth
                -- we ignore dirs that has been moved and does not exist any more for current_depth
                -- it's aggrs will be calculated when we encounter entry in _processed_ids for correct depth
                WHERE d.depth = current_depth;

        ANALYZE calculate_aggrs_dir_process (id);
        -- Temp table 'calculate_aggrs_files_for_processed_dirs' is created in 'temp_tables_for_event_processor.j2.sql'
        -- when new Postgres session is created

        INSERT INTO calculate_aggrs_files_for_processed_dirs
            SELECT file.parent_id,
                   file.volume_id,
                   file.size,
                   file.blocks,
                   CASE WHEN file.nlinks > 0 THEN file.nlinks ELSE 1 END AS nlinks,
                   file.uid,
                   file.gid,
                   file.atime,
                   file.mtime,
                   file.ctime,
                   sf.get_extension(file.name) AS ext,
                   file.sync_time
                FROM sf.file_current_part_{{ partition_id }} AS file
                INNER JOIN calculate_aggrs_dir_process
                    ON file.parent_id = calculate_aggrs_dir_process.id;
        ANALYZE calculate_aggrs_files_for_processed_dirs (parent_id, ext, uid, gid);

        -- Temp table 'calculate_aggrs_subdirs_in_processed_dirs' is created in 'temp_tables_for_event_processor.j2.sql'
        -- when new Postgres session is created
-- If you have a perf problem with calculate_aggrs_subdirs_in_processed_dirs, check STAR-7966.
-- Pg sometimes chooses insanely dumb plan even when stats seem to be up to date.
-- Fortunately looks like changing primary key on dir_current from (volume_id, id) to (id, volume_id) helped here.
-- See:
--        Merge Join  (cost=0.17..433045.57 rows=1 width=37) (actual rows=133642 loops=1)
--          Merge Cond: (dir.volume_id = calculate_aggrs_dir_process.volume_id)
--          Join Filter: (dir.parent_id = calculate_aggrs_dir_process.id)
--          Rows Removed by Join Filter: 35585356166
--          ->  Index Scan using dir_current_part_1_pkey on dir_current_part_1 dir  (cost=0.08..429296.32 rows=407010 width=37) (actual rows=400521 loops=1)
--                Index Cond: (volume_id = ANY ('{1}'::bigint[]))
--          ->  Index Only Scan using dir_process_indx on calculate_aggrs_dir_process  (cost=0.08..2568.41 rows=88848 width=16) (actual rows=35585089288 loops=1)
--                Heap Fetches: 35585089288
-- Update: we decided to aggregate one volume in transaction, which allows us to remove condition on volume_id from calculate_aggrs_dir_process.
-- We hope that Postgres will no longer be able to choose stupid plan for those queries.

        INSERT INTO calculate_aggrs_subdirs_in_processed_dirs
            SELECT dir.id,
                   dir.volume_id,
                   dir.parent_id,
                   dir.rec_aggrs,
                   dir.blocks,
                   dir.size
                FROM sf.dir_current_part_{{ partition_id }} AS dir
                INNER JOIN calculate_aggrs_dir_process ON dir.parent_id = calculate_aggrs_dir_process.id;
        ANALYZE calculate_aggrs_subdirs_in_processed_dirs (id);

        -- Temp table 'calculate_aggrs_dir_agg_values' is created in 'temp_tables_for_event_processor.j2.sql'
        -- when new Postgres session is created
        INSERT INTO calculate_aggrs_dir_agg_values
            -- Here local and recursive aggregates are calculated (at the same time because of performance reasons).
            -- For all currently processed dirs we will find subtables:
            -- 1. Dir that is currently processed (lets call it a root dir) - 1 row
            -- 2. Files directly inside root dir
            -- 3. Dirs directly inside root dir (with aggregates already calculated)
            -- Then we complicate it a little bit more because of local_aggrs so we add subtables:
            -- 4. Calculated files by extensions limited to N rows (where N=local_aggrs_limit)
            -- 5. Calculated files by uid limited to N rows (where N=local_aggrs_limit)
            -- 6. Calculated files by gid limited to N rows (where N=local_aggrs_limit)
            -- All of those tables are merged with UNION ALL and then grouped by root_id
            -- with proper aggregate on each column.
            -- REMEMBER - all of subtables must have exactly the same columns!

            SELECT id,
                   volume_id,
                   COALESCE(SUM(rec_size), 0) AS rec_size,
                   COALESCE(SUM(rec_blocks), 0) AS rec_blocks,
                   COALESCE(SUM(rec_size_div_nlinks), 0) AS rec_size_div_nlinks,
                   COALESCE(SUM(rec_blocks_div_nlinks), 0) AS rec_blocks_div_nlinks,
                   COALESCE(SUM(rec_dir_count), 0) AS rec_dir_count,
                   COALESCE(SUM(rec_file_count), 0) AS rec_file_count,
                   MIN(rec_min_dir_file_atime) AS rec_min_dir_file_atime,
                   MAX(rec_max_file_atime) AS rec_max_file_atime,
                   CASE WHEN every((source <> 'dirs_inside_root_dir') OR (rec_file_count = 0) OR (rec_avg_dir_file_atime IS NOT NULL)) THEN
                       -- compute recursive average only if it has been computed for all subdirectories which have files
                       ROUND(SUM(rec_avg_dir_file_atime * (rec_file_count + rec_dir_count))::FLOAT8
                                / SUM(CASE WHEN rec_avg_dir_file_atime IS NULL THEN NULL ELSE rec_file_count + rec_dir_count END))::FLOAT8
                   ELSE
                       NULL
                   END AS rec_avg_dir_file_atime,
                   MIN(rec_min_dir_file_ctime) AS rec_min_dir_file_ctime,
                   MAX(rec_max_dir_file_ctime) AS rec_max_dir_file_ctime,
                   MIN(rec_min_dir_file_mtime) AS rec_min_dir_file_mtime,
                   MAX(rec_max_dir_file_mtime) AS rec_max_dir_file_mtime,
                   CASE WHEN every((source <> 'dirs_inside_root_dir') OR (rec_file_count = 0) OR (rec_avg_dir_file_mtime IS NOT NULL)) THEN
                       -- compute recursive average only if it has been computed for all subdirectories which have files
                       ROUND(SUM(rec_avg_dir_file_mtime * (rec_file_count + rec_dir_count))::FLOAT8
                                / SUM(CASE WHEN rec_avg_dir_file_mtime IS NULL THEN NULL ELSE rec_file_count + rec_dir_count END))::FLOAT8
                   ELSE
                       NULL
                   END AS rec_avg_dir_file_mtime,
                   MAX(rec_max_sync_time) AS rec_max_sync_time,
                   COALESCE(SUM(local_file_count), 0) AS local_file_count,
                   COALESCE(SUM(local_dir_count), 0) AS local_dir_count,
                   COALESCE(SUM(local_size), 0) AS local_size,
                   COALESCE(SUM(local_blocks), 0) AS local_blocks,
                   COALESCE(SUM(local_blocks_div_nlinks), 0) AS local_blocks_div_nlinks,
                   COALESCE(SUM(local_size_div_nlinks), 0) AS local_size_div_nlinks,
                   EXTRACT(EPOCH FROM MIN(file_atime)) AS local_min_file_atime,
                   EXTRACT(EPOCH FROM MIN(file_ctime)) AS local_min_file_ctime,
                   EXTRACT(EPOCH FROM MIN(file_mtime)) AS local_min_file_mtime,
                   EXTRACT(EPOCH FROM MAX(file_atime)) AS local_max_file_atime,
                   EXTRACT(EPOCH FROM MAX(file_ctime)) AS local_max_file_ctime,
                   EXTRACT(EPOCH FROM MAX(file_mtime)) AS local_max_file_mtime,
                   ROUND(AVG(EXTRACT(EPOCH FROM file_atime)))::FLOAT8 AS local_avg_file_atime,
                   ROUND(AVG(EXTRACT(EPOCH FROM file_mtime)))::FLOAT8 AS local_avg_file_mtime,
                   MAX(file_by_ext::varchar)::jsonb AS local_file_by_ext,  -- this is hack! works because we have only 1 not null value in file_by_ext column!
                   MAX(file_by_uid::varchar)::jsonb AS local_file_by_uid,  -- this is hack! works because we have only 1 not null value in file_by_uid column!
                   MAX(file_by_gid::varchar)::jsonb AS local_file_by_gid   -- this is hack! works because we have only 1 not null value in file_by_gid column!
              FROM (
                SELECT 'root_dir_for_which_calculation_is_done' AS source,
                       calculate_aggrs_dir_process.id,
                       calculate_aggrs_dir_process.volume_id,
                       calculate_aggrs_dir_process.size AS rec_size,
                       calculate_aggrs_dir_process.blocks AS rec_blocks,
                       calculate_aggrs_dir_process.size AS rec_size_div_nlinks,
                       calculate_aggrs_dir_process.blocks AS rec_blocks_div_nlinks,
                       1 AS rec_dir_count,
                       0 AS rec_file_count,
                       EXTRACT(EPOCH FROM atime) AS rec_min_dir_file_atime,
                       NULL AS rec_max_file_atime,  -- do not count root for recursive aggregates max atime
                       EXTRACT(EPOCH FROM atime) AS rec_avg_dir_file_atime,
                       EXTRACT(EPOCH FROM ctime) AS rec_min_dir_file_ctime,
                       EXTRACT(EPOCH FROM ctime) AS rec_max_dir_file_ctime,
                       EXTRACT(EPOCH FROM mtime) AS rec_min_dir_file_mtime,
                       EXTRACT(EPOCH FROM mtime) AS rec_max_dir_file_mtime,
                       EXTRACT(EPOCH FROM mtime) AS rec_avg_dir_file_mtime,
                       EXTRACT(EPOCH FROM sync_time) AS rec_max_sync_time,
                       NULL AS local_file_count,  -- do not count root for local aggregates
                       NULL AS local_dir_count,   -- do not count root for local aggregates
                       NULL AS local_size,        -- do not count root for local aggregates
                       NULL AS local_blocks,      -- do not count root for local aggregates
                       NULL AS local_blocks_div_nlinks,
                       NULL AS local_size_div_nlinks,
                       NULL AS file_atime,
                       NULL AS file_ctime,
                       NULL AS file_mtime,
                       NULL::jsonb AS file_by_ext,
                       NULL::jsonb AS file_by_uid,
                       NULL::jsonb AS file_by_gid
                  FROM calculate_aggrs_dir_process

                UNION ALL

                SELECT 'files_inside_root_dir' AS source,
                       parent_id AS id,
                       volume_id,
                       size AS rec_size,
                       blocks AS rec_blocks,
                       size::NUMERIC / nlinks AS rec_size_div_nlinks,
                       blocks::BIGINT / nlinks AS rec_blocks_div_nlinks,
                       0 AS rec_dir_count,
                       1 AS rec_file_count,
                       EXTRACT(EPOCH FROM atime) AS rec_min_dir_file_atime,
                       EXTRACT(EPOCH FROM atime) AS rec_max_file_atime,
                       EXTRACT(EPOCH FROM atime) AS rec_avg_dir_file_atime,
                       EXTRACT(EPOCH FROM ctime) AS rec_min_dir_file_ctime,
                       EXTRACT(EPOCH FROM ctime) AS rec_max_dir_file_ctime,
                       EXTRACT(EPOCH FROM mtime) AS rec_min_dir_file_mtime,
                       EXTRACT(EPOCH FROM mtime) AS rec_max_dir_file_mtime,
                       EXTRACT(EPOCH FROM mtime) AS rec_avg_dir_file_mtime,
                       EXTRACT(EPOCH FROM sync_time) AS rec_max_sync_time,
                       1 AS local_file_count,
                       0 AS local_dir_count,
                       size AS local_size,
                       blocks AS local_blocks,
                       blocks::BIGINT / nlinks AS local_blocks_div_nlinks,
                       size::NUMERIC / nlinks AS local_size_div_nlinks,
                       atime AS file_atime,
                       ctime AS file_ctime,
                       mtime AS file_mtime,
                       NULL::jsonb AS file_by_ext,
                       NULL::jsonb AS file_by_uid,
                       NULL::jsonb AS file_by_gid
                  FROM calculate_aggrs_files_for_processed_dirs

                UNION ALL

                SELECT 'dirs_inside_root_dir' AS source,
                       subdir.parent_id AS id,
                       subdir.volume_id,
                       -- we use COALESCE to get value from `calculated_aggrs` if present. It is always newer than in `dir_current`.
                       (COALESCE(calculated_aggrs.rec_aggrs, subdir.rec_aggrs)->>'size')::NUMERIC AS rec_size,
                       (COALESCE(calculated_aggrs.rec_aggrs, subdir.rec_aggrs)->>'blocks')::BIGINT AS rec_blocks,
                       COALESCE((COALESCE(calculated_aggrs.rec_aggrs, subdir.rec_aggrs)->>'size_div_nlinks')::NUMERIC,
                                (COALESCE(calculated_aggrs.rec_aggrs, subdir.rec_aggrs)->>'size')::NUMERIC) AS rec_size_div_nlinks,
                       COALESCE((COALESCE(calculated_aggrs.rec_aggrs, subdir.rec_aggrs)->>'blocks_div_nlinks')::BIGINT,
                                (COALESCE(calculated_aggrs.rec_aggrs, subdir.rec_aggrs)->>'blocks')::BIGINT) AS rec_blocks_div_nlinks,
                       COALESCE((COALESCE(calculated_aggrs.rec_aggrs, subdir.rec_aggrs)->>'dirs')::BIGINT, 0) AS rec_dir_count,
                       COALESCE((COALESCE(calculated_aggrs.rec_aggrs, subdir.rec_aggrs)->>'files')::BIGINT, 0) AS rec_file_count,
                       (COALESCE(calculated_aggrs.rec_aggrs, subdir.rec_aggrs)->'min'->>'atime')::FLOAT8 AS rec_min_dir_file_atime,
                       (COALESCE(calculated_aggrs.rec_aggrs, subdir.rec_aggrs)->'max'->>'atime')::FLOAT8 AS rec_max_file_atime,
                       (COALESCE(calculated_aggrs.rec_aggrs, subdir.rec_aggrs)->'avg'->>'atime')::FLOAT8 AS rec_avg_dir_file_atime,
                       (COALESCE(calculated_aggrs.rec_aggrs, subdir.rec_aggrs)->'min'->>'ctime')::FLOAT8 AS rec_min_dir_file_ctime,
                       (COALESCE(calculated_aggrs.rec_aggrs, subdir.rec_aggrs)->'max'->>'ctime')::FLOAT8 AS rec_max_dir_file_ctime,
                       (COALESCE(calculated_aggrs.rec_aggrs, subdir.rec_aggrs)->'min'->>'mtime')::FLOAT8 AS rec_min_dir_file_mtime,
                       (COALESCE(calculated_aggrs.rec_aggrs, subdir.rec_aggrs)->'max'->>'mtime')::FLOAT8 AS rec_max_dir_file_mtime,
                       (COALESCE(calculated_aggrs.rec_aggrs, subdir.rec_aggrs)->'avg'->>'mtime')::FLOAT8 AS rec_avg_dir_file_mtime,
                       (COALESCE(calculated_aggrs.rec_aggrs, subdir.rec_aggrs)->>'max_sync_time')::FLOAT8 AS rec_max_sync_time,
                       0 AS local_file_count,
                       1 AS local_dir_count,
                       0 AS local_size,
                       subdir.blocks AS local_blocks,
                       subdir.blocks AS local_blocks_div_nlinks,
                       0 AS local_size_div_nlinks,
                       NULL AS file_atime,
                       NULL AS file_ctime,
                       NULL AS file_mtime,
                       NULL::jsonb AS file_by_ext,
                       NULL::jsonb AS file_by_uid,
                       NULL::jsonb AS file_by_gid
                  FROM calculate_aggrs_subdirs_in_processed_dirs AS subdir
                       LEFT JOIN sf.calculated_aggrs_part_{{ partition_id }} AS calculated_aggrs
                              ON calculated_aggrs.id = subdir.id

                UNION ALL

                SELECT 'artificial_table_to_calculate_by_ext_local_aggrs' AS source,
                       id,
                       volume_id,
                       NULL AS rec_size,
                       NULL AS rec_blocks,
                       NULL AS rec_size_div_nlinks,
                       NULL AS rec_blocks_div_nlinks,
                       NULL AS rec_dir_count,
                       NULL AS rec_file_count,
                       NULL AS rec_min_dir_file_atime,
                       NULL AS rec_max_file_atime,
                       NULL AS rec_avg_dir_file_atime,
                       NULL AS rec_min_dir_file_ctime,
                       NULL AS rec_max_dir_file_ctime,
                       NULL AS rec_min_dir_file_mtime,
                       NULL AS rec_max_dir_file_mtime,
                       NULL AS rec_avg_dir_file_mtime,
                       NULL AS rec_max_sync_time,
                       NULL AS local_file_count,
                       NULL AS local_dir_count,
                       NULL AS local_size,
                       NULL AS local_blocks,
                       NULL AS local_blocks_div_nlinks,
                       NULL AS local_size_div_nlinks,
                       NULL AS file_atime,
                       NULL AS file_ctime,
                       NULL AS file_mtime,
                       jsonb_object_agg(ext, ext_stat) AS file_by_ext,
                       NULL::jsonb AS file_by_uid,
                       NULL::jsonb AS file_by_gid
                FROM ( SELECT *
                           FROM (
                               SELECT row_number() OVER (PARTITION BY parent_id ORDER by count(*) DESC, SUM(size) DESC) AS ext_number,
                                      parent_id AS id,
                                      volume_id,
                                      ext,
                                      json_build_object('files', count(*),
                                                        'size', SUM(size),
                                                        'size_div_nlinks', SUM(size / nlinks),
                                                        'blocks', SUM(blocks),
                                                        'blocks_div_nlinks', SUM(blocks::BIGINT / nlinks)
                                                        ) AS ext_stat
                               FROM calculate_aggrs_files_for_processed_dirs
                               GROUP BY volume_id, parent_id, ext
                           ) extensions
                           WHERE ext_number <= local_aggrs_limit
                      ) extensions_limited
                GROUP BY volume_id, id

                UNION ALL

                SELECT 'artificial_table_to_calculate_by_uid_local_aggrs' AS source,
                       id,
                       volume_id,
                       NULL AS rec_size,
                       NULL AS rec_blocks,
                       NULL AS rec_size_div_nlinks,
                       NULL AS rec_blocks_div_nlinks,
                       NULL AS rec_dir_count,
                       NULL AS rec_file_count,
                       NULL AS rec_min_dir_file_atime,
                       NULL AS rec_max_file_atime,
                       NULL AS rec_avg_dir_file_atime,
                       NULL AS rec_min_dir_file_ctime,
                       NULL AS rec_max_dir_file_ctime,
                       NULL AS rec_min_dir_file_mtime,
                       NULL AS rec_max_dir_file_mtime,
                       NULL AS rec_avg_dir_file_mtime,
                       NULL AS rec_max_sync_time,
                       NULL AS local_file_count,
                       NULL AS local_dir_count,
                       NULL AS local_size,
                       NULL AS local_blocks,
                       NULL AS local_blocks_div_nlinks,
                       NULL AS local_size_div_nlinks,
                       NULL AS file_atime,
                       NULL AS file_ctime,
                       NULL AS file_mtime,
                       NULL::jsonb AS file_by_ext,
                       jsonb_object_agg(uid, uid_stat) AS file_by_uid,
                       NULL::jsonb AS file_by_gid
                FROM ( SELECT *
                           FROM (
                               SELECT row_number() OVER (PARTITION BY parent_id ORDER by count(*) DESC, SUM(size) DESC) AS ext_number,
                                      parent_id AS id,
                                      volume_id,
                                      uid AS uid,
                                      json_build_object('files', count(*),
                                                        'size', SUM(size),
                                                        'size_div_nlinks', SUM(size / nlinks),
                                                        'blocks', SUM(blocks),
                                                        'blocks_div_nlinks', SUM(blocks::BIGINT / nlinks)
                                                        ) AS uid_stat
                               FROM calculate_aggrs_files_for_processed_dirs
                               GROUP BY volume_id, parent_id, uid
                           ) uids
                           WHERE ext_number <= local_aggrs_limit
                      ) uids_limited
                GROUP BY volume_id, id

                UNION ALL

                SELECT 'artificial_table_to_calculate_by_gid_local_aggrs' AS source,
                       id,
                       volume_id,
                       NULL AS rec_size,
                       NULL AS rec_blocks,
                       NULL AS rec_size_div_nlinks,
                       NULL AS rec_blocks_div_nlinks,
                       NULL AS rec_dir_count,
                       NULL AS rec_file_count,
                       NULL AS rec_min_dir_file_atime,
                       NULL AS rec_max_file_atime,
                       NULL AS rec_avg_dir_file_atime,
                       NULL AS rec_min_dir_file_ctime,
                       NULL AS rec_max_dir_file_ctime,
                       NULL AS rec_min_dir_file_mtime,
                       NULL AS rec_max_dir_file_mtime,
                       NULL AS rec_avg_dir_file_mtime,
                       NULL AS rec_max_sync_time,
                       NULL AS local_file_count,
                       NULL AS local_dir_count,
                       NULL AS local_size,
                       NULL AS local_blocks,
                       NULL AS local_blocks_div_nlinks,
                       NULL AS local_size_div_nlinks,
                       NULL AS file_atime,
                       NULL AS file_ctime,
                       NULL AS file_mtime,
                       NULL::jsonb AS file_by_ext,
                       NULL::jsonb AS file_by_uid,
                       jsonb_object_agg(gid, gid_stat) AS file_by_gid
                FROM ( SELECT *
                           FROM (
                               SELECT row_number() OVER (PARTITION BY parent_id ORDER by count(*) DESC, SUM(size) DESC) AS ext_number,
                                      parent_id AS id,
                                      volume_id,
                                      gid AS gid,
                                      json_build_object('files', count(*),
                                                        'size', SUM(size),
                                                        'size_div_nlinks', SUM(size / nlinks),
                                                        'blocks', SUM(blocks),
                                                        'blocks_div_nlinks', SUM(blocks::BIGINT / nlinks)
                                                        ) AS gid_stat
                               FROM calculate_aggrs_files_for_processed_dirs
                               GROUP BY volume_id, parent_id, gid
                           ) gids
                           WHERE ext_number <= local_aggrs_limit
                      ) gids_limited
                GROUP BY volume_id, id

            ) dir_agg_sub
             GROUP BY volume_id, id;

        -- Temp table 'calculate_aggrs_dir_agg_json' is created in 'temp_tables_for_event_processor.j2.sql'
        -- when new Postgres session is created
        INSERT INTO calculate_aggrs_dir_agg_json
            SELECT id,
                volume_id,
                CASE WHEN rec_file_count > 0 THEN
                    json_build_object(
                        'size', rec_size,
                        'blocks', rec_blocks,
                        'size_div_nlinks', floor(rec_size_div_nlinks),
                        'blocks_div_nlinks', rec_blocks_div_nlinks,
                        'dirs', rec_dir_count,
                        'files', rec_file_count,
                        'min', json_build_object('atime', rec_min_dir_file_atime,
                                                 'ctime', rec_min_dir_file_ctime,
                                                 'mtime', rec_min_dir_file_mtime),
                        'max', json_build_object('atime', rec_max_file_atime,
                                                 'ctime', rec_max_dir_file_ctime,
                                                 'mtime', rec_max_dir_file_mtime),
                        'avg', json_build_object('atime', rec_avg_dir_file_atime,
                                                 'mtime', rec_avg_dir_file_mtime),
                        'max_sync_time', rec_max_sync_time
                    )::jsonb
                ELSE
                    json_build_object(
                        'size', rec_size,
                        'blocks', rec_blocks,
                        'size_div_nlinks', floor(rec_size_div_nlinks),
                        'blocks_div_nlinks', rec_blocks_div_nlinks,
                        'dirs', rec_dir_count,
                        'files', rec_file_count,
                        'max_sync_time', rec_max_sync_time
                    )::jsonb
                END AS rec_aggrs,
                CASE WHEN local_file_count > 0 THEN
                    json_build_object(
                        'total', json_build_object('size', local_size,
                                                   'size_div_nlinks', local_size_div_nlinks,
                                                   'blocks', local_blocks,
                                                   'blocks_div_nlinks', local_blocks_div_nlinks,
                                                   'files', local_file_count,
                                                   'dirs', local_dir_count
                                                   ),
                        'by_ext', local_file_by_ext,
                        'by_uid', local_file_by_uid,
                        'by_gid', local_file_by_gid,
                        'min', json_build_object('atime', local_min_file_atime,
                                                 'ctime', local_min_file_ctime,
                                                 'mtime', local_min_file_mtime),
                        'max', json_build_object('atime', local_max_file_atime,
                                                 'ctime', local_max_file_ctime,
                                                 'mtime', local_max_file_mtime),
                        'avg', json_build_object('atime', local_avg_file_atime,
                                                 'mtime', local_avg_file_mtime)
                    )::jsonb
                ELSE
                    json_build_object(
                        'total', json_build_object('size', local_size,
                                                   'size_div_nlinks', local_size_div_nlinks,
                                                   'blocks', local_blocks,
                                                   'blocks_div_nlinks', local_blocks_div_nlinks,
                                                   'files', local_file_count,
                                                   'dirs', local_dir_count
                                                   )
                    )::jsonb
                END AS local_aggrs
              FROM calculate_aggrs_dir_agg_values;
        ANALYZE calculate_aggrs_dir_agg_json;

        TRUNCATE calculate_aggrs_dir_agg_values;
        TRUNCATE calculate_aggrs_files_for_processed_dirs;
        TRUNCATE calculate_aggrs_subdirs_in_processed_dirs;

        SELECT count(*)::BIGINT,
                COALESCE(sum((dir.local_aggrs->'total'->>'dirs')::BIGINT), 0),
                COALESCE(sum((dir.local_aggrs->'total'->>'files')::BIGINT), 0)
            INTO written_dirs, read_dirs, read_files
            FROM calculate_aggrs_dir_agg_json AS dir;

        -- Upsert calculated aggrs
        INSERT INTO sf.calculated_aggrs (id, volume_id, rec_aggrs, local_aggrs)
            SELECT calculate_aggrs_dir_agg_json.id, calculate_aggrs_dir_agg_json.volume_id, calculate_aggrs_dir_agg_json.rec_aggrs, calculate_aggrs_dir_agg_json.local_aggrs
            FROM calculate_aggrs_dir_agg_json
            ON CONFLICT (volume_id, id) DO UPDATE
                SET rec_aggrs=EXCLUDED.rec_aggrs, local_aggrs=EXCLUDED.local_aggrs;
        ANALYZE sf.calculated_aggrs_part_{{ partition_id }} (volume_id, id);

        -- Insert parents of processed rows to continue processing
        -- We use distinct to ensure each parent_id is  inserted only once
        -- We don't care if  inserted entry is already there, as we eliminate duplicates when calculating calculate_aggrs_dir_process
        INSERT INTO _temp_refresh_aggregates(id, depth)
            SELECT DISTINCT parent_id, depth - 1
                FROM calculate_aggrs_dir_process
                WHERE parent_id IS NOT NULL;

        TRUNCATE calculate_aggrs_dir_agg_json;
        TRUNCATE calculate_aggrs_dir_process;
    END IF;

    SELECT written_dirs, read_dirs, read_files INTO ret;
    RETURN ret;
END;
$$ LANGUAGE plpgsql SECURITY DEFINER VOLATILE PARALLEL UNSAFE;


-- This function does not use {{ partition_id }} anywhere in it's body, but still it is appended to it's name.
-- Otherwise parallel transactions defining this function would fail with `tuple concurrently updated`
-- See: https://stackoverflow.com/questions/40525684/tuple-concurrently-updated-when-creating-functions-in-postgresql-pl-pgsql
CREATE OR REPLACE FUNCTION sf_internal.finish_calculate_aggrs_part_{{ partition_id }}(
) RETURNS void AS $$
DECLARE
BEGIN
    -- Drop temp tables
    IF EXISTS (SELECT 1 FROM _temp_refresh_aggregates LIMIT 1) THEN
        ASSERT FALSE, '_temp_refresh_aggregates is not empty: ' || array_to_string(array(SELECT id || ':' || depth FROM _temp_refresh_aggregates), ',');
    END IF;
    DROP TABLE _temp_refresh_aggregates;
    DROP TABLE _processed_ids;

END;
$$ LANGUAGE plpgsql SECURITY DEFINER VOLATILE PARALLEL UNSAFE;


CREATE OR REPLACE FUNCTION sf_internal.sync_aggrs_part_{{ partition_id }}(
    worker_num BIGINT,
    workers_count BIGINT
) RETURNS record AS $$
DECLARE
    total_processed_dirs BIGINT = 0;
    total_changed_dirs BIGINT = 0;
    dir_current_row sf.dir_current_part_{{ partition_id }};
    ret record;
BEGIN

    -- This table is truncated just after this transaction in `_sync_aggrs_if_needed`
    CREATE TEMP TABLE aggrs_for_update AS
        SELECT id, volume_id, rec_aggrs, local_aggrs
            FROM sf.calculated_aggrs_part_{{ partition_id }}
            WHERE id % workers_count = worker_num;

    CREATE INDEX aggrs_for_update_id ON aggrs_for_update(id);
    ANALYZE aggrs_for_update;
    SELECT COUNT(*) FROM aggrs_for_update INTO total_processed_dirs;

    -- remove aggrs for entries, which no longer exists
    DELETE FROM aggrs_for_update
         WHERE aggrs_for_update.id NOT IN (
          SELECT dir_current.id
          FROM sf.dir_current_part_{{ partition_id }} dir_current
          WHERE aggrs_for_update.id = dir_current.id);

    SELECT COUNT(*) FROM aggrs_for_update INTO total_changed_dirs;

    -- store updated ids, so that they can be moved to the history in the future
    INSERT INTO sf.dirs_with_recently_changed_recaggrs (fs_entry_id, volume_id)
        SELECT id, volume_id FROM aggrs_for_update;

    -- update dir_current
    UPDATE sf.dir_current_part_{{ partition_id }} SET
        rec_aggrs = aggrs_for_update.rec_aggrs,
        local_aggrs = aggrs_for_update.local_aggrs
        FROM aggrs_for_update
            WHERE aggrs_for_update.id = sf.dir_current_part_{{ partition_id }}.id;

    DROP TABLE aggrs_for_update;

    SELECT total_processed_dirs, total_changed_dirs INTO ret;
    RETURN ret;
END;
$$ LANGUAGE plpgsql SECURITY DEFINER VOLATILE PARALLEL UNSAFE;
