/* legal disclaimer in /opt/starfish/data/starfish/sql-copyright-and-license.md */

/*
 * consider the following scenario:
 * - sf_cron.cron has one row with fs_entry_id=1
 * - statistics and histograms of sf_cron.cron are accurate at this point
 * - new big volume is added and many many directories are added to sf.dir_current
 * - one new row is added to sf_cron.cron with sf_cron.cron.fs_entry_id = 10000000
 * - by default analyze is not performed (autovacuum_analyze_threshold is 50 by default)
 * - now we want to list all crons, together with path of each directory (simple use case)
 * - we perform:
 *   > SELECT c.fs_entry_id, d.path FROM sf_cron.cron AS c JOIN sf.dir_current AS d ON c.fs_entry_id = d.id;
 * - postgres thinks that sf_cron.cron has one row and is convinced that this only row has small fs_entry_id
 * - thus, it chooses merge join for joining sf_cron.cron and sf.dir_current (and it expects that
 *   iterating over sf.dir_current.id will end immediately, just after reaching sf.dir_current.id=1)
 * - however, such plan may be very bad, because the second sf_cron.cron row (which postgres is not aware of)
 *   has fs_entry_id = 10000000, so merge join will iterate through all rows in sf.dir_current that have id < 10000000
 *
 * To avoid such problems we force postgres to run analyze on this table always, on every change.
 * This is not a perf problem, because this table is very small and rarely changed.
 */

ALTER TABLE sf_cron.cron SET (
    autovacuum_analyze_threshold = 1,
    autovacuum_analyze_scale_factor = 0.00
);
