#!/opt/starfish/examples/venv/bin/python3
"""
***********************************************************************************************************

 Starfish Storage Corporation ("Starfish") CONFIDENTIAL
 Unpublished Copyright (c) 2011 - present Starfish Storage Corporation, All Rights Reserved.

 NOTICE: This file and its contents (1) constitute Starfish's "External Code" under Starfish's most-recent
 Limited Software End-User License Agreement, and (2) is and remains the property of Starfish. The
 intellectual and technical concepts contained herein are proprietary to Starfish and may be covered by
 U.S. and/or foreign patents or patents in process, and are protected by trade secret or copyright law.
 Dissemination of this information or reproduction of this material is strictly forbidden unless prior
 written permission is obtained from Starfish. Access to the source code contained herein is hereby
 forbidden to anyone except (A) current Starfish employees, managers, or contractors who have executed
 confidentiality or nondisclosure agreements explicitly covering such access, and (B) licensees of
 Starfish's software.

 ANY REPRODUCTION, COPYING, MODIFICATION, DISTRIBUTION, PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR
 THROUGH USE OF THIS SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF STARFISH IS STRICTLY PROHIBITED
 AND IS IN VIOLATION OF APPLICABLE LAWS AND INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS
 FILE OR ITS CONTENTS AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS TO REPRODUCE,
 DISCLOSE, OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE, USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN
 WHOLE OR IN PART.

 FOR U.S. GOVERNMENT CUSTOMERS REGARDING THIS DOCUMENTATION/SOFTWARE
   These notices shall be marked on any reproduction of this data, in whole or in part.
   NOTICE: Notwithstanding any other lease or license that may pertain to, or accompany the delivery of,
   this computer software, the rights of the Government regarding its use, reproduction and disclosure are
   as set forth in Section 52.227-19 of the FARS Computer Software-Restricted Rights clause.
   RESTRICTED RIGHTS NOTICE: Use, duplication, or disclosure by the Government is subject to the
   restrictions as set forth in subparagraph (c)(1)(ii) of the Rights in Technical Data and Computer
   Software clause at DFARS 52.227-7013.

***********************************************************************************************************
"""

###############################################################################
#  Author Doug Hughes
#  Created as oldprefix.py 2019-07-29
# grab shortest (topmost) prefix paths for old directory trees
#
# WARNING: This script runs queries directly against the Starfish database
# can could be used to cause tremendous harm and render Starfish inoperable if
# used incorrectly. It is recommended to only run select and not update queries.
#
# version 1.3.1 - doug 2023-09-22
#  use ancestor_ids for sorting
#
# version 1.3    - doug 2023-07-13
# includes separation of mtime and atime so they can be used as
# separate thresholds, but breaks backward compatibility because of this
# new test cases added

import csv
import os
import signal
import sys
import unittest
from datetime import datetime

import dateutil.relativedelta

try:
    import argparse
    import configparser
    from argparse import RawTextHelpFormatter

    import psycopg2
except Exception:
    print(
        "In order to use this, you must have configparser, argparse, "
        "and psycopg2 modules installed (available via pip among "
        "other means.)"
    )
    sys.exit(1)


class TestQ(unittest.TestCase):
    """extension for unittest framework."""

    def setUp(self):
        TestQ.make_src_vol()

    @staticmethod
    def make_src_vol():
        """utility function to create volume for tests"""
        testvol = "/tmp/oldprefix2"
        trymkdir(testvol)
        os.system(f"rm -rf {testvol}/*")
        trymkdir(f"{testvol}/subdir1")
        trymkdir(f"{testvol}/subdir1/ssdir1")
        trymkdir(f"{testvol}/subdir1/ssdir2" + "\u01fc")
        trymkdir(f"{testvol}/subdir2")
        with open(f"{testvol}/subdir1/file1", "w") as f:
            print("hello", file=f)
        with open(f"{testvol}/subdir1/ssdir1/file1a", "w") as f:
            print("hello", file=f)
        with open(f"{testvol}/subdir1/ssdir2" + "\u01fc" + "/file2", "w") as f:
            print("hello", file=f)
        with open(f"{testvol}/subdir1/ssdir1/file3", "w") as f:
            print("hello", file=f)
        with open(f"{testvol}/subdir2/file4", "w") as f:
            print("hello", file=f)
        with open(f"{testvol}/subdir2/file5", "w") as f:
            print("hello", file=f)
        dt = datetime.now() + dateutil.relativedelta.relativedelta(months=-3)
        ago_3m = dt.timestamp()
        dt = datetime.now() + dateutil.relativedelta.relativedelta(months=-4)
        ago_4m = dt.timestamp()
        dt = datetime.now() + dateutil.relativedelta.relativedelta(months=-6)
        ago_6m = dt.timestamp()
        dt = datetime.now() + dateutil.relativedelta.relativedelta(years=-2)
        ago_2y = dt.timestamp()
        dt = datetime.now() + dateutil.relativedelta.relativedelta(years=-3)
        ago_3y = dt.timestamp()

        os.utime(path=f"{testvol}/subdir1/file1", times=(ago_3m, ago_4m))
        os.utime(path=f"{testvol}/subdir1/ssdir2" + "\u01fc" + "/file2", times=(ago_6m, ago_3y))
        os.utime(path=f"{testvol}/subdir1/ssdir1/file1a", times=(ago_6m, ago_2y))
        os.utime(path=f"{testvol}/subdir1/ssdir1/file3", times=(ago_6m, ago_2y))
        os.utime(path=f"{testvol}/subdir2/file4", times=(ago_4m, ago_2y))
        os.utime(path=f"{testvol}/subdir2/file5", times=(ago_6m, ago_2y))
        os.utime(path=f"{testvol}/subdir2", times=(ago_4m, ago_2y))
        os.utime(path=f"{testvol}/subdir1", times=(ago_3m, ago_2y))
        os.utime(path=f"{testvol}/subdir1/ssdir1", times=(ago_6m, ago_2y))
        os.utime(path=f"{testvol}/subdir1/ssdir2" + "\u01fc", times=(ago_6m, ago_3y))
        # need to do this once for testing
        # os.system(f"sf volume add testoldprefix {testvol}")
        os.system("sf scan start --wait testoldprefix >/dev/null 2>&1")

    def test_auth(self):
        """test connection"""
        conn = psycopg2.connect(getpgauth())
        self.assertNotEqual(conn, None)

    def test_query(self):
        """test sql return"""
        conn = psycopg2.connect(getpgauth())
        query = """select count(*) from sf_volumes.volume"""
        cur = conn.cursor()
        cur.execute(query)
        self.assertNotEqual(cur, None)
        rows = cur.fetchall()
        self.assertEqual(len(rows), 1)

    def test_age_result_2m(self):
        """test query permutations for accuracy at 2m"""
        args = argparse.Namespace
        args.volume = "testoldprefix"
        args.column_header = False
        args.username = None
        args.with_tag = None
        args.without_tag = None
        args.zone = None
        args.dir_access_mode = None
        args.atime = "2 months"
        args.mtime = None
        args.debug = False
        args.showowner = False
        args.showgroup = False
        args.showfiles = True
        args.showsize = True
        args.showvolume = False
        args.prefix = "%"
        delimiter = ","
        rows, dircount, totalfiles, totalsize = runquery(args, delimiter)
        self.assertEqual(dircount, 1)
        self.assertEqual(totalfiles, 6)
        self.assertEqual(totalsize, 0.0)
        self.assertEqual(rows[0][0], "")

        args.showowner = True
        args.showgroup = True
        args.atime = "4 months"
        rows, dircount, totalfiles, totalsize = runquery(args, delimiter)
        self.assertEqual(dircount, 2)
        self.assertEqual(totalfiles, 3)
        self.assertEqual(totalsize, 0.0)
        self.assertEqual(rows[0][0], "subdir1/ssdir1")
        self.assertEqual(rows[0][3], "doug")
        self.assertEqual(rows[0][4], "doug")
        self.assertEqual(rows[1][5], "1")

        args.showowner = False
        args.showgroup = False
        args.showvolume = True
        args.atime = "9 months"
        with self.assertRaises(SystemExit):
            rows, dircount, totalfiles, totalsize = runquery(args, delimiter)

    def test_age_result_3m(self):
        """test query permutations for accuracy at 3m+"""
        args = argparse.Namespace
        args.volume = "testoldprefix"
        args.column_header = True
        args.username = None
        args.with_tag = None
        args.without_tag = None
        args.zone = None
        args.dir_access_mode = None
        args.atime = "3 months"
        args.mtime = "1 year"
        args.debug = False
        args.showowner = False
        args.showgroup = True
        args.showfiles = True
        args.showsize = True
        args.showvolume = False
        args.prefix = "%"
        delimiter = ","
        rows, dircount, totalfiles, totalsize = runquery(args, delimiter)
        self.assertEqual(dircount, 3)
        self.assertEqual(totalfiles, 5)
        self.assertEqual(totalsize, 0.0)
        self.assertEqual(rows[0][0], "subdir1/ssdir1")
        self.assertEqual(rows[1][0], "subdir1/ssdir2Ǽ")
        self.assertEqual(rows[2][0], "subdir2")
        self.assertEqual(rows[2][3], "doug")
        # run again with 4 months, but similar setup
        args.atime = "4 months"
        args.mtime = "2 years"
        rows, dircount, totalfiles, totalsize = runquery(args, delimiter)
        self.assertEqual(dircount, 1)
        self.assertEqual(totalfiles, 1)
        self.assertEqual(totalsize, 0.0)
        self.assertEqual(rows[0][0], "subdir1/ssdir2Ǽ")


def getpgauth():
    """pull auth info from config file to use implicitly"""
    cfgfile = "/opt/starfish/etc/99-local.ini"
    if not os.access(cfgfile, os.R_OK):
        print(cfgfile + " is not readable. Try with sudo.")
        sys.exit(1)

    try:
        config = configparser.ConfigParser()
        config.read(cfgfile)
        return config.get("pg", "pg_uri")
    except OSError:
        print("can't read config file to get connection uri. check permissions.")
        sys.exit(1)


# ##################### main #####################


def signal_handler(sig, frame):  # pylint: disable=W0603
    """handle interrupts gracefully"""
    print("interrupted by keyboard")
    sys.exit(0)


def humanize(num, suffix="B"):
    """utility function to make humanized output numbers"""
    for unit in ["Gi", "Ti", "Pi", "Ei", "Zi"]:
        if abs(num) < 1024.0:
            return f"{num:3.1f}{unit}{suffix}"
        num /= 1024.0
    return f"{num:.1f}Yi{suffix}"


def trymkdir(path):
    """wrap each makedir/file in a try to make setup go faster"""
    try:
        os.makedirs(path)
    except Exception:
        pass


def runquery(args, delimiter):
    """run the query and get back result rows"""

    try:
        conn = psycopg2.connect(getpgauth())
    except psycopg2.DatabaseError as e:
        print("Unable to connect to the database: {}".format(str(e)))
        sys.exit(1)

    # combining extra fields based upon arguments
    extratbl = ""
    # extra where expressions
    extraq = ""
    # An optional CTE
    prequel = ""
    # an optional directory in the volume
    if args.volume:
        prefix = f"AND d.volume_id = sf.volume_id_from_name('{args.volume}')"
    else:
        prefix = ""

    if args.prefix != "%":
        prefix += " AND d.path LIKE '%s%%'" % args.prefix  # noqa: S001

    if args.username:
        extraq += f" AND u.name = '{args.username}' "

    if args.dir_access_mode:
        extraq += " AND d.perms = %d " % int(args.dir_access_mode, 8)  # noqa: S001

    if args.with_tag:
        extraq += f" AND tn.name ~ '{args.with_tag}'"
        extratbl += """ INNER JOIN sf.tag_name tn on tn.namespace_id = 1
                         INNER JOIN sf.tag_value tv ON tv.volume_id = v.id
                             AND tv.fs_entry_id = d.id
                             AND tv.name_id = tn.id"""

    if args.without_tag:
        prequel += f"""WITH taggeddirs AS (
                       SELECT d.volume_id, d.id
                       FROM sf.dir_current d
                       INNER JOIN sf_volumes.volume v ON v.id = d.volume_id
                       INNER JOIN sf.tag_value tv ON tv.volume_id = v.id
                         AND tv.fs_entry_id = d.id
                       INNER JOIN sf.tag_name tn on tn.namespace_id = 1
                         AND tv.name_id = tn.id
                       WHERE tn.name ~ '{args.without_tag}'
                             {prefix}
                       )"""
        extratbl += """ LEFT JOIN taggeddirs tagged on v.id = tagged.volume_id
                             AND d.id = tagged.id"""
        extraq += """ AND tagged.id is NULL"""

    if args.zone:
        prequel += f"""WITH zonedirs AS (
                       SELECT d.volume_id, d.path as zonepath
                       FROM sf.dir_current d
                       JOIN sf_volumes.volume ON (d.volume_id = volume.id)
                       JOIN sf.tag_value_current tv ON (tv.fs_entry_id=d.id AND tv.volume_id=d.volume_id)
                       JOIN sf.tag_name tn ON (tn.id=tv.name_id)
                       JOIN sf.tag_namespace tns ON (tns.id=tn.namespace_id AND tns.name='__zone')
                       WHERE tn.name = (select id::varchar from sf_auth.zone where name = '{args.zone}')
                       )"""
        extratbl += """ JOIN zonedirs ON v.id = zonedirs.volume_id
                         AND d.path LIKE zonedirs.zonepath || '%'"""

    cur = conn.cursor()

    agespec_arr = []
    if args.atime:
        agespec_arr.append(
            f"""
                           to_timestamp((d.rec_aggrs->'max'->>'atime')::BIGINT) < current_date - interval '{args.atime}'
                           """
        )
    if args.mtime:
        agespec_arr.append(
            f"""
                           to_timestamp((d.rec_aggrs->'max'->>'mtime')::BIGINT) < current_date - interval '{args.mtime}'
                           """
        )
    agespec = " AND ".join(agespec_arr)

    q = f"""{prequel}
        SELECT d.path,
        to_timestamp((d.rec_aggrs->'max'->>'atime')::BIGINT) as "max atime",
        to_timestamp((d.rec_aggrs->'max'->>'mtime')::BIGINT) as "max mtime",
        CASE WHEN u.name is NULL THEN d.uid::text ELSE u.name END as user,
        CASE WHEN g.name is NULL THEN d.gid::text ELSE g.name END as group,
        d.rec_aggrs->>'files' as files,
        ROUND((d.rec_aggrs->>'size')::BIGINT / (1024.0*1024*1024),3)::text as sizeGiB,
        v.name as volume
        FROM sf.dir_current d
        INNER JOIN sf_volumes.volume v on v.id = d.volume_id
        LEFT JOIN sf.uid_mapping u on d.uid = u.uid AND u.volume_id = v.id
        LEFT JOIN sf.gid_mapping g on d.gid = g.gid AND g.volume_id = v.id
            {extratbl}
        WHERE
            {agespec}
            {prefix}
            {extraq}
        ORDER BY d.ancestor_ids """  # noqa E501

    if args.debug:
        print("executing query: " + q)

    cur.execute(q)

    if args.column_header:
        colnames = [desc[0] for desc in cur.description]
        print(delimiter.join(str(el) for el in colnames))

    if cur.rowcount == 0:
        print("no matching directories")
        cur.close()
        conn.rollback()
        sys.exit(0)

    dircount = 0
    totalsize = 0.0
    totalfiles = 0
    refpath = "!-!-!-"
    rows = []
    for row in cur:
        curpath = row[0]
        lastaccess = row[1]
        lastmodify = row[2]
        if args.debug:
            print(delimiter.join(str(el) for el in row))

        # special case at start since we'll never find a dir named this (sorting purposes)
        if refpath == "!-!-!-":
            # set the new reference path to this first row
            refpath = curpath
            if args.debug:
                print(f"--> {refpath}   {str(lastaccess)}	{str(lastmodify)}")
            out = [curpath, str(lastaccess), str(lastmodify)]
            if args.showowner:
                out.append(str(row[3]))
            if args.showgroup:
                out.append(str(row[4]))
            if args.showfiles:
                out.append(str(row[5]))
            if args.showsize:
                out.append(str(row[6]))
            if args.showvolume:
                out.append(str(row[7]))
            rows.append(out)
            totalfiles += int(row[5])
            totalsize += float(row[6])
            dircount += 1
            # try:
            #     print(delimiter.join(out))
            # except UnicodeDecodeError as e:
            #     print("Unicode decoding error detected: " + str(e))
            #     print("export PYTHONIOENCODING=utf8")
            #     print("and run oldprefix again")
            # continue
        cp = os.path.commonpath([refpath, curpath])
        if cp == refpath:
            continue
        refpath = curpath
        if args.debug:
            print(f"--> {refpath}   {str(lastaccess)}	{str(lastmodify)}")
        out = [curpath, str(lastaccess), str(lastmodify)]
        if args.showowner:
            out.append(str(row[3]))
        if args.showgroup:
            out.append(str(row[4]))
        if args.showfiles:
            out.append(str(row[5]))
        if args.showsize:
            out.append(str(row[6]))
        if args.showvolume:
            out.append(str(row[7]))

        rows.append(out)
        # record for summaries
        totalfiles += int(row[5])
        totalsize += float(row[6])
        dircount += 1

    cur.close()

    return rows, dircount, totalfiles, totalsize


def main():
    """main program"""
    signal.signal(signal.SIGINT, signal_handler)
    signal.signal(signal.SIGTSTP, signal_handler)

    # Parse Arguments
    examples_text = (
        "Examples: \n"
        "Find subtree where all files were last accessed more than 2 years ago on volume data01 \n"
        " # ./oldprefix.py --atime '2 years' --volume data01 \n\n"
        "Find subtree where average modification age is older than 9 months in the home subdirectory.\n"
        " # ./oldprefix.py --mtime '9 months' --volume data01 --prefix home \n\n"
        "Find subtree where most recent files were accessed more than 6 months ago and the directory\n"
        "permissions are 700\n"
        " # ./oldprefix.py --atime '6 months' --volume data01 --dir-access-mode 700\n\n"
        "Find subtree where most recent files were written more than 5 years ago and the directory\n"
        "AND accessed more than 1 year ago\n"
        " # ./oldprefix.py --atime '1 months' --mtime '5 years' --volume data01\n\n"
    )

    parser = argparse.ArgumentParser(
        description="\x1B[1moldprefix2\x1B[0m finds top level of directory tree that matches query.\n"
        "The primary difference between oldprefix.py and oldprefix2.py is that the later\n"
        "allows combination of mtime and atime in one query, and only does older than.\n"
        "This is useful, for instance, when looking to archive parts of the tree\n"
        "last accessed more than 2 years ago.\n"
        "It must be run on the Starfish server.\n"
        "This command is many times faster than issuing an sf query command with\n"
        "--atime +365d, for instance, because it can pull information directly\n"
        "from directory aggregates instead of doing a massive scan on the file table.\n",
        epilog=examples_text,
        formatter_class=RawTextHelpFormatter,
    )
    parser.add_argument("--csv", action="store_true", help="use comma separated values output")
    parser.add_argument(
        "--atime",
        required=False,
        help="supply a time value like '3 months' or '5 years' for any file accessed in tree."
        "No directories accessed more recently will be shown",
    )
    parser.add_argument(
        "--mtime",
        required=False,
        help="supply a time value like '3 months' or '5 years' for any file modified in tree."
        "No directories accessed more recently will be shown",
    )
    parser.add_argument("--column-header", "-H", required=False, action="store_true", help="show a column header")
    parser.add_argument(
        "--dir-access-mode", required=False, help="show only directories where permissions are DIR-ACCESS-MODE"
    )
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument("--volume", required=False, help="name of the volume to check")
    group.add_argument("--zone", required=False, help="limit to a specific zone")
    parser.add_argument("--prefix", required=False, default="%", help="optional directory prefix to start at")
    parser.add_argument("--delimiter", default="\t", help="use a delimiter of your choice in csv output (default ,)")
    parser.add_argument("--debug", action="store_true", required=False, help="add some debugging to output")
    parser.add_argument("--showowner", action="store_true", required=False, help="show owner of directory")
    parser.add_argument("--showgroup", action="store_true", required=False, help="show primary group of directory")
    parser.add_argument("--showfiles", action="store_true", required=False, help="show count of files in tree")
    parser.add_argument(
        "--showsize",
        action="store_true",
        required=False,
        help="show size of directory tree in fractional GiB rounded to nearest MiB",
    )
    parser.add_argument("--showvolume", action="store_true", required=False, help="show the volume name")
    parser.add_argument(
        "--no-summary", action="store_true", default=False, required=False, help="don't show summary line"
    )
    parser.add_argument("--username", required=False, help="limit to specific directories owned by specific user")
    parser.add_argument("--with-tag", required=False, help="all chosen directories include tag regex")
    parser.add_argument("--without-tag", required=False, help="all chosen directories exclude tag regex")
    parser.add_argument("--test", action="store_true", required=False, help=argparse.SUPPRESS)
    parser.parse_args()

    args = parser.parse_args()

    if args.test:
        unittest.main(argv=["first-arg-is-ignored"], exit=True)

    if not (args.mtime or args.atime):
        parser.print_help(sys.stderr)
        print(
            "\nUsage error: one (or both) of --atime or --mtime must be use to specify a time range\n", file=sys.stderr
        )
        sys.exit(1)

    delimiter = "\t"
    if args.csv:
        csvout = csv.writer(sys.stdout, delimiter=",")
        delimiter = ","
    elif args.delimiter:
        delimiter = args.delimiter

    rows, dircount, totalfiles, totalsize = runquery(args, delimiter)

    for row in rows:
        try:
            if args.csv:
                csvout.writerow(row)
            else:
                print(delimiter.join(row))
        except UnicodeDecodeError as e:
            print("Unicode decoding error detected: " + str(e))
            print("export PYTHONIOENCODING=utf8")
            print("and run oldprefix again")

    if not args.no_summary:
        print(
            f"Total dir trees: {dircount}; Total files: {totalfiles}; "
            f"Total Size: {totalsize:.1f}GiB, {humanize(totalsize)}"
        )


if __name__ == "__main__":
    main()
