#!/opt/starfish/examples/venv/bin/python3
"""
***********************************************************************************************************

 Starfish Storage Corporation ("Starfish") CONFIDENTIAL
 Unpublished Copyright (c) 2011 - present Starfish Storage Corporation, All Rights Reserved.

 NOTICE: This file and its contents (1) constitute Starfish's "External Code" under Starfish's most-recent
 Limited Software End-User License Agreement, and (2) is and remains the property of Starfish. The
 intellectual and technical concepts contained herein are proprietary to Starfish and may be covered by
 U.S. and/or foreign patents or patents in process, and are protected by trade secret or copyright law.
 Dissemination of this information or reproduction of this material is strictly forbidden unless prior
 written permission is obtained from Starfish. Access to the source code contained herein is hereby
 forbidden to anyone except (A) current Starfish employees, managers, or contractors who have executed
 confidentiality or nondisclosure agreements explicitly covering such access, and (B) licensees of
 Starfish's software.

 ANY REPRODUCTION, COPYING, MODIFICATION, DISTRIBUTION, PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR
 THROUGH USE OF THIS SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF STARFISH IS STRICTLY PROHIBITED
 AND IS IN VIOLATION OF APPLICABLE LAWS AND INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS
 FILE OR ITS CONTENTS AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS TO REPRODUCE,
 DISCLOSE, OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE, USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN
 WHOLE OR IN PART.

 FOR U.S. GOVERNMENT CUSTOMERS REGARDING THIS DOCUMENTATION/SOFTWARE
   These notices shall be marked on any reproduction of this data, in whole or in part.
   NOTICE: Notwithstanding any other lease or license that may pertain to, or accompany the delivery of,
   this computer software, the rights of the Government regarding its use, reproduction and disclosure are
   as set forth in Section 52.227-19 of the FARS Computer Software-Restricted Rights clause.
   RESTRICTED RIGHTS NOTICE: Use, duplication, or disclosure by the Government is subject to the
   restrictions as set forth in subparagraph (c)(1)(ii) of the Rights in Technical Data and Computer
   Software clause at DFARS 52.227-7013.

***********************************************************************************************************
"""

###############################################################################
#  Author Doug Hughes
#  Created 2019-07-29
# grab shortest (topmost) prefix paths for old directory trees
#
# WARNING: This script runs queries directly against the Starfish database
# can could be used to cause tremendous harm and render Starfish inoperable if
# used incorrectly. It is recommended to only run select and not update queries.
#
# version 1.2.3 - doug 2023-09-22
#  use ancestor_ids for sorting
# version 1.2.2 - doug 2023-08-30
#  fix minor bug in sum calculation (missed in inner loop)
# version 1.2.1 - doug 2023-06-29
#  loop over cursor to conserve memory; don't use fetchall
# version 1.2   - doug 2023-04-14
#  force sorting in C order so that idiosyncracies of utf8 relating to / are avoided
#  fix starting string to be "!-!-!-" so that if the entire chosen starting point matches
#  the age criteria, it will work (having "" as the starting initialization doesn't allow ""
#  as the top of tree in sql output
#  fix csv and delimiter choosing logic
# version 1.1.13 - doug 2023-04-07
#  handle null uid/gid mapping
# version 1.1.12 - doug 2023-01-11
#  use csv writer
# version 1.1.11 - pdybowski 2022-08-18
#  Refactor and fix pylint warning: 'redefined-outer-name'
# version 1.1.10 - Doug 2022-08-15
#  fix bugs in column ordering and group display
# version 1.1.9 - Doug 2020-07-29
#  add ability to --showgroup
# version 1.1.8 - Doug 2020-07-29
#  add ability to limit to a specific username
# version 1.1.7 - Doug 2020-02-13
#  handle UnicodeDecodeError and tell customer what to do to fix it
# version 1.1.6 - Doug 2020-02-11
#  give 3 digits precision to sizeGIB
# version 1.1.5 - Doug 2020-02-04
#  add --column-header option
# version 1.1.4 - doug 2020-01-26
#  add --without-tag option as regex
# version 1.1.3 - doug 2020-01-25
#  add --with-tag option as regex to select only directories containing an explicit tag
# version 1.1.2 - doug 2020-01-02
#  add option to only check directories with permission 700 (client req) --dir-access-mode
#  change default atime to max (most recent access)
# version 1.1.1 - doug 2019-12-04
#  improved help statement
# version 1.1 - doug 2019-11-25
#  add owner, size, and files options for cust

import csv
import os
import signal
import sys
import unittest

try:
    import argparse
    import configparser
    from argparse import RawTextHelpFormatter

    import psycopg2
except Exception:
    print(
        "In order to use this, you must have configparser, argparse, "
        "and psycopg2 modules installed (available via pip among "
        "other means.)"
    )
    sys.exit(1)


class TestQ(unittest.TestCase):
    """extension for unittest framework."""

    def setUp(self):
        pass

    def test_auth(self):
        """test connection"""
        conn = psycopg2.connect(getpgauth())
        self.assertNotEqual(conn, None)

    def test_query(self):
        """test sql return"""
        conn = psycopg2.connect(getpgauth())
        query = """select count(*) from sf_volumes.volume"""
        cur = conn.cursor()
        cur.execute(query)
        self.assertNotEqual(cur, None)
        rows = cur.fetchall()
        self.assertEqual(len(rows), 1)


def getpgauth():
    """pull auth info from config file to use implicitly"""
    cfgfile = "/opt/starfish/etc/99-local.ini"
    if not os.access(cfgfile, os.R_OK):
        print(cfgfile + " is not readable. Try with sudo.")
        sys.exit(1)

    try:
        config = configparser.ConfigParser()
        config.read(cfgfile)
        return config.get("pg", "pg_uri")
    except OSError:
        print("can't read config file to get connection uri. check permissions.")
        sys.exit(1)


# ##################### main #####################


def signal_handler(sig, frame):
    print("interrupted by keyboard")
    sys.exit(0)


def humanize(num, suffix="B"):
    for unit in ["Gi", "Ti", "Pi", "Ei", "Zi"]:
        if abs(num) < 1024.0:
            return f"{num:3.1f}{unit}{suffix}"
        num /= 1024.0
    return f"{num:.1f}Yi{suffix}"


def main():
    signal.signal(signal.SIGINT, signal_handler)
    signal.signal(signal.SIGTSTP, signal_handler)

    try:
        conn = psycopg2.connect(getpgauth())
    except psycopg2.DatabaseError as e:
        print("Unable to connect to the database: {}".format(str(e)))
        sys.exit(1)

    # Parse Arguments
    examples_text = (
        "Examples: \n"
        "Find subtree where all files were last accessed more than 2 years ago on volume data01 \n"
        ' # ./oldprefix.py --whence max --type atime --volume data01 --interval "2 years" \n\n'
        "Find subtree where average modification age is older than 9 months in the home subdirectory.\n"
        ' # ./oldprefix.py --whence avg --type mtime --volume data01 --prefix home --interval "9 months" \n\n'
        "Find subtree where most recent files were accessed more than 6 months ago and the directory\n"
        "permissions are 700\n"
        ' # ./oldprefix.py --whence max --type atime --volume data01 --interval "6 months" --dir-access-mode 700\n'
    )

    parser = argparse.ArgumentParser(
        description="\x1B[1moldprefix\x1B[0m finds top level of directory tree that matches query.\n"
        "This is useful, for instance, when looking to archive parts of the tree\n "
        "last accessed more than 2 years ago.\n"
        "It must be run on the Starfish server.\n"
        "This command is many times faster than issuing an sf query command with\n"
        "--atime +365d, for instance, because it can pull information directly\n"
        "from directory aggregates instead of doing a massive scan on the file table.\n"
        "Note that 'max atime' find requires Starfish version 5.0.5649 or newer.\n",
        epilog=examples_text,
        formatter_class=RawTextHelpFormatter,
    )
    parser.add_argument("--csv", action="store_true", help="use comma separated values output")
    parser.add_argument(
        "--whence",
        required=False,
        choices=["avg", "min", "max"],
        default="max",
        help="atime/mtime algorithm to use (Average, Minimum or Maximum for a tree)"
        "max atime will often show nothing at all. Default = max (most recent)",
    )
    parser.add_argument(
        "--type",
        required=False,
        choices=["atime", "mtime"],
        default="atime",
        help="choose between atime and mtime for recursive calculations (default atime)",
    )
    parser.add_argument("--column-header", "-H", required=False, action="store_true", help="show a column header")
    parser.add_argument(
        "--dir-access-mode", required=False, help="show only directories where permissions are DIR-ACCESS-MODE"
    )
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument("--volume", required=False, help="name of the volume to check")
    group.add_argument("--zone", required=False, help="limit to a specific zone")
    parser.add_argument("--prefix", required=False, default="%", help="optional directory prefix to start at")
    parser.add_argument(
        "--interval",
        default="2 years",
        required=True,
        help="how old should the tree be (e.g. '2 years', '10 weeks', '3 months')"
        "quotes around the interval are mendatory",
    )
    parser.add_argument("--delimiter", help="use a delimiter of your choice in csv output (default ,)")
    parser.add_argument("--debug", action="store_true", required=False, help="add some debugging to output")
    parser.add_argument("--showowner", action="store_true", required=False, help="show owner of directory")
    parser.add_argument("--showgroup", action="store_true", required=False, help="show primary group of directory")
    parser.add_argument("--showfiles", action="store_true", required=False, help="show count of files in tree")
    parser.add_argument(
        "--showsize",
        action="store_true",
        required=False,
        help="show size of directory tree in fractional GiB rounded to nearest MiB",
    )
    parser.add_argument("--showvolume", action="store_true", required=False, help="show the volume name")
    parser.add_argument(
        "--no-summary", action="store_true", default=False, required=False, help="don't show summary line"
    )
    parser.add_argument("--username", required=False, help="limit to specific directories owned by specific user")
    parser.add_argument("--with-tag", required=False, help="all chosen directories include tag regex")
    parser.add_argument("--without-tag", required=False, help="all chosen directories exclude tag regex")
    parser.add_argument("--test", action="store_true", required=False, help=argparse.SUPPRESS)
    parser.parse_args()

    args = parser.parse_args()

    if args.test:
        unittest.main(argv=["first-arg-is-ignored"], exit=True)

    delimiter = "\t"
    if args.csv:
        csvout = csv.writer(sys.stdout, delimiter=",")
        delimiter = ","
    elif args.delimiter:
        delimiter = args.delimiter

    # combining extra fields based upon arguments
    extratbl = ""
    # extra where expressions
    extraq = ""
    # An optional CTE
    prequel = ""
    # an optional directory in the volume
    if args.volume:
        prefix = f"AND d.volume_id = sf.volume_id_from_name('{args.volume}')"
    else:
        prefix = ""

    if args.prefix != "%":
        prefix += " AND d.path LIKE '%s%%'" % args.prefix  # noqa: S001

    if args.username:
        extraq += f" AND u.name = '{args.username}' "

    if args.dir_access_mode:
        extraq += " AND d.perms = %d " % int(args.dir_access_mode, 8)  # noqa: S001

    if args.with_tag:
        extraq += f" AND tn.name ~ '{args.with_tag}'"
        extratbl += """ INNER JOIN sf.tag_name tn on tn.namespace_id = 1
                         INNER JOIN sf.tag_value tv ON tv.volume_id = v.id
                             AND tv.fs_entry_id = d.id
                             AND tv.name_id = tn.id"""

    if args.without_tag:
        prequel += f"""WITH taggeddirs AS (
                       SELECT d.volume_id, d.id
                       FROM sf.dir_current d
                       INNER JOIN sf_volumes.volume v ON v.id = d.volume_id
                       INNER JOIN sf.tag_value tv ON tv.volume_id = v.id
                         AND tv.fs_entry_id = d.id
                       INNER JOIN sf.tag_name tn on tn.namespace_id = 1
                         AND tv.name_id = tn.id
                       WHERE tn.name ~ '{args.without_tag}'
                             {prefix}
                       )"""
        extratbl += """ LEFT JOIN taggeddirs tagged on v.id = tagged.volume_id
                             AND d.id = tagged.id"""
        extraq += """ AND tagged.id is NULL"""

    if args.zone:
        prequel += f"""WITH zonedirs AS (
                       SELECT d.volume_id, d.path as zonepath
                       FROM sf.dir_current d
                       JOIN sf_volumes.volume ON (d.volume_id = volume.id)
                       JOIN sf.tag_value_current tv ON (tv.fs_entry_id=d.id AND tv.volume_id=d.volume_id)
                       JOIN sf.tag_name tn ON (tn.id=tv.name_id)
                       JOIN sf.tag_namespace tns ON (tns.id=tn.namespace_id AND tns.name='__zone')
                       WHERE tn.name = (select id::varchar from sf_auth.zone where name = '{args.zone}')
                       )"""
        extratbl += """ JOIN zonedirs ON v.id = zonedirs.volume_id
                         AND d.path LIKE zonedirs.zonepath || '%'"""

    cur = conn.cursor()

    q = f"""{prequel}
        SELECT d.path,
        to_timestamp((d.rec_aggrs->'{args.whence}'->>'{args.type}')::BIGINT) as "{args.whence} {args.type}",
        CASE WHEN u.name is NULL THEN d.uid::text ELSE u.name END as user,
        CASE WHEN g.name is NULL THEN d.gid::text ELSE g.name END as group,
        d.rec_aggrs->>'files' as files,
        ROUND((d.rec_aggrs->>'size')::BIGINT / (1024.0*1024*1024),3)::text as sizeGiB,
        v.name as volume
        FROM sf.dir_current d
        INNER JOIN sf_volumes.volume v on v.id = d.volume_id
        LEFT JOIN sf.uid_mapping u on d.uid = u.uid AND u.volume_id = v.id
        LEFT JOIN sf.gid_mapping g on d.gid = g.gid AND g.volume_id = v.id
            {extratbl}
        WHERE d.rec_aggrs ? '{args.whence}'
            and to_timestamp((d.rec_aggrs->'{args.whence}'->>'{args.type}')::BIGINT) < current_date - interval '{args.interval}'
            {prefix}
            {extraq}
        ORDER BY d.ancestor_ids """  # noqa E501

    if args.debug:
        print("executing query: " + q)

    cur.execute(q)

    if args.column_header:
        colnames = [desc[0] for desc in cur.description]
        print(delimiter.join(str(el) for el in colnames))

    if cur.rowcount == 0:
        print("no matching directories")
        cur.close()
        conn.rollback()
        sys.exit(0)

    dircount = 0
    totalsize = 0.0
    totalfiles = 0
    refpath = "!-!-!-"
    for row in cur:
        curpath = row[0]
        lastaccess = row[1]
        if args.debug:
            print(delimiter.join(str(el) for el in row))

        if refpath == "!-!-!-":
            refpath = curpath
            if args.debug:
                print(f"--> {refpath}   {str(lastaccess)}")
            out = [curpath, str(lastaccess)]
            if args.showowner:
                out.append(str(row[2]))
            if args.showgroup:
                out.append(str(row[3]))
            if args.showfiles:
                out.append(str(row[4]))
            if args.showsize:
                out.append(str(row[5]))
            if args.showvolume:
                out.append(str(row[6]))
            # record for summaries
            totalfiles += int(row[4])
            totalsize += float(row[5])
            dircount += 1

            try:
                print(delimiter.join(out))
            except UnicodeDecodeError as e:
                print("Unicode decoding error detected: " + str(e))
                print("export PYTHONIOENCODING=utf8")
                print("and run oldprefix again")
            continue
        cp = os.path.commonpath([refpath, curpath])
        if cp == refpath:
            continue
        refpath = curpath
        if args.debug:
            print(f"--> {refpath}   {str(lastaccess)}")
        out = [curpath, str(lastaccess)]
        if args.showowner:
            out.append(str(row[2]))
        if args.showgroup:
            out.append(str(row[3]))
        if args.showfiles:
            out.append(str(row[4]))
        if args.showsize:
            out.append(str(row[5]))
        if args.showvolume:
            out.append(str(row[6]))

        # record for summaries
        totalfiles += int(row[4])
        totalsize += float(row[5])
        dircount += 1

        try:
            if args.csv:
                csvout.writerow(out)
            else:
                print(delimiter.join(out))
        except UnicodeDecodeError as e:
            print("Unicode decoding error detected: " + str(e))
            print("export PYTHONIOENCODING=utf8")
            print("and run oldprefix again")

    cur.close()
    conn.rollback()

    if not args.no_summary:
        print(
            f"Total dir trees: {dircount}; Total files: {totalfiles}; "
            f"Total Size: {totalsize:.1f}GiB, {humanize(totalsize)}"
        )


if __name__ == "__main__":
    main()
