#!/opt/starfish/examples/venv/bin/python3
"""
***********************************************************************************************************

 Starfish Storage Corporation ("Starfish") CONFIDENTIAL
 Unpublished Copyright (c) 2011 - present Starfish Storage Corporation, All Rights Reserved.

 NOTICE: This file and its contents (1) constitute Starfish's "External Code" under Starfish's most-recent
 Limited Software End-User License Agreement, and (2) is and remains the property of Starfish. The
 intellectual and technical concepts contained herein are proprietary to Starfish and may be covered by
 U.S. and/or foreign patents or patents in process, and are protected by trade secret or copyright law.
 Dissemination of this information or reproduction of this material is strictly forbidden unless prior
 written permission is obtained from Starfish. Access to the source code contained herein is hereby
 forbidden to anyone except (A) current Starfish employees, managers, or contractors who have executed
 confidentiality or nondisclosure agreements explicitly covering such access, and (B) licensees of
 Starfish's software.

 ANY REPRODUCTION, COPYING, MODIFICATION, DISTRIBUTION, PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR
 THROUGH USE OF THIS SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF STARFISH IS STRICTLY PROHIBITED
 AND IS IN VIOLATION OF APPLICABLE LAWS AND INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS
 FILE OR ITS CONTENTS AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS TO REPRODUCE,
 DISCLOSE, OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE, USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN
 WHOLE OR IN PART.

 FOR U.S. GOVERNMENT CUSTOMERS REGARDING THIS DOCUMENTATION/SOFTWARE
   These notices shall be marked on any reproduction of this data, in whole or in part.
   NOTICE: Notwithstanding any other lease or license that may pertain to, or accompany the delivery of,
   this computer software, the rights of the Government regarding its use, reproduction and disclosure are
   as set forth in Section 52.227-19 of the FARS Computer Software-Restricted Rights clause.
   RESTRICTED RIGHTS NOTICE: Use, duplication, or disclosure by the Government is subject to the
   restrictions as set forth in subparagraph (c)(1)(ii) of the Rights in Technical Data and Computer
   Software clause at DFARS 52.227-7013.

***********************************************************************************************************
"""

###############################################################################
#  Author Doug Hughes
#  Last modified 2022-08-18
#
# Version 1.1.1
# 2022-08-18: Refactor and fix pylint warning: 'redefined-outer-name'
#
# Version1.1
# take snapdup.py and make it work at Lustre sites
# This should be refactored ininto snapdup with optional arguments
#
# Run simple sql queries while removing the need to find the auth key or
# format the query
# This query outputs the query results, whatever they are, in a CSV output
# format. It does not currently take care of quoting.
#
# WARNING: This script runs queries directly against the Starfish database
# can could be used to cause tremendous harm and render Starfish inoperable if
# used incorrectly. It is recommended to only run select and not update queries.

import os
import sys
import unittest
from subprocess import PIPE, STDOUT, Popen

try:
    import argparse
    import configparser

    import psycopg2
except Exception:
    print(
        "In order to use this, you must have configparser, argparse, "
        "and psycopg2 modules installed (available via pip among "
        "other means.)"
    )
    sys.exit(1)


class TestQ(unittest.TestCase):
    """extension for unittest framework."""

    def setUp(self):
        pass

    def test_auth(self):
        """test connection"""
        conn = psycopg2.connect(getpgauth())
        self.assertNotEqual(conn, None)

    def test_query(self):
        """test sql return"""
        conn = psycopg2.connect(getpgauth())
        query = """select count(*) from sf_volumes.volume"""
        cur = conn.cursor()
        cur.execute(query)
        self.assertNotEqual(cur, None)
        rows = cur.fetchall()
        self.assertEqual(len(rows), 1)


def getpgauth():
    """pull auth info from config file to use implicitly"""
    try:
        config = configparser.ConfigParser()
        config.read("/opt/starfish/etc/99-local.ini")
        return config.get("pg", "pg_uri")
    except OSError:
        print("can't read config file to get connection uri. check permissions.")
        sys.exit(1)


# Start main


def main():
    if not os.access("/opt/starfish/etc/99-local.ini", os.R_OK):
        print("no permissions to get the sql auth token from /opt/starfish/etc/99-local.init. Try as root?")
        sys.exit(1)
    try:
        conn = psycopg2.connect(getpgauth())
    except psycopg2.DatabaseError as e:
        print("unable to connect to the database: {}".format(str(e)))
        sys.exit(1)

    # Parse Arguments
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--cutoffMIB", required=False, default=1, type=float, help="minimum MiB to consider when comparing"
    )
    parser.add_argument("--volume", required=False, help="volume which to check")
    parser.add_argument("--debug", action="store_true", required=False, help="add some debugging to output")
    parser.add_argument("--test", action="store_true", required=False, help=argparse.SUPPRESS)
    parser.parse_args()
    args = parser.parse_args()
    if args.test:
        unittest.main(argv=["first-arg-is-ignored"], exit=True)
    cur = conn.cursor()
    q1 = (
        """
    WITH sizes AS (SELECT f.size
    FROM sf.file_current f INNER JOIN sf.dir_current d ON f.parent_id = d.id AND d.volume_id = f.volume_id
    INNER JOIN sf_volumes.volume v on v.id = f.volume_id
    WHERE v.name = '%s'
      AND f.size > %d
    GROUP BY f.size
    HAVING count(f.size) > 1)
    SELECT d.path || '/' || f.name
    FROM sf.dir_current d INNER JOIN sf.file_current f ON d.id = f.parent_id and d.volume_id = f.volume_id
    INNER JOIN sizes s ON s.size = f.size
    INNER JOIN sf_volumes.volume v on v.id = f.volume_id
    WHERE v.name = '%s'
      AND f.size > %d
    """
        % (  # noqa: S001
            args.volume,
            args.cutoffMIB * 1024 * 1024,
            args.volume,
            args.cutoffMIB * 1024 * 1024,
        )
    )
    print("finding pairs of similarly sized files")
    cur.execute(q1)
    rows = cur.fetchall()
    linecount = 0
    with open("/tmp/stage1a.out", "w") as of1:
        for row in rows:
            of1.write(row[0] + "\n")
            linecount += 1
    if linecount == 0:
        print("no pairs of files with same size match cutoff criteria. Consider lowering cutoff")
        sys.exit(1)
    else:
        print("found %d files to scan" % linecount)  # noqa: S001
    print("running hash on duplicate set")
    p = Popen(
        "sf job start hasher %s: --wait --no-prescan --from-file "  # noqa: S001
        "/tmp/stage1a.out --batch-size-entries 100 --workers-per-agent 8" % (args.volume),
        stdout=PIPE,
        stderr=STDOUT,
        shell=True,
    )
    for line in p.stdout:
        print(line.decode("utf-8").replace("\n", ""))


if __name__ == "__main__":
    main()
