#!/opt/starfish/examples/venv/bin/python3
# ***********************************************************************************************************
#
# Starfish Storage Corporation ("Starfish") CONFIDENTIAL
# Unpublished Copyright (c) 2011 - present Starfish Storage Corporation, All Rights Reserved.
#
# NOTICE: This file and its contents (1) constitute Starfish's "External Code" under Starfish's most-recent
# Limited Software End-User License Agreement, and (2) is and remains the property of Starfish. The
# intellectual and technical concepts contained herein are proprietary to Starfish and may be covered by
# U.S. and/or foreign patents or patents in process, and are protected by trade secret or copyright law.
# Dissemination of this information or reproduction of this material is strictly forbidden unless prior
# written permission is obtained from Starfish. Access to the source code contained herein is hereby
# forbidden to anyone except (A) current Starfish employees, managers, or contractors who have executed
# confidentiality or nondisclosure agreements explicitly covering such access, and (B) licensees of
# Starfish's software.
#
# ANY REPRODUCTION, COPYING, MODIFICATION, DISTRIBUTION, PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR
# THROUGH USE OF THIS SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF STARFISH IS STRICTLY PROHIBITED
# AND IS IN VIOLATION OF APPLICABLE LAWS AND INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS
# FILE OR ITS CONTENTS AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS TO REPRODUCE,
# DISCLOSE, OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE, USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN
# WHOLE OR IN PART.
#
# FOR U.S. GOVERNMENT CUSTOMERS REGARDING THIS DOCUMENTATION/SOFTWARE
#   These notices shall be marked on any reproduction of this data, in whole or in part.
#   NOTICE: Notwithstanding any other lease or license that may pertain to, or accompany the delivery of,
#   this computer software, the rights of the Government regarding its use, reproduction and disclosure are
#   as set forth in Section 52.227-19 of the FARS Computer Software-Restricted Rights clause.
#   RESTRICTED RIGHTS NOTICE: Use, duplication, or disclosure by the Government is subject to the
#   restrictions as set forth in subparagraph (c)(1)(ii) of the Rights in Technical Data and Computer
#   Software clause at DFARS 52.227-7013.
#
# ***********************************************************************************************************
import argparse
import json
import logging
import os
import sys
import unittest
from argparse import RawTextHelpFormatter

from sfexamples.job.modules import hasher
from sfexamples.job.modules.snapshot import SnapShot

SFHOME = os.environ.get("SFHOME", "/opt/starfish")
Progname = "hashsnap"
version = "1.0.4 2023-03-27"
# 1.0.2 fix import for snapshot module
# 1.0.3 fix incorrect quoting in examples 2020-04-14
# 1.0.4 add test case for dotfile and dotdirs at root
#
# Starfish Storage
# Doug Hughes
# use snapshots to get hash of a file similar to Starfish output


class TestHashSnap(unittest.TestCase):
    """Unit test framework"""

    cmdargs = argparse.Namespace
    cmdargs.debug = 0

    def setUp(self):
        with open("/tmp/srcfile1", "w") as tfile:
            # create 1MB file
            print("abcdefghij" * 100000, file=tfile)
        with open("/tmp/srcfile2", "w") as tfile:
            # create 1MB file
            print("abcdefghik" * 100001, file=tfile)
        with open("/tmp/srcfile3", "w") as tfile:
            # create 1MB file
            print("abcdefghil" * 100002, file=tfile)
        self.srchashes = {
            "/tmp/srcfile": {
                "md5": "63e61467a43b1fbf9da62c240025840f",
                "sha1": "900d103dd4cba1a1f33bc98c0a440e308d1493b7",
            },
            "/tmp/srcfile2": {
                "md5": "a5e2ac330f8312e623b59b7ebcf8d98a",
                "sha1": "f6be05c86074ab7001c4257e71c6b649b7b108dd",
            },
            "/tmp/srcfile3": {
                "md5": "990be982dda846e1ee920cb0bed992c7",
                "sha1": "d2ad93c50ed28cfc10aa0ff3258cf09db12a13b0",
            },
        }

    def test_snap_fail(self):
        """try a known failure"""
        with self.assertRaises(Exception) as context:
            SnapShot("top", "Karma", debug=self.cmdargs.debug, logging=Progname)
            self.assertTrue("bad path" in context.exception)

    def test_simple_snap_dg_dh(self):
        """successful snapshot mount on 196.168.10.159 and 10.157"""
        snaph = SnapShot("top", "/mnt/dg-isi/.snapshot/*duration*", debug=self.cmdargs.debug, logging=Progname)
        self.assertTrue(snaph)

    def test_copy_dotfiles(self):
        """this is for the edge case when dotfiles exist at the root of the copied volume
        e.g. the commonprefix of /home/doug/.snapshot and another file in /home/doug would be
        /home/doug/., which it should be /home/doug/"""
        snaph = SnapShot("top", "/mnt/dg-isi/.snapshot/*duration*", debug=self.cmdargs.debug, logging=Progname)
        prefix = snaph.get_snap_prefix("/mnt/dg-isi/.conda")
        self.assertEqual(prefix, "/mnt/dg-isi/")
        prefix = snaph.get_snap_prefix("/mnt/dg-isi/.ssh")
        self.assertEqual(prefix, "/mnt/dg-isi/")


def setup_logging():
    filename = os.path.join(SFHOME, "log", f"{Progname}.log")
    logging.basicConfig(
        format="%(asctime)s %(levelname)-8s %(message)s",
        datefmt="%Y/%m/%d %H:%M:%S",
        filename=filename,
        level=logging.WARN,
    )


def parse_cmdline():
    examples_text = f"""
[1mExamples:[0m

Note: this is best run from snapdup.py which will eliminate having
to run the job start by hand, and will also only run hashing on things
that have the same size, eliminating excess short-term CPU usage

Run hashsnap on command line with debugging to see json outputs
[1mecho -ne "/mnt/dg-isi/home/doug/POFax.tif\\0/mnt/dg-isi/home/doug/small.mts" |
{SFHOME}/bin/examples/job/hashsnap.py --paths-via-stdin
--snapshot-location=top --snapshot-name='/mnt/dg-isi/.snapshot/*isi_2week*' --debug 2[0m

Run hashsnap as a job on an isilon with snapshots at the top level of the volume and look for
files ending in .jpg or .jpeg. The job name is hash.
[1msf job start --ext jpg --ext jpeg --job-name hash --paths-via-stdin
--cmd-output-format json --no-prescan "{SFHOME}/bin/examples/job/hashsnap.py --paths-via-stdin
--snapshot-location=top --snapshot-name='/mnt/myisi/.snapshot/*Hourly*'" MyVol: [0m

To run on all files in a subtree, specify that:
[1msf job start --job-name hash --paths-via-stdin --cmd-output-format json
--no-prescan "{SFHOME}/bin/examples/job/hashsnap.py --paths-via-stdin --snapshot-location=top
--snapshot-name='/mnt/myisi/.snapshot/*Hourly*'" MyVol:/path/to/directory --not --type d[0m
"""

    # parse command line arguments
    parser = argparse.ArgumentParser(
        description="generate hashes from filesystem snapshots of files",
        epilog=examples_text,
        formatter_class=RawTextHelpFormatter,
    )
    parser.add_argument("--debug", required=False, default=0, type=int, help="enable debugging")
    parser.add_argument(
        "--pretty", required=False, action="store_true", help="show human readable json on output (vs compact)"
    )
    parser.add_argument(
        "--paths-via-stdin",
        required=False,
        default=False,
        action="store_true",
        help="output using conventions for calling from a starfish \n "
        'job using argument of same name, i.e. "null separated"',
    )
    parser.add_argument("--test", required=False, action="store_true", help=argparse.SUPPRESS)
    parser.add_argument(
        "--snapshot-name",
        required=False,
        help="The snapshot name is either a specific name (e.g. Daily), \n "
        "or a glob match (e.g. .snapshot|.snapshots|.zfs/snapshot, or Daily* \n "
        "Note that the glob pattern match is strictly alphanumeric, and is not \n "
        "based on date stamp. So /mnt/ifs/.snapshot/Daily* will match best when \n "
        "snapshots use the format Daily-YYYY-MM-DD",
    )
    parser.add_argument(
        "--snapshot-location",
        required=False,
        choices=["top", "local"],
        help="Selects whether snapshot location is to be found at share level \n " "or, like zfs, in every directory",
    )
    parser.add_argument("--version", required=False, action="store_true", help="show version and exit")
    parser.add_argument("file_list", nargs="*", help="One or more files")

    return parser.parse_args()


def main():
    args = parse_cmdline()

    if args.version:
        print(version)
        return

    # run unit tests?
    if args.test:
        TestHashSnap.cmdargs = args
        unittest.main(argv=["first-arg-is-ignored"], exit=True)
        return

    setup_logging()

    if not args.snapshot_name or not args.snapshot_location:
        print("Error: --snapshot-name and --snapshot-location must both be defined.")
        return 1

    try:
        snap_instance = SnapShot(args.snapshot_location, args.snapshot_name, debug=args.debug, logging=Progname)
    except NameError as e:
        print(f"Starfish snapshot module not found. It should be in {SFHOME}/bin/examples/job/modules: {e}")
        return 1
    except Exception as e:
        print(f"Other Error finding snapshot location: {e}", file=sys.stderr)
        return 1

    h = hasher.Hasher()

    # for use with sf job --start efficiency
    if args.paths_via_stdin:
        flist = sys.stdin.read().split("\0")
    else:
        flist = args.file_list
    for fname in flist:
        if args.debug > 1:
            print("checking path " + fname, file=sys.stderr)
        spath = snap_instance.get_snap_path(fname)
        # only emit output if the thing exists in a snapshot, else skip it and converge later
        if spath:
            h2 = h.gethashes(spath)
            if args.paths_via_stdin:
                print(fname + str("\0") + json.dumps(h2), end="\0")
            else:
                print(fname + " " + json.dumps(h2))


if __name__ == "__main__":
    sys.exit(main())
