#!/opt/starfish/examples/venv/bin/python3
"""
***********************************************************************************************************

 Starfish Storage Corporation ("Starfish") CONFIDENTIAL
 Unpublished Copyright (c) 2011-present Starfish Storage Corporation, All Rights Reserved.

 NOTICE: This file and its contents (1) constitute Starfish's "External Code" under Starfish's most-recent
 Limited Software End-User License Agreement, and (2) is and remains the property of Starfish. The
 intellectual and technical concepts contained herein are proprietary to Starfish and may be covered by
 U.S. and/or foreign patents or patents in process, and are protected by trade secret or copyright law.
 Dissemination of this information or reproduction of this material is strictly forbidden unless prior
 written permission is obtained from Starfish. Access to the source code contained herein is hereby
 forbidden to anyone except (A) current Starfish employees, managers, or contractors who have executed
 confidentiality or nondisclosure agreements explicitly covering such access, and (B) licensees of
 Starfish's software.

 ANY REPRODUCTION, COPYING, MODIFICATION, DISTRIBUTION, PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR
 THROUGH USE OF THIS SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF STARFISH IS STRICTLY PROHIBITED
 AND IS IN VIOLATION OF APPLICABLE LAWS AND INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS
 FILE OR ITS CONTENTS AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS TO REPRODUCE,
 DISCLOSE, OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE, USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN
 WHOLE OR IN PART.

 FOR U.S. GOVERNMENT CUSTOMERS REGARDING THIS DOCUMENTATION/SOFTWARE
   These notices shall be marked on any reproduction of this data, in whole or in part.
   NOTICE: Notwithstanding any other lease or license that may pertain to, or accompany the delivery of,
   this computer software, the rights of the Government regarding its use, reproduction and disclosure are
   as set forth in Section 52.227-19 of the FARS Computer Software-Restricted Rights clause.
   RESTRICTED RIGHTS NOTICE: Use, duplication, or disclosure by the Government is subject to the
   restrictions as set forth in subparagraph (c)(1)(ii) of the Rights in Technical Data and Computer
   Software clause at DFARS 52.227-7013.

***********************************************************************************************************
"""

###############################################################################
#  Author Eric Bressler
#
#  Last modified 8-2-2023 v.1.96
#  1.96: Added exclude tag example in help
#  1.95: add GLACIER_IR capability
#  1.94: Only generate manifest if there is one and only if not suppressed
#  1.93: Updated to support sfcommon locally or in modules folder, submitted to examples. - 1-24-2022 EB
#  1.92: changed email attachements to zip format. Fixed a flake8 issue. - 12-21-2021 EB
#  1.91: --sf-archive-args flag help examples updated. - 12-07-2021 EB
#  1.90: Added --sf-archive-args flag. - 11-29-2021 EB
#  1.80: Added --preserve-empty-dirs flag. - 11-11-2021 EB
#  1.70: Removed an extra `-` from the last example in help. - 9-22-2021 EB
#  1.60: Updated to use 1.03 version of sfcommon so that archive to volumes will work. - 9-30-2021 EB
#        Changed the command line to have --send-empty-email instead of --no-empty-email
#        so that by default we will not send an email if the manifest is empty.
#  1.50: Added in proper --migrate functionality - 9-28-2021 EB
#  1.40: Updated to require sf-common 1.01 to fix issue with scan's not scanning at the directory - 9-12-2021 EB
#        level for pre/post scan. Fixed an issue with a poor error message when a malformed namespace:tag was
#        entered. Also made it where it will no longer do an mtime scan when issuing archive command since we
#        handle pre/post scaning outside of the archive command itself.
#  1.30: Updated to handle an issue where the log directory doesn't exist. It will now create it. - 7-29-2021 EB
#        It will now check to make sure it has a valid version of sfcommon.py and error if it doesn't.
#  1.20: Corrected/clarified examples. - DG 7-27-2021
#  1.19: Changed python to Starfish supplied venv, cleanup of help
#  1.18: This is a new version of zone-tag-archive.sh v1.18, migrated to python - 2020-10-20 EB
import argparse
import datetime
import getpass
import io
import json
import logging
import os
import socket
import sys
import time
from zipfile import ZipFile

from sfexamples.sfcommon import (
    VERSION as SFCOMMON_VERSION,
    SFCommon,
    SFMail,
)

VERSION = "1.96"

# Newer versions of sfcommon have a version number we can check. If that is not there, it will
# throw an exception, so handling this as a separate case than a missing dependency.
required_version = 1.04
if SFCOMMON_VERSION < required_version:
    print(
        f"zonetag-archive.py requires modules/sfcommon.py {required_version} or later.\n"
        f"Your version is {SFCOMMON_VERSION}.\n"
        "Please update the version you are using."
    )
    sys.exit(1)

logger = logging.getLogger(__name__)


class ZoneTagArchiveUtils:
    def __init__(self, logdir, namespacetag, error_emails, from_address, **kwargs):
        self.sfc = SFCommon()
        self.start_time = datetime.datetime.now()
        self.error_emails = error_emails
        self.from_address = from_address
        self.log_dir = logdir

        self.namespacetag = namespacetag
        self.file_friendly_namespacetag = namespacetag.replace(":", "-")
        self.formatted_start_time = self.start_time.strftime("%Y%m%d-%H%M%S")
        file_prefix = f"{self.formatted_start_time}-{self.file_friendly_namespacetag}"
        self.manifest_file_name = f"{file_prefix}-archive-filelist"
        self.log_location = os.path.join(self.log_dir, f"{file_prefix}.log")
        self.error_log_location = os.path.join(self.log_dir, f"{file_prefix}.err")
        self.report_errors = False
        self.successful_archive_jobs = []

        root_logger = logging.getLogger()
        root_logger.setLevel(logging.INFO)

        # make sure log dir exists and if not make it.
        os.makedirs(os.path.dirname(self.log_dir), exist_ok=True)
        fh = logging.FileHandler(self.log_location)
        efh = logging.FileHandler(self.error_log_location)
        efh.setLevel(logging.ERROR)
        formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
        fh.setFormatter(formatter)
        efh.setFormatter(formatter)
        root_logger.addHandler(fh)
        root_logger.addHandler(efh)
        self.lock_file = ""

    def __enter__(self):
        self._lock(self.namespacetag)

    def __exit__(self, _exc_type, _exc_val, _exc_tb):
        self._unlock()

    def _log_and_print(self, message, end="\n", flush=False):
        print(message, end=end, flush=flush)
        logger.info(message)

    def _err_and_print(self, message):
        self.report_errors = True
        print(message, file=sys.stderr)
        logger.error(message)

    def _lock(self, namespacetag):
        self.lock_file = os.path.join(self.log_dir, f"{self.file_friendly_namespacetag}.lock")
        old_pid = ""
        f = None
        try:
            f = open(self.lock_file, "r")
            old_pid = f.readline()
            message = (
                f"Another version of zonetag-archive is already running for {namespacetag} with pid "
                f"{old_pid}.\nIf this is not the case, please remove {self.lock_file} and rerun command."
            )
            self._fail_and_email(message)
            sys.exit(1)
        except FileNotFoundError:
            # if it is FileNotFound, this is normal, no need to message
            pass
        except IOError as err:
            message = f"Could not read lock pid at {self.lock_file}. Error: {err}"
            logger.warning(message)
        finally:
            if f is not None:
                f.close()

        pid = str(os.getpid())
        try:
            f = open(self.lock_file, "w")
            f.write(pid)
            f.close()
        except IOError as err:
            self._fail_and_email(f"Cannot write lock file: {err}")
            sys.exit(1)
        finally:
            if f is not None:
                f.close()

    def _unlock(self):
        if self.lock_file:
            try:
                os.remove(self.lock_file)
            except IOError as err:
                print(f"Error cleaning up lock file: {err}")

    def _write_manifest(self, stream):
        try:
            filename = os.path.join(self.log_dir, f"{self.manifest_file_name}.log.gz")
            f = open(filename, "wb")
            f.write(stream)
            f.close()
            logger.info(f"Manifest written to {filename}")
        except IOError as err:
            self._err_and_print(f"Failed to write manifest file to {filename}: {str(err)}")

    def _fail_and_email(self, message):
        self._err_and_print(message)
        if self.error_emails:  # only send the email if we have a To.
            self._log_and_print("Sending failure email")

            subject = f"Starfish zonetag script error report: {self.namespacetag}"
            logger.debug("Subject: %s", subject)
            body = (
                "Script Name: zonetag-archive.py\n"
                f'Run Date: {self.start_time.strftime("%a %d %b %Y %H:%M:%S %p %Z")}\n'
                f"Namespace tag: {self.namespacetag}\n\n"
                f"{message}\n\n"
                f"Further details can be found in the log at {self.error_log_location}\n"
            )

            logger.debug("Body: %s", body)

            sfmail = SFMail(To=self.error_emails, Subject=subject, From=self.from_address)
            sfmail.set_body(body)
            sfmail.send_smtp()
        else:
            msg = "Not sending failure email because addressed not supplied. Set --email-on-error for email errors."
            self._err_and_print(msg)

    def _zip_str(self, filename, payload):
        out = io.BytesIO()

        with ZipFile(out, "w") as new_archive:
            new_archive.writestr(filename, payload)

        bytes_obj = out.getvalue()
        return bytes_obj

    def error_report(self):
        if self.report_errors:
            self._fail_and_email(f"Non-fatal errors were detected. Please check logs in {self.log_dir}.")

    def validate_tag(self):
        parts = self.namespacetag.split(":")
        if len(parts) != 2:
            self._fail_and_email(f"Invalid format for 'namespace:tag': {self.namespacetag}")
            sys.exit(1)
        namespace, tag = parts
        result = self.sfc.get_tags(with_namespace=False, in_namespace=namespace)
        if tag not in result["tags"]:
            self._fail_and_email(f"Invalid 'namespace:tag': {self.namespacetag}")
            sys.exit(1)

    def get_tagged_objects(self):
        tag_explicit = f"tag-explicit={self.namespacetag}"
        return self.sfc.query("%2A", tag_explicit)

    def create_manifest_from_archive_job(self, archive_job_id):
        self._log_and_print(f"Creating Manifest for {archive_job_id}", end="", flush=True)
        result = self.sfc.create_manifest(archive_job_id, True)
        status = None
        is_done = False
        query_id = result["query_id"]
        file_list = set()
        while not is_done:
            status = self.sfc.get_async_query_status(query_id)
            is_done = status["is_done"]
            if not is_done:
                print(".", end="", flush=True)
                time.sleep(1)
        print("")
        error = status["error"]
        if error:
            message = f"Could not create manifest for {archive_job_id} with query_id {query_id}. Error: {error}"
            self._err_and_print(message)
        else:
            manifest = self.sfc.get_async_query_result(query_id)
            json_manifest = json.loads(manifest)
            for item in json_manifest:
                file_list.add(item["vol_path"])
            self.sfc.delete_async_query_result(query_id)

        return file_list

    def create_manifest_file_and_send_success_email(
        self, mail_to, no_empty_email, manifest, email_num_lines=0, create_manifest=True
    ):
        manifest_size = len(manifest)
        if no_empty_email and manifest_size == 0 and create_manifest:
            logger.info(f"Archive of {self.namespacetag} matched no files, no manifest created.")
            return

        subject = f"Starfish Migration Report: {self.namespacetag}"
        logger.debug("Subject: %s", subject)
        body = (
            "Script Name: zonetag-archive.py\n"
            f'Run Date: {self.start_time.strftime("%a %d %b %Y %H:%M:%S %p %Z")}\n'
            f"Namespace tag: {self.namespacetag}\n"
            f"Inventory line count: {manifest_size}\n"
        )

        logger.debug("Body: %s", body)

        if create_manifest:
            self._log_and_print("Creating Manifest File")
            sorted_manifest = "\n".join(sorted(manifest))
            stream = self._zip_str(f"{self.manifest_file_name}.log", body + sorted_manifest)
            self._write_manifest(stream)

        if mail_to:  # only send the email if we have a To.
            self._log_and_print("Sending success email")
            sfmail = SFMail(To=mail_to, Subject=subject, From=self.from_address)
            if create_manifest:
                mbody = (
                    f"The list of Starfish migrated files generated at {self.formatted_start_time} is attached\n{body}"
                )
                mbody = mbody + (sorted_manifest if len(manifest) <= email_num_lines else "")
                sfmail.set_body(mbody)
                sfmail.attach(f"{self.manifest_file_name}.log.zip", stream=stream)
            else:
                sfmail.set_body(body)

            sfmail.send_smtp()

    def run_archive_job(
        self,
        directories,
        separate_vols_by_dir,
        archive_target,
        compression_type,
        compression_level,
        from_scratch,
        storage_class,
        migrate,
        preserve_empty_dirs,
        archive_filters,
        create_manifest,
    ):
        """execute an archive job"""
        self.successful_archive_jobs = []
        for d in directories:
            # Archiving should only be done on directories, so if they somehow tagged a file or symlink, log an error
            # about it and continue on.
            if d["type"] != 16384:
                self._err_and_print(f'Non-directory tagged for archive, skipping {d["vol_path"]}')
                continue

            dest_path = d["full_path"]
            if separate_vols_by_dir:
                dest_path = d["volume"] + "/" + dest_path
            self._log_and_print(f"Performing archive of {d['vol_path']}", end="")

            remove_empty_dirs = migrate
            if migrate and preserve_empty_dirs:
                remove_empty_dirs = False

            archive_job = self.sfc.run_archive(
                d["vol_path"],
                archive_target,
                dest_path,
                compression_type,
                compression_level,
                from_scratch,
                False,
                storage_class,
                migrate,
                remove_empty_dirs=remove_empty_dirs,
                query=archive_filters,
            )
            if not archive_job:
                self._err_and_print(f"Failed to start archive job for: {d['vol_path']}")
            else:
                is_running = True
                current_phase = archive_job["phase"]
                while is_running:
                    status = self.sfc.get_archive_job(archive_job["id"])
                    logger.debug("Archive Job Status: %s", status)
                    is_running = status["state"]["is_running"]
                    if current_phase != status["phase"]:
                        current_phase = status["phase"]
                        print("\n" + current_phase, end="")
                    if is_running:
                        print(".", end="", flush=True)
                        time.sleep(1)
                    else:
                        print("")
                        self._log_and_print(f"Archiving of {d['vol_path']} complete.")
                        if status["status"] == "SUCCESS":
                            self.successful_archive_jobs.append(status)
                        else:
                            # print errors to logs which will also flag it for error email later
                            self._err_and_print(f"archive job for {d['vol_path']} failed: {status['reason']}")

        manifest = set()
        # only create manifest if flag is set, otherwise pass back empty manifest
        # saves many minutes of time on large jobs where a custom manifest is desired
        if create_manifest:
            for archive_job in self.successful_archive_jobs:
                manifest.update(self.create_manifest_from_archive_job(archive_job["id"]))

        return manifest

    def scan_directories(self, directories):
        """run a scan on a set of directories"""
        for d in directories:
            if d["type"] != 16384:
                continue
            self._log_and_print(f"Scanning volume {d['volume']} path {d['vol_path']}.", end="")
            result = None
            try:
                result = self.sfc.create_new_scan(d["volume"], "diff", d["full_path"], overlapping_check_disabled=True)
            except Exception as err:
                self._err_and_print(f"\nError doing post scan of volume {d['volume']} path {d['vol_path']}: {err}")

            if result:
                scanid = result["id"]
                is_done = False
                while not is_done:
                    scan_details = self.sfc.get_scan(scanid)
                    is_done = scan_details["status"] in ["done", "fail", "cancelled"]
                    if not is_done:
                        print(".", end="", flush=True)
                        time.sleep(1)
                print("")


def get_args():
    """global argument parsing"""
    description_text = (
        f"%(prog)s version: {VERSION}\n"
        "Starfish script to archive data for a single zonetag on one or more volumes.\n"
        "This script supports tagged directories only; files will be logged.\n"
    )

    examples_text = (
        "Examples:\n\n"
        "Run the script on zonetag Zone1_Action:archive with volume name myVol, archiving the data\n"
        "to the archive target called archiveTarget01:\n"
        "\t%(prog)s archiveTarget01 Zone1_action:archive\n"
        "\nSame as above example, but archive files even if they have been previously archived:\n"
        "\t%(prog)s --from-scratch archiveTarget01 Zone1_action:archive\n"
        "\nSame as above example, but scan relevant directories before and after archive, create\n"
        "separate directories at the destination named after the volumes at source:\n"
        "\t%(prog)s --migrate --scan-before-archive --scan-after-archive --separate-vols-by-dir\n"
        " --email-on-error abc@company.com archiveTarget01 Zone1_action:archive\n"
    )

    parser = argparse.ArgumentParser(
        epilog=examples_text, description=description_text, formatter_class=argparse.RawTextHelpFormatter
    )
    # Required
    parser.add_argument("archive_target", help="Name of the archive target to archive to.")
    parser.add_argument("zonetag", help="The tag used to identify the items to archive in the format namespace:tagname")

    # Optional
    parser.add_argument(
        "--from-scratch",
        required=False,
        action="store_true",
        default=False,
        help="Run the job on files that have been previously archived.",
    )
    parser.add_argument(
        "--migrate",
        required=False,
        action="store_true",
        default=False,
        help="Archive files and delete from source; remove empty directories afterwards.",
    )
    parser.add_argument(
        "--keep-empty-dirs",
        required=False,
        action="store_true",
        default=False,
        help="When --migrate is used, if directories are empty, do not remove them.",
    )

    help_text = (
        "Additional filter parameters to use for the archive.\n"
        "All filters supported by `sf archive start` command are supported, but the dashes (--)\n"
        "are removed from the query argument and = are added for values.\n"
        "For example if the query command is:\n"
        '\t--type f --size 0-1024, then use: --sf-archive-filters "type=f size=0-1024"\n'
        "\t--atime 'inf-3 years ago', should be --sf-archive-filters 'atime=\"inf-3 years ago\"'\n"
        '\t--tag ~tagset:exclude, should be --sf-archive-filters "tag=~tagset:exclude"\n'
        "\t\tNOTE: In this example, the tagset is 'tagset' and the tag used to exclude is 'exclude'\n"
        "\t--type d --aggrs.max.atime:lte 1630705951, should be --sf-archive-filters "
        '"type=d aggrs.max.atime=1630705951"\n'
        "\t\tNOTE: the number for atime is in epoch format\n"
    )

    parser.add_argument("--sf-archive-filters", required=False, help=help_text)

    help_text = (
        "If the archive job had any failures, send an email to the designated email addresses.\n"
        "Separate multiple email addresses by commas (no spaces between email addresses)."
    )
    parser.add_argument("--email-on-error", required=False, metavar="email-addresses", help=help_text)

    help_text = (
        "Scan the relevant directories before starting the archive process. Helps ensure that all files\n"
        "currently in location to archive are known to Starfish before archive starts. (default: %(default)s)"
    )
    parser.add_argument("--scan-before-archive", required=False, default=False, action="store_true", help=help_text)

    help_text = (
        "Scan the relevant directories after the archive process finishes. Ensures that Starfish\n"
        "GUI reflects removed directories/files, when --migrate option is used.  (default: %(default)s)"
    )
    parser.add_argument("--scan-after-archive", required=False, default=False, action="store_true", help=help_text)

    parser.add_argument(
        "--debug",
        required=False,
        default=False,
        type=int,
        choices=[1, 2],
        help="Turn on debug mode to log additional output to the log file. ",
    )

    help_text = (
        "Send an email to the designated email addresses. Separate multiple email addresses by\n"
        "commas (no spaces between email addresses). This also creates the report file in the\n"
        "zonetag-archive log-dir."
    )
    parser.add_argument("--email-file-list", required=False, metavar="email-addresses", help=help_text)
    parser.add_argument(
        "--storage-class",
        required=False,
        choices=[
            "STANDARD",
            "REDUCED_REDUNDANCY",
            "STANDARD_IA",
            "ONEZONE_IA",
            "INTELLIGENT_TIERING",
            "GLACIER",
            "GLACIER_IR",
            "DEEP_ARCHIVE",
            "HOT",
            "COOL",
            "ARCHIVE",
        ],
        help="Specify the storage class in the case of a cloud-based archive target",
        default="",
    )
    parser.add_argument(
        "--compression-type",
        required=False,
        choices=["xz", "gzip"],
        default="",
        help="Compress file contents. Valid for object storage targets only. Options are %(choices)s.",
    )

    help_text = (
        "Compression level from 1(fastest and least compression) to 9 (slowest and best compression);\n"
        "ignored if --compression-type is not specified. (default: %(default)s)"
    )

    parser.add_argument(
        "--compression-level", required=False, choices=[1, 2, 3, 4, 5, 6, 7, 8, 9], default=6, type=int, help=help_text
    )
    parser.add_argument(
        "--no-manifest",
        action="store_false",
        default=True,
        dest="create_manifest",
        required=False,
        help="skip manifest generation phase",
    )
    parser.add_argument(
        "--separate-vols-by-dir",
        required=False,
        action="store_true",
        help="On the target, create individual top level directories for each volume.",
    )

    help_text = (
        "If list of migrated files is less than <n>, include the migrated file list in the body of the email\n"
        "in addition to attachment. (default: %(default)s)"
    )
    parser.add_argument("--email-n-lines", required=False, type=int, default=0, metavar="n", help=help_text)
    parser.add_argument(
        "--send-empty-email",
        required=False,
        action="store_true",
        default=False,
        help="Send file list email even if no files were archived.",
    )

    help_text = (
        "The email address to use when sending emails so that they show in From:. (default is\n"
        "<currentuser>@<host>, current user is the user the script is running as. If you are\n"
        "using cron, it would be the system account used by cron (e.g. root), by default)"
    )
    user = getpass.getuser()
    parser.add_argument("--from-address", required=False, default=f"{user}@{socket.gethostname()}", help=help_text)

    parser.add_argument(
        "--log-dir",
        required=False,
        default="/opt/starfish/log/zonetag-archive/",
        help="Directory to put logs, manifest files, and lock files. (default=%(default)s)",
    )
    return parser.parse_args()


def main():
    cmd_line = " ".join(sys.argv)

    args = get_args()
    zta = ZoneTagArchiveUtils(args.log_dir, args.zonetag, args.email_on_error, args.from_address)
    with zta:
        logger.info(f"Script invoked: {cmd_line}")

        if args.debug:
            logger.setLevel(logging.DEBUG)
            sfc_logging = logging.getLogger("sfcommon")
            sfc_logging.setLevel(logging.DEBUG)

            # set any errors from sfcommon to lot to stderr as well
            error_console = logging.StreamHandler(sys.stderr)
            error_console.setLevel(logging.ERROR)
            sfc_logging.addHandler(error_console)

        if args.debug == 2:
            requests_log = logging.getLogger("requests.packages.urllib3")
            requests_log.setLevel(logging.DEBUG)
            requests_log.propagate = True

        zta.validate_tag()
        dirs = zta.get_tagged_objects()
        if args.scan_before_archive:
            zta.scan_directories(dirs)
        manifest = zta.run_archive_job(
            dirs,
            args.separate_vols_by_dir,
            args.archive_target,
            args.compression_type,
            args.compression_level,
            args.from_scratch,
            args.storage_class,
            args.migrate,
            args.keep_empty_dirs,
            args.sf_archive_filters,
            args.create_manifest,
        )

        zta.create_manifest_file_and_send_success_email(
            args.email_file_list, not args.send_empty_email, manifest, args.email_n_lines, args.create_manifest
        )
        if args.scan_after_archive:
            zta.scan_directories(dirs)
        zta.error_report()


if __name__ == "__main__":
    main()
