#!/opt/starfish/examples/venv/bin/python3
"""
***********************************************************************************************************

 Starfish Storage Corporation ("Starfish") CONFIDENTIAL
 Unpublished Copyright (c) 2011 - present Starfish Storage Corporation, All Rights Reserved.

 NOTICE: This file and its contents (1) constitute Starfish's "External Code" under Starfish's most-recent
 Limited Software End-User License Agreement, and (2) is and remains the property of Starfish. The
 intellectual and technical concepts contained herein are proprietary to Starfish and may be covered by
 U.S. and/or foreign patents or patents in process, and are protected by trade secret or copyright law.
 Dissemination of this information or reproduction of this material is strictly forbidden unless prior
 written permission is obtained from Starfish. Access to the source code contained herein is hereby
 forbidden to anyone except (A) current Starfish employees, managers, or contractors who have executed
 confidentiality or nondisclosure agreements explicitly covering such access, and (B) licensees of
 Starfish's software.

 ANY REPRODUCTION, COPYING, MODIFICATION, DISTRIBUTION, PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR
 THROUGH USE OF THIS SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF STARFISH IS STRICTLY PROHIBITED
 AND IS IN VIOLATION OF APPLICABLE LAWS AND INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS
 FILE OR ITS CONTENTS AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS TO REPRODUCE,
 DISCLOSE, OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE, USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN
 WHOLE OR IN PART.

 FOR U.S. GOVERNMENT CUSTOMERS REGARDING THIS DOCUMENTATION/SOFTWARE
   These notices shall be marked on any reproduction of this data, in whole or in part.
   NOTICE: Notwithstanding any other lease or license that may pertain to, or accompany the delivery of,
   this computer software, the rights of the Government regarding its use, reproduction and disclosure are
   as set forth in Section 52.227-19 of the FARS Computer Software-Restricted Rights clause.
   RESTRICTED RIGHTS NOTICE: Use, duplication, or disclosure by the Government is subject to the
   restrictions as set forth in subparagraph (c)(1)(ii) of the Rights in Technical Data and Computer
   Software clause at DFARS 52.227-7013.

***********************************************************************************************************
"""

###############################################################################
#  Author Doug Hughes
#  Last modified 2023-06-01
#
# Run simple sql queries while removing the need to find the auth key or
# format the query
# This query outputs the query results, whatever they are, in a CSV output
# format. It does not currently take care of quoting.
#
# 2018-08-27 - add html table support
# 2022-08-18: Refactor and fix pylint warning: 'redefined-outer-name'
# 2023-06-01: connect to different default url for broader compatibility
#
# WARNING: This script runs queries directly against the Starfish database
# can could be used to cause tremendous harm and render Starfish inoperable if
# used incorrectly. It is recommended to only run select and not update queries.

import datetime
import os
import re
import sys
from pprint import pprint

import requests
import urllib3

try:
    # not needed at deployment
    import unittest
except Exception:
    pass

from sfexamples.sfcommon import SFMail

try:
    import argparse
    import configparser
    from argparse import RawTextHelpFormatter

    import matplotlib.pyplot as plt
    import psycopg2
except ImportError as e:
    print(
        "In order to use this, you must have configparser, argparse, matplotlib"
        "and psycopg2 modules installed (available via pip among "
        "other means.)"
    )
    print(str(e))
    sys.exit(1)

# redash has a made up cert
urllib3.disable_warnings()


# Exceptions


def getpgauth():
    """pull auth info from config file to use implicitly"""
    try:
        config = configparser.ConfigParser()
        config.read("/opt/starfish/etc/99-local.ini")
        return config.get("pg", "pg_uri")
    except OSError:
        print("can't read config file to get connection uri. check permissions.")
        sys.exit(1)


class NoQueryByName(Exception):
    """custom exception"""

    def __init__(self, message="No query matches this query name"):  # pylint: disable=W0235
        # Call the base class constructor with the parameters it needs
        super().__init__(message)


class BadArgs(Exception):
    """custom exception"""

    def __init__(self, message="Bad argument combination"):  # pylint: disable=W0235
        # Call the base class constructor with the parameters it needs
        super().__init__(message)


class TestReport(unittest.TestCase):
    """extension for unittest framework."""

    redash_api_key = ""
    redash_url = ""

    def setUp(self):
        pass

    def test_auth(self):
        """test connection"""
        conn_ = psycopg2.connect(getpgauth())
        self.assertNotEqual(conn_, None)

    def test_query(self):
        """test sql return"""
        conn_ = psycopg2.connect(getpgauth())
        query_ = """select count(*) from sf_volumes.volume"""
        cur_ = conn_.cursor()
        cur_.execute(query_)
        self.assertNotEqual(cur_, None)
        rows_ = cur_.fetchall()
        self.assertEqual(len(rows_), 1)

    def test_object(self):
        """initialize a redash object to local test server"""
        urllib3.disable_warnings()
        r = Redash(url=TestReport.redash_url, api_key=TestReport.redash_api_key)
        ql = r.get_query_list()
        self.assertIn("Volume Name List", ql)
        qstr = r.get_query("Volume Name List")
        # test query result
        self.maxDiff = None
        self.assertEqual(
            qstr,
            "select name from sf_volumes.volume",
        )
        # test assertion for bad query
        with self.assertRaises(NoQueryByName):
            r.get_query("Bad Robot Query")

    def test_args(self):
        """test argument processing"""

        args_ = argparse.Namespace

        args_.redash_url = "https://localhost/foo"
        args_.list_redash_queries = True
        args_.redash_api_key = False
        args_.redash_query = "Volume Change Report"
        # bad argument
        args_.query = "Select 1"
        with self.assertRaises(BadArgs):
            process_args(args_)

        # good argument, but insufficient
        args_.list_redash_queries = False
        args_.redash_api_key = TestReport.redash_api_key
        args_.output = False
        # no output chosen
        with self.assertRaises(BadArgs):
            process_args(args_)

    def test_query_params(self):
        """test that query params are identified correctly"""

        args_ = argparse.Namespace

        args_.redash_query = "Volume Change Report"
        args_.redash_url = TestReport.redash_url
        args_.list_redash_queries = False
        args_.redash_api_key = TestReport.redash_api_key
        args_.output = csv
        args_.debug = 0
        r = Redash(url=args_.redash_url, api_key=args_.redash_api_key)
        self.assertIsNotNone(r)
        beforeq = r.get_query("Volume Change Report")
        self.assertIsNotNone(beforeq)
        # make sure it says missing parameters properly
        self.assertCountEqual(r.get_query_params(), set(["volume", "group_by_days", "number_of_days_to_look_back"]))
        with self.assertRaises(SystemExit):
            r.fill_params("volume=home,group_by_days=2")

        afterq = r.fill_params("volume=home,group_by_days=2,number_of_days_to_look_back=1")
        # Good enough to ensure the queries are not the same (parameters replaced)
        self.assertNotEqual(beforeq, afterq)

    def test_normalize_date(self):
        """test date noramlization for x axis labels"""
        self.assertEqual(reduce_date("2020-08-08 21:04:20.062389-04:00"), "2020-08-08 21:04:20")
        self.assertEqual(reduce_date("2020-08-08 21:04:20"), "2020-08-08 21:04:20")
        self.assertEqual(reduce_date("2020-08-08"), "2020-08-08")


class Redash:
    """a class to handle redash related queries"""

    def __init__(self, api_key, url="http://localhost:5003/"):
        """initialize"""
        self._queries_ = Redash.get_queries(url, api_key)
        # a selected query
        self.query = ""

    @staticmethod
    def get_queries(url, api_key):
        """fetch all redash queries"""
        qdict = {}
        headers = {"Authorization": "Key {}".format(api_key)}
        path = "{}/api/queries".format(url)
        has_more = True
        page = 1
        while has_more:
            response = requests.get(path, headers=headers, verify=False, timeout=60, params={"page": page}).json()
            for jstruct in response["results"]:
                minid = {x: jstruct[x] for x in ["id", "query"]}
                qdict[jstruct["name"]] = minid

            has_more = page * response["page_size"] + 1 <= response["count"]
            page += 1

        return qdict

    def get_query_params(self):
        """pull out any parameter values and return as a list
        :return: param list or None"""

        qp_set = set(re.findall(r"{{{?([A-Za-z_ ]+)}}}?", self.query))
        if "args" in globals() and args.debug > 1:
            print(f"redash query params: {qp_set}")
        return qp_set

    def get_query(self, queryname):
        """get body of query given a name
        :returns: query_body (only one)"""
        if queryname in self._queries_:
            self.query = self._queries_[queryname]["query"]
        else:
            raise NoQueryByName

        return self.query

    def get_query_list(self):
        """get list of all of the redash queries"""
        return sorted(self._queries_.keys())

    @staticmethod
    def filter_query(filterkv, column_names, result_rows):
        """filter query results by column name and value pair"""

        namefilt, namevalue = filterkv.split("=")
        colindex = column_names.index(namefilt)
        result_rows = list(filter(lambda row: row[colindex] == namevalue, result_rows))
        return result_rows

    def fill_params(self, arg_params):
        """replace the parameterized variables in the redash query with the substitutions"""
        redash_params = self.get_query_params()
        arg_param_dict = {}
        for p in arg_params.split(","):
            k, v = p.split("=")
            arg_param_dict[k] = v

        if "args" in globals() and args.debug > 1:
            print("arg_params: ")
            pprint(arg_param_dict)

        ap = set(arg_param_dict.keys())
        seta_diff = redash_params - ap
        if seta_diff:
            print(
                f"Error: query parameter(s) {seta_diff} are missing. Add them using --params key1=v1,key2=v2,...",
                file=sys.stderr,
            )
            sys.exit(1)

        for k, v in arg_param_dict.items():
            pattern = re.compile("{{{?" + k + "}}}?")
            self.query = pattern.sub(v, self.query)
        if "args" in globals() and args.debug > 1:
            print("replaced query:")
            print(self.query)
        return self.query


# --------------------------- global functions -----------------------------

# global callbacks to generate output


def csv(outfile, result_rows, column_names):
    """generate csv from query data"""

    try:
        with open(outfile, "w") as f_:
            if args.no_header is False:
                print(args.delimiter.join(column_names), file=f_)
            for row in result_rows:
                print(args.delimiter.join(str(x) for x in row), file=f_)
        print(f"created csv outfile {outfile}")
    except OSError:
        print(f"couldn't create output file {outfile}")
        sys.exit(1)


def html(outfile, result_rows, column_names):
    """generate html table from query data"""

    html_body = f"""<html>
               <head>
               <title>{args.title}</title>
               <style>
               table {{
                border-collapse: collapse;
                width: 100%;
               }}

               th, td {{
                  text-align: left;
                  padding: 8px;
               }}

               tr:nth-child(even) {{background-color: #ffdc91;}}
               </style>
               </head>
               <body>
               <p style="font-family: 'Verdana', sans-serif; font-size: 16px; font-weight: bold">{args.title}</p>
               <table style="border: 1px solid black;">
               <tr><th style="border: 1px solid black;">
            """
    delimiter_ = '</th><th style="border: 1px solid black;">'
    html_body += delimiter_.join(str(el) for el in column_names)
    html_body += "</th></tr>\n"

    delimiter_ = '</td>\n<td style="border: 1px solid black;">'
    for row in result_rows:
        html_body += '<tr><td style="border: 1px solid black;">\n'
        html_body += delimiter_.join(str(el) for el in row)
        html_body += "</td></tr>\n"

    html_body += "</table> </body> </html>"

    try:
        with open(outfile, "w") as f_:
            print(html_body, file=f_)
        print(f"created html outfile {outfile}")
    except OSError:
        print(f"couldn't create output file {outfile}")
        sys.exit(1)


def reduce_date(datestr):
    """take a date string and reduce it to eliminate microseconds if provided
    :returns:  a shortened date"""
    return datestr.split(".")[0]


def png(outfile, result_rows, column_names):
    """generate a graph file from query data"""

    ncols = len(column_names)
    axesdata = {}
    for i in range(0, ncols):
        axesdata[column_names[i]] = []

    if args.debug:
        print("columns: ", end="")
        print(delimiter.join(str(el) for el in column_names))
    for row in result_rows:
        for i in range(0, ncols):
            axesdata[column_names[i]].append(row[i])

    fig, ax1 = plt.subplots()

    color = "tab:red"

    if args.title == "graph":
        plt.title(f"{args.yaxis} vs {args.xaxis}")
    else:
        plt.title(args.title)

    ax1.set_ylabel(args.yaxis, color=color)
    try:
        ax1.plot(axesdata[args.xaxis], axesdata[args.yaxis], color=color)
    except KeyError:
        print("either the xaxis or y axis name is bad for graphing")
        print("available columns: ")
        print(list(axesdata.keys()))
        if args.debug:
            print(f"query: {query}")
        sys.exit(1)

    x = axesdata[args.xaxis]
    xlabels = [reduce_date(str(x)) for x in axesdata[args.xaxis]]
    ax1.set_xlabel(args.xaxis)
    ax1.set_xticks(x[::5])
    ax1.set_xticklabels(xlabels[::5], rotation=60)

    if args.zero:
        ax1.set_ylim(ymin=0)

    if args.yaxis2:
        color = "tab:blue"
        ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
        ax2.set_ylabel(args.yaxis2, color=color)  # we already handled the x-label with ax1
        ax2.plot(axesdata[args.xaxis], axesdata[args.yaxis2], color=color)
        ax2.tick_params(axis="y", labelcolor=color)
        if args.zero:
            ax2.set_ylim(ymin=0)

    plt.xticks(x[::5], xlabels[::5], rotation=60)
    fig.tight_layout()  # otherwise the right y-label is slightly clipped

    try:
        plt.savefig(outfile, bbox_inches="tight")
        print(f"created output file {outfile}")
    except OSError:
        print(f"couldn't create output file {outfile}")
        sys.exit(1)


def email(mailto, mailfrom, attach_list, mailhost="localhost", attach_type="png", subject=None):  # pylint: disable=R0913
    """send attachment to emails in mailstr which is a comma separate list of email addresses without spaces
    :returns: Nothing"""

    datestr = str(datetime.datetime.now().strftime("%Y-%m-%d %T"))
    if subject is None:
        subject = f"mailed report attached from {datestr}"
    if args.debug > 0:
        print(f"sending email to: {mailto}")
    sm = SFMail(mailhost=mailhost)
    sm.set_headers(From=f"{mailfrom}", Subject=subject)
    sm.set_headers(To=f"{mailto}")

    header = f"Report created at: {datestr}"
    header += "<br>by sfpng<p/>"

    header += '<img width="97" height="21" '
    header += 'src="http://starfishstorage.com/wp-content/uploads/2018/07/StarFishLogo.png">'
    header += " &nbsp; &nbsp; &nbsp; "
    header += '<a href="http://www.starfishstorage.com">http://www.starfishstorage.com</a> <p/>'
    sm.set_body(header, msgtype="html")

    for fname in attach_list:
        if attach_type == "html":
            with open(fname, "r") as f_:
                data = f_.read()
                sm.set_body(data, msgtype="html")
        elif attach_type == "csv":
            sm.attach(fname, mimetype="text/csv")
        else:
            sm.attach(fname)

    if not sm.send_smtp():
        print(f"mail send failed to {mailto}", file=sys.stderr)


def cmd_args():
    """do argument parsing
    :returns: arg structure"""

    epilog = """
        --params  are substituted into query {{params}} in redash queries. This argument is only
        valid when --redash-query is used. If there are no parameters in the query, this does
        nothing. If there are arguments supplied here that are not in the query, those arguments do
        nothing. If there are missing arguments from the query, an error will be generated indicating
        the missing parameters that must be supplied.

        Examples:

        # show all currently stored redash queries

        sfpng.py --redash-api-key ${redash_api_key} --list-redash-queries | head

        # email an html table from a parameterized query, substituting parameters

        sfpng.py --redash-api-key ${redash_api_key} --redash-query "Volume Churn Report Single Volume" \\
        --params group_by_days=3,volume=home,number_of_days_to_look_back=365 --output html  \\
        --mail-to sven@sprockets.com,omar@sprockets.com --mail-from=support@starfishstorage.com \\
        --title "query report Volume Churn"

        # run a regular query and graph it as png; select xaxis and yaxis from column results
        # count on left axis(1) and size on right axis(2)
        # save it to local file. Start all y value graphs at 0. (--zero)

        sfpng.py --redash-api-key ${redash_api_key} --query "SELECT group_name, TO_CHAR(run_time, 'YYYY-mm-dd') \\
        as run_time, count() AS count, ROUND(sum(size) / (10001000*1000),2) as size_GB FROM \\
        sf_reports.last_time_generic_history WHERE run_time > now() - INTERVAL '90 days' \\
        AND group_name = 'doug' GROUP BY group_name, run_time ORDER BY run_time;" \\
        --xaxis=run_time --yaxis=count --title "Group count" --yaxis2="size_gb" --zero --output png

        # substitute parameters and filter the results by the VOLUME NAME column where the volume name
        # value is home. Save it to a specific output file rather than the default

        sfpng.py --redash-url https://192.168.10.139/redash/ --redash-api-key ${redash_api_key} \\
        --redash-query "Volume Churn Report" --xaxis date --yaxis "added files"  \\
        --yaxis2 "added files size (GiB)" --output png --title "query report" \\
        --params number_of_days_to_look_back=90,group_by_days=3 --filter-results "VOLUME NAME::filter=home"
        --output-file /tmp/home-churn.png
        """

    # Parse Arguments
    parser = argparse.ArgumentParser(epilog=epilog, formatter_class=RawTextHelpFormatter)
    parser.add_argument(
        "--no-header", "-H", required=False, action="store_true", default=False, help="suppress header in csv"
    )
    parser.add_argument("--debug", required=False, type=int, default=0, help="add some debugging to output")
    parser.add_argument("--delimiter", required=False, default=",", help="choose a different delimiter for csv")
    parser.add_argument(
        "--query",
        required=False,
        help=("an SQL query or a file containing an SQL query " "(mutually exclusive with --redash-query)"),
    )
    parser.add_argument(
        "--redash-query",
        required=False,
        help=("a redash query name to pull that query for execution " "(mutually exclusive with --query)"),
    )
    parser.add_argument(
        "--list-redash-queries", action="store_true", required=False, help="show all Redash queries by name"
    )
    parser.add_argument("--xaxis", required=False, help="column from query to use as axis")
    parser.add_argument("--yaxis", required=False, help="column from query to use as yxis")
    parser.add_argument("--yaxis2", required=False, help="optional second yxis")
    parser.add_argument("--zero", required=False, action="store_true", help="set yaxis start point to 0")
    parser.add_argument("--title", required=False, default="graph", help="graph title")
    parser.add_argument(
        "--skip-empty", required=False, action="store_true", help="don't send email if the query result is empty"
    )
    parser.add_argument("--test", action="store_true", required=False, help=argparse.SUPPRESS)
    parser.add_argument("--output", required=False, choices=["html", "png", "csv"], help="choose output mode")
    parser.add_argument("--output-file", required=False, default="/tmp/output", help="choose output file location")
    parser.add_argument("--redash-api-key", required=False, help="to pull query from redash need the api key")
    parser.add_argument(
        "--redash-url",
        required=False,
        default="https://127.0.0.1/redash",
        help="url to redash api (default: %(default)s)",
    )
    parser.add_argument(
        "--params",
        required=False,
        default="=",
        help="for parameterized redash queries, do name-based substitution.\n"
        "Takes a comma separated list of name=value items",
    )
    parser.add_argument(
        "--filter-results",
        required=False,
        help="pick a column and a value like --filter-results volume=home to filter results",
    )
    parser.add_argument("--mail-to", required=False, help="a comma separated list of email addresses (no spaces!)")
    parser.add_argument(
        "--mail-from", required=False, default="starfishreports@localhost", help="a source address to use for emails"
    )
    parser.add_argument(
        "--mail-subject",
        required=False,
        default=None,
        help="use a fixed and carefully chosen Subject for the email instead of the default report",
    )
    parser.parse_args()

    args_ = parser.parse_args()

    # early check for unit testing
    if args_.test:
        TestReport.redash_api_key = args_.redash_api_key
        TestReport.redash_url = args_.redash_url
        unittest.main(argv=["first-arg-is-ignored"], exit=True)
        sys.exit(0)

    return args_


def process_args(arguments):
    """check arguments for sanity"""

    # Check the various combinations of valid options
    if (arguments.list_redash_queries or arguments.redash_query) and not arguments.redash_api_key:
        print("To connect to redash, an api key must be supplied", file=sys.stderr)
        print("run sfpng --help.", file=sys.stderr)
        raise BadArgs

    if arguments.list_redash_queries:
        try:
            redash_ = Redash(url=arguments.redash_url, api_key=arguments.redash_api_key)
        except Exception:
            print("failed to fetch list; check api key and redash-url", file=sys.stderr)
            sys.exit(1)
        print("\n".join(redash_.get_query_list()))
        raise BadArgs

    if not arguments.output:
        print("output selection is required. Run sfpng --help.", file=sys.stderr)
        raise BadArgs


# ------------------------ main --------------------


if __name__ == "__main__":
    try:
        args = cmd_args()
        process_args(args)
    except BadArgs:
        sys.exit(1)

    if args.redash_query:
        try:
            redash = Redash(url=args.redash_url, api_key=args.redash_api_key)
        except Exception:
            print("failed to fetch query; check mpi key and redash-url")
            sys.exit(1)
        try:
            query = redash.get_query(args.redash_query)

        except NoQueryByName as e:
            print(f"{str(e)}: {args.redash_query}")
            sys.exit(1)

    # query gets set from redash or passed in as argument, any other condition is failure
    if args.query:
        query = args.query

    if "query" not in locals():
        print("must supply either a query to run or a file name that holds a query or a Redash query name")
        sys.exit(1)

    if args.params != "=":
        if not args.redash_query:
            print("Warning: --params are ignored without a redash query that has parameters", file=sys.stderr)

    # no redash object and no fill if plain query is used
    if "redash" in locals():
        query = redash.fill_params(args.params)

    try:
        conn = psycopg2.connect(getpgauth())
        cur = conn.cursor()
    except psycopg2.DatabaseError as e:
        print("unable to connect to the redash database: {}".format(str(e)))
        sys.exit(1)

    if not re.search(r"select|with", query, re.IGNORECASE):
        if os.path.exists(query):
            with open(query, "r") as f:
                query = f.read()
        else:
            print("invalid query; must be select statement or name of file with select statement")
            sys.exit(-1)

    delimiter = " "

    if args.debug:
        print("executing query " + query)

    cur.execute(query)

    # get column names
    colnames = [desc[0] for desc in cur.description]

    # get all the rows
    rows = cur.fetchall()

    # in a big table that delivers lots of volumes you can filter using something like
    # --filter-results volume=homevol
    if args.filter_results:
        rows = Redash.filter_query(args.filter_results, colnames, rows)

    if args.skip_empty and len(rows) == 0:
        if args.debug:
            print("dbg: empty result set. exiting")
        sys.exit(0)

    globals()[args.output](f"{args.output_file}.{args.output}", rows, colnames)

    if args.mail_to:
        # why a list? thinking for the future; leaves in an assumption that all
        # items are the same type, though
        email(
            mailto=args.mail_to,
            mailfrom=args.mail_from,
            attach_list=[f"{args.output_file}.{args.output}"],
            attach_type=args.output,
            subject=args.mail_subject,
        )

    sys.exit(0)
