#!/opt/starfish/examples/venv/bin/python3
# ***********************************************************************************************************
#
# Starfish Storage Corporation ("Starfish") CONFIDENTIAL
# Unpublished Copyright (c) 2011 - present Starfish Storage Corporation, All Rights Reserved.
#
# NOTICE: This file and its contents (1) constitute Starfish's "External Code" under Starfish's most-recent
# Limited Software End-User License Agreement, and (2) is and remains the property of Starfish. The
# intellectual and technical concepts contained herein are proprietary to Starfish and may be covered by
# U.S. and/or foreign patents or patents in process, and are protected by trade secret or copyright law.
# Dissemination of this information or reproduction of this material is strictly forbidden unless prior
# written permission is obtained from Starfish. Access to the source code contained herein is hereby
# forbidden to anyone except (A) current Starfish employees, managers, or contractors who have executed
# confidentiality or nondisclosure agreements explicitly covering such access, and (B) licensees of
# Starfish's software.
#
# ANY REPRODUCTION, COPYING, MODIFICATION, DISTRIBUTION, PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR
# THROUGH USE OF THIS SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF STARFISH IS STRICTLY PROHIBITED
# AND IS IN VIOLATION OF APPLICABLE LAWS AND INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS
# FILE OR ITS CONTENTS AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS TO REPRODUCE,
# DISCLOSE, OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE, USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN
# WHOLE OR IN PART.
#
# FOR U.S. GOVERNMENT CUSTOMERS REGARDING THIS DOCUMENTATION/SOFTWARE
#   These notices shall be marked on any reproduction of this data, in whole or in part.
#   NOTICE: Notwithstanding any other lease or license that may pertain to, or accompany the delivery of,
#   this computer software, the rights of the Government regarding its use, reproduction and disclosure are
#   as set forth in Section 52.227-19 of the FARS Computer Software-Restricted Rights clause.
#   RESTRICTED RIGHTS NOTICE: Use, duplication, or disclosure by the Government is subject to the
#   restrictions as set forth in subparagraph (c)(1)(ii) of the Rights in Technical Data and Computer
#   Software clause at DFARS 52.227-7013.
#
# ***********************************************************************************************************

""" Example of a Starfish job that uses standard in (stdin) to pass path names to operate on and returns a result in
 JSON format to store back into the Starfish database """
#
# Example job start command
# sf job start --paths-via-stdin --cmd-output-format json --ext txt \
# /opt/starfish/scripts/file_parse_example.py volume:[/path/on/volume]
# --paths-via-stdin   - tells Starfish to pass paths to the program via standard in,
#                       improves performance by only starting the script once per block of work
# --cmd-output-format - tells Starfish to expect the result as JSON
# --ext txt           - example query filter to only run this job on *.txt files
#
# /opt/starfish/scripts/file_parse_example.py  - path to the script for the job on the agent
# volume:[/path/on/volume] - Starfish volume for the job to operate on with optional path
import json
import os
import re
import sys


def main():
    # paths are passed to standard in with a null terminator seporating files
    # for loop will split on the null terminator and run the code one each file
    # each path will be a full path to the file or directory based on where the volume is mounted on this agent
    for file in sys.stdin.read().split("\0"):
        # Using find to simulate a Starfish job can make testing quick,
        # but find ends with a blank line so filter that output
        # example:  `find <dir> -print0 | ./file_parse_example.py`
        if file == "":
            continue

        # sanity check that the path exists and is a file, Starfish jobs could include files,
        # links, directories based on the
        # parameters of the job
        if not os.path.isfile(file):
            # any information from the script should be printed to stderr to not mix with the
            # job results, Starfish will log
            # all of stderr to a file as part of the job logs on the agent
            print("File not found", file=sys.stderr)
            # Starfish will consider a file with no output a failure and report it, it can be retried later
            continue

        # For this example we'll count the number of lines in the file and do a crude check for html tags
        output_dict = {}
        output_dict["line_count"] = 0
        output_dict["processed"] = True
        output_dict["has_html"] = False

        with open(file, "rb") as f:
            for line in f:
                output_dict["line_count"] += 1

                if re.match(r"<.+?>", line.decode("utf-8", "ignore")):
                    output_dict["has_html"] = True

        # at the end, print the file's full path and JSON result back out to stdout for Starfish to process
        # the only data that should be printed to stdout should be the results
        # everything should be null terminated
        # Any thing from input file list that is NOT printed is an automatic error and retry
        # for starfish; this is the state keeping mechanism at work. If the file cannot be
        # parsed, don't print anything to output.
        print(file + "\0" + json.dumps(output_dict), end="\0")


if __name__ == "__main__":
    main()
