#!/bin/bash
#***********************************************************************************************************
#
# Starfish Storage Corporation ("Starfish") CONFIDENTIAL
# Unpublished Copyright (c) 2011 - present Starfish Storage Corporation, All Rights Reserved.
#
# NOTICE: This file and its contents (1) constitute Starfish's "External Code" under Starfish's most-recent
# Limited Software End-User License Agreement, and (2) is and remains the property of Starfish. The
# intellectual and technical concepts contained herein are proprietary to Starfish and may be covered by
# U.S. and/or foreign patents or patents in process, and are protected by trade secret or copyright law.
# Dissemination of this information or reproduction of this material is strictly forbidden unless prior
# written permission is obtained from Starfish. Access to the source code contained herein is hereby
# forbidden to anyone except (A) current Starfish employees, managers, or contractors who have executed
# confidentiality or nondisclosure agreements explicitly covering such access, and (B) licensees of
# Starfish's software.
#
# ANY REPRODUCTION, COPYING, MODIFICATION, DISTRIBUTION, PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR
# THROUGH USE OF THIS SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF STARFISH IS STRICTLY PROHIBITED
# AND IS IN VIOLATION OF APPLICABLE LAWS AND INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS
# FILE OR ITS CONTENTS AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS TO REPRODUCE,
# DISCLOSE, OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE, USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN
# WHOLE OR IN PART.
#
# FOR U.S. GOVERNMENT CUSTOMERS REGARDING THIS DOCUMENTATION/SOFTWARE
#   These notices shall be marked on any reproduction of this data, in whole or in part.
#   NOTICE: Notwithstanding any other lease or license that may pertain to, or accompany the delivery of,
#   this computer software, the rights of the Government regarding its use, reproduction and disclosure are
#   as set forth in Section 52.227-19 of the FARS Computer Software-Restricted Rights clause.
#   RESTRICTED RIGHTS NOTICE: Use, duplication, or disclosure by the Government is subject to the
#   restrictions as set forth in subparagraph (c)(1)(ii) of the Rights in Technical Data and Computer
#   Software clause at DFARS 52.227-7013.
#
#***********************************************************************************************************
# Last update 2020-10-30
# Given a username, find all directories in any or all volumes that have at least
# one file owned by that user. Further, collapse common directory trees into the
# top most prefix and sum the counts and sizes
# Example if a user has files with counts and sizes follows
# home/doug 2 1G
# home/doug/git 2 1G
# home/doug/git/charlemagne 1 1G
# proj/scales/enterprise 2 1G
# proj/scales/enterprise/yammer 3 1G
#
# the results would be:
# home/doug 5 3.00 G
# proj/scales/enterprise 5 2.00 G

# default empty = all volumes
volume=""

usage() {
cat<<_EOF
usage:  $0 -u <user> [ -v (volpath|"volpath1 volpath2 ..") ]
-u <user>                         - a single username to search is required
-v (volpath|"volpath1 volpath2")  - a list of 0 or more volpaths, quoted if >1
                                    if none are given, search over all volumes
-h                                - this help message

Examples:
Summarize doug over all volumes
user_files_by_dir.sh -u doug

Summarize for bill over home:users
user_files_by_dir.sh -u bill -v home:userd

Summarize for john over 2 volumes
user_files_by_dir.sh -u john -v "home: proj:"

_EOF
}

set -euo pipefail

while getopts 'hv:u:' OPTION
do
    case $OPTION in
    h)
        usage
        exit 0
        ;;
    v)
        volume="$OPTARG"
        ;;
    u)
        user="$OPTARG"
        ;;
    *)
        usage
        exit 1
        ;;
    esac
done
shift $((OPTIND -1))

if [ "XX$user" = "XX" ]; then
    echo "Error: -u <username> is a required argument"
    usage
    exit 1
fi

# $volume is purposefully not quoted
# shellcheck disable=SC2086
sf query -H --username "$user" --type f --group-by volume,parent_path --sort-by volume,parent_path $volume -d '|' |
awk -F'|' '
v != $1 || $2 !~ p {
    if (v != "") {
        printf("%s\t%s\t%d\t%.2f\n", v, substr(p, 2, length(p)), count, size/(1000*1000*1000));
    }
    count=0;
    size=0;
    v=$1;
    p="^" $2
};
v == $1 && $2 ~ p {
    count += $3;
    size += $4;
}
END {
    printf("%s\t%s\t%d\t%.2f G\n", v, substr(p, 2, length(p)), count, size/(1000*1000*1000))
}'

exit 0
