#!/bin/bash
#
# script to subset a gridded file to a spatial subset
# also, automatically merges time slices into a single file
#
# by Piotr
# Mar 2025
#
#
#
################################################################

#checking the number of arguments provided
if [ ${#} -lt 4 ]; then
    echo
    echo script to subset gridded files to a spatial subset
    echo when files are time slices - they are first subset and the subsets merged into one file
    echo it is intended to be run as a slurm batch job
    echo it requires jobsfile that is created using makejobs_subset.py
    echo
    echo "usage: subset.sh <jobsfile> <cdo_command>"
    echo 
    echo this script requires one argument. Got ${#}. Exiting...
    echo
    exit

fi


overwrite=false

var=$1 #variable to be processed
indir=$2 #directory from which files will be read
outdir=$3 #directory to which subset files will be written
cdocmd=$4 
#cdocmd will come as comma separated list

#domain=${domain//,/ }

#might implement this at some stage, for the time being - all files are merged, and no time slice is cut
#firstyear=${params[3]}
#lastyear=${params[4]}

#listing files in input directory
files=(`ls $indir/${var}_*`)

echo found ${#files[@]} files


# if there is more than one file, output files will be written to a temp directory and then merged into one file in the output directory
if [ ${#files[@]} -gt 1 ]; then
    echo processing to $outdir/temp
    tempdir=$outdir/temp

    #name of outputfile
    filename=`basename ${files[0]}`
    filename=(${filename//_/ })
    echo ${filename[@]}
    n=${#filename[@]}
    ((i=n-1))

    #this removes last element of the file name, i.e. date range, and substitutes is with "merged"
    newfilename=("${filename[@]:0:$i}" 'merged.nc')
    echo $newfilename
    newfilename=$(printf "_%s" "${newfilename[@]}")
    newfilename=${newfilename:1}
    outfile=$outdir/$newfilename


elif [ ${#files[@]} -eq 1 ]; then
    echo processing to $outdir
    tempdir=$outdir
    filename=`basename ${files[0]}`
    outfile=$tempdir/$filename
else
    echo "no files found. skipping"
    exit
fi


if [ -f $outfile ]; then
    echo file exists and overwrite is off. exiting...
    exit
fi



# if tempdir does not exist
if [ ! -d $tempdir ]; then
    mkdir -p $tempdir
    chgrp -R afriverse $tempdir
    chmod -R 775 $tempdir
fi



#iterating through files and subsetting each
for infile in ${files[@]}; do
    filename=`basename $infile`
    tempfile=$tempdir/$filename
    echo $tempfile
    if [ ! -e $tempfile ]; then
        cmd="cdo $cdocmd $infile $tempfile $domain"
        echo $cmd
        $cmd
    else
        echo skipping
    fi
done

# if multiple files were subset - merging them
if [ ${#files[@]} -gt 1 ]; then
    echo merging files
    cmd="cdo -mergetime -apply,-selname,$var [ $tempdir/${var}_* ] $outfile"
    echo $cmd
    $cmd
    #removing tempdir
    #rm $tempdir/${var}_*
fi

#setting permissions
chmod -R 775 $outdir
chgrp -R afriverse $outdir
exit



