From 867f8af22a5c0d2947eb48b57aa16a336b1e94d7 Mon Sep 17 00:00:00 2001
From: Emma Ai <emmaai@users.noreply.github.com>
Date: Thu, 27 May 2021 14:14:40 +1000
Subject: [PATCH] change folder name to cater windows (#13)

Co-authored-by: Emma Ai <emma.ai@ga.gov.au>
---
 auxfiles/README.md     | 85 +++++++++++++++++++++++++++++++++++++
 auxfiles/fc_pd.yaml    | 96 ++++++++++++++++++++++++++++++++++++++++++
 auxfiles/job_normal.sh | 14 ++++++
 auxfiles/job_sub_in.sh | 76 +++++++++++++++++++++++++++++++++
 auxfiles/wetland.sh    | 16 +++++++
 5 files changed, 287 insertions(+)
 create mode 100644 auxfiles/README.md
 create mode 100644 auxfiles/fc_pd.yaml
 create mode 100644 auxfiles/job_normal.sh
 create mode 100755 auxfiles/job_sub_in.sh
 create mode 100644 auxfiles/wetland.sh

diff --git a/auxfiles/README.md b/auxfiles/README.md
new file mode 100644
index 0000000..505e8b0
--- /dev/null
+++ b/auxfiles/README.md
@@ -0,0 +1,85 @@
+`job_normal.sh` is used to submit individual job to PBS queue
+
+`job_sub_in.sh` is used to submit jobs in bulk to PBS queue
+
+`wetland.sh` is used to setup the environment to run wit tooling
+
+`job_normal.sh`
+======
+
+usage:
+---
+
+`qsub -l ncpus=$num_cpus,mem=${mem}GB -v threads=$((num_cpus * 4)),feature=$feature,datasets=$file,aggregate=$aggregate,pdyaml=$PDYAML,shapefile=$shapefile job_normal.sh`
+
+`$num_cpus`, `$mem` refer the manual of `qsub`
+
+`threads`, `feature`, `datasets` and `aggregate` are the parameters required by `job_normal`.
+
+- `threads` is the number of threads used in `OpenMP`. We oversubscribe it by 4 times of the CPUS for a). To employ the hyper-threading technique; b). increase the efficiency of CPU usage since the job is I/O bound.
+
+- `$feature` is the parameter in `--feature-list $feature` in `wetland_brutal.py wit-cal`.
+
+- `$datasets` is the parameter in `--datasets $datasets` in `wetland_brutal.py wit-cal`
+
+- `$aggregate` is the parameter in `--aggregate $aggregate` in `wetland_brutal.py wit-cal`
+
+- `$PDYAML` is the parameter in `--product-yaml $PDYAML` in `wetland_brutal.py wit-cal`
+
+- `$shapefile` is the parameter in `wetland_brutal.py wit-cal`
+
+Example
+------
+
+`qsub -N anae_1005 -l ncpus=48,mem=192GB -v threads=192,feature=anae//new/contain_1005.txt,datasets=anae//query/1005.pkl,aggregate=0,pdyaml=/g/data/u46/users/ea6141/wlinsight/fc_pd.yaml,shapefile=/g/data/r78/DEA_Wetlands/shapefiles/MDB_ANAE_Aug2017_modified_2019_SB_3577.shp job_normal.sh`
+
+`job_sub_in.sh`
+=============
+
+usage: 
+-----
+
+`./job_sub_in.sh $input $shapefile $aggregate`
+
+`$input` is the folder where the feature list and query results are stored
+`$shapefile` is the shape file with all the polygons for the job
+`$aggregate` is the number of days in aggregation if it happens
+
+Example:
+
+`./job_sub_in.sh sadew/ shapefiles/waterfowlandwetlands_3577.shp 15`
+
+Note: Add your work folder as prefix if needed.
+
+In the file:
+-----------
+
+`PDYAML` is the virtual product recipe, which should be modified to the correct path, e.g, `$youworkingfolder/wit_tooling/aux/fc_pd.yaml`
+
+`num_thread` is calculated regards to how many polygons would be parallelized, yet the minimum should be `9`, DONOT change it.
+
+`mem` is calculated as how a job would be charged on NCI, the multiplier `UMEM=4` CAN be dialed up until total memory hits the limit of `192GB`. As in the script, when the aggregation over time slices is required, we set `UMEM=8`.
+
+
+`wetland.sh`
+============
+
+usage: 
+-----
+`source wetland.sh`
+
+Note: you need to run `wetland.sh` to set up the correct modules to run `wit_tooling`
+
+In the file: 
+-----
+
+`module load dea/20200316` The file is currently loading the 16-03-2020 version of the datacube. This may change if the datacube gets updated.
+
+`module load openmpi/4.0.1` Satisfying requirements for openmpi
+
+`PYTHONUSERBASE` is where you installed customised packages. `wit_tooling` will be installed in this folder if you installed it as --user
+`PYTHONPATH` adds the datacube-stats refactor branch to the front of your path so that we can use it.
+
+Example:
+`export PYTHONUSERBASE=/g/data/r78/rjd547/python_setup/`
+
diff --git a/auxfiles/fc_pd.yaml b/auxfiles/fc_pd.yaml
new file mode 100644
index 0000000..7c429ce
--- /dev/null
+++ b/auxfiles/fc_pd.yaml
@@ -0,0 +1,96 @@
+juxtapose:
+  - collate:
+      - product: ls8_fc_albers
+        measurements: [BS, PV, NPV]
+        source_filter:
+          product: ls8_level1_scene
+          gqa_iterative_mean_xy: [0, 1]
+        dataset_predicate: wit_tooling.ls8_on
+      - product: ls7_fc_albers
+        measurements: [BS, PV, NPV]
+        source_filter:
+          product: ls7_level1_scene
+          gqa_iterative_mean_xy: [0, 1]
+        dataset_predicate: wit_tooling.ls7_on
+      - product: ls5_fc_albers
+        measurements: [BS, PV, NPV]
+        source_filter:
+          product: ls5_level1_scene
+          gqa_iterative_mean_xy: [0, 1]
+        dataset_predicate: wit_tooling.ls5_on_1ym
+  - collate:
+      - transform: make_mask
+        input:
+          product: ls8_pq_albers
+          fuse_func: datacube.helpers.ga_pq_fuser
+        flags:
+          contiguous: True
+          cloud_acca: no_cloud
+          cloud_fmask: no_cloud
+          cloud_shadow_acca: no_cloud_shadow
+          cloud_shadow_fmask: no_cloud_shadow
+          blue_saturated: False
+          green_saturated: False
+          red_saturated: False
+          nir_saturated: False
+          swir1_saturated: False
+          swir2_saturated: False
+        mask_measurement_name: pixelquality
+      - transform: make_mask
+        input:
+          product: ls7_pq_albers
+          fuse_func: datacube.helpers.ga_pq_fuser
+        flags:
+          contiguous: True
+          cloud_acca: no_cloud
+          cloud_fmask: no_cloud
+          cloud_shadow_acca: no_cloud_shadow
+          cloud_shadow_fmask: no_cloud_shadow
+          blue_saturated: False
+          green_saturated: False
+          red_saturated: False
+          nir_saturated: False
+          swir1_saturated: False
+          swir2_saturated: False
+        mask_measurement_name: pixelquality
+      - transform: make_mask
+        input:
+          product: ls5_pq_albers
+          fuse_func: datacube.helpers.ga_pq_fuser
+        flags:
+          contiguous: True
+          cloud_acca: no_cloud
+          cloud_fmask: no_cloud
+          cloud_shadow_acca: no_cloud_shadow
+          cloud_shadow_fmask: no_cloud_shadow
+          blue_saturated: False
+          green_saturated: False
+          red_saturated: False
+          nir_saturated: False
+          swir1_saturated: False
+          swir2_saturated: False
+        mask_measurement_name: pixelquality
+  - transform: wit_tooling.external_stats.TCIndex
+    input:
+      collate:
+        - product: ls8_nbart_albers
+          measurements: [blue, green, red, nir, swir1, swir2]
+          source_filter:
+            product: ls8_level1_scene
+            gqa_iterative_mean_xy: [0, 1]
+          dataset_predicate: wit_tooling.ls8_on
+        - product: ls7_nbart_albers
+          measurements: [blue, green, red, nir, swir1, swir2]
+          source_filter:
+            product: ls7_level1_scene
+            gqa_iterative_mean_xy: [0, 1]
+          dataset_predicate: wit_tooling.ls7_on
+        - product: ls5_nbart_albers
+          measurements: [blue, green, red, nir, swir1, swir2]
+          source_filter:
+            product: ls5_level1_scene
+            gqa_iterative_mean_xy: [0, 1]
+          dataset_predicate: wit_tooling.ls5_on_1ym
+  - product: wofs_albers
+    measurements: [water]
+    fuse_func: digitalearthau.utils.wofs_fuser
diff --git a/auxfiles/job_normal.sh b/auxfiles/job_normal.sh
new file mode 100644
index 0000000..ac21c13
--- /dev/null
+++ b/auxfiles/job_normal.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+#PBS -P u46
+#PBS -q normal
+#PBS -l storage=gdata/rs0+gdata/fk4+gdata/v10+gdata/r78+gdata/u46+scratch/r78
+#PBS -l walltime=2:00:00
+#PBS -l jobfs=1GB
+#PBS -l wd
+
+source $HOME/setup-datacube-up2date.sh
+
+echo $threads $feature $datasets $aggregate $pdyaml $shapefile
+export OMP_NUM_THREADS=$threads
+export NUMEXPR_MAX_THREADS=$threads
+mpirun -np 9 -bind-to none python3 -m mpi4py.futures wetland_brutal.py wit-cal --feature-list $feature --datasets $datasets --aggregate $aggregate --product-yaml $pdyaml $shapefile
diff --git a/auxfiles/job_sub_in.sh b/auxfiles/job_sub_in.sh
new file mode 100755
index 0000000..0a62c8a
--- /dev/null
+++ b/auxfiles/job_sub_in.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+NCPUS=48
+MEM=$((48*4))
+#how much memory charged over 1 cpu
+UMEM=4
+
+# change this accordingly
+PDYAML=fc_pd.yaml
+
+# $1: folder with polygon list and pickled datasets
+# $2: shape file
+# $3: days interval if aggregate
+
+echo start to process $1 $2 $3
+
+if [ ! -s $2 ]; then
+    echo shape file $2 not exist
+    exit
+fi
+
+if [ ! -d $1/query ]; then
+    echo query results should be in $1/query
+    exit
+fi
+
+if [ ! -d $1/new ]; then
+    echo feature lists should be in $1/new
+    exit
+fi
+
+PDYAML=$(readlink -f $PDYAML)
+shapefile=$(readlink -f $2)
+AGGREGATE=$3
+
+for file in $1/query/*.pkl; do
+    tile_id=$(echo $file | sed 's/.*\/\([_0-9]\+\).*/\1/g')
+    feature=$1/new/contain_$tile_id.txt
+    aggregate=0
+    if [ ! -s $feature ]; then
+        feature=$1/new/intersect_$tile_id.txt
+        if [ ! -s $feature ]; then
+            echo feature list for $tile_id not exist
+            continue
+        else
+            # note: some big polygons might need dial up a bit
+            # we dial up to double for aggregation over time slices
+            aggregate=$AGGREGATE
+            UMEM=8
+        fi
+    else
+        aggregate=0
+        UMEM=4
+    fi
+    num_thread=$(cat $feature | wc -l)
+    if [ $num_thread -lt 9 ]; then
+        num_thread=9
+    else
+        if [ $num_thread -gt $NCPUS ]; then
+            num_thread=$NCPUS
+        fi
+    fi 
+
+    mem=$((num_thread * UMEM))
+    if [ $mem -gt $MEM ]; then
+        mem=$MEM
+    fi
+    echo qsub -N ${1//\/}_$tile_id -l ncpus=$num_thread,mem=${mem}GB -v threads=$((num_thread * 4)),feature=$feature,datasets=$file,aggregate=$aggregate,pdyaml=$PDYAML,shapefile=$shapefile job_normal.sh
+    jobid=$(qselect -N ${1//\/}_$tile_id)
+    if [ "$jobid" == "" ]; then
+        qsub -N ${1//\/}_$tile_id -l ncpus=$num_thread,mem=${mem}GB -v threads=$((num_thread * 4)),feature=$feature,datasets=$file,aggregate=$aggregate,pdyaml=$PDYAML,shapefile=$shapefile job_normal.sh
+    else
+        for i in $(seq 1 2); do
+            jobid=$(qsub -W depend=afterany:$jobid -N ${1//\/}_$tile_id -l ncpus=$num_thread,mem=${mem}GB -v threads=$((num_thread * 4)),feature=$feature,datasets=$file,aggregate=$aggregate,pdyaml=$PDYAML,shapefile=$shapefile job_normal.sh)
+        done
+    fi
+done
diff --git a/auxfiles/wetland.sh b/auxfiles/wetland.sh
new file mode 100644
index 0000000..7673dd9
--- /dev/null
+++ b/auxfiles/wetland.sh
@@ -0,0 +1,16 @@
+#!usr/bin/env bash
+
+#code written by Emma A and Bex D on 25.03.2020
+
+module use /g/data/v10/public/modules/modulefiles/
+
+module load dea/20200316
+
+module load openmpi/4.0.1
+
+export PYTHONUSERBASE=/g/data/r78/rjd547/python_setup/
+export PYTHONPATH=/g/data1a/r78/rjd547/jupyter_notebooks/datacube-stats:$PYTHONPATH
+
+
+#to run this code, type source wetland.sh
+