From 867f8af22a5c0d2947eb48b57aa16a336b1e94d7 Mon Sep 17 00:00:00 2001 From: Emma Ai Date: Thu, 27 May 2021 14:14:40 +1000 Subject: [PATCH] change folder name to cater windows (#13) Co-authored-by: Emma Ai --- auxfiles/README.md | 85 +++++++++++++++++++++++++++++++++++++ auxfiles/fc_pd.yaml | 96 ++++++++++++++++++++++++++++++++++++++++++ auxfiles/job_normal.sh | 14 ++++++ auxfiles/job_sub_in.sh | 76 +++++++++++++++++++++++++++++++++ auxfiles/wetland.sh | 16 +++++++ 5 files changed, 287 insertions(+) create mode 100644 auxfiles/README.md create mode 100644 auxfiles/fc_pd.yaml create mode 100644 auxfiles/job_normal.sh create mode 100755 auxfiles/job_sub_in.sh create mode 100644 auxfiles/wetland.sh diff --git a/auxfiles/README.md b/auxfiles/README.md new file mode 100644 index 0000000..505e8b0 --- /dev/null +++ b/auxfiles/README.md @@ -0,0 +1,85 @@ +`job_normal.sh` is used to submit individual job to PBS queue + +`job_sub_in.sh` is used to submit jobs in bulk to PBS queue + +`wetland.sh` is used to setup the environment to run wit tooling + +`job_normal.sh` +====== + +usage: +--- + +`qsub -l ncpus=$num_cpus,mem=${mem}GB -v threads=$((num_cpus * 4)),feature=$feature,datasets=$file,aggregate=$aggregate,pdyaml=$PDYAML,shapefile=$shapefile job_normal.sh` + +`$num_cpus`, `$mem` refer the manual of `qsub` + +`threads`, `feature`, `datasets` and `aggregate` are the parameters required by `job_normal`. + +- `threads` is the number of threads used in `OpenMP`. We oversubscribe it by 4 times of the CPUS for a). To employ the hyper-threading technique; b). increase the efficiency of CPU usage since the job is I/O bound. + +- `$feature` is the parameter in `--feature-list $feature` in `wetland_brutal.py wit-cal`. + +- `$datasets` is the parameter in `--datasets $datasets` in `wetland_brutal.py wit-cal` + +- `$aggregate` is the parameter in `--aggregate $aggregate` in `wetland_brutal.py wit-cal` + +- `$PDYAML` is the parameter in `--product-yaml $PDYAML` in `wetland_brutal.py wit-cal` + +- `$shapefile` is the parameter in `wetland_brutal.py wit-cal` + +Example +------ + +`qsub -N anae_1005 -l ncpus=48,mem=192GB -v threads=192,feature=anae//new/contain_1005.txt,datasets=anae//query/1005.pkl,aggregate=0,pdyaml=/g/data/u46/users/ea6141/wlinsight/fc_pd.yaml,shapefile=/g/data/r78/DEA_Wetlands/shapefiles/MDB_ANAE_Aug2017_modified_2019_SB_3577.shp job_normal.sh` + +`job_sub_in.sh` +============= + +usage: +----- + +`./job_sub_in.sh $input $shapefile $aggregate` + +`$input` is the folder where the feature list and query results are stored +`$shapefile` is the shape file with all the polygons for the job +`$aggregate` is the number of days in aggregation if it happens + +Example: + +`./job_sub_in.sh sadew/ shapefiles/waterfowlandwetlands_3577.shp 15` + +Note: Add your work folder as prefix if needed. + +In the file: +----------- + +`PDYAML` is the virtual product recipe, which should be modified to the correct path, e.g, `$youworkingfolder/wit_tooling/aux/fc_pd.yaml` + +`num_thread` is calculated regards to how many polygons would be parallelized, yet the minimum should be `9`, DONOT change it. + +`mem` is calculated as how a job would be charged on NCI, the multiplier `UMEM=4` CAN be dialed up until total memory hits the limit of `192GB`. As in the script, when the aggregation over time slices is required, we set `UMEM=8`. + + +`wetland.sh` +============ + +usage: +----- +`source wetland.sh` + +Note: you need to run `wetland.sh` to set up the correct modules to run `wit_tooling` + +In the file: +----- + +`module load dea/20200316` The file is currently loading the 16-03-2020 version of the datacube. This may change if the datacube gets updated. + +`module load openmpi/4.0.1` Satisfying requirements for openmpi + +`PYTHONUSERBASE` is where you installed customised packages. `wit_tooling` will be installed in this folder if you installed it as --user +`PYTHONPATH` adds the datacube-stats refactor branch to the front of your path so that we can use it. + +Example: +`export PYTHONUSERBASE=/g/data/r78/rjd547/python_setup/` + diff --git a/auxfiles/fc_pd.yaml b/auxfiles/fc_pd.yaml new file mode 100644 index 0000000..7c429ce --- /dev/null +++ b/auxfiles/fc_pd.yaml @@ -0,0 +1,96 @@ +juxtapose: + - collate: + - product: ls8_fc_albers + measurements: [BS, PV, NPV] + source_filter: + product: ls8_level1_scene + gqa_iterative_mean_xy: [0, 1] + dataset_predicate: wit_tooling.ls8_on + - product: ls7_fc_albers + measurements: [BS, PV, NPV] + source_filter: + product: ls7_level1_scene + gqa_iterative_mean_xy: [0, 1] + dataset_predicate: wit_tooling.ls7_on + - product: ls5_fc_albers + measurements: [BS, PV, NPV] + source_filter: + product: ls5_level1_scene + gqa_iterative_mean_xy: [0, 1] + dataset_predicate: wit_tooling.ls5_on_1ym + - collate: + - transform: make_mask + input: + product: ls8_pq_albers + fuse_func: datacube.helpers.ga_pq_fuser + flags: + contiguous: True + cloud_acca: no_cloud + cloud_fmask: no_cloud + cloud_shadow_acca: no_cloud_shadow + cloud_shadow_fmask: no_cloud_shadow + blue_saturated: False + green_saturated: False + red_saturated: False + nir_saturated: False + swir1_saturated: False + swir2_saturated: False + mask_measurement_name: pixelquality + - transform: make_mask + input: + product: ls7_pq_albers + fuse_func: datacube.helpers.ga_pq_fuser + flags: + contiguous: True + cloud_acca: no_cloud + cloud_fmask: no_cloud + cloud_shadow_acca: no_cloud_shadow + cloud_shadow_fmask: no_cloud_shadow + blue_saturated: False + green_saturated: False + red_saturated: False + nir_saturated: False + swir1_saturated: False + swir2_saturated: False + mask_measurement_name: pixelquality + - transform: make_mask + input: + product: ls5_pq_albers + fuse_func: datacube.helpers.ga_pq_fuser + flags: + contiguous: True + cloud_acca: no_cloud + cloud_fmask: no_cloud + cloud_shadow_acca: no_cloud_shadow + cloud_shadow_fmask: no_cloud_shadow + blue_saturated: False + green_saturated: False + red_saturated: False + nir_saturated: False + swir1_saturated: False + swir2_saturated: False + mask_measurement_name: pixelquality + - transform: wit_tooling.external_stats.TCIndex + input: + collate: + - product: ls8_nbart_albers + measurements: [blue, green, red, nir, swir1, swir2] + source_filter: + product: ls8_level1_scene + gqa_iterative_mean_xy: [0, 1] + dataset_predicate: wit_tooling.ls8_on + - product: ls7_nbart_albers + measurements: [blue, green, red, nir, swir1, swir2] + source_filter: + product: ls7_level1_scene + gqa_iterative_mean_xy: [0, 1] + dataset_predicate: wit_tooling.ls7_on + - product: ls5_nbart_albers + measurements: [blue, green, red, nir, swir1, swir2] + source_filter: + product: ls5_level1_scene + gqa_iterative_mean_xy: [0, 1] + dataset_predicate: wit_tooling.ls5_on_1ym + - product: wofs_albers + measurements: [water] + fuse_func: digitalearthau.utils.wofs_fuser diff --git a/auxfiles/job_normal.sh b/auxfiles/job_normal.sh new file mode 100644 index 0000000..ac21c13 --- /dev/null +++ b/auxfiles/job_normal.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#PBS -P u46 +#PBS -q normal +#PBS -l storage=gdata/rs0+gdata/fk4+gdata/v10+gdata/r78+gdata/u46+scratch/r78 +#PBS -l walltime=2:00:00 +#PBS -l jobfs=1GB +#PBS -l wd + +source $HOME/setup-datacube-up2date.sh + +echo $threads $feature $datasets $aggregate $pdyaml $shapefile +export OMP_NUM_THREADS=$threads +export NUMEXPR_MAX_THREADS=$threads +mpirun -np 9 -bind-to none python3 -m mpi4py.futures wetland_brutal.py wit-cal --feature-list $feature --datasets $datasets --aggregate $aggregate --product-yaml $pdyaml $shapefile diff --git a/auxfiles/job_sub_in.sh b/auxfiles/job_sub_in.sh new file mode 100755 index 0000000..0a62c8a --- /dev/null +++ b/auxfiles/job_sub_in.sh @@ -0,0 +1,76 @@ +#!/bin/bash +NCPUS=48 +MEM=$((48*4)) +#how much memory charged over 1 cpu +UMEM=4 + +# change this accordingly +PDYAML=fc_pd.yaml + +# $1: folder with polygon list and pickled datasets +# $2: shape file +# $3: days interval if aggregate + +echo start to process $1 $2 $3 + +if [ ! -s $2 ]; then + echo shape file $2 not exist + exit +fi + +if [ ! -d $1/query ]; then + echo query results should be in $1/query + exit +fi + +if [ ! -d $1/new ]; then + echo feature lists should be in $1/new + exit +fi + +PDYAML=$(readlink -f $PDYAML) +shapefile=$(readlink -f $2) +AGGREGATE=$3 + +for file in $1/query/*.pkl; do + tile_id=$(echo $file | sed 's/.*\/\([_0-9]\+\).*/\1/g') + feature=$1/new/contain_$tile_id.txt + aggregate=0 + if [ ! -s $feature ]; then + feature=$1/new/intersect_$tile_id.txt + if [ ! -s $feature ]; then + echo feature list for $tile_id not exist + continue + else + # note: some big polygons might need dial up a bit + # we dial up to double for aggregation over time slices + aggregate=$AGGREGATE + UMEM=8 + fi + else + aggregate=0 + UMEM=4 + fi + num_thread=$(cat $feature | wc -l) + if [ $num_thread -lt 9 ]; then + num_thread=9 + else + if [ $num_thread -gt $NCPUS ]; then + num_thread=$NCPUS + fi + fi + + mem=$((num_thread * UMEM)) + if [ $mem -gt $MEM ]; then + mem=$MEM + fi + echo qsub -N ${1//\/}_$tile_id -l ncpus=$num_thread,mem=${mem}GB -v threads=$((num_thread * 4)),feature=$feature,datasets=$file,aggregate=$aggregate,pdyaml=$PDYAML,shapefile=$shapefile job_normal.sh + jobid=$(qselect -N ${1//\/}_$tile_id) + if [ "$jobid" == "" ]; then + qsub -N ${1//\/}_$tile_id -l ncpus=$num_thread,mem=${mem}GB -v threads=$((num_thread * 4)),feature=$feature,datasets=$file,aggregate=$aggregate,pdyaml=$PDYAML,shapefile=$shapefile job_normal.sh + else + for i in $(seq 1 2); do + jobid=$(qsub -W depend=afterany:$jobid -N ${1//\/}_$tile_id -l ncpus=$num_thread,mem=${mem}GB -v threads=$((num_thread * 4)),feature=$feature,datasets=$file,aggregate=$aggregate,pdyaml=$PDYAML,shapefile=$shapefile job_normal.sh) + done + fi +done diff --git a/auxfiles/wetland.sh b/auxfiles/wetland.sh new file mode 100644 index 0000000..7673dd9 --- /dev/null +++ b/auxfiles/wetland.sh @@ -0,0 +1,16 @@ +#!usr/bin/env bash + +#code written by Emma A and Bex D on 25.03.2020 + +module use /g/data/v10/public/modules/modulefiles/ + +module load dea/20200316 + +module load openmpi/4.0.1 + +export PYTHONUSERBASE=/g/data/r78/rjd547/python_setup/ +export PYTHONPATH=/g/data1a/r78/rjd547/jupyter_notebooks/datacube-stats:$PYTHONPATH + + +#to run this code, type source wetland.sh +