diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 8f164c6..cdcd85e 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -1,9 +1,9 @@ -# sage/dcqc: Contributing Guidelines +# sage-bionetworks-workflows/nf-dcqc: Contributing Guidelines Hi there! -Many thanks for taking an interest in improving sage/dcqc. +Many thanks for taking an interest in improving sage-bionetworks-workflows/nf-dcqc. -We try to manage the required tasks for sage/dcqc using GitHub issues, you probably came to this page when creating one. +We try to manage the required tasks for sage-bionetworks-workflows/nf-dcqc using GitHub issues, you probably came to this page when creating one. Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! @@ -11,10 +11,10 @@ Contributions to the code are even more welcome ;) ## Contribution workflow -If you'd like to write some code for sage/dcqc, the standard workflow is as follows: +If you'd like to write some code for sage-bionetworks-workflows/nf-dcqc, the standard workflow is as follows: -1. Check that there isn't already an issue about your idea in the [sage/dcqc issues](https://github.com/sage/dcqc/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this -2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [sage/dcqc repository](https://github.com/sage/dcqc) to your GitHub account +1. Check that there isn't already an issue about your idea in the [sage-bionetworks-workflows/nf-dcqc issues](https://github.com/sage-bionetworks-workflows/nf-dcqc/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this +2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [sage-bionetworks-workflows/nf-dcqc repository](https://github.com/sage-bionetworks-workflows/nf-dcqc) to your GitHub account 3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) 4. Use `nf-core schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). 5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged @@ -52,7 +52,7 @@ These tests are run both with the latest available version of `Nextflow` and als ## Pipeline contribution conventions -To make the sage/dcqc code and processing logic more understandable for new contributors and to ensure quality, we semi-standardise the way the code and other contributions are written. +To make the sage-bionetworks-workflows/nf-dcqc code and processing logic more understandable for new contributors and to ensure quality, we semi-standardise the way the code and other contributions are written. ### Adding a new step @@ -102,7 +102,7 @@ This repo includes a devcontainer configuration which will create a GitHub Codes To get started: -- Open the repo in [Codespaces](https://github.com/sage/dcqc/codespaces) +- Open the repo in [Codespaces](https://github.com/sage-bionetworks-workflows/nf-dcqc/codespaces) - Tools installed - nf-core - Nextflow diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 6ecc1b3..0ac1971 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -2,52 +2,52 @@ name: Bug report description: Report something that is broken or incorrect labels: bug body: -- type: textarea - id: description - attributes: - label: Description of the bug - description: A clear and concise description of what the bug is. - validations: - required: true -- type: textarea - id: command_used - attributes: - label: Command used and terminal output - description: Steps to reproduce the behaviour. Please paste the command you used - to launch the pipeline and the output from your terminal. - render: console - placeholder: '$ nextflow run ... - - - Some output where something broke - - ' -- type: textarea - id: files - attributes: - label: Relevant files - description: 'Please drag and drop the relevant files here. Create a `.zip` archive - if the extension is not allowed. - - Your verbose log file `.nextflow.log` is often useful _(this is a hidden file - in the directory where you launched the pipeline)_ as well as custom Nextflow - configuration files. - - ' -- type: textarea - id: system - attributes: - label: System information - description: '* Nextflow version _(eg. 22.10.1)_ - - * Hardware _(eg. HPC, Desktop, Cloud)_ - - * Executor _(eg. slurm, local, awsbatch)_ - - * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_ - - * OS _(eg. CentOS Linux, macOS, Linux Mint)_ - - * Version of sage/dcqc _(eg. 1.1, 1.5, 1.8.2)_ - - ' + - type: textarea + id: description + attributes: + label: Description of the bug + description: A clear and concise description of what the bug is. + validations: + required: true + - type: textarea + id: command_used + attributes: + label: Command used and terminal output + description: Steps to reproduce the behaviour. Please paste the command you used + to launch the pipeline and the output from your terminal. + render: console + placeholder: "$ nextflow run ... + + + Some output where something broke + + " + - type: textarea + id: files + attributes: + label: Relevant files + description: "Please drag and drop the relevant files here. Create a `.zip` archive + if the extension is not allowed. + + Your verbose log file `.nextflow.log` is often useful _(this is a hidden file + in the directory where you launched the pipeline)_ as well as custom Nextflow + configuration files. + + " + - type: textarea + id: system + attributes: + label: System information + description: "* Nextflow version _(eg. 22.10.1)_ + + * Hardware _(eg. HPC, Desktop, Cloud)_ + + * Executor _(eg. slurm, local, awsbatch)_ + + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_ + + * OS _(eg. CentOS Linux, macOS, Linux Mint)_ + + * Version of sage-bionetworks-workflows/nf-dcqc _(eg. 1.1, 1.5, 1.8.2)_ + + " diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml index 64503c3..9768210 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.yml +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -1,5 +1,5 @@ name: Feature request -description: Suggest an idea for the sage/dcqc pipeline +description: Suggest an idea for the sage-bionetworks-workflows/nf-dcqc pipeline labels: enhancement body: - type: textarea diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 162785d..f702fca 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,21 +1,21 @@ ## PR checklist - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! -- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/sage/dcqc/tree/master/.github/CONTRIBUTING.md) +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/sage-bionetworks-workflows/nf-dcqc/tree/master/.github/CONTRIBUTING.md) - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index b7a0f1d..9db3192 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -11,9 +11,9 @@ jobs: steps: # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches - name: Check PRs - if: github.repository == 'sage/dcqc' + if: github.repository == 'sage-bionetworks-workflows/nf-dcqc' run: | - { [[ ${{github.event.pull_request.head.repo.full_name }} == sage/dcqc ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + { [[ ${{github.event.pull_request.head.repo.full_name }} == sage-bionetworks-workflows/nf-dcqc ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] # If the above check failed, post a comment on the PR explaining the failure # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2386c98..45d0f47 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,7 @@ jobs: strategy: matrix: NXF_VER: - - "22.10.1" + - "22.10.4" - "latest-everything" steps: - name: Check out pipeline code @@ -36,8 +36,8 @@ jobs: version: "${{ matrix.NXF_VER }}" - name: Run pipeline with test data - # TODO nf-core: You can customise CI pipeline run tests as required - # For example: adding multiple test runs with different parameters - # Remember that you can parallelise this by using strategy.matrix + env: + SYNAPSE_AUTH_TOKEN: ${{ secrets.SYNAPSE_AUTH_TOKEN }} run: | + nextflow secrets set SYNAPSE_AUTH_TOKEN "${SYNAPSE_AUTH_TOKEN}" nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results diff --git a/.nf-core.yml b/.nf-core.yml index 2201a7d..1e1d92b 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,15 +1,15 @@ repository_type: pipeline lint: files_exist: - - CODE_OF_CONDUCT.md - - assets/nf-core-dcqc_logo_light.png - - docs/images/nf-core-dcqc_logo_light.png - - docs/images/nf-core-dcqc_logo_dark.png - - .github/ISSUE_TEMPLATE/config.yml - - .github/workflows/awstest.yml - - .github/workflows/awsfulltest.yml + - CODE_OF_CONDUCT.md + - assets/nf-core-dcqc_logo_light.png + - docs/images/nf-core-dcqc_logo_light.png + - docs/images/nf-core-dcqc_logo_dark.png + - .github/ISSUE_TEMPLATE/config.yml + - .github/workflows/awstest.yml + - .github/workflows/awsfulltest.yml nextflow_config: - - manifest.name - - manifest.homePage + - manifest.name + - manifest.homePage multiqc_config: - - report_comment + - report_comment diff --git a/README.md b/README.md index 6817727..eb5a67b 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ [![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.4-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) @@ -27,7 +27,7 @@ On release, automated continuous integration tests run the pipeline on a full-si ## Quick Start -1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=22.10.1`) +1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=22.10.4`) 2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_. @@ -52,6 +52,16 @@ On release, automated continuous integration tests run the pipeline on a full-si nextflow run sage/dcqc --input samplesheet.csv --outdir --genome GRCh37 -profile ``` +## Special Considerations for Running `nf-dcqc` on Nextflow Tower + +`nf-dcqc` leverages the reports feature when executed on Tower. This is done by pointing Tower to the generated `output.csv` file which is saved to `params.outdir` after a successful run. By default, the `outdir` for the workflow is set to a local directory called `results`. This does not work on Nextflow Tower runs, as you will not have access to the `results` directory once the job has completed. Thus, the `outdir` should be set to an S3 bucket location that the Tower workspace you are using has access to. For example, in the `pipeline parameters` for a Tower run, you can provide YAML such as: + +```yaml +outdir: s3://example-project-tower-bucket/dcqc_output +``` + +From the reports tab within your workflow run, you can view and download the generated `output.csv` file. + ## Credits sage/dcqc was originally written by Bruno Grande . diff --git a/assets/email_template.html b/assets/email_template.html index 37a7bf1..bdfd31d 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -4,21 +4,21 @@ - - sage/dcqc Pipeline Report + + sage-bionetworks-workflows/nf-dcqc Pipeline Report
-

sage/dcqc v${version}

+

sage-bionetworks-workflows/nf-dcqc v${version}

Run Name: $runName

<% if (!success){ out << """
-

sage/dcqc execution completed unsuccessfully!

+

sage-bionetworks-workflows/nf-dcqc execution completed unsuccessfully!

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

The full error message was:

${errorReport}
@@ -27,7 +27,7 @@

sage/dcqc execution completed unsucces } else { out << """
- sage/dcqc execution completed successfully! + sage-bionetworks-workflows/nf-dcqc execution completed successfully!
""" } @@ -44,8 +44,8 @@

Pipeline Configuration:

-

sage/dcqc

-

https://github.com/sage/dcqc

+

sage-bionetworks-workflows/nf-dcqc

+

https://github.com/sage-bionetworks-workflows/nf-dcqc

diff --git a/assets/email_template.txt b/assets/email_template.txt index f5d99a5..b4b8ac2 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -1,10 +1,10 @@ Run Name: $runName <% if (success){ - out << "## sage/dcqc execution completed successfully! ##" + out << "## sage-bionetworks-workflows/nf-dcqc execution completed successfully! ##" } else { out << """#################################################### -## sage/dcqc execution completed unsuccessfully! ## +## sage-bionetworks-workflows/nf-dcqc execution completed unsuccessfully! ## #################################################### The exit status of the task that caused the workflow execution to fail was: $exitStatus. The full error message was: @@ -27,5 +27,5 @@ Pipeline Configuration: <% out << summary.collect{ k,v -> " - $k: $v" }.join("\n") %> -- -sage/dcqc -https://github.com/sage/dcqc +sage-bionetworks-workflows/nf-dcqc +https://github.com/sage-bionetworks-workflows/nf-dcqc diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml deleted file mode 100644 index e950942..0000000 --- a/assets/multiqc_config.yml +++ /dev/null @@ -1,12 +0,0 @@ -report_comment: > - This report has been generated by the sage/dcqc - analysis pipeline. -report_section_order: - "sage-dcqc-methods-description": - order: -1000 - software_versions: - order: -1001 - "sage-dcqc-summary": - order: -1002 - -export_plots: true diff --git a/assets/nf-core-nf-dcqc_logo_light.png b/assets/nf-core-nf-dcqc_logo_light.png new file mode 100644 index 0000000..e458b86 Binary files /dev/null and b/assets/nf-core-nf-dcqc_logo_light.png differ diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt index 9051585..a9b6716 100644 --- a/assets/sendmail_template.txt +++ b/assets/sendmail_template.txt @@ -9,12 +9,12 @@ Content-Type: text/html; charset=utf-8 $email_html --nfcoremimeboundary -Content-Type: image/png;name="sage-dcqc_logo.png" +Content-Type: image/png;name="sage-bionetworks-workflows-nf-dcqc_logo.png" Content-Transfer-Encoding: base64 Content-ID: -Content-Disposition: inline; filename="sage-dcqc_logo_light.png" +Content-Disposition: inline; filename="sage-bionetworks-workflows-nf-dcqc_logo_light.png" -<% out << new File("$projectDir/assets/sage-dcqc_logo_light.png"). +<% out << new File("$projectDir/assets/sage-bionetworks-workflows-nf-dcqc_logo_light.png"). bytes. encodeBase64(). toString(). diff --git a/conf/base.config b/conf/base.config index 02e1737..84a0183 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,7 +10,6 @@ process { - // TODO nf-core: Check the defaults for all processes cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } @@ -24,7 +23,6 @@ process { // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. // If possible, it would be nice to keep the same label naming convention when // adding in your local modules too. - // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { cpus = { check_max( 1 , 'cpus' ) } @@ -59,6 +57,12 @@ process { errorStrategy = 'retry' maxRetries = 2 } + withLabel:dcqc { + container = 'ghcr.io/sage-bionetworks-workflows/py-dcqc:latest' + secret = [ + 'SYNAPSE_AUTH_TOKEN' + ] + } withName:CUSTOM_DUMPSOFTWAREVERSIONS { cache = false } diff --git a/conf/local.config b/conf/local.config new file mode 100644 index 0000000..5720f1c --- /dev/null +++ b/conf/local.config @@ -0,0 +1,6 @@ +process { + withLabel:dcqc { + // Default container name when running `src/docker/build.sh` in py-dcqc + container = 'dcqc' + } +} diff --git a/conf/modules.config b/conf/modules.config index da58a5d..08564c2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -12,29 +12,38 @@ process { - publishDir = [ - path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + // publishDir = [ + // path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + // mode: params.publish_dir_mode, + // saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + // ] - withName: SAMPLESHEET_CHECK { + withName: '.*:(CREATE_TESTS|CREATE_SUITE)' { + ext.args = [ + params.required_tests ? params.required_tests.split(",").collect {"--required-tests $it"}.join(" ") : "", + params.skipped_tests ? params.skipped_tests.split(",").collect {"--skipped-tests $it"}.join(" ") : "" + ].join(" ").trim() + } + + withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + pattern: '*_versions.yml' ] } - withName: FASTQC { - ext.args = '--quiet' + withName: COMBINE_SUITES { + publishDir = [ + path: { "${params.outdir}/" }, + mode: params.publish_dir_mode + ] } - withName: CUSTOM_DUMPSOFTWAREVERSIONS { + withName: UPDATE_INPUT { publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - pattern: '*_versions.yml' + path: { "${params.outdir}/" }, + mode: params.publish_dir_mode ] } diff --git a/conf/test.config b/conf/test.config index 3271d55..02a4067 100644 --- a/conf/test.config +++ b/conf/test.config @@ -11,8 +11,8 @@ */ params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' + config_profile_name = 'Test TXT profile' + config_profile_description = 'Small test dataset to check pipeline function' // Limit resources so that this can run on GitHub Actions max_cpus = 2 @@ -20,10 +20,7 @@ params { max_time = '6.h' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' + input = "${projectDir}/testdata/input_txt.csv" + required_tests = "LibTiffInfoTest,BioFormatsInfoTest,OmeXmlSchemaTest" - // Genome references - genome = 'R64-1-1' } diff --git a/conf/test_full.config b/conf/test_full.config index f08f7bf..a98f35f 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -15,10 +15,7 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' - - // Genome references - genome = 'R64-1-1' + input = "${projectDir}/testdata/input_full.csv" + required_tests = "LibTiffInfoTest,BioFormatsInfoTest,OmeXmlSchemaTest" + skipped_tests = "FileExtensionTest,Md5ChecksumTest" } diff --git a/conf/test_tiff.config b/conf/test_tiff.config new file mode 100644 index 0000000..bb11091 --- /dev/null +++ b/conf/test_tiff.config @@ -0,0 +1,25 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run sage/dcqc -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test TIFF profile' + config_profile_description = 'Small test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = "${projectDir}/testdata/input_tiff.csv" + +} diff --git a/docs/README.md b/docs/README.md index 0cf1f1b..1f9816c 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,6 +1,6 @@ -# sage/dcqc: Documentation +# sage-bionetworks-workflows/nf-dcqc: Documentation -The sage/dcqc documentation is split into the following pages: +The sage-bionetworks-workflows/nf-dcqc documentation is split into the following pages: - [Usage](usage.md) - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. diff --git a/docs/images/nf-core-nf-dcqc_logo_dark.png b/docs/images/nf-core-nf-dcqc_logo_dark.png new file mode 100644 index 0000000..7a12c67 Binary files /dev/null and b/docs/images/nf-core-nf-dcqc_logo_dark.png differ diff --git a/docs/images/nf-core-nf-dcqc_logo_light.png b/docs/images/nf-core-nf-dcqc_logo_light.png new file mode 100644 index 0000000..e458b86 Binary files /dev/null and b/docs/images/nf-core-nf-dcqc_logo_light.png differ diff --git a/docs/output.md b/docs/output.md index 3aed334..9fa2c66 100644 --- a/docs/output.md +++ b/docs/output.md @@ -15,6 +15,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [FastQC](#fastqc) - Raw read QC - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution +- [QC Results Files](#qc-results-files) - QC results generated by the pipeline ### FastQC @@ -66,3 +67,11 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ [Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. + +### QC Results Files + +
+Output files + +- `output.csv`: The original input CSV file updated to include the QC results for each file. +- `suites.json`: A JSON file containing summary information for all QC tests performed. diff --git a/lib/Utils.groovy b/lib/Utils.groovy index 8d030f4..279d536 100644 --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -6,6 +6,14 @@ import org.yaml.snakeyaml.Yaml class Utils { + // + // Useful for pulling information from a JSON file into a Nextflow channel + // + public static def parseJson(file) { + def parser = new groovy.json.JsonSlurper() + return parser.parseText(file.text) + } + // // When running with -profile conda, warn if channels have not been set-up appropriately // diff --git a/lib/WorkflowDcqc.groovy b/lib/WorkflowDcqc.groovy index c3eee33..a7e625d 100755 --- a/lib/WorkflowDcqc.groovy +++ b/lib/WorkflowDcqc.groovy @@ -10,13 +10,13 @@ class WorkflowDcqc { // Check and validate parameters // public static void initialise(params, log) { - genomeExistsError(params, log) + // genomeExistsError(params, log) - if (!params.fasta) { - log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." - System.exit(1) - } + // if (!params.fasta) { + // log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." + // System.exit(1) + // } } // diff --git a/main.nf b/main.nf index 91188cb..9acc7e3 100644 --- a/main.nf +++ b/main.nf @@ -15,7 +15,7 @@ nextflow.enable.dsl = 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +// params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/modules/local/combine_suites.nf b/modules/local/combine_suites.nf new file mode 100644 index 0000000..c05e13e --- /dev/null +++ b/modules/local/combine_suites.nf @@ -0,0 +1,18 @@ +process COMBINE_SUITES { + label 'process_low' + label 'dcqc' + + input: + path report_jsons + + output: + path "suites.json" + + when: + task.ext.when == null || task.ext.when + + script: + """ + dcqc combine-suites "suites.json" *.json + """ +} diff --git a/modules/local/compute_test.nf b/modules/local/compute_test.nf new file mode 100644 index 0000000..bd04751 --- /dev/null +++ b/modules/local/compute_test.nf @@ -0,0 +1,20 @@ +process COMPUTE_TEST { + tag "$target_id" + label 'process_single' + label 'dcqc' + + input: + tuple val(target_id), path(test_json), path("std_out.txt"), path("std_err.txt"), path("exit_code.txt") + + output: + tuple val(target_id), path("${test_json.baseName}.computed.json") + + when: + task.ext.when == null || task.ext.when + + script: + """ + export TMPDIR="./" + dcqc compute-test "${test_json}" "${test_json.baseName}.computed.json" + """ +} diff --git a/modules/local/create_process.nf b/modules/local/create_process.nf new file mode 100644 index 0000000..b02a2c9 --- /dev/null +++ b/modules/local/create_process.nf @@ -0,0 +1,20 @@ +process CREATE_PROCESS { + tag "$target_id" + label 'process_single' + label 'dcqc' + + input: + tuple val(target_id), path(test_json) + + output: + tuple val(target_id), path(test_json), path("dcqc-staged-*/*"), path("${test_json.baseName}.process.json") + + when: + task.ext.when == null || task.ext.when + + script: + """ + export TMPDIR="./" + dcqc create-process "${test_json}" "${test_json.baseName}.process.json" + """ +} diff --git a/modules/local/create_suite.nf b/modules/local/create_suite.nf new file mode 100644 index 0000000..be5a123 --- /dev/null +++ b/modules/local/create_suite.nf @@ -0,0 +1,20 @@ +process CREATE_SUITE { + tag "$target_id" + label 'process_single' + label 'dcqc' + + input: + tuple val(target_id), path(test_results) + + output: + path("${target_id}.suite.json") + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + dcqc create-suite ${args} "${target_id}.suite.json" *.json + """ +} diff --git a/modules/local/create_targets.nf b/modules/local/create_targets.nf new file mode 100644 index 0000000..7b4fb42 --- /dev/null +++ b/modules/local/create_targets.nf @@ -0,0 +1,18 @@ +process CREATE_TARGETS { + label 'process_single' + label 'dcqc' + + input: + path input_csv + + output: + path 'targets/*.json' + + when: + task.ext.when == null || task.ext.when + + script: + """ + dcqc create-targets "${input_csv}" targets/ + """ +} diff --git a/modules/local/create_tests.nf b/modules/local/create_tests.nf new file mode 100644 index 0000000..03e9b3b --- /dev/null +++ b/modules/local/create_tests.nf @@ -0,0 +1,20 @@ +process CREATE_TESTS { + tag "$target_id" + label 'process_single' + label 'dcqc' + + input: + tuple val(target_id), path(target_json) + + output: + tuple val(target_id), path("tests/*") + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + dcqc create-tests ${args} "${target_json}" tests/ + """ +} diff --git a/modules/local/dcqc_version.nf b/modules/local/dcqc_version.nf new file mode 100644 index 0000000..e7df5c8 --- /dev/null +++ b/modules/local/dcqc_version.nf @@ -0,0 +1,18 @@ +process DCQC_VERSION { + label 'process_single' + label 'dcqc' + + output: + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + cat <<-END_VERSIONS > versions.yml + "${task.process}": + dcqc: \$(dcqc --version | sed 's/.*: //g') + END_VERSIONS + """ +} diff --git a/modules/local/run_process.nf b/modules/local/run_process.nf new file mode 100644 index 0000000..7fa24ba --- /dev/null +++ b/modules/local/run_process.nf @@ -0,0 +1,20 @@ +process RUN_PROCESS { + tag "$target_id" + cpus "${cpus}" + memory "${memory}" + container "${container}" + + input: + tuple val(target_id), path(test_json), path(staged_file), val(container), val(cpus), val(memory), val(command) + + output: + tuple val(target_id), path(test_json), path("std_out.txt"), path("std_err.txt"), path("exit_code.txt") + + when: + task.ext.when == null || task.ext.when + + script: + """ + ( (${command}) > "std_out.txt" 2> "std_err.txt"; echo \$? > "exit_code.txt" ) || true + """ +} diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf deleted file mode 100644 index cab1b4c..0000000 --- a/modules/local/samplesheet_check.nf +++ /dev/null @@ -1,31 +0,0 @@ -process SAMPLESHEET_CHECK { - tag "$samplesheet" - label 'process_single' - - conda "conda-forge::python=3.8.3" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'quay.io/biocontainers/python:3.8.3' }" - - input: - path samplesheet - - output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: // This script is bundled with the pipeline, in sage/dcqc/bin/ - """ - check_samplesheet.py \\ - $samplesheet \\ - samplesheet.valid.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/modules/local/update_input.nf b/modules/local/update_input.nf new file mode 100644 index 0000000..6a18b7b --- /dev/null +++ b/modules/local/update_input.nf @@ -0,0 +1,19 @@ +process UPDATE_INPUT { + label 'process_single' + label 'dcqc' + + input: + path suites_file + path input_csv + + output: + path "output.csv" + + when: + task.ext.when == null || task.ext.when + + script: + """ + dcqc update-csv ${suites_file} ${input_csv} "output.csv" + """ +} diff --git a/nextflow.config b/nextflow.config index c8480d1..027a5b2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,24 +9,13 @@ // Global default params, used in configs params { - // TODO nf-core: Specify your pipeline's command line flags // Input options input = null - - - // References - genome = null - igenomes_base = 's3://ngi-igenomes/igenomes' - igenomes_ignore = false - // MultiQC options - multiqc_config = null - multiqc_title = null - multiqc_logo = null - max_multiqc_email_size = '25.MB' - multiqc_methods_description = null + required_tests = null + skipped_tests = null // Boilerplate options - outdir = null + outdir = "results" tracedir = "${params.outdir}/pipeline_info" publish_dir_mode = 'copy' email = null @@ -40,7 +29,6 @@ params { show_hidden_params = false schema_ignore_params = 'genomes' - // Config options custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" @@ -49,7 +37,6 @@ params { config_profile_url = null config_profile_name = null - // Max resource options // Defaults only, expecting to be overwritten max_memory = '128.GB' @@ -141,19 +128,11 @@ profiles { executor.cpus = 16 executor.memory = 60.GB } + local { includeConfig 'conf/local.config' } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } } - -// Load igenomes.config if required -if (!params.igenomes_ignore) { - includeConfig 'conf/igenomes.config' -} else { - params.genomes = [:] -} - - // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -187,12 +166,12 @@ dag { } manifest { - name = 'sage/dcqc' + name = 'sage-bionetworks-workflows/nf-dcqc' author = """Bruno Grande """ - homePage = 'https://github.com/sage/dcqc' + homePage = 'https://github.com/sage-bionetworks-workflows/nf-dcqc' description = """Nextflow Workflow for Data Coordination Quality Control""" mainScript = 'main.nf' - nextflowVersion = '!>=22.10.1' + nextflowVersion = '!>=22.10.4' version = '0.1' doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 13563b1..87efd1f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -22,6 +22,18 @@ "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row.", "fa_icon": "fas fa-file-csv" }, + "required_tests": { + "type": "string", + "fa_icon": "fas fa-plus-circle", + "description": "Comma-separated list of tests that are required for a file to pass quality control (QC). Consult the dcqc Python package documentation for information on the default behavior since it is subject to change.", + "pattern": "^([A-z0-9]+,)*[A-z0-9]+$" + }, + "skipped_tests": { + "type": "string", + "fa_icon": "fas fa-minus-circle", + "description": "Comma-separated list of tests that should be skipped for all files. Consult the dcqc Python package documentation for information on the default behavior since it is subject to change.", + "pattern": "^([A-z0-9]+,)*[A-z0-9]+$" + }, "outdir": { "type": "string", "format": "directory-path", @@ -34,49 +46,6 @@ "fa_icon": "fas fa-envelope", "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" - }, - "multiqc_title": { - "type": "string", - "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", - "fa_icon": "fas fa-file-signature" - } - } - }, - "reference_genome_options": { - "title": "Reference genome options", - "type": "object", - "fa_icon": "fas fa-dna", - "description": "Reference genome related files and options required for the workflow.", - "properties": { - "genome": { - "type": "string", - "description": "Name of iGenomes reference.", - "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." - }, - "fasta": { - "type": "string", - "format": "file-path", - "mimetype": "text/plain", - "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", - "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", - "fa_icon": "far fa-file-code" - }, - "igenomes_base": { - "type": "string", - "format": "directory-path", - "description": "Directory / URL base for iGenomes references.", - "default": "s3://ngi-igenomes/igenomes", - "fa_icon": "fas fa-cloud-download-alt", - "hidden": true - }, - "igenomes_ignore": { - "type": "boolean", - "description": "Do not load the iGenomes reference config.", - "fa_icon": "fas fa-ban", - "hidden": true, - "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." } } }, @@ -205,14 +174,6 @@ "fa_icon": "fas fa-remove-format", "hidden": true }, - "max_multiqc_email_size": { - "type": "string", - "description": "File size limit when attaching MultiQC reports to summary emails.", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "default": "25.MB", - "fa_icon": "fas fa-file-upload", - "hidden": true - }, "monochrome_logs": { "type": "boolean", "description": "Do not use coloured log outputs.", @@ -226,23 +187,6 @@ "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", "hidden": true }, - "multiqc_config": { - "type": "string", - "description": "Custom config file to supply to MultiQC.", - "fa_icon": "fas fa-cog", - "hidden": true - }, - "multiqc_logo": { - "type": "string", - "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", - "fa_icon": "fas fa-image", - "hidden": true - }, - "multiqc_methods_description": { - "type": "string", - "description": "Custom MultiQC yaml file containing HTML including a methods description.", - "fa_icon": "fas fa-cog" - }, "tracedir": { "type": "string", "description": "Directory to keep pipeline Nextflow logs and reports.", @@ -271,9 +215,6 @@ { "$ref": "#/definitions/input_output_options" }, - { - "$ref": "#/definitions/reference_genome_options" - }, { "$ref": "#/definitions/institutional_config_options" }, diff --git a/subworkflows/local/external_tests.nf b/subworkflows/local/external_tests.nf new file mode 100644 index 0000000..d175290 --- /dev/null +++ b/subworkflows/local/external_tests.nf @@ -0,0 +1,25 @@ +include { CREATE_PROCESS } from '../../modules/local/create_process' +include { RUN_PROCESS } from '../../modules/local/run_process' +include { COMPUTE_TEST } from '../../modules/local/compute_test' + +workflow EXTERNAL_TESTS { + take: + ch_tests // channel: [ val(target_id), path(test_json) ] + + main: + ch_processes_raw = CREATE_PROCESS(ch_tests) + + ch_processes = + ch_processes_raw + | map { target_id, test, staged, cmd -> + parsed = Utils.parseJson(cmd) + [ target_id, test, staged, parsed.container, parsed.cpus, parsed.memory, parsed.command ] + } + + ch_process_outputs = RUN_PROCESS(ch_processes) + + ch_tests_computed = COMPUTE_TEST(ch_process_outputs) + + emit: + results = ch_tests_computed // channel: [ val(target_id), path(result_json) ] +} diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf deleted file mode 100644 index 0aecf87..0000000 --- a/subworkflows/local/input_check.nf +++ /dev/null @@ -1,44 +0,0 @@ -// -// Check input samplesheet and get read channels -// - -include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' - -workflow INPUT_CHECK { - take: - samplesheet // file: /path/to/samplesheet.csv - - main: - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channel(it) } - .set { reads } - - emit: - reads // channel: [ val(meta), [ reads ] ] - versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] -} - -// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] -def create_fastq_channel(LinkedHashMap row) { - // create meta map - def meta = [:] - meta.id = row.sample - meta.single_end = row.single_end.toBoolean() - - // add path(s) of the fastq file(s) to the meta map - def fastq_meta = [] - if (!file(row.fastq_1).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" - } - if (meta.single_end) { - fastq_meta = [ meta, [ file(row.fastq_1) ] ] - } else { - if (!file(row.fastq_2).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" - } - fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] - } - return fastq_meta -} diff --git a/subworkflows/local/internal_tests.nf b/subworkflows/local/internal_tests.nf new file mode 100644 index 0000000..94daea5 --- /dev/null +++ b/subworkflows/local/internal_tests.nf @@ -0,0 +1,19 @@ +include { COMPUTE_TEST } from '../../modules/local/compute_test' + +workflow INTERNAL_TESTS { + take: + ch_tests // channel: [ val(target_id), path(test_json) ] + + main: + ch_tests_extra = + ch_tests + | map { target_id, test_json -> + dummy_file = file("${projectDir}/testdata/dummy.txt") + [ target_id, test_json, dummy_file, dummy_file, dummy_file ] + } + + ch_tests_computed = COMPUTE_TEST(ch_tests_extra) + + emit: + results = ch_tests_computed // channel: [ val(target_id), path(result_json) ] +} diff --git a/subworkflows/local/prepare_reports.nf b/subworkflows/local/prepare_reports.nf new file mode 100644 index 0000000..b7b864e --- /dev/null +++ b/subworkflows/local/prepare_reports.nf @@ -0,0 +1,23 @@ +include { CREATE_SUITE } from '../../modules/local/create_suite' +include { COMBINE_SUITES } from '../../modules/local/combine_suites' +include { UPDATE_INPUT } from '../../modules/local/update_input' + +workflow PREPARE_REPORTS { + take: + ch_tests_computed // channel: [ val(target_id), path(result_json) ] + ch_input_csv // channel: path(input_csv) + + main: + ch_tests_by_target = ch_tests_computed.groupTuple() + + ch_suites = CREATE_SUITE(ch_tests_by_target) + + ch_suites_collected = ch_suites.collect() + + ch_summary_report = COMBINE_SUITES(ch_suites_collected) + + ch_output_csv = UPDATE_INPUT(ch_summary_report, ch_input_csv) + + emit: + summary = ch_summary_report // channel: path(summary_json) +} diff --git a/subworkflows/local/prepare_tests.nf b/subworkflows/local/prepare_tests.nf new file mode 100644 index 0000000..ff69045 --- /dev/null +++ b/subworkflows/local/prepare_tests.nf @@ -0,0 +1,41 @@ +include { CREATE_TARGETS } from '../../modules/local/create_targets' +include { CREATE_TESTS } from '../../modules/local/create_tests' + +workflow PREPARE_TESTS { + take: + ch_input // file: CSV list of QC targets + + main: + ch_targets_raw = CREATE_TARGETS(ch_input) + + ch_targets = + ch_targets_raw + | flatten + | map { + parsed = Utils.parseJson(it) + [ parsed.id, it ] + } + + ch_tests_raw = CREATE_TESTS(ch_targets) + + ch_tests = + ch_tests_raw + | transpose + | map { target_id, test -> + parsed = Utils.parseJson(test) + [ parsed.is_external_test, [ target_id, test ] ] + } + + ch_tests_split = + ch_tests + | branch { is_external_test, it -> + internal: !is_external_test + return it + external: is_external_test + return it + } + + emit: + internal = ch_tests_split.internal // channel: [ val(target_id), path(test_json) ] + external = ch_tests_split.external // channel: [ val(target_id), path(test_json) ] +} diff --git a/testdata/dummy.txt b/testdata/dummy.txt new file mode 100644 index 0000000..e69de29 diff --git a/testdata/input_full.csv b/testdata/input_full.csv new file mode 100644 index 0000000..8ca65a7 --- /dev/null +++ b/testdata/input_full.csv @@ -0,0 +1,12 @@ +url,file_type,md5_checksum +syn://syn41864974,TXT,38b86a456d1f441008986c6f798d5ef9 +syn://syn41864977,TXT,a542e9b744bedcfd874129ab0f98c4ff +syn://syn43716055,TIFF,38b86a456d1f441008986c6f798d5ef9 +syn://syn43716711,TIFF,a542e9b744bedcfd874129ab0f98c4ff +syn://syn26644414,TIFF,0a3d8f1d2d69f15aeccedea0d54efa6c +syn://syn26644414,OME-TIFF,0a3d8f1d2d69f15aeccedea0d54efa6c +syn://syn26644421,OME-TIFF,313257a6822ff5b52e7e35a626b6c33f +syn://syn50944309,TIFF,7408a1bb12e39228ad096a95c2a839ac +syn://syn50944267,OME-TIFF,a2550a887091d51351d547c8beae8f0c +syn://syn50944248,OME-TIFF,64bbfe1d1c9c171cad17b11e666b31b4 +syn://syn50944306,TIFF,dcca48d86b2e1d97677e8de533b4d74b diff --git a/testdata/input_txt.csv b/testdata/input_txt.csv new file mode 100644 index 0000000..e2cd0eb --- /dev/null +++ b/testdata/input_txt.csv @@ -0,0 +1,5 @@ +url,file_type,md5_checksum +syn://syn41864974,TXT,38b86a456d1f441008986c6f798d5ef9 +syn://syn41864977,TXT,make-status-amber +syn://syn43716055,TIFF,38b86a456d1f441008986c6f798d5ef9 +syn://syn43716711,TIFF,a542e9b744bedcfd874129ab0f98c4ff diff --git a/tower.yml b/tower.yml new file mode 100644 index 0000000..fcadc5b --- /dev/null +++ b/tower.yml @@ -0,0 +1,3 @@ +reports: + "**/output.csv": + display: "DCQC Output CSV file" diff --git a/workflows/dcqc.nf b/workflows/dcqc.nf index affb687..67f431a 100644 --- a/workflows/dcqc.nf +++ b/workflows/dcqc.nf @@ -9,9 +9,8 @@ def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) // Validate input parameters WorkflowDcqc.initialise(params, log) -// TODO nf-core: Add all file path parameters for the pipeline to the list below // Check input path parameters to see if they exist -def checkPathParamList = [ params.input, params.multiqc_config, params.fasta ] +def checkPathParamList = [ params.input ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } // Check mandatory parameters @@ -23,10 +22,10 @@ if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input sample ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() -ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) +// ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +// ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() +// ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() +// ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -34,10 +33,13 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// -include { INPUT_CHECK } from '../subworkflows/local/input_check' +// Local Modules + +// Local Subworkflows +include { PREPARE_TESTS } from '../subworkflows/local/prepare_tests' +include { INTERNAL_TESTS } from '../subworkflows/local/internal_tests' +include { EXTERNAL_TESTS } from '../subworkflows/local/external_tests' +include { PREPARE_REPORTS } from '../subworkflows/local/prepare_reports' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -45,13 +47,11 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// MODULE: Installed directly from nf-core/modules -// -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' +// nf-core Modules include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +// nf-core Subworkflows + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -65,48 +65,16 @@ workflow DCQC { ch_versions = Channel.empty() - // - // SUBWORKFLOW: Read in samplesheet, validate and stage input files - // - INPUT_CHECK ( - ch_input - ) - ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - - // - // MODULE: Run FastQC - // - FASTQC ( - INPUT_CHECK.out.reads - ) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) - - // - // MODULE: MultiQC - // - workflow_summary = WorkflowDcqc.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - methods_description = WorkflowDcqc.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) - ch_methods_description = Channel.value(methods_description) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() - ) - multiqc_report = MULTIQC.out.report.toList() + PREPARE_TESTS(ch_input) + + INTERNAL_TESTS(PREPARE_TESTS.out.internal) + + EXTERNAL_TESTS(PREPARE_TESTS.out.external) + + ch_tests_computed = INTERNAL_TESTS.out.mix(EXTERNAL_TESTS.out) + + ch_summary = PREPARE_REPORTS(ch_tests_computed, ch_input) + } /*