Skip to content

Pangenome analysis based on MAGs

Francisco Zorrilla edited this page Mar 22, 2021 · 2 revisions

prokka is implemented for genome annotation as follows:

rule prokka:
    input:
        bins = f'{config["path"]["root"]}/{config["folder"]["dnaBins"]}/{{binIDs}}.fa'
    output:
        directory(f'{config["path"]["root"]}/{config["folder"]["pangenome"]}/prokka/unorganized/{{binIDs}}')
    benchmark:
        f'{config["path"]["root"]}/benchmarks/{{binIDs}}.prokka.benchmark.txt'
    shell:
        """
        set +u;source activate {config[envs][prokkaroary]};set -u
        mkdir -p $(dirname $(dirname {output}))
        mkdir -p $(dirname {output})
        cp {input} $SCRATCHDIR
        cd $SCRATCHDIR
        id=$(echo $(basename {input})|sed "s/.fa//g")
        prokka -locustag $id --cpus {config[cores][prokka]} --centre MAG --compliant -outdir prokka/$id -prefix $id $(basename {input})
        mv prokka/$id $(dirname {output})
        """

roary is implemented for pangenome analysis as follows:

rule roary:
    input:
        f'{config["path"]["root"]}/{config["folder"]["pangenome"]}/prokka/organized/{{speciesIDs}}/'
    output:
        directory(f'{config["path"]["root"]}/{config["folder"]["pangenome"]}/roary/{{speciesIDs}}/')
    benchmark:
        f'{config["path"]["root"]}/benchmarks/{{speciesIDs}}.roary.benchmark.txt'
    shell:
        """
        set +u;source activate {config[envs][prokkaroary]};set -u
        mkdir -p $(dirname {output})
        cd $SCRATCHDIR
        cp -r {input} .
                
        roary -s -p {config[cores][roary]} -i {config[params][roaryI]} -cd {config[params][roaryCD]} -f yes_al -e -v $(basename {input})/*.gff
        cd yes_al
        create_pan_genome_plots.R 
        cd ..
        mkdir -p {output}
        mv yes_al/* {output}
        """