Skip to content

Commit

Permalink
added support for many fasta files
Browse files Browse the repository at this point in the history
  • Loading branch information
olgatsiouri1996 committed Dec 16, 2021
1 parent 889b8c6 commit cbc222e
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 11 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# biomisc [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5762316.svg)](https://doi.org/10.5281/zenodo.5762316)
# biomisc
collection of miscellaneous command line bioinformatic scripts(see the wiki page for documentation and depedences)
35 changes: 25 additions & 10 deletions fasta_manipulation/fasta_formatter.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,34 @@
# python3
import os
import argparse
from Bio import SeqIO
import sys
# input parameters
ap = argparse.ArgumentParser(description="changes the width of sequences line in a FASTA file")
ap.add_argument("-in", "--input", required=True, help="input fasta file")
ap.add_argument("-out", "--output", required=True, help="output fasta file")
ap = argparse.ArgumentParser(description="changes the width of sequences line in 1 or many FASTA files")
ap.add_argument("-in", "--input", required=False, help="input fasta file")
ap.add_argument("-out", "--output", required=False, help="output fasta file")
ap.add_argument("-width", "--width", required=False, type=int, default=80, help="number of characters per line. Default 80")
ap.add_argument("-pro", "--program", required=False, type=int, default=1, help="program to choose. 1) one input/output fasta file, 2) many input/output fasta files. Default is 1")
args = vars(ap.parse_args())
# main
# create function to split the input sequence based on a specific number of characters
def split_every_width(s): return [s[i:i+args['width']] for i in range(0,len(s),args['width'])]
# export to a new fasta file
sys.stdout = open(args['output'], 'a')
for record in SeqIO.parse(args['input'],'fasta'):
print(">"+record.id)
print('\n'.join(split_every_width(str(record.seq)))) # add characters in new line after the number of characters surpasses the input width
sys.stdout.close()
def split_every_width(s,w): return [s[i:i+w] for i in range(0,len(s),w)]
# choose program
if args['program'] == 1:
# export to a new fasta file
sys.stdout = open(args['output'], 'a')
for record in SeqIO.parse(args['input'],'fasta'):
print(">"+record.id)
print('\n'.join(split_every_width(str(record.seq), args['width']))) # add characters in new line after the number of characters surpasses the input width
sys.stdout.close()
else:
# import each fasta file from the working directory
for filename in sorted(os.listdir(str(os.getcwd()))):
if filename.endswith(".fa") or filename.endswith(".fasta"):
# export to new fasta files with the user imported width value
sys.stdout = open(''.join([filename.split(".")[0],"_","w",str(args['width']),".fasta"]), 'a')
for record in SeqIO.parse(filename,'fasta'):
print(">"+record.id)
print('\n'.join(split_every_width(str(record.seq), args['width']))) # add characters in new line after the number of characters surpasses the input width
sys.stdout.close()

24 changes: 24 additions & 0 deletions fasta_manipulation/fasta_formatter_many_fasta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# python3
import os
import argparse
from Bio import SeqIO
import sys
# input parameters
ap = argparse.ArgumentParser(description="changes the width of sequences line in multiple FASTA file by specifying either the width")
ap.add_argument("-w", "--width", required=False, type=int, default=80, help="number of characters per line. Default 80")
args = vars(ap.parse_args())
# main
# create function to split the input sequence based on a specific number of characters
def split_every_width(s,w): return [s[i:i+w] for i in range(0,len(s),w)]
# setup empty list
names = []
# import each fasta file from the working directory
for filename in sorted(os.listdir(str(os.getcwd()))):
if filename.endswith(".fa") or filename.endswith(".fasta"):
# export to new fasta files with the user imported width value
sys.stdout = open(''.join([filename.split(".")[0],"_","w",str(args['width']),".fasta"]), 'a')
for record in SeqIO.parse(filename,'fasta'):
print(">"+record.id)
print('\n'.join(split_every_width(str(record.seq), args['width']))) # add characters in new line after the number of characters surpasses the input width
sys.stdout.close()

0 comments on commit cbc222e

Please sign in to comment.