Skip to content

Commit

Permalink
Added option swap to goalign shuffle recomb
Browse files Browse the repository at this point in the history
  • Loading branch information
fredericlemoine committed Dec 20, 2023
1 parent 3eda576 commit c13a469
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 12 deletions.
23 changes: 16 additions & 7 deletions align/align.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ type Alignment interface {
Pssm(log bool, pseudocount float64, normalization int) (pssm map[uint8][]float64, err error) // Normalization: PSSM_NORM_NONE, PSSM_NORM_UNIF, PSSM_NORM_DATA
Rarefy(nb int, counts map[string]int) (Alignment, error) // Take a new rarefied sample taking into accounts weights
RandSubAlign(length int, consecutive bool) (Alignment, error) // Extract a random subalignment with given length from this alignment
Recombine(rate float64, lenprop float64)
Recombine(rate float64, lenprop float64, swap bool) error
// converts coordinates on the given sequence to coordinates on the alignment
RefCoordinates(name string, refstart, refend int) (alistart, aliend int, err error)
// converts sites on the given sequence to coordinates on the alignment
Expand Down Expand Up @@ -959,19 +959,23 @@ func (a *align) TranslateByReference(phase int, geneticcode int, refseq string)
return
}

// Recombines a rate of the sequences to another sequences
// takes rate/2 seqs and copy/paste a portion of them to the other
// rate/2 seqs at a random position
// if rate < 0 : does nothing
// if rate > 1 : does nothing
// Recombine recombines a rate of the sequences into other sequences
// takes prop*nseq seqs and copy/paste a portion of them to the other
// prop*nseq seqs
// if prop < 0 : error
// if prop > 0.5 : error
// prop must be <= 0.5 because it will recombine x% of seqs based on other x% of seqs
func (a *align) Recombine(prop float64, lenprop float64) {
// if swap is true, then swaps the two portions of sequences (2*prop sequences will be impacted)
// if swap is false, then just transfers the portion of seq1 to seq2
func (a *align) Recombine(prop float64, lenprop float64, swap bool) (err error) {
var seq1, seq2 *seq

if prop < 0 || prop > 0.5 {
err = fmt.Errorf("proportion of sequence is outside of [0,0.5] range")
return
}
if lenprop < 0 || lenprop > 1 {
err = fmt.Errorf("proportion of sequence length is outside of [0,1] range")
return
}

Expand All @@ -985,9 +989,14 @@ func (a *align) Recombine(prop float64, lenprop float64) {
seq1 = a.seqs[permutation[i]]
seq2 = a.seqs[permutation[i+nb]]
for j := pos; j < pos+lentorecomb; j++ {
tmp := seq1.sequence[j]
seq1.sequence[j] = seq2.sequence[j]
if swap {
seq2.sequence[j] = tmp
}
}
}
return
}

// Add prop*100% gaps to lenprop*100% of the sequences
Expand Down
27 changes: 24 additions & 3 deletions cmd/recomb.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,29 +9,46 @@ import (

var recombNb float64
var recombProp float64
var recombSwap bool

// recombCmd represents the recomb command
var recombCmd = &cobra.Command{
Use: "recomb",
Short: "Recombine sequences in the input alignment",
Long: `Recombine of sequences in the input alignment.
This command recombines a proportion of the input sequences into other sequences
It takes prop*nseq seqs and copy/paste a portion of them to the other prop*nseq sequences.
- if prop < 0 or prop > 0.5 : error (prop must be <= 0.5 because it will recombine x% of
seqs based on other x% of seqs)
- if swap is true, then swaps the two portions of sequences (2*prop sequences will be impacted)
- if swap is false, then just transfers the portion of seq1 to seq2
It may take Fasta or Phylip input alignment.
If the input alignment contains several alignments, will process all of them
Two options:
Three options:
1 - The proportion of recommbining sequences. It will take n sequences
and will copy/paste a portion of another n sequences;
2 - The proportion of the sequence length to recombine.
3 - Swap or not
Recombine 25% of sequences by 50%:
Recombine 25% of sequences by 50% (no swap):
s1 CCCCCCCCCCCCCC s1 CCCCCCCCCCCCCC
s2 AAAAAAAAAAAAAA => s2 AAAATTTTTTTAAA
s3 GGGGGGGGGGGGGG s3 GGGGGGGGGGGGGG
s4 TTTTTTTTTTTTTT s4 TTTTTTTTTTTTTT
Recombine 2x25% of sequences by 50% (swap):
s1 CCCCCCCCCCCCCC s1 CCCCCCCCCCCCCC
s2 AAAAAAAAAAAAAA => s2 AAAATTTTTTTAAA
s3 GGGGGGGGGGGGGG s3 GGGGGGGGGGGGGG
s4 TTTTTTTTTTTTTT s4 TTTTAAAAAAATTT
Example of usage:
goalign shuffle recomb -i align.phylip -p -n 1 -l 0.5
Expand All @@ -52,7 +69,10 @@ goalign shuffle recomb -i align.fasta -r 0.5 -n 1 -l 0.5
defer utils.CloseWriteFile(f, shuffleOutput)

for al := range aligns.Achan {
al.Recombine(recombNb, recombProp)
if err = al.Recombine(recombNb, recombProp, recombSwap); err != nil {
io.LogError(err)
return
}
writeAlign(al, f)
}

Expand All @@ -69,4 +89,5 @@ func init() {

recombCmd.PersistentFlags().Float64VarP(&recombNb, "prop-seq", "n", 0.5, "Proportion of the sequences to recombine")
recombCmd.PersistentFlags().Float64VarP(&recombProp, "prop-length", "l", 0.5, "Proportion of length of sequences to recombine")
recombCmd.PersistentFlags().BoolVar(&recombSwap, "swap", false, "If true, swaps sequences, otherwise just transfer seq1 subseq to seq2")
}
10 changes: 8 additions & 2 deletions docs/commands/shuffle.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,19 @@

### shuffle
This command adds different type of noises in an input alignment, with these sub-commands:
* `goalign shuffle recomb`:Recombine a given proportion of the length of a given proportion of the sequences with other sequences (copy/paste).
Example:
* `goalign shuffle recomb`:Recombine a given proportion of the length of a given proportion of the sequences with other sequences (copy/paste). If swap is true, will impact 2xproportion of the sequences (Example 2)
Example (no swap):
```
s1 CCCCCCCCCCCCCC s1 CCCCCCCCCCCCCC
s2 AAAAAAAAAAAAAA => s2 AAAATTTTTTTAAA
s3 GGGGGGGGGGGGGG s3 GGGGGGGGGGGGGG
s4 TTTTTTTTTTTTTT s4 TTTTTTTTTTTTTT
Example 2 (swap):
```
s1 CCCCCCCCCCCCCC s1 CCCCCCCCCCCCCC
s2 AAAAAAAAAAAAAA => s2 AAAATTTTTTTAAA
s3 GGGGGGGGGGGGGG s3 GGGGGGGGGGGGGG
s4 TTTTTTTTTTTTTT s4 TTTTAAAAAAATTT
```
* `goalign shuffle rogue`: Simulate rogue taxa, by shuffling (horizontally) a given proportion of the sites of a given proportion of the sequences.
Example:
Expand Down

0 comments on commit c13a469

Please sign in to comment.