Skip to content
Pierre Morisse edited this page Aug 31, 2021 · 7 revisions

LRez is a project that provides both a standalone toolkit and a C++ API allowing to work with barcoded linked-reads.

Using the API

The documentation of the various modules and of their corresponding functions is available at https://morispi.github.io/LRez/files.html.

LRez can easily be integrated to existing projects, and should be compiled using -llrez_bamtools -llrez -L/path/to/LRez/lib/.

Examples

We provide a few examples on how to use the different functionalities of the API below.

Extract the barcodes from a given region of a BAM file

string bamFile = "bamFile.bam";
string region = "chr1:0-1000";
robin_hood::unordered_set<string> barcodes;
barcodes = extractBarcodesSeqsFromRegion(bamFile, region);

// Print extracted barcodes
for (string s : barcodes) {
    std::cout << s << std::endl;
}

Alternatively, if the BAM file needs to be queried with multiple regions, the BAM file can be opened a single time

string bamFile = "bamFile.bam";
BamReader reader;
reader.Open(bamFile);
// Check if the BAM file was properly opened

robin_hood::unordered_set<string> barcodes;
string region = "chr1:0-1000";
barcodes = extractBarcodesSeqsFromRegion_BamReader(reader, region);

// Print extracted barcodes
for (string s : barcodes) {
    std::cout << s << std::endl;
}

region = "chr2:5000-6000";
barcodes = extractBarcodesSeqsFromRegion_BamReader(reader, region);

// Print extracted barcodes
for (string s : barcodes) {
    std::cout << s << std::endl;
}

reader.Close();

Index the barcodes from a BAM file

string bamFile = "bamFile.bam";
bool onlyIndexPrimary = false;
unsigned minQuality = 0;
BarcodesOffsetsIndex index = indexBarcodesOffsetsFromBam(bamFile, onlyIndexPrimary, minQuality);

Query the index to retrieve alignments in a BAM file

string bamFile = "bamFile.bam";
BamReader reader;
reader.Open(bamFile);
// Check if the BAM file was properly opened

vector<BamAlignment> alignments;
string barcode = "ACGTTGCGATGGTCTG";
alignments = retrieveAlignmentsWithBarcode_BamReader(reader, index, barcode);

// Print extracted alignments
for (BamAlignment al : alignments) {
    std::cout << convertToSam(al, reader.GetReferenceData()) << std::endl;
}

Alternatively, if the index needs to be queried with multiple barcodes, the BAM file can be opened a single time

string bamFile = "bamFile.bam";
BamReader reader;
reader.Open(bamFile);
// Check if the BAM file was properly opened

vector<BamAlignment> alignments;
string barcode = "ACGTTGCGATGGTCTG";
alignments = retrieveAlignmentsWithBarcode_BamReader(reader, index, barcode);

// Print extracted alignments
for (BamAlignment al : alignments) {
    std::cout << convertToSam(al, reader.GetReferenceData()) << std::endl;
}


barcode = "TTGCTAGCTAGGTAGT";
alignments = retrieveAlignmentsWithBarcode_BamReader(reader, index, barcode);

// Print extracted alignments
for (BamAlignment al : alignments) {
    std::cout << convertToSam(al, reader.GetReferenceData()) << std::endl;
}

reader.Close();

Use the index to compute the number of common barcodes between the ends of a given contig, and the ends of all other contigs

unsigned endSize = 1000;
string contig = "chr12";
robin_hood::unordered_map<pair<string, string>, unsigned, hashPairs> commonBarcodes;
commonBarcodes = compareContig(bamFile, index, contig, endSize);

// Print the counts of common barcodes
for (auto c : commonBarcodes) {
    std::cout << c.first.first << " " << c.first.second << " " << c.second << std::endl;
}

Index the barcodes from a FASTQ file

string fastqFile = "fastqFile.fastq";
BarcodesIndex index = indexBarcodesFromFastq(fastqFile);

Alternatively, if the FASTQ file is gzipped

string fastqFile = "fastqFile.fastq";
BarcodesIndex index = indexBarcodesFromFastqGz(fastqFile);

Query the index to retrieve reads in a FASTQ file

vector<string> reads;
string barcode = "ACGTTGCGATGGTCTG";
reads = retrieveReadsWithBarcode(fastqFile, index, barcode);

// Print extracted reads
for (string r : reads) {
    std::cout << r << std::endl;
}

Alternatively, if the index needs to be queried with multiple barcodes, the FASTQ file can be opened a single time

string fastqFile = "fastqFile.fastq";
ifstream if;
if.open(fastqFile);
// Check if the FASTQ file was properly opened

vector<string> reads;
string barcode = "ACGTTGCGATGGTCTG";
reads = retrieveReadsWithBarcodes_Stream(if, index, barcode);

// Print extracted reads
for (string r : reads) {
    std::cout << r << std::endl;
}

barcode = "TTGCTAGCTAGGTAGT";
reads = retrieveReadsWithBarcodes_Stream(if, index, barcode);

// Print extracted reads
for (string r : reads) {
    std::cout << r << std::endl;
}

if.close();

Query the index to retrieve reads in a gzipped FASTQ file

vector<string> reads;
string barcode = "ACGTTGCGATGGTCTG";
reads = retrieveReadsWithBarcode_Gzip(fastqFile, index, barcode);

// Print extracted reads
for (string r : reads) {
    std::cout << r << std::endl;
}

Alternatively, if the index needs to be queried with multiple barcodes, the FASTQ file can be opened a single time

string fastqFile = "fastqFile.fastq";
FILE* if;
if = fopen(fastqFile.c_str(), "rb");
// Check if the FASTQ file was properly opened

struct access* gzIndex = NULL;
gzIndex = deserializeGzIndex(gzIndex, fastqFile + "i");

vector<string> reads;
string barcode = "ACGTTGCGATGGTCTG";
reads = retrieveReadsWithBarcode_Gzip_Stream_Index(if, gzIndex, index, barcode);

// Print extracted reads
for (string r : reads) {
    std::cout << r << std::endl;
}

barcode = "TTGCTAGCTAGGTAGT";
reads = retrieveReadsWithBarcode_Gzip_Stream_Index(if, gzIndex, index, barcode);

// Print extracted reads
for (string r : reads) {
    std::cout << r << std::endl;
}

fclose(if);
freeGzIndex(gzIndex);

Retrieve statistics from a BAM file

string bamFile = "bamFile.bam";
BamReader reader;
reader.Open(bamFile);
// Check if the BAM file was properly opened

// Compute stats
vector<string> regionsList = extractRegionsList(reader, regionSize);
pair<vector<unsigned>, vector<unsigned>> res = extractBarcodesAndCommonBarcodesCounts(bamFile, regionsList, numberOfRegions, nbThreads);
vector<unsigned> barcodesPerRegion = res.first;
vector<unsigned> commonBarcodes = res.second;

// Print the stats
std::cout << "Number of barcodes: " << stats.nbBarcodes << std::endl;
std::cout << "Number of mapped reads: " << stats.nbMappedReads << std::endl;
std::cout << std::endl;

std::cout << "Number of reads per barcode:" << std::endl;
std::cout << "\t 1st quantile: " << stats.readsPerBarcode[0.25 * stats.readsPerBarcode.size()] << std::endl;
std::cout << "\t median: " << stats.readsPerBarcode[0.5 * stats.readsPerBarcode.size()] << std::endl;
std::cout << "\t 3rd quantile: " << stats.readsPerBarcode[0.75 * stats.readsPerBarcode.size()] << std::endl;
std::cout << std::endl;

std::cout << "Number of barcodes per region of size " << regionSize << ":" << std::endl;
std::cout << "\t 1st quantile: " << barcodesPerRegion[0.25 * barcodesPerRegion.size()] << std::endl;
std::cout << "\t median: " << barcodesPerRegion[0.5 * barcodesPerRegion.size()] << std::endl;
std::cout << "\t 3rd quantile: " << barcodesPerRegion[0.75 * barcodesPerRegion.size()] << std::endl;
std::cout << std::endl;

std::cout << "Number of common barcodes between adjacent regions of size " << regionSize << ":" << std::endl;
std::cout << "\t 1st quantile: " << commonBarcodes[0.25 * commonBarcodes.size()] << std::endl;
std::cout << "\t median: " << commonBarcodes[0.5 * commonBarcodes.size()] << std::endl;
std::cout << "\t 3rd quantile: " << commonBarcodes[0.75 * commonBarcodes.size()] << std::endl;