Source code for singlecellmultiomics.universalBamTagger.scar

from singlecellmultiomics.universalBamTagger.digest import DigestFlagger
from singlecellmultiomics.tagtools import tagtools
from singlecellmultiomics.utils.sequtils import phred_to_prob
import numpy as np


[docs]class ScarFlagger(DigestFlagger): def __init__(self, **kwargs): DigestFlagger.__init__(self, **kwargs)
[docs] def addSite(self, reads, scarChromosome, scarPrimerStart): R1_primer_length = 20 R2_primer_length = 18 sample = reads[0].get_tag(self.sampleTag) allele = None if not reads[0].has_tag( self.alleleTag) else reads[0].get_tag( self.alleleTag) R1, R2 = reads if R1 is None or R2 is None: for read in reads: if read is not None: self.setRejectionReason(read, 'NotPaired') return None # find all deletions: scarDescription = set() qualities = [] for read in reads: firstCigarOperation, firstCigarLen = read.cigartuples[0] insertPos = 0 lastNonNoneRefPos = read.reference_start if firstCigarOperation != 4 else read.reference_start - firstCigarLen expandedCigar = [] for cigarOperation, cigarLen in read.cigartuples: expandedCigar += [cigarOperation] * cigarLen for queryPos, referencePos in read.get_aligned_pairs( matches_only=False): if queryPos is not None: qualities.append(phred_to_prob(read.qual[queryPos])) if queryPos is None and referencePos is None: continue if referencePos is not None: lastNonNoneRefPos = referencePos insertPos = 0 if queryPos is not None: # If both the reference and query match, there is not scar information continue if queryPos is None: scarDescription.add(f'{referencePos}.D') elif referencePos is None: # insert or clip: operation = expandedCigar[queryPos] if operation == 1: if lastNonNoneRefPos is None: raise ValueError('Unsolvable :(') queryBase = read.seq[queryPos] scarDescription.add( f'{queryBase}.{insertPos+lastNonNoneRefPos}.I') insertPos += 1 scarDescription = ','.join(sorted(list(scarDescription))) siteInfo = tuple( [x for x in [allele, scarDescription] if x is not None]) moleculeId = self.increaseAndRecordOversequencing( sample, scarChromosome, scarPrimerStart, siteInfo=siteInfo) # Add average base calling quality excluding primers: meanQ = np.mean(qualities) for read in reads: read.set_tag('SQ', 1 - meanQ) self.setSiteOversequencing(read, moleculeId) if len(scarDescription) == 0: scarDescription = 'WT' read.set_tag('SD', scarDescription) self.setSiteCoordinate(read, R1.reference_start) self.setRecognizedSequence(read, 'SCAR') self.setSource(read, 'SCAR') if allele is not None: self.setAllele(read, allele)
[docs] def digest(self, reads): if len(reads) != 2: return None # Only made for mate pair R1, R2 = reads self.addAlleleInfo(reads) if R1 is None or R1.is_unmapped or R2 is None or R2.is_unmapped: return(None) self.addSite(reads, scarChromosome=R1.reference_name, scarPrimerStart=R1.reference_start)