Source code for singlecellmultiomics.universalBamTagger.nlaIII

from singlecellmultiomics.universalBamTagger.digest import DigestFlagger
from singlecellmultiomics.tagtools import tagtools


[docs]class NlaIIIFlagger(DigestFlagger): def __init__(self, **kwargs): DigestFlagger.__init__(self, **kwargs)
[docs] def addSite(self, reads, strand, restrictionChrom, restrictionPos): if not reads[0].has_tag( self.sampleTag) or not reads[0].has_tag( self.umiTag): return sample = reads[0].get_tag(self.sampleTag) umi = reads[0].get_tag(self.umiTag) allele = None if not reads[0].has_tag( self.alleleTag) else reads[0].get_tag( self.alleleTag) siteInfo = tuple([x for x in [strand, allele, umi] if x is not None]) moleculeId = self.increaseAndRecordOversequencing( sample, restrictionChrom, restrictionPos, siteInfo=siteInfo) for read in reads: if read is None: continue self.setSiteOversequencing(read, moleculeId) self.setSiteCoordinate(read, restrictionPos) self.setSource(read, 'NLA'), {} if allele is not None: self.setAllele(read, allele) self.setStrand(read, '+' if strand == 1 else ('-' if strand == 0 else '?'))
[docs] def digest(self, reads): if len(reads) != 2: if len(reads) == 1: self.setRejectionReason(reads[0], 'unmapped mate') else: self.setRejectionReason(reads[0], 'nopair') return None # Only made for mate pair R1, R2 = reads self.addAlleleInfo([read for read in reads if read is not None]) """ Valid configs: CATG######## R1 ########## ^ ########## R2 ########## ############ R2 ########## ^ ########### R1 #####CATG reverse case !BWA inverts the query sequence if it maps to the negative strand! or R2.is_unmapped: if R1.is_unmapped and R2.is_unmapped: self.setRejectionReason(R1, 'unmapped R1;R2') elif R1.is_unmapped: self.setRejectionReason(R1, 'unmapped R1') self.setRejectionReason(R2, 'unmapped R1') else: self.setRejectionReason(R1, 'unmapped R2') self.setRejectionReason(R2, 'unmapped R2') return(None) """ # Obtain RT hexamer: if R2 is not None: hstart, hseq = tagtools.getRandomPrimerHash( R2, onStart=True, primerLength=6) self.setRandomPrimer(R1, R2, hstart, hseq) if R1 is None or R1.is_unmapped: self.setRejectionReason(R1, 'unmapped R1') self.setRejectionReason(R2, 'unmapped R1') return None if R1.seq[:4] == 'CATG' and not R1.is_reverse: rpos = (R1.reference_name, R1.reference_start) self.addSite([R1, R2], strand=0, restrictionChrom=rpos[0], restrictionPos=rpos[1]) self.setRecognizedSequence(R1, 'CATG') self.setRecognizedSequence(R2, 'CATG') return(rpos) elif R1.seq[-4:] == 'CATG' and R1.is_reverse: rpos = (R1.reference_name, R1.reference_end - 4) self.addSite([R1, R2], strand=1, restrictionChrom=rpos[0], restrictionPos=rpos[1]) self.setRecognizedSequence(R1, 'CATG') self.setRecognizedSequence(R2, 'CATG') return(rpos) # Sometimes the cycle is off elif R1.seq[:3] == 'ATG' and not R1.is_reverse: rpos = (R1.reference_name, R1.reference_start - 1) self.addSite([R1, R2], strand=0, restrictionChrom=rpos[0], restrictionPos=rpos[1]) self.setRecognizedSequence(R1, 'ATG') self.setRecognizedSequence(R2, 'ATG') return(rpos) elif R1.seq[-3:] == 'CAT' and R1.is_reverse: # First base was trimmed or lost rpos = (R1.reference_name, R1.reference_end - 3) self.addSite([R1, R2], strand=1, restrictionChrom=rpos[0], restrictionPos=rpos[1]) self.setRecognizedSequence(R1, 'CAT') self.setRecognizedSequence(R2, 'CAT') return(rpos) else: if R1.seq[:4] == 'CATG' and R1.is_reverse: self.setRejectionReason(R1, 'found CATG R1 REV exp FWD') self.setRejectionReason(R2, 'found CATG R1 REV exp FWD') elif R1.seq[-4:] == 'CATG' and not R1.is_reverse: self.setRejectionReason(R1, 'found CATG R1 FWD exp REV') self.setRejectionReason(R2, 'found CATG R1 FWD exp REV') else: self.setRejectionReason(R1, 'no CATG') self.setRejectionReason(R2, 'no CATG') return None try: start, end = tagtools.getPairGenomicLocations( R1, R2, R1PrimerLength=4, R2PrimerLength=6) self.setFragmentSize(R1, end - start) self.setFragmentSize(R2, end - start) self.setFragmentTrust(R1, start, end) self.setFragmentTrust(R2, start, end) except Exception as e: self.setFragmentSize(R1, 'unknown') self.setFragmentSize(R2, 'unknown') """ if R1.seq[:4]=='CATG' and R1.reference_start<=R2.reference_start: # Site on the start of R1, R2 should map behind self.addSite( [R1,R2], strand=0, restrictionChrom=R1.reference_name, restrictionPos=R1.reference_start ) return(( R1.reference_name, R1.reference_start)) if R1.seq[-4:]=='CATG' and R1.reference_start>=R2.reference_start: # Site on the end of R1, R2 should map before self.addSite( [R1,R2], strand=1, restrictionChrom=R1.reference_name, restrictionPos=R1.reference_end-4 ) return( (R1.reference_name, R1.reference_end-4)) """