Source code for singlecellmultiomics.universalBamTagger.scchic

from singlecellmultiomics.universalBamTagger.digest import DigestFlagger
from singlecellmultiomics.tagtools import tagtools
complement = str.maketrans('ATGC', 'TACG')


[docs]class ChicSeqFlagger(DigestFlagger): def __init__(self, **kwargs): DigestFlagger.__init__(self, **kwargs)
[docs] def addSite( self, reads, strand, restrictionChrom, restrictionPos, is_trimmed=False): sample = reads[0].get_tag(self.sampleTag) umi = reads[0].get_tag(self.umiTag) allele = None if not reads[0].has_tag( self.alleleTag) else reads[0].get_tag( self.alleleTag) siteInfo = tuple([x for x in [strand, allele, umi] if x is not None]) moleculeId = self.increaseAndRecordOversequencing( sample, restrictionChrom, restrictionPos, siteInfo=siteInfo) for read in reads: if read is None: continue self.setSiteOversequencing(read, moleculeId) self.setSiteCoordinate(read, restrictionPos) if is_trimmed: self.setRecognizedSequence(read, read.get_tag('lh')) else: if reads[0].is_reverse: # the first two base, this should be A{A:80%, N:20%}, we # take the complement because the reads captures the # complement strand self.setRecognizedSequence( read, reads[0].seq[-2:][::-1].translate(complement)) else: self.setRecognizedSequence( read, reads[0].seq[:2]) # the last two bases self.setSource(read, 'CHIC') if allele is not None: self.setAllele(read, allele) # Note: strand 1 is + self.setStrand(read, '+' if strand == 1 else ('-' if strand == 0 else '?'))
[docs] def digest(self, reads): if len(reads) != 2: return None # Only made for mate pair R1, R2 = reads if R1 is None or R2 is None: return None self.addAlleleInfo([read for read in reads if read is not None]) """ Valid configs: Observed read pair: T######## R1 ########## ^ ########## R2 ########## Real molecule: A########################################################### Real molecule: and adapter: ADAPTER: 5' NNNNT 3' A{A:80%,N:20%}NNN [CHIC MOLECULE] ^ real cut site """ is_trimmed = (R1.has_tag('MX') and R1.get_tag( 'MX').startswith('scCHIC')) if R1.is_unmapped or R2.is_unmapped: return(None) try: start, end = tagtools.getPairGenomicLocations( R1, R2, R1PrimerLength=1 - int(is_trimmed), R2PrimerLength=6) self.setFragmentSize(R1, end - start) self.setFragmentSize(R2, end - start) self.setFragmentTrust(R1, start, end) self.setFragmentTrust(R2, start, end) except Exception as e: self.setFragmentSize(R1, 'unknown') self.setFragmentSize(R2, 'unknown') # if R1.seq[0]=='T': # Site on the start of R1, R2 should map behind if is_trimmed: # The first base of the read has been taken off and the lh tag is # already set, this can be copied to RZ self.addSite([R1, R2], # We sequence the other strand (Starting with a T, # this is an A in the molecule), the digestion thus # happened on the other strand strand=int(R1.is_reverse), # On the next line we asume that the mnsase cut is one # base after the ligated A, but it can be more bases # upstream restrictionChrom=R1.reference_name, restrictionPos=( R1.reference_end if R1.is_reverse else R1.reference_start), is_trimmed=True ) else: self.addSite([R1, R2], # We sequence the other strand (Starting with a T, # this is an A in the molecule), the digestion thus # happened on the other strand strand=int(R1.is_reverse), # On the next line we asume that the mnsase cut is one # base after the ligated A, but it can be more bases # upstream restrictionChrom=R1.reference_name, restrictionPos=( R1.reference_end - 1 if R1.is_reverse else R1.reference_start + 1), is_trimmed=False) return((R1.reference_name, R1.reference_start))