Source code for singlecellmultiomics.molecule.fourthiouridine

from singlecellmultiomics.molecule import Molecule


[docs]class FourThiouridine(Molecule): def __init__(self, fragments=None, classifier=None, **kwargs): """ FourThiouridine Molecule class Args: fragments(list) : list of fragments to associate with the Molecule classifier : fitted sklearn classifier, when supplied this classifier is used to obtain a consensus from which the methylation calls are generated. """ Molecule.__init__(self, fragments=fragments, **kwargs) self.classifier = classifier self.gene = None def __finalise__(self): super().__finalise__() self.obtain_conversions(self.classifier) for frag in self: if frag.gene is not None: self.gene = frag.gene
[docs] def is_valid(self, set_rejection_reasons=False): if not super().is_valid(set_rejection_reasons=set_rejection_reasons): return False try: consensus = self.get_consensus() except ValueError: if set_rejection_reasons: self.set_rejection_reason('no_consensus') return False except TypeError: if set_rejection_reasons: self.set_rejection_reason('getPairGenomicLocations_failed') return False return True
[docs] def obtain_conversions(self, classifier=None): """ This methods obtains the amount of converted bases and stores them to self.converted_bases and the 4U tag Args: classifier : classifier used for consensus determination returns: None """ # Find all aligned positions and corresponding reference bases: aligned_reference_positions = {} # (chrom,pos)->base for read in self.iter_reads(): for read_pos, ref_pos, ref_base in read.get_aligned_pairs( with_seq=True, matches_only=True): aligned_reference_positions[( read.reference_name, ref_pos)] = ref_base.upper() # Obtain consensus: try: consensus = self.get_consensus(classifier=classifier) except ValueError: raise ValueError( 'Cannot obtain a safe consensus for this molecule') # look for T > C conversions self.converted_bases = 0 conversions = {} for location, reference_base in aligned_reference_positions.items(): if location not in consensus: continue if (not self.strand and reference_base == 'T' and consensus[location] == 'C') or \ self.strand and reference_base == 'A' and consensus[location] in 'G': conversions[location] = { 'ref': reference_base, 'obs': consensus[location]} self.converted_bases += 1 self.set_meta('4U', self.converted_bases)