from singlecellmultiomics.molecule.molecule import Molecule
from singlecellmultiomics.molecule.featureannotatedmolecule import FeatureAnnotatedMolecule
import collections
[docs]class CHICMolecule(Molecule):
"""CHIC Molecule class
Args:
fragments (singlecellmultiomics.fragment.Fragment): Fragments to associate to the molecule
**kwargs: extra args
"""
def __init__(self, fragment,
**kwargs):
self.site_location = None
Molecule.__init__(self, fragment, **kwargs)
self.ligation_motif = None
# Extracted from fragments:
self.assignment_radius=None
def _add_fragment(self,fragment):
# Update the cut coordinate tho the (left most extreme value)
self.assignment_radius =fragment.assignment_radius
if fragment.site_location is not None:
if self.site_location is None:
self.site_location = [fragment.site_location[0],fragment.site_location[1]]
elif fragment.strand: # Reverse:
self.site_location[1] = max(fragment.site_location[1], self.site_location[1]) # this is the coordinate
else:
self.site_location[1] = min(fragment.site_location[1], self.site_location[1]) # this is the coordinate
# else : writing a fragment which has no cut location associated
Molecule._add_fragment(self, fragment)
[docs] def get_cut_site(self):
"""For restriction based protocol data, obtain genomic location of cut site
Returns:
None if site is not available
chromosome (str)
position (int)
strand (bool)
"""
return (*self.site_location, self.strand)
[docs] def is_valid(self, set_rejection_reasons=False):
try:
chrom, start, strand = self.get_cut_site()
except Exception as e:
if set_rejection_reasons:
self.set_rejection_reason(f'no_cut_site_found')
return False
return True
[docs] def get_fragment_span_sequence(self, reference=None):
"""Obtain the sequence between the start and end of the molecule
Args:
reference(pysam.FastaFile) : reference to use.
If not specified `self.reference` is used
Returns:
sequence (str)
"""
if reference is None:
if self.reference is None:
raise ValueError('Please supply a reference (PySAM.FastaFile)')
reference = self.reference
return reference.fetch(
self.chromosome,
self.spanStart,
self.spanEnd).upper()
def __finalise__(self):
# Obtain ligation motif(s)
self.update_ligation_motif()
Molecule.__finalise__(self)
[docs] def update_ligation_motif(self):
"""
Extract lh tag from associated reads and set most common one to the RZ tag
"""
ligation_motif_counts = collections.Counter()
for fragment in self:
if fragment.ligation_motif is not None:
ligation_motif_counts[fragment.ligation_motif]+=1
if len(ligation_motif_counts)>0:
self.ligation_motif = ligation_motif_counts.most_common(1)[0][0]
self.set_meta('RZ', self.ligation_motif)
[docs]class CHICNLAMolecule(Molecule):
"""CHIC NLA Molecule class
Args:
fragments (singlecellmultiomics.fragment.Fragment): Fragments to associate to the molecule
**kwargs: extra args
"""
[docs] def get_upstream_site(self, scan_extra_bp=3):
read = None
for fragment in self:
if fragment[0] is not None:
read = fragment[0]
break
if read is None:
raise ValueError() # Read 1 is not mapped
if self.strand==0:
start = read.reference_start
return self.reference.fetch( self.chromosome, start-4-scan_extra_bp, start)
else:
start = read.reference_end
return self.reference.fetch( self.chromosome, start, start+4+scan_extra_bp)
def __init__(self, fragment, reference, **kwargs):
self.reference = reference
CHICMolecule.__init__(self, fragment, reference=reference, **kwargs)
[docs]class AnnotatedCHICMolecule(CHICMolecule, FeatureAnnotatedMolecule):
"""Chic Molecule which is annotated with features (genes/exons/introns, .. )
Args:
fragments (singlecellmultiomics.fragment.Fragment): Fragments to associate to the molecule
features (singlecellmultiomics.features.FeatureContainer) : container to use to obtain features from
**kwargs: extra args
"""
def __init__(self, fragment, features, **kwargs):
CHICMolecule.__init__(self, fragment, **kwargs)
FeatureAnnotatedMolecule.__init__(self, fragment, features, **kwargs)