from singlecellmultiomics.fragment import Fragment
from singlecellmultiomics.utils.sequtils import hamming_distance
[docs]class CHICFragment(Fragment):
def __init__(self,
reads,
R1_primer_length=4,
R2_primer_length=6,
assignment_radius=1_000,
umi_hamming_distance=1,
invert_strand=True
):
self.invert_strand = invert_strand
Fragment.__init__(self,
reads,
assignment_radius=assignment_radius,
R1_primer_length=R1_primer_length,
R2_primer_length=R2_primer_length,
umi_hamming_distance=umi_hamming_distance
)
# set CHIC cut site given reads
self.strand = None
self.ligation_motif = None
self.site_location = None
self.cut_site_strand = None
self.identify_site()
if self.is_valid():
self.match_hash = (
self.strand,
self.cut_site_strand,
self.site_location[0],
self.site_location[1],
self.sample)
else:
self.match_hash = None
[docs] def set_site(
self,
site_chrom,
site_pos,
site_strand=None,
is_trimmed=False):
self.set_meta('DS', site_pos)
if site_strand is not None:
self.set_meta('RS', site_strand)
self.set_strand(site_strand)
self.site_location = (site_chrom, site_pos)
self.cut_site_strand = site_strand
[docs] def identify_site(self):
R1 = self.get_R1()
if R1 is None:
self.set_rejection_reason("R1_undefined")
return None
if R1.has_tag('lh'):
self.ligation_motif = R1.get_tag('lh')
""" Valid configs:
Observed read pair:
T######## R1 ########## ^ ########## R2 ##########
Real molecule:
A###########################################################
Real molecule: and adapter:
ADAPTER: 5' NNNNT 3'
A{A:80%,N:20%}NNN [CHIC MOLECULE]
^ real cut site
"""
is_trimmed = (R1.has_tag('MX') and R1.get_tag(
'MX').startswith('scCHIC'))
if R1.is_unmapped:
self.set_rejection_reason("R1_unmapped")
return(None)
# if R1.seq[0]=='T': # Site on the start of R1, R2 should map behind
if is_trimmed:
# The first base of the read has been taken off and the lh tag is
# already set, this can be copied to RZ
self.set_site(
site_strand=R1.is_reverse if not self.invert_strand else not R1.is_reverse,
# We sequence the other strand (Starting with a T, this is an A in the molecule), the digestion thus happened on the other strand
# On the next line we asume that the mnsase cut is one base after the ligated A, but it can be more bases upstream
site_chrom=R1.reference_name,
site_pos=(R1.reference_end if R1.is_reverse else R1.reference_start),
is_trimmed=True
)
else:
self.set_site(
site_strand=R1.is_reverse if not self.invert_strand else not R1.is_reverse,
# We sequence the other strand (Starting with a T, this is an A in the molecule), the digestion thus happened on the other strand
# On the next line we asume that the mnsase cut is one base after the ligated A, but it can be more bases upstream
site_chrom=R1.reference_name,
site_pos=(R1.reference_end - 1 if R1.is_reverse else R1.reference_start + 1),
is_trimmed=False)
[docs] def is_valid(self):
return self.site_location is not None
[docs] def get_site_location(self):
return self.site_location
def __repr__(self):
return Fragment.__repr__(
self) + f'\n\tMNase cut site:{self.get_site_location()}'
def __eq__(self, other):
# Make sure fragments map to the same strand, cheap comparisons
if self.match_hash != other.match_hash:
return False
# Make sure UMI's are similar enough, more expensive hamming distance
# calculation
return self.umi_eq(other)