import itertools
from singlecellmultiomics.molecule import Molecule
from singlecellmultiomics.molecule.nlaIII import NlaIIIMolecule
from singlecellmultiomics.molecule.nlaIII import AnnotatedNLAIIIMolecule
from singlecellmultiomics.molecule.chic import CHICMolecule
from singlecellmultiomics.molecule.chic import AnnotatedCHICMolecule
complement = str.maketrans('ATGC', 'TACG')
[docs]class TAPS():
# Methylated Cs get converted into T readout
def __init__(self, reference, reference_variants=None, **kwargs):
"""
Intialise the TAPS class
Args:
reference (pysam.FastaFile) : reference fasta file
reference_variants(pysam.VariantFile) : variants in comparison to supplied reference. @todo
"""
if reference_variants is not None:
raise NotImplementedError()
self.overlap_tag = 'XM'
self.reference = reference
if self.reference is None:
raise ValueError('A reference fasta file is required!')
"""
z unmethylated C in CpG context (CG)
Z methylated C in CpG context (CG)
x unmethylated C in CHG context ( C[ACT]G )
X methylated C in CHG context ( C[ACT]G )
h unmethylated C in CHH context ( C[ACT][ACT] )
H methylated C in CHH context ( C[ACT][ACT] )
"""
self.context_mapping = {}
self.context_mapping[False] = {
'CGA': 'z',
'CGT': 'z',
'CGC': 'z',
'CGG': 'z',
'CAG': 'x',
'CCG': 'x',
'CTG': 'x',
}
self.context_mapping[True] = {
context: letter.upper() for context,
letter in self.context_mapping[False].items()}
for x in list(itertools.product('ACT', repeat=2)):
self.context_mapping[True][''.join(['C'] + list(x))] = 'H'
self.context_mapping[False][''.join(['C'] + list(x))] = 'h'
[docs] def position_to_context(
self,
chromosome,
position,
observed_base='N',
strand=0):
"""Extract bismark call letter from a chromosomal location given the observed base
Args:
chromosome (str) : chromosome / contig of location to test
position (int) : genomic location of location to test
observed_base(str) : base observed in the read
strand(str) : mapping strand
Returns:
context(str) : 3 basepair context
bismark_letter(str) : bismark call
"""
qbase = observed_base.upper()
ref_base = self.reference.fetch(
chromosome, position, position + 1).upper()
# if a vcf file is supplied we can extract the possible reference bases
# @todo
context = None
methylated = False
rpos = position
if ref_base == 'C' and strand == 0:
context = self.reference.fetch(chromosome, rpos, rpos + 3).upper()
if qbase == 'T':
methylated = True
methylationStateString = self.context_mapping[methylated].get(context, 'uU'[
methylated])
elif ref_base == 'G' and strand == 1:
origin = self.reference.fetch(
chromosome, rpos - 2, rpos + 1).upper()
context = origin.translate(complement)[::-1]
if qbase == 'A':
methylated = True
return context, self.context_mapping[methylated].get(context, '.')
[docs] def molecule_to_context_call_dict(self, molecule):
"""Extract bismark call_string dictionary from a molecule
Args:
molecule(singlecellmultiomics.molecule.Molecule) : molecule to extract calls from
Returns:
context_call_dict(dict) : {(chrom,pos):bismark call letter}
"""
call_dict = {} # (chrom,pos)-> "bismark" call_string
for (chrom, pos), base in molecule.get_consensus().items():
context, letter = self.position_to_context(chromosome=molecule.chromosome,
position=pos,
observed_base=base,
strand=molecule.get_strand())
if letter is not None:
call_dict[(chrom, pos)] = letter
return call_dict
[docs]class TAPSMolecule(Molecule):
def __init__(self, fragments=None, taps=None, classifier=None, **kwargs):
""" TAPSMolecule
Args:
fragments(list) : list of fragments to associate with the Molecule
taps(singlecellmultiomics.molecule.TAPS) : TAPS class which performs the methylation calling
classifier : fitted sklearn classifier, when supplied this classifier is used to obtain a consensus from which the methylation calls are generated.
"""
Molecule.__init__(self, fragments=fragments, **kwargs)
if taps is None:
raise ValueError("""Supply initialised TAPS class
taps = singlecellmultiomics.molecule.TAPS( reference )
""")
self.taps = taps # initialised TAPS class
self.methylation_call_dict = None
self.classifier = classifier
def __finalise__(self):
super().__finalise__()
try:
self.obtain_methylation_calls(classifier=self.classifier)
except ValueError:
pass
[docs] def is_valid(self, set_rejection_reasons=False):
if not super().is_valid(set_rejection_reasons=set_rejection_reasons):
return False
try:
consensus = self.get_consensus()
except ValueError:
if set_rejection_reasons:
self.set_rejection_reason('no_consensus')
return False
except TypeError:
if set_rejection_reasons:
self.set_rejection_reason('getPairGenomicLocations_failed')
return False
if self.methylation_call_dict is None:
if set_rejection_reasons:
self.set_rejection_reason('methylation_calls_failed')
return False
return True
[docs] def obtain_methylation_calls(self, classifier=None):
""" This methods returns a methylation call dictionary
Args:
classifier : classifier used for consensus determination
returns:
mcalls(dict) : (chrom,pos) : {'consensus': consensusBase, 'reference': referenceBase, 'call': call}
"""
# Find all aligned positions and corresponding reference bases:
aligned_reference_positions = {} # (chrom,pos)->base
for read in self.iter_reads():
for read_pos, ref_pos, ref_base in read.get_aligned_pairs(
with_seq=True, matches_only=True):
aligned_reference_positions[(
read.reference_name, ref_pos)] = ref_base.upper()
# Obtain consensus:
try:
consensus = self.get_consensus(classifier=classifier)
except ValueError:
raise ValueError(
'Cannot obtain a safe consensus for this molecule')
# find all locations where a C/G was converted into A/T, now strand
# specific
converted_bases = 0
conversions = {}
for location, reference_base in aligned_reference_positions.items():
if location not in consensus:
continue
if (not self.strand and reference_base == 'C' and consensus[location] in 'CT') or \
self.strand and reference_base == 'G' and consensus[location] in 'AG':
conversions[location] = {
'ref': reference_base, 'obs': consensus[location]}
if consensus[location] in 'TA':
converted_bases += 1
# obtain the context of the conversions:
conversion_contexts = {
location:
{'consensus': consensus[location],
'reference_base': conversions[location]['ref'],
'context': self.taps.position_to_context(
*location,
observed_base=observations['obs'],
strand=self.strand)[1]}
for location, observations in conversions.items()}
# Write bismark tags:
self.set_methylation_call_tags(conversion_contexts)
[docs]class TAPSNlaIIIMolecule(NlaIIIMolecule, TAPSMolecule):
"""Molecule class for combined TAPS and NLAIII """
def __init__(self, fragments=None, taps=None, **kwargs):
NlaIIIMolecule.__init__(self, fragments, **kwargs)
TAPSMolecule.__init__(self, fragments=fragments, taps=taps, **kwargs)
[docs] def is_valid(self, set_rejection_reasons=False):
return NlaIIIMolecule.is_valid(
self, set_rejection_reasons=set_rejection_reasons) and TAPSMolecule.is_valid(
self, set_rejection_reasons=set_rejection_reasons)
[docs]class AnnotatedTAPSNlaIIIMolecule(AnnotatedNLAIIIMolecule, TAPSMolecule):
"""Molecule class for combined TAPS, NLAIII and transcriptome """
def __init__(self, fragments=None, features=None, taps=None, **kwargs):
assert features is not None, "Supply features!"
AnnotatedNLAIIIMolecule.__init__(
self, fragments, features=features, **kwargs)
TAPSMolecule.__init__(self, fragments=fragments, taps=taps, **kwargs)
[docs] def is_valid(self, set_rejection_reasons=False):
return AnnotatedNLAIIIMolecule.is_valid(
self, set_rejection_reasons=set_rejection_reasons) and TAPSMolecule.is_valid(
self, set_rejection_reasons=set_rejection_reasons)
[docs]class TAPSCHICMolecule(CHICMolecule, TAPSMolecule):
"""Molecule class for combined TAPS and CHIC """
def __init__(self, fragments=None, taps=None, **kwargs):
CHICMolecule.__init__(self, fragments, **kwargs)
TAPSMolecule.__init__(self, fragments=fragments, taps=taps, **kwargs)
[docs] def is_valid(self, set_rejection_reasons=False):
return CHICMolecule.is_valid(
self, set_rejection_reasons=set_rejection_reasons) and TAPSMolecule.is_valid(
self, set_rejection_reasons=set_rejection_reasons)
[docs]class AnnotatedTAPSCHICMolecule(AnnotatedCHICMolecule, TAPSMolecule):
"""Molecule class for combined TAPS, CHIC and transcriptome """
def __init__(self, fragments=None, features=None, taps=None, **kwargs):
assert features is not None, "Supply features!"
AnnotatedCHICMolecule.__init__(
self, fragments, features=features, **kwargs)
TAPSMolecule.__init__(self, fragments=fragments, taps=taps, **kwargs)
[docs] def is_valid(self, set_rejection_reasons=False):
return AnnotatedCHICMolecule.is_valid(
self, set_rejection_reasons=set_rejection_reasons) and TAPSMolecule.is_valid(
self, set_rejection_reasons=set_rejection_reasons)