Source code for singlecellmultiomics.modularDemultiplexer.demultiplexModules.scCHIC

from singlecellmultiomics.modularDemultiplexer.baseDemultiplexMethods import UmiBarcodeDemuxMethod, NonMultiplexable, IlluminaBaseDemultiplexer
from singlecellmultiomics.modularDemultiplexer.demultiplexModules import CELSeq2_c8_u6
# SCCHIC using NLAIII adapter, 384 well format with 3bp UMI followed by
# "A" base


[docs]class SCCHIC_384w_c8_u3(UmiBarcodeDemuxMethod): def __init__(self, barcodeFileParser, random_primer_read=1,random_primer_length=6, **kwargs): self.barcodeFileAlias = 'maya_384NLA' UmiBarcodeDemuxMethod.__init__( self, umiRead=0, umiStart=0, umiLength=3, barcodeRead=0, barcodeStart=3, barcodeLength=8, random_primer_read=random_primer_read, random_primer_length=random_primer_length, barcodeFileAlias=self.barcodeFileAlias, barcodeFileParser=barcodeFileParser, **kwargs) self.shortName = 'scCHIC384C8U3' self.longName = 'Single cell CHIC, 384well CB: 8bp UMI: 3bp, RP: 6BP' self.autoDetectable = True self.description = '384 well format. 3bp umi followed by 8bp barcode and a single A. R2 ends with a 6bp random primer' self.sequenceCapture[0] = slice( self.barcodeLength + self.umiLength + 1, None) # dont capture the first base
[docs] def demultiplex(self, records, **kwargs): if kwargs.get( 'probe') and records[0].sequence[self.barcodeLength + self.umiLength] != 'T': raise NonMultiplexable # add first 2 bases as ligation tag: ligation_start = self.barcodeLength + self.umiLength ligation_end = ligation_start + 2 ligation_sequence = records[0].sequence[ligation_start:ligation_end] ligation_qualities = records[0].qual[ligation_start:ligation_end] taggedRecords = UmiBarcodeDemuxMethod.demultiplex( self, records, **kwargs) taggedRecords[0].addTagByTag( 'lh', ligation_sequence, isPhred=False, make_safe=False) taggedRecords[0].addTagByTag( 'lq', ligation_qualities, isPhred=True, make_safe=False) taggedRecords[1].addTagByTag( 'lh', ligation_sequence, isPhred=False, make_safe=False) taggedRecords[1].addTagByTag( 'lq', ligation_qualities, isPhred=True, make_safe=False) #taggedRecords[0].sequence = taggedRecords[0].sequence[1:] #taggedRecords[0].qualities = taggedRecords[0].qualities[1:] return taggedRecords
[docs]class SCCHIC_384w_c8_u3_direct_ligation(UmiBarcodeDemuxMethod): def __init__(self, barcodeFileParser, random_primer_read=None,random_primer_length=None, **kwargs): self.barcodeFileAlias = 'maya_384NLA' UmiBarcodeDemuxMethod.__init__( self, umiRead=0, umiStart=0, umiLength=3, barcodeRead=0, barcodeStart=3, barcodeLength=8, random_primer_read=random_primer_read, random_primer_length=random_primer_length, barcodeFileAlias=self.barcodeFileAlias, barcodeFileParser=barcodeFileParser, **kwargs) self.shortName = 'scCHIC384C8U3l' self.longName = 'Single cell CHIC, 384well CB: 8bp UMI: 3bp, no RP' self.autoDetectable = False self.description = '384 well format. 3bp umi followed by 8bp barcode and a single A. R2 does not contain a random primer' self.sequenceCapture[0] = slice( self.barcodeLength + self.umiLength + 1, None) # dont capture the first base
[docs] def demultiplex(self, records, **kwargs): if kwargs.get( 'probe') and records[0].sequence[self.barcodeLength + self.umiLength] != 'T': raise NonMultiplexable # add first 2 bases as ligation tag: ligation_start = self.barcodeLength + self.umiLength ligation_end = ligation_start + 2 ligation_sequence = records[0].sequence[ligation_start:ligation_end] ligation_qualities = records[0].qual[ligation_start:ligation_end] taggedRecords = UmiBarcodeDemuxMethod.demultiplex( self, records, **kwargs) taggedRecords[0].addTagByTag( 'lh', ligation_sequence, isPhred=False, make_safe=False) taggedRecords[0].addTagByTag( 'lq', ligation_qualities, isPhred=True, make_safe=False) taggedRecords[1].addTagByTag( 'lh', ligation_sequence, isPhred=False, make_safe=False) taggedRecords[1].addTagByTag( 'lq', ligation_qualities, isPhred=True, make_safe=False) #taggedRecords[0].sequence = taggedRecords[0].sequence[1:] #taggedRecords[0].qualities = taggedRecords[0].qualities[1:] return taggedRecords
[docs]class SCCHIC_384w_c8_u3_direct_ligation_SINGLE_END(UmiBarcodeDemuxMethod): def __init__(self, barcodeFileParser, random_primer_read=None,random_primer_length=None, **kwargs): self.barcodeFileAlias = 'maya_384NLA' UmiBarcodeDemuxMethod.__init__( self, umiRead=0, umiStart=0, umiLength=3, barcodeRead=0, barcodeStart=3, barcodeLength=8, random_primer_read=None, random_primer_length=None, barcodeFileAlias=self.barcodeFileAlias, barcodeFileParser=barcodeFileParser, **kwargs) self.shortName = 'scCHIC384C8U3se' self.longName = 'Single cell CHIC, 384well CB: 8bp UMI: 3bp, single end, no RP' self.autoDetectable = True self.description = '384 well format. 3bp umi followed by 8bp barcode and a single A. No read 2' self.sequenceCapture[0] = slice( self.barcodeLength + self.umiLength + 1, None) # dont capture the first base
[docs] def demultiplex(self, records, **kwargs): if kwargs.get( 'probe') and records[0].sequence[self.barcodeLength + self.umiLength] != 'T': raise NonMultiplexable if len(records) != 1: raise NonMultiplexable # add first 2 bases as ligation tag: ligation_start = self.barcodeLength + self.umiLength ligation_end = ligation_start + 2 ligation_sequence = records[0].sequence[ligation_start:ligation_end] ligation_qualities = records[0].qual[ligation_start:ligation_end] taggedRecords = UmiBarcodeDemuxMethod.demultiplex( self, records, **kwargs) taggedRecords[0].addTagByTag( 'lh', ligation_sequence, isPhred=False, make_safe=False) taggedRecords[0].addTagByTag( 'lq', ligation_qualities, isPhred=True, make_safe=False) return taggedRecords
[docs]class SCCHIC_384w_c8_u3_pdt(IlluminaBaseDemultiplexer): def __init__( self, barcodeFileParser=None, indexFileParser=None, indexFileAlias='illumina_merged_ThruPlex48S_RP', **kwargs): IlluminaBaseDemultiplexer.__init__( self, indexFileParser=indexFileParser, indexFileAlias=indexFileAlias) self.description = '384 well format, mixed transcriptome and CHiC. scCHiC: 3bp umi followed by 8bp barcode and a single A. R2 ends with a 6bp random primer. Transcriptome: cs2 + template switching oligo' self.shortName = 'CHICTV' self.autoDetectable = False # The demultiplexer used for the chic reads: self.chic_demux = SCCHIC_384w_c8_u3(barcodeFileParser=barcodeFileParser,random_primer_read=None,**kwargs) self.barcodeSummary = self.chic_demux.barcodeSummary self.longName = f'{self.chic_demux.longName} and TV primer ' def __repr__(self): return f'{self.longName} {self.description}'
[docs] def demultiplex(self, records, **kwargs): # Check if the supplied reads are mate-pair: if len(records) != 2: raise NonMultiplexable('Not mate pair') # Check if the TSO oligo is present.. if 'AGACTCTTT' in records[0].sequence: taggedRecords = self.chic_demux.demultiplex(records,**kwargs) if not 'AGACTCTTT' in taggedRecords[0].sequence: raise NonMultiplexable('No match to transcriptome or CHiC') oligo_position = taggedRecords[0].sequence.index('AGACTCTTT') umi_start = max(0,oligo_position-6) umi = taggedRecords[0].sequence[umi_start:oligo_position] # Set umi : for r in taggedRecords: r.tags['tu'] = umi r.tags['MX'] = "CTV" # Clip the read down: taggedRecords[0].sequence = taggedRecords[0].sequence[:oligo_position] taggedRecords[0].qualities = taggedRecords[0].qualities[:oligo_position] return taggedRecords raise NonMultiplexable('No match to transcriptome or CHiC')
[docs]class SCCHIC_384w_c8_u3_cs2(IlluminaBaseDemultiplexer): def __init__( self, barcodeFileParser=None, indexFileParser=None, indexFileAlias='illumina_merged_ThruPlex48S_RP', **kwargs): IlluminaBaseDemultiplexer.__init__( self, indexFileParser=indexFileParser, indexFileAlias=indexFileAlias) self.description = '384 well format, mixed transcriptome and CHiC. scCHiC: 3bp umi followed by 8bp barcode and a single A. R2 ends with a 6bp random primer. Transcriptome: cs2 + template switching oligo' self.shortName = 'CHICT' self.autoDetectable = False # The demultiplexer used for the transcriptome reads: self.transcriptome_demux = CELSeq2_c8_u6(barcodeFileParser=barcodeFileParser,**kwargs) # The demultiplexer used for the chic reads: self.chic_demux = SCCHIC_384w_c8_u3(barcodeFileParser=barcodeFileParser,**kwargs) self.barcodeSummary = f'{self.chic_demux.barcodeSummary} and {self.transcriptome_demux.barcodeSummary}' self.longName = f'{self.chic_demux.longName} and {self.transcriptome_demux.longName}' def __repr__(self): return f'{self.longName} {self.description}'
[docs] def demultiplex(self, records, **kwargs): # Check if the supplied reads are mate-pair: if len(records) != 2: raise NonMultiplexable('Not mate pair') # Check if the reads are transcriptome: try: result = self.transcriptome_demux.demultiplex(records, **kwargs) return result except NonMultiplexable: pass # Check if the TSO oligo is present.. # or cs2 barcode in R1 (Also makes it transcriptome) # If not try to demultiplex as CHiC: try: result = self.chic_demux.demultiplex(records, **kwargs) return result except NonMultiplexable: raise NonMultiplexable('No match to transcriptome or CHiC')