Source code for singlecellmultiomics.universalBamTagger.customreads

#!/usr/bin/env python3
from singlecellmultiomics.universalBamTagger.digest import DigestFlagger
from singlecellmultiomics.utils import split_nth


[docs]class VaninsbergheQueryNameFlagger(DigestFlagger): def __init__(self, **kwargs): DigestFlagger.__init__(self, **kwargs)
[docs] def digest(self, reads): for read in reads: if read is None: continue origin, mi_tag, cell_barcode, umi, cell_index = read.query_name.rsplit( ':', 4) read.set_tag('MI', mi_tag) read.set_tag('RX', umi) read.set_tag('bi', int(cell_index)) read.set_tag('SM', cell_barcode)
[docs]class BulkFlagger(DigestFlagger): def __init__(self, **kwargs): DigestFlagger.__init__(self, **kwargs)
[docs] def digest(self, reads): for read in reads: if read is None: continue read.set_tag('MI', "A") read.set_tag('RX', "A") read.set_tag('bi', 0) read.set_tag('SM', "BULK")
[docs]class CustomAssingmentQueryNameFlagger(DigestFlagger): """This query name flagger converts values between colons ":" to tags""" def __init__(self, block_assignments, **kwargs): """Initialise CustomAssingmentQueryNameFlagger Args: block_assignments(list) : list of two letter codes to assign blocks to """ self.block_assignments = block_assignments self.origin_colons = 7 # amount of ':' in original read name # Verify if all of the assignments are 2 letters: if not all((len(b) == 2 for b in block_assignments)): for b in block_assignments: if len(b) != 2: raise ValueError(f'Tag {b} is not two letters long') DigestFlagger.__init__(self, **kwargs)
[docs] def digest(self, reads): for read in reads: if read is None: continue # Split original read name from added data origin, rest = split_nth(read.query_name, ':', self.origin_colons) # Reset the read name read.query_name = origin # Write the tags for tag, value in zip(self.block_assignments, rest.split(':')): read.set_tag(tag, value)