Source code for singlecellmultiomics.fastqProcessing.fastqIterator

# Fastq iterator class, Buys de Barbanson
import collections
import gzip
import os

FastqRecord = collections.namedtuple(
    'FastqRecord', 'header sequence plus qual')


[docs]class FastqIterator(): """FastqIterator, iterates over one or more fastq files.""" def __init__(self, *args): """Initialise FastqIterator. Argument(s): path to fastq file, path to fastq file 2 , ... example: for rec1, rec2 in FastqIterator('./R1.fastq', './R2.fastq'): """ self.handles = tuple( gzip.open(path, 'rt') # Load as GZIP when the extension is .gz if os.path.splitext(path)[1] == '.gz' else open(path, 'r') for path in args ) self.readIndex = 0 def _readFastqRecord(self, handle): # Read four lines and load them into a FastqRecord return( #FastqRecord(*tuple(handle.readline().rstrip() for i in range(4))) FastqRecord( handle.readline().rstrip(), handle.readline().rstrip(), handle.readline().rstrip(), handle.readline().rstrip() ) )
[docs] def __iter__(self): """Exectuted upon generator initiation.""" return(self)
[docs] def __next__(self): """Obtain the next fastq record for all opened files.""" self.readIndex += 1 # Increment the current read counter records = tuple(self._readFastqRecord(handle) for handle in self.handles) # Stop when empty records are being returned; the file end is reached if any((len(rec.header) == 0 for rec in records)): raise StopIteration return(records)