# Fastq iterator class, Buys de Barbanson
import collections
import gzip
import os
FastqRecord = collections.namedtuple(
'FastqRecord', 'header sequence plus qual')
[docs]class FastqIterator():
"""FastqIterator, iterates over one or more fastq files."""
def __init__(self, *args):
"""Initialise FastqIterator.
Argument(s):
path to fastq file, path to fastq file 2 , ...
example: for rec1, rec2 in FastqIterator('./R1.fastq', './R2.fastq'):
"""
self.handles = tuple(
gzip.open(path, 'rt')
# Load as GZIP when the extension is .gz
if os.path.splitext(path)[1] == '.gz' else open(path, 'r')
for path in args
)
self.readIndex = 0
def _readFastqRecord(self, handle):
# Read four lines and load them into a FastqRecord
return(
#FastqRecord(*tuple(handle.readline().rstrip() for i in range(4)))
FastqRecord(
handle.readline().rstrip(),
handle.readline().rstrip(),
handle.readline().rstrip(),
handle.readline().rstrip()
)
)
[docs] def __iter__(self):
"""Exectuted upon generator initiation."""
return(self)
[docs] def __next__(self):
"""Obtain the next fastq record for all opened files."""
self.readIndex += 1 # Increment the current read counter
records = tuple(self._readFastqRecord(handle)
for handle in self.handles)
# Stop when empty records are being returned; the file end is reached
if any((len(rec.header) == 0 for rec in records)):
raise StopIteration
return(records)