Source code for singlecellmultiomics.statistic.fragmentsize

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
from .statistic import StatisticHistogram
import singlecellmultiomics.pyutils as pyutils
import collections

import matplotlib
matplotlib.rcParams['figure.dpi'] = 160
matplotlib.use('Agg')


[docs]def readIsDuplicate(read): return (read.has_tag('RC') and read.get_tag('RC') > 1) or read.is_duplicate
[docs]class FragmentSizeHistogram(StatisticHistogram): def __init__(self, args): StatisticHistogram.__init__(self, args) self.histogram = collections.Counter() self.histogramReject = collections.Counter() self.histogramAccept = collections.Counter()
[docs] def processRead(self, read): if not read.is_proper_pair or not read.is_paired or not read.is_read1 or readIsDuplicate( read): return mateStart = read.next_reference_start if mateStart is None: return readLen = read.infer_query_length() if readLen is None: return readA = (read.reference_start, read.reference_end) readB = (mateStart, mateStart + readLen) end = max( read.reference_start, read.reference_end, mateStart, mateStart + readLen) start = min( read.reference_start, read.reference_end, mateStart, mateStart + readLen) fragmentSize = end - start if fragmentSize > 1_000: return #print(fragmentSize, read.reference_start, read.reference_end,mateStart,readLen ) self.histogram[fragmentSize] += 1 if read.has_tag('DS') and not read.has_tag('RR'): self.histogramAccept[fragmentSize] += 1 else: self.histogramReject[fragmentSize] += 1
def __repr__(self): return f'The average fragment size is {pyutils.meanOfCounter(self.histogram)}, SD:{pyutils.varianceOfCounter(self.histogram)}'
[docs] def plot(self, target_path, title=None): fig, ax = plt.subplots() ax.bar( list( self.histogram.keys()), list( self.histogram.values()), width=1) if title is not None: ax.set_title(title) ax.set_xlabel("Fragment size [bp]") ax.set_ylabel("# Fragments") plt.tight_layout() plt.savefig(target_path) plt.close() fig, ax = plt.subplots() plt.bar( list( self.histogramReject.keys()), list( self.histogramReject.values()), width=1, color='r') if title is not None: plt.title(title) ax.set_xlabel("Rejected fragment size [bp]") ax.set_ylabel("# Fragments") plt.tight_layout() plt.savefig(target_path.replace('.png', '.rejected.png')) plt.close() fig, ax = plt.subplots() ax.bar( list( self.histogramAccept.keys()), list( self.histogramAccept.values()), width=1, color='g') if title is not None: plt.title(title) ax.set_xlabel("Accepted fragment size [bp]") ax.set_ylabel("# Fragments") plt.tight_layout() plt.savefig(target_path.replace('.png', '.accepted.png')) plt.close()