Source code for singlecellmultiomics.statistic.cellreadcount

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
from .statistic import StatisticHistogram
import singlecellmultiomics.pyutils as pyutils
import collections
import pandas as pd
import matplotlib
import numpy as np
matplotlib.rcParams['figure.dpi'] = 160
matplotlib.use('Agg')
import seaborn as sns


[docs]def readIsDuplicate(read):
    return (read.has_tag('RC') and read.get_tag('RC') > 1) or read.is_duplicate


[docs]class CellReadCount(StatisticHistogram):
    def __init__(self, args):
        StatisticHistogram.__init__(self, args)
        self.read_counts = collections.Counter()
        self.molecule_counts = collections.Counter()

[docs]    def processRead(self, R1,R2=None):

        for read in [R1,R2]:
            if read is None:
                continue

            if not read.has_tag('SM'):
                continue

            cell = read.get_tag('SM')

            self.read_counts[cell] +=1

            if not read.is_duplicate:
                self.molecule_counts[cell] +=1
            break

[docs]    def to_csv(self, path):
        pd.DataFrame({'reads':self.read_counts, 'umis':self.molecule_counts}).to_csv(path)

    def __repr__(self):
        return f'The average amount of reads is {np.mean(list(self.read_counts.values()))}'

[docs]    def plot(self, target_path, title=None):
        fig, ax = plt.subplots()
        print(self.read_counts)
        ax.hist(list(self.read_counts.values()), bins=25, zorder=1)

        if title is not None:
            ax.set_title(title)

        ax.set_xlabel("# Reads")
        ax.set_ylabel("# Cells")
        ax.grid(zorder=0)
        sns.despine()
        plt.tight_layout()
        plt.savefig(target_path)
        plt.close()

        fig, ax = plt.subplots()
        ax.hist(list(self.molecule_counts.values()), bins=25,zorder=1)
        ax.grid(zorder=0)
        sns.despine()
        if title is not None:
            plt.title(title)

        ax.set_xlabel("# Molecules")
        ax.set_ylabel("# Cells")
        plt.tight_layout()
        plt.savefig(target_path.replace('.png', '.molecules.png'))
        plt.close()