Source code for singlecellmultiomics.statistic.scchicligation

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from matplotlib.ticker import MaxNLocator
import matplotlib.pyplot as plt
from .statistic import StatisticHistogram
import singlecellmultiomics.pyutils as pyutils
import collections
import pandas as pd
import seaborn as sns
import matplotlib
matplotlib.rcParams['figure.dpi'] = 160
matplotlib.use('Agg')


[docs]class ScCHICLigation(): def __init__(self, args): # cell -> { A_start: count, total_cuts: count } self.per_cell_a_obs = collections.defaultdict(collections.Counter) # cell -> { TA_start: count, total_cuts: count } self.per_cell_ta_obs = collections.defaultdict(collections.Counter)
[docs] def processRead(self, R1,R2): if R1 is None: return read = R1 if read.has_tag('RZ') and not read.is_duplicate and read.is_read1: sample = read.get_tag('SM') first = read.get_tag('RZ')[0] if read.get_tag('RZ') == 'TA': self.per_cell_ta_obs[sample]['TA_start'] += 1 if first == 'A': self.per_cell_a_obs[sample]['A_start'] += 1 self.per_cell_ta_obs[sample]['total'] += 1 self.per_cell_a_obs[sample]['total'] += 1
def __repr__(self): return 'ScCHICLigation: no description' def __iter__(self): for cell, cell_data in self.per_cell_ta_obs.items(): yield cell_data['total'], cell_data['TA_start'] / cell_data['total']
[docs] def plot(self, target_path, title=None): ########### TA ########### fig, ax = plt.subplots(figsize=(4, 4)) x = [] y = [] for cell, cell_data in self.per_cell_ta_obs.items(): x.append(cell_data['total']) y.append(cell_data['TA_start'] / cell_data['total']) ax.scatter(x, y, s=3,c='k') ax.set_xscale('log') if title is not None: ax.set_title(title) ax.set_ylabel("Fraction unique cuts starting with TA") ax.set_xlabel("# Molecules") ax.set_xlim(1, None) ax.set_ylim(-0.1, 1.05) sns.despine() plt.tight_layout() plt.savefig(target_path.replace('.png', '.TA.png')) plt.close() ########### A ########### fig, ax = plt.subplots(figsize=(4, 4)) x = [] y = [] for cell, cell_data in self.per_cell_ta_obs.items(): x.append(cell_data['total']) y.append(cell_data['A_start'] / cell_data['total']) ax.scatter(x, y, s=3,c='k') ax.set_xscale('log') if title is not None: ax.set_title(title) ax.set_ylabel("Fraction unique cuts starting with A") ax.set_xlabel("# Molecules") ax.set_xlim(1, None) ax.set_ylim(-0.1, 1.05) plt.tight_layout() sns.despine() plt.savefig(target_path.replace('.png', '.A.png')) plt.close()
[docs] def to_csv(self, path): pd.DataFrame( self.per_cell_ta_obs).sort_index().to_csv( path.replace( '.csv', 'TA_obs_per_cell.csv'))