#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
from .statistic import StatisticHistogram
import singlecellmultiomics.pyutils as pyutils
import collections
import pandas as pd
import matplotlib
matplotlib.rcParams['figure.dpi'] = 160
matplotlib.use('Agg')
[docs]class MethylationContextHistogram(StatisticHistogram):
def __init__(self, args):
StatisticHistogram.__init__(self, args)
self.context_obs = collections.Counter() # (bismark_call_tag)=> observations
self.hasdata = False
[docs] def processRead(self, R1,R2):
for read in [R2,R2]:
if read is None:
continue
if not read.is_paired or not read.has_tag(
'XM') or not read.is_read1 or read.is_duplicate or read.has_tag('RR'):
return
self.hasdata = True
tags = dict(read.tags)
for tag in 'zhx':
self.context_obs[tag] += tags.get(f's{tag}', 0)
self.context_obs[tag.upper()] += tags.get(f's{tag.upper()}', 0)
def __repr__(self):
return f'Methylation status.'
[docs] def get_df(self):
x = self.context_obs
return pd.DataFrame(
{'ratio':
{
'CpG': (x['Z'] / (x['z'] + x['Z'])) if x['z'] + x['Z'] > 0 else 0 ,
'CHG': (x['X'] / (x['x'] + x['X'])) if x['x'] + x['X'] >0 else 0,
'CHH': (x['H'] / (x['h'] + x['H'])) if x['h'] + x['H'] > 0 else 0},
'methylated': {
'CpG': x['Z'],
'CHG': x['X'],
'CHH': x['H']
},
'unmethylated': {
'CpG': x['z'],
'CHG': x['x'],
'CHH': x['h']
}
})
[docs] def plot(self, target_path, title=None):
df = self.get_df()
# Methylation ratio plot:
name = 'methylation_pct'
(df['ratio'] * 100).plot.bar()
ax = plt.gca()
for p in ax.patches:
ax.annotate(
f'{p.get_height():.1f}%',
(p.get_x() + p.get_width() / 2, p.get_height() * 1.005),
va='bottom', ha='center')
ax.set_ylim(0, 110)
ax.set_xlabel('Methylation context')
ax.set_ylabel('% methylated')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
plt.show()
if title is not None:
plt.title(title)
plt.tight_layout()
plt.savefig(target_path.replace('.png', f'.{name}.png'))
ax.set_yscale('log')
ax.set_ylim(0.01, 110)
plt.tight_layout()
plt.savefig(target_path.replace('.png', f'.{name}.log.png'))
plt.close()
########
name = 'methylation_absolute'
(df[['methylated', 'unmethylated']]).plot.bar()
ax = plt.gca()
for p in ax.patches:
ax.annotate(
f'{p.get_height()}',
(p.get_x() + p.get_width() / 2, p.get_height() * 1.005),
va='bottom', ha='center')
ax.set_xlabel('Methylation context')
ax.set_ylabel('Bases total')
plt.tight_layout()
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
if title is not None:
plt.title(title)
plt.tight_layout()
plt.savefig(target_path.replace('.png', f'.{name}.png'))
ax.set_yscale('log')
plt.tight_layout()
plt.savefig(target_path.replace('.png', f'.{name}.log.png'))
plt.close()
[docs] def to_csv(self, path):
self.get_df().to_csv(path)