Source code for snputils.visualization.admixture_manhattan_plot
import os
import numpy as np
import pandas as pd
from typing import Optional, Tuple, Dict
import matplotlib.pyplot as plt
[docs]
def manhattan_plot(
input_file: str,
colors: list,
significance_threshold: float = 0.05,
figsize: Optional[Tuple[float, float]] = None,
title: Optional[str] = None,
fontsize: Optional[Dict[str, float]] = None,
save: Optional[bool] = None,
output_filename: Optional[str] = None,
):
"""
Generates a Manhattan plot from an input file the results of an admixture mapping association.
The plot is highly customizable, allowing users to specify colors for each chromosome and apply bonferroni correction for p-values.
Args:
input_file: Path to the input file containing columns '#CHROM', 'POS', and 'P'. default plik2 output files are supported.
colors: List of colors to apply to each chromosome. the chromosome number is used as an index to select the color.
significance_threshold: Significance threshold for p-values. Default is 0.05.
figsize: Optional tuple to specify figure dimensions (width, height).
title: Plot title. If None, no title is shown.
fontsize: Dictionary with font sizes for title, labels, and legend.
save: If True, saves the plot to a file. If None, the plot is not saved.
output_filename: Filename for saving the plot (PNG).
"""
# Read the input file
df = pd.read_csv(input_file, sep='\t')
# Calculate the maximum distance within each chromosome to scale absolute positions
max_distance = 0
for chrom, chrom_data in df.groupby('#CHROM'):
chrom_max_pos = chrom_data['POS'].max()
if chrom_max_pos > max_distance:
max_distance = chrom_max_pos
# Calculate absolute positions for each SNP
df['ABS_POS'] = df['POS'] + max_distance * df['#CHROM']
# Bonferroni threshold
bonferroni_threshold = significance_threshold / len(df)
# Create the plot
plt.figure(figsize=figsize)
chrom_offsets = {chrom: max_distance * (chrom - 1) for chrom in range(1, 23)}
# Display Manhattan plot points for each chromosome
for i, (chrom, chrom_data) in enumerate(df.groupby('#CHROM')):
chrom_data['ABS_POS'] = chrom_data['POS'] + chrom_offsets[chrom]
plt.scatter(chrom_data['ABS_POS'], -np.log10(chrom_data['P']),
color=colors[int(chrom+1) % len(colors)])
# X-axis settings
plt.xlim(0, 22 * max_distance)
chrom_labels = [str(c) for c in range(1, 23)]
chrom_positions = [chrom_offsets[c] + max_distance / 2 for c in range(1, 23)]
plt.xticks(chrom_positions, chrom_labels)
# Significance thresholds
plt.axhline(y=-np.log10(bonferroni_threshold), color='r', linestyle='--', label='Bonferroni')
# Labels and title
if title:
plt.title(title, fontsize=fontsize.get('title', 20))
plt.xlabel('Chromosomes', fontsize=fontsize.get('xlabel', 15))
plt.ylabel('-log10(p-value)', fontsize=fontsize.get('ylabel', 15))
plt.legend(fontsize=fontsize.get('legend', 15))
# Save the plot
plt.tight_layout()
if save:
plt.savefig(output_filename)
plt.show()