Source code for snputils.visualization.admixture_manhattan_plot

import os
import numpy as np
import pandas as pd
from typing import Optional, Tuple, Dict
import matplotlib.pyplot as plt


[docs]
def manhattan_plot(
    input_file: str, 
    colors: list,
    significance_threshold: float = 0.05,
    figsize: Optional[Tuple[float, float]] = None,
    title: Optional[str] = None,
    fontsize: Optional[Dict[str, float]] = None,
    save: Optional[bool] = None,
    output_filename: Optional[str] = None,
):
    """
    Generates a Manhattan plot from an input file the results of an admixture mapping association. 
    The plot is highly customizable, allowing users to specify colors for each chromosome and apply bonferroni correction for p-values.

    Args:
        input_file: Path to the input file containing columns '#CHROM', 'POS', and 'P'. default plik2 output files are supported.
        colors: List of colors to apply to each chromosome. the chromosome number is used as an index to select the color.
        significance_threshold: Significance threshold for p-values. Default is 0.05.
        figsize: Optional tuple to specify figure dimensions (width, height).
        title: Plot title. If None, no title is shown.
        fontsize: Dictionary with font sizes for title, labels, and legend.
        save: If True, saves the plot to a file. If None, the plot is not saved.
        output_filename: Filename for saving the plot (PNG).
    """
    # Read the input file
    df = pd.read_csv(input_file, sep='\t')

    # Calculate the maximum distance within each chromosome to scale absolute positions
    max_distance = 0
    for chrom, chrom_data in df.groupby('#CHROM'):
        chrom_max_pos = chrom_data['POS'].max()
        if chrom_max_pos > max_distance:
            max_distance = chrom_max_pos

    # Calculate absolute positions for each SNP
    df['ABS_POS'] = df['POS'] + max_distance * df['#CHROM']

    # Bonferroni threshold
    bonferroni_threshold = significance_threshold / len(df)

    # Create the plot
    plt.figure(figsize=figsize)
    chrom_offsets = {chrom: max_distance * (chrom - 1) for chrom in range(1, 23)}

    # Display Manhattan plot points for each chromosome
    for i, (chrom, chrom_data) in enumerate(df.groupby('#CHROM')):
        chrom_data['ABS_POS'] = chrom_data['POS'] + chrom_offsets[chrom]
        plt.scatter(chrom_data['ABS_POS'], -np.log10(chrom_data['P']), 
                    color=colors[int(chrom+1) % len(colors)])

    # X-axis settings
    plt.xlim(0, 22 * max_distance)
    chrom_labels = [str(c) for c in range(1, 23)]
    chrom_positions = [chrom_offsets[c] + max_distance / 2 for c in range(1, 23)]
    plt.xticks(chrom_positions, chrom_labels)

    # Significance thresholds
    plt.axhline(y=-np.log10(bonferroni_threshold), color='r', linestyle='--', label='Bonferroni')

    # Labels and title
    if title:
        plt.title(title, fontsize=fontsize.get('title', 20))
    plt.xlabel('Chromosomes', fontsize=fontsize.get('xlabel', 15))
    plt.ylabel('-log10(p-value)', fontsize=fontsize.get('ylabel', 15))
    plt.legend(fontsize=fontsize.get('legend', 15))

    # Save the plot
    plt.tight_layout()
    if save:
        plt.savefig(output_filename)
    plt.show()