snputils.visualization.admixture_manhattan_plot

View Source

 1import os
 2import numpy as np
 3import pandas as pd
 4from typing import Optional, Tuple, Dict
 5import matplotlib.pyplot as plt
 6import statsmodels.api as sm
 7
 8def manhattan_plot(
 9    input_file: str, 
10    colors: list,
11    significance_threshold: float = 0.05,
12    figsize: Optional[Tuple[float, float]] = None,
13    title: Optional[str] = None,
14    fontsize: Optional[Dict[str, float]] = None,
15    save: Optional[bool] = None,
16    output_filename: Optional[str] = None,
17):
18    """
19    Generates a Manhattan plot from an input file the results of an admixture mapping association. 
20    The plot is highly customizable, allowing users to specify colors for each chromosome and apply bonferroni correction for p-values.
21
22    Args:
23        input_file: Path to the input file containing columns '#CHROM', 'POS', and 'P'. default plik2 output files are supported.
24        colors: List of colors to apply to each chromosome. the chromosome number is used as an index to select the color.
25        significance_threshold: Significance threshold for p-values. Default is 0.05.
26        figsize: Optional tuple to specify figure dimensions (width, height).
27        title: Plot title. If None, no title is shown.
28        fontsize: Dictionary with font sizes for title, labels, and legend.
29        save: If True, saves the plot to a file. If None, the plot is not saved.
30        output_filename: Filename for saving the plot (PNG).
31    """
32    # Read the input file
33    df = pd.read_csv(input_file, sep='\t')
34
35    # Calculate the maximum distance within each chromosome to scale absolute positions
36    max_distance = 0
37    for chrom, chrom_data in df.groupby('#CHROM'):
38        chrom_max_pos = chrom_data['POS'].max()
39        if chrom_max_pos > max_distance:
40            max_distance = chrom_max_pos
41
42    # Calculate absolute positions for each SNP
43    df['ABS_POS'] = df['POS'] + max_distance * df['#CHROM']
44
45    # Bonferroni threshold
46    bonferroni_threshold = significance_threshold / len(df)
47
48    # Create the plot
49    plt.figure(figsize=figsize)
50    chrom_offsets = {chrom: max_distance * (chrom - 1) for chrom in range(1, 23)}
51
52    # Display Manhattan plot points for each chromosome
53    for i, (chrom, chrom_data) in enumerate(df.groupby('#CHROM')):
54        chrom_data['ABS_POS'] = chrom_data['POS'] + chrom_offsets[chrom]
55        plt.scatter(chrom_data['ABS_POS'], -np.log10(chrom_data['P']), 
56                    color=colors[int(chrom+1) % len(colors)])
57
58    # X-axis settings
59    plt.xlim(0, 22 * max_distance)
60    chrom_labels = [str(c) for c in range(1, 23)]
61    chrom_positions = [chrom_offsets[c] + max_distance / 2 for c in range(1, 23)]
62    plt.xticks(chrom_positions, chrom_labels)
63
64    # Significance thresholds
65    plt.axhline(y=-np.log10(bonferroni_threshold), color='r', linestyle='--', label='Bonferroni')
66
67    # Labels and title
68    if title:
69        plt.title(title, fontsize=fontsize.get('title', 20))
70    plt.xlabel('Chromosomes', fontsize=fontsize.get('xlabel', 15))
71    plt.ylabel('-log10(p-value)', fontsize=fontsize.get('ylabel', 15))
72    plt.legend(fontsize=fontsize.get('legend', 15))
73
74    # Save the plot
75    plt.tight_layout()
76    if save:
77        plt.savefig(output_filename)
78    plt.show()

def manhattan_plot( input_file: str, colors: list, significance_threshold: float = 0.05, figsize: Optional[Tuple[float, float]] = None, title: Optional[str] = None, fontsize: Optional[Dict[str, float]] = None, save: Optional[bool] = None, output_filename: Optional[str] = None): View Source

 9def manhattan_plot(
10    input_file: str, 
11    colors: list,
12    significance_threshold: float = 0.05,
13    figsize: Optional[Tuple[float, float]] = None,
14    title: Optional[str] = None,
15    fontsize: Optional[Dict[str, float]] = None,
16    save: Optional[bool] = None,
17    output_filename: Optional[str] = None,
18):
19    """
20    Generates a Manhattan plot from an input file the results of an admixture mapping association. 
21    The plot is highly customizable, allowing users to specify colors for each chromosome and apply bonferroni correction for p-values.
22
23    Args:
24        input_file: Path to the input file containing columns '#CHROM', 'POS', and 'P'. default plik2 output files are supported.
25        colors: List of colors to apply to each chromosome. the chromosome number is used as an index to select the color.
26        significance_threshold: Significance threshold for p-values. Default is 0.05.
27        figsize: Optional tuple to specify figure dimensions (width, height).
28        title: Plot title. If None, no title is shown.
29        fontsize: Dictionary with font sizes for title, labels, and legend.
30        save: If True, saves the plot to a file. If None, the plot is not saved.
31        output_filename: Filename for saving the plot (PNG).
32    """
33    # Read the input file
34    df = pd.read_csv(input_file, sep='\t')
35
36    # Calculate the maximum distance within each chromosome to scale absolute positions
37    max_distance = 0
38    for chrom, chrom_data in df.groupby('#CHROM'):
39        chrom_max_pos = chrom_data['POS'].max()
40        if chrom_max_pos > max_distance:
41            max_distance = chrom_max_pos
42
43    # Calculate absolute positions for each SNP
44    df['ABS_POS'] = df['POS'] + max_distance * df['#CHROM']
45
46    # Bonferroni threshold
47    bonferroni_threshold = significance_threshold / len(df)
48
49    # Create the plot
50    plt.figure(figsize=figsize)
51    chrom_offsets = {chrom: max_distance * (chrom - 1) for chrom in range(1, 23)}
52
53    # Display Manhattan plot points for each chromosome
54    for i, (chrom, chrom_data) in enumerate(df.groupby('#CHROM')):
55        chrom_data['ABS_POS'] = chrom_data['POS'] + chrom_offsets[chrom]
56        plt.scatter(chrom_data['ABS_POS'], -np.log10(chrom_data['P']), 
57                    color=colors[int(chrom+1) % len(colors)])
58
59    # X-axis settings
60    plt.xlim(0, 22 * max_distance)
61    chrom_labels = [str(c) for c in range(1, 23)]
62    chrom_positions = [chrom_offsets[c] + max_distance / 2 for c in range(1, 23)]
63    plt.xticks(chrom_positions, chrom_labels)
64
65    # Significance thresholds
66    plt.axhline(y=-np.log10(bonferroni_threshold), color='r', linestyle='--', label='Bonferroni')
67
68    # Labels and title
69    if title:
70        plt.title(title, fontsize=fontsize.get('title', 20))
71    plt.xlabel('Chromosomes', fontsize=fontsize.get('xlabel', 15))
72    plt.ylabel('-log10(p-value)', fontsize=fontsize.get('ylabel', 15))
73    plt.legend(fontsize=fontsize.get('legend', 15))
74
75    # Save the plot
76    plt.tight_layout()
77    if save:
78        plt.savefig(output_filename)
79    plt.show()

Generates a Manhattan plot from an input file the results of an admixture mapping association. The plot is highly customizable, allowing users to specify colors for each chromosome and apply bonferroni correction for p-values.

Arguments:

input_file: Path to the input file containing columns '#CHROM', 'POS', and 'P'. default plik2 output files are supported.
colors: List of colors to apply to each chromosome. the chromosome number is used as an index to select the color.
significance_threshold: Significance threshold for p-values. Default is 0.05.
figsize: Optional tuple to specify figure dimensions (width, height).
title: Plot title. If None, no title is shown.
fontsize: Dictionary with font sizes for title, labels, and legend.
save: If True, saves the plot to a file. If None, the plot is not saved.
output_filename: Filename for saving the plot (PNG).