snputils.visualization.admixture_manhattan_plot

 1import os
 2import numpy as np
 3import pandas as pd
 4from typing import Optional, Tuple, Dict
 5import matplotlib.pyplot as plt
 6
 7def manhattan_plot(
 8    input_file: str, 
 9    colors: list,
10    significance_threshold: float = 0.05,
11    figsize: Optional[Tuple[float, float]] = None,
12    title: Optional[str] = None,
13    fontsize: Optional[Dict[str, float]] = None,
14    save: Optional[bool] = None,
15    output_filename: Optional[str] = None,
16):
17    """
18    Generates a Manhattan plot from an input file the results of an admixture mapping association. 
19    The plot is highly customizable, allowing users to specify colors for each chromosome and apply bonferroni correction for p-values.
20
21    Args:
22        input_file: Path to the input file containing columns '#CHROM', 'POS', and 'P'. default plik2 output files are supported.
23        colors: List of colors to apply to each chromosome. the chromosome number is used as an index to select the color.
24        significance_threshold: Significance threshold for p-values. Default is 0.05.
25        figsize: Optional tuple to specify figure dimensions (width, height).
26        title: Plot title. If None, no title is shown.
27        fontsize: Dictionary with font sizes for title, labels, and legend.
28        save: If True, saves the plot to a file. If None, the plot is not saved.
29        output_filename: Filename for saving the plot (PNG).
30    """
31    # Read the input file
32    df = pd.read_csv(input_file, sep='\t')
33
34    # Calculate the maximum distance within each chromosome to scale absolute positions
35    max_distance = 0
36    for chrom, chrom_data in df.groupby('#CHROM'):
37        chrom_max_pos = chrom_data['POS'].max()
38        if chrom_max_pos > max_distance:
39            max_distance = chrom_max_pos
40
41    # Calculate absolute positions for each SNP
42    df['ABS_POS'] = df['POS'] + max_distance * df['#CHROM']
43
44    # Bonferroni threshold
45    bonferroni_threshold = significance_threshold / len(df)
46
47    # Create the plot
48    plt.figure(figsize=figsize)
49    chrom_offsets = {chrom: max_distance * (chrom - 1) for chrom in range(1, 23)}
50
51    # Display Manhattan plot points for each chromosome
52    for i, (chrom, chrom_data) in enumerate(df.groupby('#CHROM')):
53        chrom_data['ABS_POS'] = chrom_data['POS'] + chrom_offsets[chrom]
54        plt.scatter(chrom_data['ABS_POS'], -np.log10(chrom_data['P']), 
55                    color=colors[int(chrom+1) % len(colors)])
56
57    # X-axis settings
58    plt.xlim(0, 22 * max_distance)
59    chrom_labels = [str(c) for c in range(1, 23)]
60    chrom_positions = [chrom_offsets[c] + max_distance / 2 for c in range(1, 23)]
61    plt.xticks(chrom_positions, chrom_labels)
62
63    # Significance thresholds
64    plt.axhline(y=-np.log10(bonferroni_threshold), color='r', linestyle='--', label='Bonferroni')
65
66    # Labels and title
67    if title:
68        plt.title(title, fontsize=fontsize.get('title', 20))
69    plt.xlabel('Chromosomes', fontsize=fontsize.get('xlabel', 15))
70    plt.ylabel('-log10(p-value)', fontsize=fontsize.get('ylabel', 15))
71    plt.legend(fontsize=fontsize.get('legend', 15))
72
73    # Save the plot
74    plt.tight_layout()
75    if save:
76        plt.savefig(output_filename)
77    plt.show()
def manhattan_plot( input_file: str, colors: list, significance_threshold: float = 0.05, figsize: Optional[Tuple[float, float]] = None, title: Optional[str] = None, fontsize: Optional[Dict[str, float]] = None, save: Optional[bool] = None, output_filename: Optional[str] = None):
 8def manhattan_plot(
 9    input_file: str, 
10    colors: list,
11    significance_threshold: float = 0.05,
12    figsize: Optional[Tuple[float, float]] = None,
13    title: Optional[str] = None,
14    fontsize: Optional[Dict[str, float]] = None,
15    save: Optional[bool] = None,
16    output_filename: Optional[str] = None,
17):
18    """
19    Generates a Manhattan plot from an input file the results of an admixture mapping association. 
20    The plot is highly customizable, allowing users to specify colors for each chromosome and apply bonferroni correction for p-values.
21
22    Args:
23        input_file: Path to the input file containing columns '#CHROM', 'POS', and 'P'. default plik2 output files are supported.
24        colors: List of colors to apply to each chromosome. the chromosome number is used as an index to select the color.
25        significance_threshold: Significance threshold for p-values. Default is 0.05.
26        figsize: Optional tuple to specify figure dimensions (width, height).
27        title: Plot title. If None, no title is shown.
28        fontsize: Dictionary with font sizes for title, labels, and legend.
29        save: If True, saves the plot to a file. If None, the plot is not saved.
30        output_filename: Filename for saving the plot (PNG).
31    """
32    # Read the input file
33    df = pd.read_csv(input_file, sep='\t')
34
35    # Calculate the maximum distance within each chromosome to scale absolute positions
36    max_distance = 0
37    for chrom, chrom_data in df.groupby('#CHROM'):
38        chrom_max_pos = chrom_data['POS'].max()
39        if chrom_max_pos > max_distance:
40            max_distance = chrom_max_pos
41
42    # Calculate absolute positions for each SNP
43    df['ABS_POS'] = df['POS'] + max_distance * df['#CHROM']
44
45    # Bonferroni threshold
46    bonferroni_threshold = significance_threshold / len(df)
47
48    # Create the plot
49    plt.figure(figsize=figsize)
50    chrom_offsets = {chrom: max_distance * (chrom - 1) for chrom in range(1, 23)}
51
52    # Display Manhattan plot points for each chromosome
53    for i, (chrom, chrom_data) in enumerate(df.groupby('#CHROM')):
54        chrom_data['ABS_POS'] = chrom_data['POS'] + chrom_offsets[chrom]
55        plt.scatter(chrom_data['ABS_POS'], -np.log10(chrom_data['P']), 
56                    color=colors[int(chrom+1) % len(colors)])
57
58    # X-axis settings
59    plt.xlim(0, 22 * max_distance)
60    chrom_labels = [str(c) for c in range(1, 23)]
61    chrom_positions = [chrom_offsets[c] + max_distance / 2 for c in range(1, 23)]
62    plt.xticks(chrom_positions, chrom_labels)
63
64    # Significance thresholds
65    plt.axhline(y=-np.log10(bonferroni_threshold), color='r', linestyle='--', label='Bonferroni')
66
67    # Labels and title
68    if title:
69        plt.title(title, fontsize=fontsize.get('title', 20))
70    plt.xlabel('Chromosomes', fontsize=fontsize.get('xlabel', 15))
71    plt.ylabel('-log10(p-value)', fontsize=fontsize.get('ylabel', 15))
72    plt.legend(fontsize=fontsize.get('legend', 15))
73
74    # Save the plot
75    plt.tight_layout()
76    if save:
77        plt.savefig(output_filename)
78    plt.show()

Generates a Manhattan plot from an input file the results of an admixture mapping association. The plot is highly customizable, allowing users to specify colors for each chromosome and apply bonferroni correction for p-values.

Arguments:
  • input_file: Path to the input file containing columns '#CHROM', 'POS', and 'P'. default plik2 output files are supported.
  • colors: List of colors to apply to each chromosome. the chromosome number is used as an index to select the color.
  • significance_threshold: Significance threshold for p-values. Default is 0.05.
  • figsize: Optional tuple to specify figure dimensions (width, height).
  • title: Plot title. If None, no title is shown.
  • fontsize: Dictionary with font sizes for title, labels, and legend.
  • save: If True, saves the plot to a file. If None, the plot is not saved.
  • output_filename: Filename for saving the plot (PNG).