snputils.visualization.admixture_manhattan_plot
1import os 2import numpy as np 3import pandas as pd 4from typing import Optional, Tuple, Dict 5import matplotlib.pyplot as plt 6import statsmodels.api as sm 7 8def manhattan_plot( 9 input_file: str, 10 colors: list, 11 significance_threshold: float = 0.05, 12 figsize: Optional[Tuple[float, float]] = None, 13 title: Optional[str] = None, 14 fontsize: Optional[Dict[str, float]] = None, 15 save: Optional[bool] = None, 16 output_filename: Optional[str] = None, 17): 18 """ 19 Generates a Manhattan plot from an input file the results of an admixture mapping association. 20 The plot is highly customizable, allowing users to specify colors for each chromosome and apply bonferroni correction for p-values. 21 22 Args: 23 input_file: Path to the input file containing columns '#CHROM', 'POS', and 'P'. default plik2 output files are supported. 24 colors: List of colors to apply to each chromosome. the chromosome number is used as an index to select the color. 25 significance_threshold: Significance threshold for p-values. Default is 0.05. 26 figsize: Optional tuple to specify figure dimensions (width, height). 27 title: Plot title. If None, no title is shown. 28 fontsize: Dictionary with font sizes for title, labels, and legend. 29 save: If True, saves the plot to a file. If None, the plot is not saved. 30 output_filename: Filename for saving the plot (PNG). 31 """ 32 # Read the input file 33 df = pd.read_csv(input_file, sep='\t') 34 35 # Calculate the maximum distance within each chromosome to scale absolute positions 36 max_distance = 0 37 for chrom, chrom_data in df.groupby('#CHROM'): 38 chrom_max_pos = chrom_data['POS'].max() 39 if chrom_max_pos > max_distance: 40 max_distance = chrom_max_pos 41 42 # Calculate absolute positions for each SNP 43 df['ABS_POS'] = df['POS'] + max_distance * df['#CHROM'] 44 45 # Bonferroni threshold 46 bonferroni_threshold = significance_threshold / len(df) 47 48 # Create the plot 49 plt.figure(figsize=figsize) 50 chrom_offsets = {chrom: max_distance * (chrom - 1) for chrom in range(1, 23)} 51 52 # Display Manhattan plot points for each chromosome 53 for i, (chrom, chrom_data) in enumerate(df.groupby('#CHROM')): 54 chrom_data['ABS_POS'] = chrom_data['POS'] + chrom_offsets[chrom] 55 plt.scatter(chrom_data['ABS_POS'], -np.log10(chrom_data['P']), 56 color=colors[int(chrom+1) % len(colors)]) 57 58 # X-axis settings 59 plt.xlim(0, 22 * max_distance) 60 chrom_labels = [str(c) for c in range(1, 23)] 61 chrom_positions = [chrom_offsets[c] + max_distance / 2 for c in range(1, 23)] 62 plt.xticks(chrom_positions, chrom_labels) 63 64 # Significance thresholds 65 plt.axhline(y=-np.log10(bonferroni_threshold), color='r', linestyle='--', label='Bonferroni') 66 67 # Labels and title 68 if title: 69 plt.title(title, fontsize=fontsize.get('title', 20)) 70 plt.xlabel('Chromosomes', fontsize=fontsize.get('xlabel', 15)) 71 plt.ylabel('-log10(p-value)', fontsize=fontsize.get('ylabel', 15)) 72 plt.legend(fontsize=fontsize.get('legend', 15)) 73 74 # Save the plot 75 plt.tight_layout() 76 if save: 77 plt.savefig(output_filename) 78 plt.show()
def
manhattan_plot( input_file: str, colors: list, significance_threshold: float = 0.05, figsize: Optional[Tuple[float, float]] = None, title: Optional[str] = None, fontsize: Optional[Dict[str, float]] = None, save: Optional[bool] = None, output_filename: Optional[str] = None):
9def manhattan_plot( 10 input_file: str, 11 colors: list, 12 significance_threshold: float = 0.05, 13 figsize: Optional[Tuple[float, float]] = None, 14 title: Optional[str] = None, 15 fontsize: Optional[Dict[str, float]] = None, 16 save: Optional[bool] = None, 17 output_filename: Optional[str] = None, 18): 19 """ 20 Generates a Manhattan plot from an input file the results of an admixture mapping association. 21 The plot is highly customizable, allowing users to specify colors for each chromosome and apply bonferroni correction for p-values. 22 23 Args: 24 input_file: Path to the input file containing columns '#CHROM', 'POS', and 'P'. default plik2 output files are supported. 25 colors: List of colors to apply to each chromosome. the chromosome number is used as an index to select the color. 26 significance_threshold: Significance threshold for p-values. Default is 0.05. 27 figsize: Optional tuple to specify figure dimensions (width, height). 28 title: Plot title. If None, no title is shown. 29 fontsize: Dictionary with font sizes for title, labels, and legend. 30 save: If True, saves the plot to a file. If None, the plot is not saved. 31 output_filename: Filename for saving the plot (PNG). 32 """ 33 # Read the input file 34 df = pd.read_csv(input_file, sep='\t') 35 36 # Calculate the maximum distance within each chromosome to scale absolute positions 37 max_distance = 0 38 for chrom, chrom_data in df.groupby('#CHROM'): 39 chrom_max_pos = chrom_data['POS'].max() 40 if chrom_max_pos > max_distance: 41 max_distance = chrom_max_pos 42 43 # Calculate absolute positions for each SNP 44 df['ABS_POS'] = df['POS'] + max_distance * df['#CHROM'] 45 46 # Bonferroni threshold 47 bonferroni_threshold = significance_threshold / len(df) 48 49 # Create the plot 50 plt.figure(figsize=figsize) 51 chrom_offsets = {chrom: max_distance * (chrom - 1) for chrom in range(1, 23)} 52 53 # Display Manhattan plot points for each chromosome 54 for i, (chrom, chrom_data) in enumerate(df.groupby('#CHROM')): 55 chrom_data['ABS_POS'] = chrom_data['POS'] + chrom_offsets[chrom] 56 plt.scatter(chrom_data['ABS_POS'], -np.log10(chrom_data['P']), 57 color=colors[int(chrom+1) % len(colors)]) 58 59 # X-axis settings 60 plt.xlim(0, 22 * max_distance) 61 chrom_labels = [str(c) for c in range(1, 23)] 62 chrom_positions = [chrom_offsets[c] + max_distance / 2 for c in range(1, 23)] 63 plt.xticks(chrom_positions, chrom_labels) 64 65 # Significance thresholds 66 plt.axhline(y=-np.log10(bonferroni_threshold), color='r', linestyle='--', label='Bonferroni') 67 68 # Labels and title 69 if title: 70 plt.title(title, fontsize=fontsize.get('title', 20)) 71 plt.xlabel('Chromosomes', fontsize=fontsize.get('xlabel', 15)) 72 plt.ylabel('-log10(p-value)', fontsize=fontsize.get('ylabel', 15)) 73 plt.legend(fontsize=fontsize.get('legend', 15)) 74 75 # Save the plot 76 plt.tight_layout() 77 if save: 78 plt.savefig(output_filename) 79 plt.show()
Generates a Manhattan plot from an input file the results of an admixture mapping association. The plot is highly customizable, allowing users to specify colors for each chromosome and apply bonferroni correction for p-values.
Arguments:
- input_file: Path to the input file containing columns '#CHROM', 'POS', and 'P'. default plik2 output files are supported.
- colors: List of colors to apply to each chromosome. the chromosome number is used as an index to select the color.
- significance_threshold: Significance threshold for p-values. Default is 0.05.
- figsize: Optional tuple to specify figure dimensions (width, height).
- title: Plot title. If None, no title is shown.
- fontsize: Dictionary with font sizes for title, labels, and legend.
- save: If True, saves the plot to a file. If None, the plot is not saved.
- output_filename: Filename for saving the plot (PNG).