snputils.visualization.admixture_manhattan_plot
1import os 2import numpy as np 3import pandas as pd 4from typing import Optional, Tuple, Dict 5import matplotlib.pyplot as plt 6 7def manhattan_plot( 8 input_file: str, 9 colors: list, 10 significance_threshold: float = 0.05, 11 figsize: Optional[Tuple[float, float]] = None, 12 title: Optional[str] = None, 13 fontsize: Optional[Dict[str, float]] = None, 14 save: Optional[bool] = None, 15 output_filename: Optional[str] = None, 16): 17 """ 18 Generates a Manhattan plot from an input file the results of an admixture mapping association. 19 The plot is highly customizable, allowing users to specify colors for each chromosome and apply bonferroni correction for p-values. 20 21 Args: 22 input_file: Path to the input file containing columns '#CHROM', 'POS', and 'P'. default plik2 output files are supported. 23 colors: List of colors to apply to each chromosome. the chromosome number is used as an index to select the color. 24 significance_threshold: Significance threshold for p-values. Default is 0.05. 25 figsize: Optional tuple to specify figure dimensions (width, height). 26 title: Plot title. If None, no title is shown. 27 fontsize: Dictionary with font sizes for title, labels, and legend. 28 save: If True, saves the plot to a file. If None, the plot is not saved. 29 output_filename: Filename for saving the plot (PNG). 30 """ 31 # Read the input file 32 df = pd.read_csv(input_file, sep='\t') 33 34 # Calculate the maximum distance within each chromosome to scale absolute positions 35 max_distance = 0 36 for chrom, chrom_data in df.groupby('#CHROM'): 37 chrom_max_pos = chrom_data['POS'].max() 38 if chrom_max_pos > max_distance: 39 max_distance = chrom_max_pos 40 41 # Calculate absolute positions for each SNP 42 df['ABS_POS'] = df['POS'] + max_distance * df['#CHROM'] 43 44 # Bonferroni threshold 45 bonferroni_threshold = significance_threshold / len(df) 46 47 # Create the plot 48 plt.figure(figsize=figsize) 49 chrom_offsets = {chrom: max_distance * (chrom - 1) for chrom in range(1, 23)} 50 51 # Display Manhattan plot points for each chromosome 52 for i, (chrom, chrom_data) in enumerate(df.groupby('#CHROM')): 53 chrom_data['ABS_POS'] = chrom_data['POS'] + chrom_offsets[chrom] 54 plt.scatter(chrom_data['ABS_POS'], -np.log10(chrom_data['P']), 55 color=colors[int(chrom+1) % len(colors)]) 56 57 # X-axis settings 58 plt.xlim(0, 22 * max_distance) 59 chrom_labels = [str(c) for c in range(1, 23)] 60 chrom_positions = [chrom_offsets[c] + max_distance / 2 for c in range(1, 23)] 61 plt.xticks(chrom_positions, chrom_labels) 62 63 # Significance thresholds 64 plt.axhline(y=-np.log10(bonferroni_threshold), color='r', linestyle='--', label='Bonferroni') 65 66 # Labels and title 67 if title: 68 plt.title(title, fontsize=fontsize.get('title', 20)) 69 plt.xlabel('Chromosomes', fontsize=fontsize.get('xlabel', 15)) 70 plt.ylabel('-log10(p-value)', fontsize=fontsize.get('ylabel', 15)) 71 plt.legend(fontsize=fontsize.get('legend', 15)) 72 73 # Save the plot 74 plt.tight_layout() 75 if save: 76 plt.savefig(output_filename) 77 plt.show()
def
manhattan_plot( input_file: str, colors: list, significance_threshold: float = 0.05, figsize: Optional[Tuple[float, float]] = None, title: Optional[str] = None, fontsize: Optional[Dict[str, float]] = None, save: Optional[bool] = None, output_filename: Optional[str] = None):
8def manhattan_plot( 9 input_file: str, 10 colors: list, 11 significance_threshold: float = 0.05, 12 figsize: Optional[Tuple[float, float]] = None, 13 title: Optional[str] = None, 14 fontsize: Optional[Dict[str, float]] = None, 15 save: Optional[bool] = None, 16 output_filename: Optional[str] = None, 17): 18 """ 19 Generates a Manhattan plot from an input file the results of an admixture mapping association. 20 The plot is highly customizable, allowing users to specify colors for each chromosome and apply bonferroni correction for p-values. 21 22 Args: 23 input_file: Path to the input file containing columns '#CHROM', 'POS', and 'P'. default plik2 output files are supported. 24 colors: List of colors to apply to each chromosome. the chromosome number is used as an index to select the color. 25 significance_threshold: Significance threshold for p-values. Default is 0.05. 26 figsize: Optional tuple to specify figure dimensions (width, height). 27 title: Plot title. If None, no title is shown. 28 fontsize: Dictionary with font sizes for title, labels, and legend. 29 save: If True, saves the plot to a file. If None, the plot is not saved. 30 output_filename: Filename for saving the plot (PNG). 31 """ 32 # Read the input file 33 df = pd.read_csv(input_file, sep='\t') 34 35 # Calculate the maximum distance within each chromosome to scale absolute positions 36 max_distance = 0 37 for chrom, chrom_data in df.groupby('#CHROM'): 38 chrom_max_pos = chrom_data['POS'].max() 39 if chrom_max_pos > max_distance: 40 max_distance = chrom_max_pos 41 42 # Calculate absolute positions for each SNP 43 df['ABS_POS'] = df['POS'] + max_distance * df['#CHROM'] 44 45 # Bonferroni threshold 46 bonferroni_threshold = significance_threshold / len(df) 47 48 # Create the plot 49 plt.figure(figsize=figsize) 50 chrom_offsets = {chrom: max_distance * (chrom - 1) for chrom in range(1, 23)} 51 52 # Display Manhattan plot points for each chromosome 53 for i, (chrom, chrom_data) in enumerate(df.groupby('#CHROM')): 54 chrom_data['ABS_POS'] = chrom_data['POS'] + chrom_offsets[chrom] 55 plt.scatter(chrom_data['ABS_POS'], -np.log10(chrom_data['P']), 56 color=colors[int(chrom+1) % len(colors)]) 57 58 # X-axis settings 59 plt.xlim(0, 22 * max_distance) 60 chrom_labels = [str(c) for c in range(1, 23)] 61 chrom_positions = [chrom_offsets[c] + max_distance / 2 for c in range(1, 23)] 62 plt.xticks(chrom_positions, chrom_labels) 63 64 # Significance thresholds 65 plt.axhline(y=-np.log10(bonferroni_threshold), color='r', linestyle='--', label='Bonferroni') 66 67 # Labels and title 68 if title: 69 plt.title(title, fontsize=fontsize.get('title', 20)) 70 plt.xlabel('Chromosomes', fontsize=fontsize.get('xlabel', 15)) 71 plt.ylabel('-log10(p-value)', fontsize=fontsize.get('ylabel', 15)) 72 plt.legend(fontsize=fontsize.get('legend', 15)) 73 74 # Save the plot 75 plt.tight_layout() 76 if save: 77 plt.savefig(output_filename) 78 plt.show()
Generates a Manhattan plot from an input file the results of an admixture mapping association. The plot is highly customizable, allowing users to specify colors for each chromosome and apply bonferroni correction for p-values.
Arguments:
- input_file: Path to the input file containing columns '#CHROM', 'POS', and 'P'. default plik2 output files are supported.
- colors: List of colors to apply to each chromosome. the chromosome number is used as an index to select the color.
- significance_threshold: Significance threshold for p-values. Default is 0.05.
- figsize: Optional tuple to specify figure dimensions (width, height).
- title: Plot title. If None, no title is shown.
- fontsize: Dictionary with font sizes for title, labels, and legend.
- save: If True, saves the plot to a file. If None, the plot is not saved.
- output_filename: Filename for saving the plot (PNG).