Source code for snputils.visualization.qq_plot

import numpy as np
import pandas as pd
from typing import Dict, Optional, Tuple, Union
import matplotlib.pyplot as plt

from ._figure_export import (
    default_savefig_kwargs,
    scatter_rasterized_for_path,
    style_association_axes,
)

_LOG10_P_LABEL = r"$-\log_{10}(p)$"


[docs] def qq_plot( data: Union[str, pd.DataFrame], color: str = "black", significance_threshold: float = 0.05, point_size: float = 7.0, line_width: float = 1.0, expected_line_color: str = "red", threshold_line_color: str = "orange", figsize: Optional[Tuple[float, float]] = None, title: Optional[str] = None, fontsize: Optional[Dict[str, float]] = None, save: Optional[bool] = None, output_filename: Optional[str] = None, ): """Generate a quantile-quantile (QQ) plot of association study p-values. Plots observed ``-log10(p)`` against the expected ``-log10(p)`` under the null hypothesis of no association (uniform distribution), together with the identity reference line and a Bonferroni significance threshold. Accepts either a file path or an in-memory :class:`pandas.DataFrame`. The input must contain a column ``P`` with p-values. Args: data: Path to a tab-separated results file or an in-memory :class:`~pandas.DataFrame` with a column ``P``. PLINK2-style output files are supported directly. color: Color for the scatter points. Defaults to ``"black"``. significance_threshold: Nominal significance threshold used to derive the Bonferroni-corrected threshold (``significance_threshold / n_variants``). Default is 0.05. point_size: Marker area for scatter points (matplotlib ``s``). Default is 7.0. line_width: Width of the expected-null and Bonferroni reference lines. Default is 1.0. expected_line_color: Color of the identity (expected under null) reference line. Default is ``"red"``. threshold_line_color: Color of the Bonferroni threshold line. Default is ``"orange"``. figsize: Optional ``(width, height)`` tuple passed to :func:`matplotlib.pyplot.figure`. title: Plot title. Default is ``None`` (no title). fontsize: Mapping with optional keys ``'title'``, ``'xlabel'``, and ``'ylabel'`` controlling font sizes. Missing keys fall back to sensible defaults (20 for title, 15 for axis labels). save: If ``True``, saves the figure to ``output_filename``. output_filename: Destination path for the saved figure (``.pdf``, ``.svg``, ``.png``, …). """ if isinstance(data, pd.DataFrame): df = data.copy() else: df = pd.read_csv(data, sep='\t') _fs = fontsize or {} p_values = df['P'].dropna().values n = len(p_values) observed = np.sort(-np.log10(p_values))[::-1] expected = -np.log10(np.arange(1, n + 1) / (n + 1)) bonferroni_threshold = -np.log10(significance_threshold / n) _rz = scatter_rasterized_for_path(output_filename) if output_filename else False plt.figure(figsize=figsize) plt.scatter(expected, observed, color=color, s=point_size, rasterized=_rz) # Identity reference line (expected under null) max_val = max(expected.max(), observed.max()) plt.plot( [0, max_val], [0, max_val], color=expected_line_color, linestyle='--', linewidth=line_width, ) # Bonferroni threshold plt.axhline( y=bonferroni_threshold, color=threshold_line_color, linestyle=':', linewidth=line_width, ) if title: plt.title(title, fontsize=_fs.get('title', 20)) plt.xlabel(f'Expected {_LOG10_P_LABEL}', fontsize=_fs.get('xlabel', 15)) plt.ylabel(f'Observed {_LOG10_P_LABEL}', fontsize=_fs.get('ylabel', 15)) style_association_axes(y_floor=0, x_floor=0) plt.tight_layout() if save: skw = default_savefig_kwargs(output_filename) plt.savefig(output_filename, **skw) if output_filename is None: plt.show()