Source code for snputils.phenotype.genobj.covarobj
from __future__ import annotations
from pathlib import Path
from typing import Any, Optional, Sequence, Union
import numpy as np
from snputils._utils.printing import array_shape, format_repr
[docs]
class CovariateObject:
"""Sample-aligned covariate matrix for association analyses."""
def __init__(
self,
samples: Sequence[str],
values: Sequence[Sequence[float]],
covariate_names: Optional[Sequence[str]] = None,
) -> None:
sample_ids = [str(sample) for sample in samples]
if len(sample_ids) == 0:
raise ValueError("CovariateObject contains no samples.")
if len(set(sample_ids)) != len(sample_ids):
raise ValueError("Covariate sample IDs must be unique.")
try:
values_f64 = np.asarray(values, dtype=np.float64)
except (TypeError, ValueError) as exc:
raise ValueError("Covariate values must be numeric.") from exc
if values_f64.ndim != 2:
raise ValueError("Covariate values must be a 2-dimensional matrix.")
if values_f64.shape[0] != len(sample_ids):
raise ValueError(
"Covariate sample/value length mismatch: "
f"{len(sample_ids)} samples but {values_f64.shape[0]} rows."
)
if values_f64.shape[1] == 0:
raise ValueError("CovariateObject must contain at least one covariate column.")
if not np.all(np.isfinite(values_f64)):
raise ValueError("Covariates contain non-finite values (NaN/Inf).")
if covariate_names is None:
names = [f"COV{i + 1}" for i in range(values_f64.shape[1])]
else:
names = [str(name) for name in covariate_names]
if len(names) != values_f64.shape[1]:
raise ValueError(
"Covariate name/value width mismatch: "
f"{len(names)} names but {values_f64.shape[1]} columns."
)
if len(set(names)) != len(names):
raise ValueError("Covariate names must be unique.")
self._samples = sample_ids
self._values = values_f64
self._covariate_names = names
def __repr__(self) -> str:
return format_repr(
self,
shape=self.shape,
n_samples=self.n_samples,
n_covariates=self.n_covariates,
)
def __str__(self) -> str:
return self.__repr__()
@property
def samples(self) -> list[str]:
return self._samples
@property
def values(self) -> np.ndarray:
return self._values
@property
def covariate_names(self) -> list[str]:
return self._covariate_names
@property
def names(self) -> list[str]:
return self._covariate_names
@property
def shape(self) -> tuple[int, ...]:
return array_shape(self._values) or self._values.shape
@property
def n_samples(self) -> int:
return len(self._samples)
@property
def n_covariates(self) -> int:
return int(self._values.shape[1])
@classmethod
def from_file(
cls,
path: Union[str, Path],
col_nums: Optional[str] = None,
) -> CovariateObject:
from snputils.phenotype.covariates import covariate_object_from_file
return covariate_object_from_file(path, col_nums=col_nums)
@classmethod
def from_embedding(
cls,
model: Any,
n_components: Optional[int] = None,
component_names: Optional[Sequence[str]] = None,
) -> CovariateObject:
from snputils.phenotype.covariates import covariate_object_from_embedding
return covariate_object_from_embedding(
model,
n_components=n_components,
component_names=component_names,
)
@classmethod
def from_global_ancestry(
cls,
admobj: Any,
columns: Optional[Sequence[int]] = None,
drop_ancestry: int = -1,
ancestry_names: Optional[Sequence[str]] = None,
) -> CovariateObject:
from snputils.phenotype.covariates import covariate_object_from_global_ancestry
return covariate_object_from_global_ancestry(
admobj,
columns=columns,
drop_ancestry=drop_ancestry,
ancestry_names=ancestry_names,
)
@classmethod
def merge(cls, *objs: CovariateObject) -> CovariateObject:
from snputils.phenotype.covariates import merge_covariates
return merge_covariates(*objs)