Source code for snputils.ancestry.io.wide.write.admixture
import logging
from pathlib import Path
from typing import Union, Optional
import numpy as np
from .base import WideBaseWriter
from snputils.ancestry.genobj.wide import GlobalAncestryObject
log = logging.getLogger(__name__)
[docs]
class AdmixtureWriter(WideBaseWriter):
"""
A writer class for exporting global ancestry data from a
`snputils.ancestry.genobj.GlobalAncestryObject` into multiple ADMIXTURE files.
"""
def __init__(
self,
wideobj: GlobalAncestryObject,
file_prefix: Union[str, Path]
) -> None:
"""
Args:
wideobj (GlobalAncestryObject):
A GlobalAncestryObject instance.
file_prefix (str or pathlib.Path):
Prefix for output file names, including directory path but excluding file extensions.
The prefix is used to generate specific file names for each output, with file-specific
suffixes appended as described above (e.g., `file_prefix.n_ancestries.Q` for the Q matrix file).
"""
super(AdmixtureWriter, self).__init__(wideobj, file_prefix)
self.__Q_file = self.file_prefix.with_suffix(f".{self.wideobj.n_ancestries}.Q")
self.__P_file = self.file_prefix.with_suffix(f".{self.wideobj.n_ancestries}.P")
self.__sample_file = self.file_prefix.with_suffix(".sample_ids.txt") if self.wideobj.samples is not None else None
self.__snp_file = self.file_prefix.with_suffix(".snp_ids.txt") if self.wideobj.snps is not None else None
self.__ancestry_file = self.file_prefix.with_suffix(".map") if self.wideobj.ancestries is not None else None
@property
def wideobj(self) -> GlobalAncestryObject:
"""
Retrieve `wideobj`.
Returns:
GlobalAncestryObject: A GlobalAncestryObject instance.
"""
return self.__wideobj
@property
def file_prefix(self) -> Path:
"""
Retrieve `file_prefix`.
Returns:
pathlib.Path:
Prefix for output file names, including directory path but excluding file extensions.
The prefix is used to generate specific file names for each output, with file-specific
suffixes appended as described above (e.g., `file_prefix.n_ancestries.Q` for the Q matrix file).
"""
return self.__file_prefix
@property
def Q_file(self) -> Path:
"""
Retrieve `Q_file`.
Returns:
pathlib.Path:
Path to the `.Q` file that will store the Q matrix (per-sample ancestry proportions).
"""
return self.__Q_file
@property
def P_file(self) -> Path:
"""
Retrieve `P_file`.
Returns:
pathlib.Path:
Path to the `.P` file that will store the P/F matrix (per-ancestry SNP frequencies).
"""
return self.__P_file
@property
def sample_file(self) -> Optional[Path]:
"""
Retrieve `sample_file`.
Returns:
pathlib.Path:
Path to the `.txt` the file that will store sample identifiers.
If None, sample identifiers are not saved.
"""
return self.__sample_file
@property
def snp_file(self) -> Optional[Path]:
"""
Retrieve `snp_file`.
Returns:
pathlib.Path:
Path to the `.txt` file that will store SNP identifiers.
If None, SNP identifiers are not saved.
"""
return self.__snp_file
@property
def ancestry_file(self) -> Optional[Path]:
"""
Retrieve `ancestry_file`.
Returns:
pathlib.Path:
Path to the `.map` file that will store ancestry labels for each sample.
If None, ancestries are not saved.
"""
return self.__ancestry_file
def _write_Q(self):
log.info(f"Writing Q matrix to '{self.Q_file}'...")
np.savetxt(self.Q_file, self.wideobj.Q, delimiter=" ")
log.info(f"Finished writing Q matrix to '{self.Q_file}'.")
def _write_P(self):
log.info(f"Writing P matrix to '{self.P_file}'...")
np.savetxt(self.P_file, self.wideobj.P, delimiter=" ")
log.info(f"Finished writing P matrix to '{self.P_file}'.")
def _write_sample_ids(self):
if self.wideobj.samples is not None:
log.info(f"Writing sample IDs to '{self.sample_file}'...")
np.savetxt(self.sample_file, self.wideobj.samples, fmt="%s")
log.info(f"Finished writing sample IDs to '{self.sample_file}'.")
def _write_snps(self):
if self.wideobj.snps is not None:
log.info(f"Writing SNP IDs to '{self.snp_file}'...")
np.savetxt(self.snp_file, self.wideobj.snps, fmt="%s")
log.info(f"Finished writing SNP IDs to '{self.snp_file}'.")
def _write_ancestries(self):
if self.wideobj.ancestries is not None:
log.info(f"Writing ancestry information to '{self.ancestry_file}'...")
np.savetxt(self.ancestry_file, self.wideobj.ancestries, fmt="%s")
log.info(f"Finished writing ancestry information to '{self.ancestry_file}'.")
[docs]
def write(self) -> None:
"""
Write the data contained in the `wideobj` instance into the multiple ADMIXTURE files
with the specified `file_prefix`. If the files already exist, they will be overwritten.
Output files:
- `<file_prefix>.K.Q`: Q matrix file. The file uses space (' ') as the delimiter.
- `<file_prefix>.K.P`: P matrix file. The file uses space (' ') as the delimiter.
- `<file_prefix>.sample_ids.txt`: Sample IDs file (if sample IDs are available).
- `<file_prefix>.snp_ids.txt`: SNP IDs file (if SNP IDs are available).
- `<file_prefix>.map`: Ancestry file (if ancestries information is available).
where `K` is the total number of ancestries.
"""
log.info(f"Preparing to write ADMIXTURE files with prefix '{self.file_prefix}'...")
self.file_prefix.parent.mkdir(parents=True, exist_ok=True)
self._write_Q()
self._write_P()
self._write_sample_ids()
self._write_snps()
self._write_ancestries()
log.info(f"Finished writing all ADMIXTURE files with prefix '{self.file_prefix}'.")
WideBaseWriter.register(AdmixtureWriter)