snputils.phenotype.genobj

View Source

1from .multi_phenobj import MultiPhenotypeObject
2from .ukb_phenobj import UKBPhenotypeObject
3
4__all__ = ['MultiPhenotypeObject', 'UKBPhenotypeObject']

class MultiPhenotypeObject: View Source

  9class MultiPhenotypeObject():
 10    """
 11    A class for multi-phenotype data.
 12
 13    This class serves as a container for phenotype data, allowing for
 14    operations such as filtering samples and accessing phenotype information.
 15    It uses a DataFrame to store the data, with the first column reserved for the sample identifers.
 16    """
 17    def __init__(
 18        self,
 19        phen_df: pd.DataFrame
 20    ) -> None:
 21        """
 22        Args:
 23            phen_df (pd.DataFrame): 
 24                A Pandas DataFrame containing phenotype data, with the first column 
 25                representing sample identifiers.
 26        """
 27        self.__phen_df = phen_df
 28
 29    def __getitem__(self, key):
 30        """
 31        To access an attribute of the class using the square bracket notation,
 32        similar to a dictionary.
 33        """
 34        try:
 35            return getattr(self, key)
 36        except:
 37            raise KeyError(f'Invalid key: {key}')
 38
 39    def __setitem__(self, key, value):
 40        """
 41        To set an attribute of the class using the square bracket notation,
 42        similar to a dictionary.
 43        """
 44        try:
 45            setattr(self, key, value)
 46        except AttributeError:
 47            raise KeyError(f'Invalid key: {key}')
 48
 49    @property
 50    def phen_df(self) -> pd.DataFrame:
 51        """
 52        Retrieve `phen_df`.
 53
 54        Returns:
 55            pd.DataFrame: 
 56                A Pandas DataFrame containing phenotype data, with the first column 
 57                representing sample identifiers.
 58        """
 59        return self.__phen_df
 60    
 61    @phen_df.setter
 62    def phen_df(self, x: pd.DataFrame):
 63        """
 64        Update `phen_df`.
 65        """
 66        self.__phen_df = x
 67    
 68    @property
 69    def n_samples(self) -> int:
 70        """
 71        Retrieve `n_samples`.
 72
 73        Returns:
 74            int: The total number of samples.
 75        """
 76        return len(self.phen_df)
 77
 78    def copy(self):
 79        """
 80        Create and return a copy of the current `MultiPhenotypeObject` instance.
 81
 82        Returns:
 83            MultiPhenotypeObject: A new instance of the current object.
 84        """
 85        return copy.copy(self)
 86    
 87    def filter_samples(
 88            self, 
 89            samples: Optional[Union[str, Sequence[str], np.ndarray]] = None, 
 90            indexes: Optional[Union[int, Sequence[int], np.ndarray]] = None, 
 91            include: bool = True, 
 92            inplace: bool = False
 93        ) -> Optional['MultiPhenotypeObject']:
 94        """
 95        Filter samples in the `MultiPhenotypeObject` based on sample names or indexes.
 96
 97        This method allows you to include or exclude specific samples by their names,
 98        indexes, or both. When both samples and indexes are provided, the union of
 99        the specified samples is used. Negative indexes are supported and follow NumPy's indexing 
100        conventions. It updates the `lai`, `samples`, and `haplotypes` attributes accordingly.
101
102        Args:
103            samples (str or array_like of str, optional): 
104                 Names of the samples to include or exclude. Can be a single sample name or a
105                 sequence of sample names. Default is None.
106            indexes (int or array_like of int, optional):
107                Indexes of the samples to include or exclude. Can be a single index or a sequence
108                of indexes. Negative indexes are supported. Default is None.
109            include (bool, default=True): 
110                If True, includes only the specified samples. If False, excludes the specified
111                samples. Default is True.
112            inplace (bool, default=False): 
113                If True, modifies the object in place. If False, returns a new
114                `MultiPhenotypeObject` with the samples filtered. Default is False.
115
116        Returns:
117            Optional[MultiPhenotypeObject]: Returns a new MultiPhenotypeObject with the specified samples 
118            filtered if `inplace=False`. If `inplace=True`, modifies the object in place and returns None.
119        """
120        # Ensure at least one of samples or indexes is provided
121        if samples is None and indexes is None:
122            raise ValueError("At least one of 'samples' or 'indexes' must be provided.")
123
124        n_samples = self.n_samples
125
126        # Create mask based on sample names
127        if samples is not None:
128            samples = np.atleast_1d(samples)
129            # Extract sample names from the DataFrame
130            sample_names = self.__phen_df.iloc[:, 0].values
131            # Create mask for samples belonging to specified names
132            mask_samples = np.isin(sample_names, samples)
133        else:
134            mask_samples = np.zeros(n_samples, dtype=bool)
135
136        # Create mask based on sample indexes
137        if indexes is not None:
138            indexes = np.atleast_1d(indexes)
139            # Adjust negative indexes
140            indexes = np.mod(indexes, n_samples)
141            if np.any((indexes < 0) | (indexes >= n_samples)):
142                raise IndexError("One or more sample indexes are out of bounds.")
143            # Create mask for samples at specified indexes
144            mask_indexes = np.zeros(n_samples, dtype=bool)
145            mask_indexes[indexes] = True
146        else:
147            mask_indexes = np.zeros(n_samples, dtype=bool)
148
149        # Combine masks using logical OR (union of samples)
150        mask_combined = mask_samples | mask_indexes
151
152        if not include:
153            # Invert mask if excluding samples
154            mask_combined = ~mask_combined
155
156        # Filter the phenotype DataFrame
157        if inplace:
158            self['phen_df'] = self['phen_df'][mask_combined].reset_index(drop=True)
159            return None
160        else:
161            phen_obj = self.copy()
162            phen_obj['phen_df'] = phen_obj['phen_df'][mask_combined].reset_index(drop=True)
163            return phen_obj

A class for multi-phenotype data.

This class serves as a container for phenotype data, allowing for operations such as filtering samples and accessing phenotype information. It uses a DataFrame to store the data, with the first column reserved for the sample identifers.

MultiPhenotypeObject(phen_df: pandas.core.frame.DataFrame) View Source

17    def __init__(
18        self,
19        phen_df: pd.DataFrame
20    ) -> None:
21        """
22        Args:
23            phen_df (pd.DataFrame): 
24                A Pandas DataFrame containing phenotype data, with the first column 
25                representing sample identifiers.
26        """
27        self.__phen_df = phen_df

Arguments:

phen_df (pd.DataFrame): A Pandas DataFrame containing phenotype data, with the first column representing sample identifiers.

phen_df: pandas.core.frame.DataFrame View Source

49    @property
50    def phen_df(self) -> pd.DataFrame:
51        """
52        Retrieve `phen_df`.
53
54        Returns:
55            pd.DataFrame: 
56                A Pandas DataFrame containing phenotype data, with the first column 
57                representing sample identifiers.
58        """
59        return self.__phen_df

Retrieve phen_df.

Returns:

pd.DataFrame: A Pandas DataFrame containing phenotype data, with the first column representing sample identifiers.

n_samples: int View Source

68    @property
69    def n_samples(self) -> int:
70        """
71        Retrieve `n_samples`.
72
73        Returns:
74            int: The total number of samples.
75        """
76        return len(self.phen_df)

Retrieve n_samples.

Returns:

int: The total number of samples.

def copy(self): View Source

78    def copy(self):
79        """
80        Create and return a copy of the current `MultiPhenotypeObject` instance.
81
82        Returns:
83            MultiPhenotypeObject: A new instance of the current object.
84        """
85        return copy.copy(self)

Create and return a copy of the current MultiPhenotypeObject instance.

Returns:

MultiPhenotypeObject: A new instance of the current object.

def filter_samples( self, samples: Union[str, Sequence[str], numpy.ndarray, NoneType] = None, indexes: Union[int, Sequence[int], numpy.ndarray, NoneType] = None, include: bool = True, inplace: bool = False) -> Optional[MultiPhenotypeObject]: View Source

 87    def filter_samples(
 88            self, 
 89            samples: Optional[Union[str, Sequence[str], np.ndarray]] = None, 
 90            indexes: Optional[Union[int, Sequence[int], np.ndarray]] = None, 
 91            include: bool = True, 
 92            inplace: bool = False
 93        ) -> Optional['MultiPhenotypeObject']:
 94        """
 95        Filter samples in the `MultiPhenotypeObject` based on sample names or indexes.
 96
 97        This method allows you to include or exclude specific samples by their names,
 98        indexes, or both. When both samples and indexes are provided, the union of
 99        the specified samples is used. Negative indexes are supported and follow NumPy's indexing 
100        conventions. It updates the `lai`, `samples`, and `haplotypes` attributes accordingly.
101
102        Args:
103            samples (str or array_like of str, optional): 
104                 Names of the samples to include or exclude. Can be a single sample name or a
105                 sequence of sample names. Default is None.
106            indexes (int or array_like of int, optional):
107                Indexes of the samples to include or exclude. Can be a single index or a sequence
108                of indexes. Negative indexes are supported. Default is None.
109            include (bool, default=True): 
110                If True, includes only the specified samples. If False, excludes the specified
111                samples. Default is True.
112            inplace (bool, default=False): 
113                If True, modifies the object in place. If False, returns a new
114                `MultiPhenotypeObject` with the samples filtered. Default is False.
115
116        Returns:
117            Optional[MultiPhenotypeObject]: Returns a new MultiPhenotypeObject with the specified samples 
118            filtered if `inplace=False`. If `inplace=True`, modifies the object in place and returns None.
119        """
120        # Ensure at least one of samples or indexes is provided
121        if samples is None and indexes is None:
122            raise ValueError("At least one of 'samples' or 'indexes' must be provided.")
123
124        n_samples = self.n_samples
125
126        # Create mask based on sample names
127        if samples is not None:
128            samples = np.atleast_1d(samples)
129            # Extract sample names from the DataFrame
130            sample_names = self.__phen_df.iloc[:, 0].values
131            # Create mask for samples belonging to specified names
132            mask_samples = np.isin(sample_names, samples)
133        else:
134            mask_samples = np.zeros(n_samples, dtype=bool)
135
136        # Create mask based on sample indexes
137        if indexes is not None:
138            indexes = np.atleast_1d(indexes)
139            # Adjust negative indexes
140            indexes = np.mod(indexes, n_samples)
141            if np.any((indexes < 0) | (indexes >= n_samples)):
142                raise IndexError("One or more sample indexes are out of bounds.")
143            # Create mask for samples at specified indexes
144            mask_indexes = np.zeros(n_samples, dtype=bool)
145            mask_indexes[indexes] = True
146        else:
147            mask_indexes = np.zeros(n_samples, dtype=bool)
148
149        # Combine masks using logical OR (union of samples)
150        mask_combined = mask_samples | mask_indexes
151
152        if not include:
153            # Invert mask if excluding samples
154            mask_combined = ~mask_combined
155
156        # Filter the phenotype DataFrame
157        if inplace:
158            self['phen_df'] = self['phen_df'][mask_combined].reset_index(drop=True)
159            return None
160        else:
161            phen_obj = self.copy()
162            phen_obj['phen_df'] = phen_obj['phen_df'][mask_combined].reset_index(drop=True)
163            return phen_obj

Filter samples in the MultiPhenotypeObject based on sample names or indexes.

This method allows you to include or exclude specific samples by their names, indexes, or both. When both samples and indexes are provided, the union of the specified samples is used. Negative indexes are supported and follow NumPy's indexing conventions. It updates the lai, samples, and haplotypes attributes accordingly.

Arguments:

samples (str or array_like of str, optional): Names of the samples to include or exclude. Can be a single sample name or a sequence of sample names. Default is None.
indexes (int or array_like of int, optional): Indexes of the samples to include or exclude. Can be a single index or a sequence of indexes. Negative indexes are supported. Default is None.
include (bool, default=True): If True, includes only the specified samples. If False, excludes the specified samples. Default is True.
inplace (bool, default=False): If True, modifies the object in place. If False, returns a new MultiPhenotypeObject with the samples filtered. Default is False.

Returns:

Optional[MultiPhenotypeObject]: Returns a new MultiPhenotypeObject with the specified samples filtered if inplace=False. If inplace=True, modifies the object in place and returns None.