snputils.phenotype.genobj
9class MultiPhenotypeObject(): 10 """ 11 A class for multi-phenotype data. 12 13 This class serves as a container for phenotype data, allowing for 14 operations such as filtering samples and accessing phenotype information. 15 It uses a DataFrame to store the data, with the first column reserved for the sample identifers. 16 """ 17 def __init__( 18 self, 19 phen_df: pd.DataFrame 20 ) -> None: 21 """ 22 Args: 23 phen_df (pd.DataFrame): 24 A Pandas DataFrame containing phenotype data, with the first column 25 representing sample identifiers. 26 """ 27 self.__phen_df = phen_df 28 29 def __getitem__(self, key): 30 """ 31 To access an attribute of the class using the square bracket notation, 32 similar to a dictionary. 33 """ 34 try: 35 return getattr(self, key) 36 except: 37 raise KeyError(f'Invalid key: {key}') 38 39 def __setitem__(self, key, value): 40 """ 41 To set an attribute of the class using the square bracket notation, 42 similar to a dictionary. 43 """ 44 try: 45 setattr(self, key, value) 46 except AttributeError: 47 raise KeyError(f'Invalid key: {key}') 48 49 @property 50 def phen_df(self) -> pd.DataFrame: 51 """ 52 Retrieve `phen_df`. 53 54 Returns: 55 pd.DataFrame: 56 A Pandas DataFrame containing phenotype data, with the first column 57 representing sample identifiers. 58 """ 59 return self.__phen_df 60 61 @phen_df.setter 62 def phen_df(self, x: pd.DataFrame): 63 """ 64 Update `phen_df`. 65 """ 66 self.__phen_df = x 67 68 @property 69 def n_samples(self) -> int: 70 """ 71 Retrieve `n_samples`. 72 73 Returns: 74 int: The total number of samples. 75 """ 76 return len(self.phen_df) 77 78 def copy(self): 79 """ 80 Create and return a copy of the current `MultiPhenotypeObject` instance. 81 82 Returns: 83 MultiPhenotypeObject: A new instance of the current object. 84 """ 85 return copy.copy(self) 86 87 def filter_samples( 88 self, 89 samples: Optional[Union[str, Sequence[str], np.ndarray]] = None, 90 indexes: Optional[Union[int, Sequence[int], np.ndarray]] = None, 91 include: bool = True, 92 inplace: bool = False 93 ) -> Optional['MultiPhenotypeObject']: 94 """ 95 Filter samples in the `MultiPhenotypeObject` based on sample names or indexes. 96 97 This method allows you to include or exclude specific samples by their names, 98 indexes, or both. When both samples and indexes are provided, the union of 99 the specified samples is used. Negative indexes are supported and follow NumPy's indexing 100 conventions. It updates the `lai`, `samples`, and `haplotypes` attributes accordingly. 101 102 Args: 103 samples (str or array_like of str, optional): 104 Names of the samples to include or exclude. Can be a single sample name or a 105 sequence of sample names. Default is None. 106 indexes (int or array_like of int, optional): 107 Indexes of the samples to include or exclude. Can be a single index or a sequence 108 of indexes. Negative indexes are supported. Default is None. 109 include (bool, default=True): 110 If True, includes only the specified samples. If False, excludes the specified 111 samples. Default is True. 112 inplace (bool, default=False): 113 If True, modifies the object in place. If False, returns a new 114 `MultiPhenotypeObject` with the samples filtered. Default is False. 115 116 Returns: 117 Optional[MultiPhenotypeObject]: Returns a new MultiPhenotypeObject with the specified samples 118 filtered if `inplace=False`. If `inplace=True`, modifies the object in place and returns None. 119 """ 120 # Ensure at least one of samples or indexes is provided 121 if samples is None and indexes is None: 122 raise ValueError("At least one of 'samples' or 'indexes' must be provided.") 123 124 n_samples = self.n_samples 125 126 # Create mask based on sample names 127 if samples is not None: 128 samples = np.atleast_1d(samples) 129 # Extract sample names from the DataFrame 130 sample_names = self.__phen_df.iloc[:, 0].values 131 # Create mask for samples belonging to specified names 132 mask_samples = np.isin(sample_names, samples) 133 else: 134 mask_samples = np.zeros(n_samples, dtype=bool) 135 136 # Create mask based on sample indexes 137 if indexes is not None: 138 indexes = np.atleast_1d(indexes) 139 # Adjust negative indexes 140 indexes = np.mod(indexes, n_samples) 141 if np.any((indexes < 0) | (indexes >= n_samples)): 142 raise IndexError("One or more sample indexes are out of bounds.") 143 # Create mask for samples at specified indexes 144 mask_indexes = np.zeros(n_samples, dtype=bool) 145 mask_indexes[indexes] = True 146 else: 147 mask_indexes = np.zeros(n_samples, dtype=bool) 148 149 # Combine masks using logical OR (union of samples) 150 mask_combined = mask_samples | mask_indexes 151 152 if not include: 153 # Invert mask if excluding samples 154 mask_combined = ~mask_combined 155 156 # Filter the phenotype DataFrame 157 if inplace: 158 self['phen_df'] = self['phen_df'][mask_combined].reset_index(drop=True) 159 return None 160 else: 161 phen_obj = self.copy() 162 phen_obj['phen_df'] = phen_obj['phen_df'][mask_combined].reset_index(drop=True) 163 return phen_obj
A class for multi-phenotype data.
This class serves as a container for phenotype data, allowing for operations such as filtering samples and accessing phenotype information. It uses a DataFrame to store the data, with the first column reserved for the sample identifers.
17 def __init__( 18 self, 19 phen_df: pd.DataFrame 20 ) -> None: 21 """ 22 Args: 23 phen_df (pd.DataFrame): 24 A Pandas DataFrame containing phenotype data, with the first column 25 representing sample identifiers. 26 """ 27 self.__phen_df = phen_df
Arguments:
- phen_df (pd.DataFrame): A Pandas DataFrame containing phenotype data, with the first column representing sample identifiers.
49 @property 50 def phen_df(self) -> pd.DataFrame: 51 """ 52 Retrieve `phen_df`. 53 54 Returns: 55 pd.DataFrame: 56 A Pandas DataFrame containing phenotype data, with the first column 57 representing sample identifiers. 58 """ 59 return self.__phen_df
Retrieve phen_df
.
Returns:
pd.DataFrame: A Pandas DataFrame containing phenotype data, with the first column representing sample identifiers.
78 def copy(self): 79 """ 80 Create and return a copy of the current `MultiPhenotypeObject` instance. 81 82 Returns: 83 MultiPhenotypeObject: A new instance of the current object. 84 """ 85 return copy.copy(self)
Create and return a copy of the current MultiPhenotypeObject
instance.
Returns:
MultiPhenotypeObject: A new instance of the current object.
87 def filter_samples( 88 self, 89 samples: Optional[Union[str, Sequence[str], np.ndarray]] = None, 90 indexes: Optional[Union[int, Sequence[int], np.ndarray]] = None, 91 include: bool = True, 92 inplace: bool = False 93 ) -> Optional['MultiPhenotypeObject']: 94 """ 95 Filter samples in the `MultiPhenotypeObject` based on sample names or indexes. 96 97 This method allows you to include or exclude specific samples by their names, 98 indexes, or both. When both samples and indexes are provided, the union of 99 the specified samples is used. Negative indexes are supported and follow NumPy's indexing 100 conventions. It updates the `lai`, `samples`, and `haplotypes` attributes accordingly. 101 102 Args: 103 samples (str or array_like of str, optional): 104 Names of the samples to include or exclude. Can be a single sample name or a 105 sequence of sample names. Default is None. 106 indexes (int or array_like of int, optional): 107 Indexes of the samples to include or exclude. Can be a single index or a sequence 108 of indexes. Negative indexes are supported. Default is None. 109 include (bool, default=True): 110 If True, includes only the specified samples. If False, excludes the specified 111 samples. Default is True. 112 inplace (bool, default=False): 113 If True, modifies the object in place. If False, returns a new 114 `MultiPhenotypeObject` with the samples filtered. Default is False. 115 116 Returns: 117 Optional[MultiPhenotypeObject]: Returns a new MultiPhenotypeObject with the specified samples 118 filtered if `inplace=False`. If `inplace=True`, modifies the object in place and returns None. 119 """ 120 # Ensure at least one of samples or indexes is provided 121 if samples is None and indexes is None: 122 raise ValueError("At least one of 'samples' or 'indexes' must be provided.") 123 124 n_samples = self.n_samples 125 126 # Create mask based on sample names 127 if samples is not None: 128 samples = np.atleast_1d(samples) 129 # Extract sample names from the DataFrame 130 sample_names = self.__phen_df.iloc[:, 0].values 131 # Create mask for samples belonging to specified names 132 mask_samples = np.isin(sample_names, samples) 133 else: 134 mask_samples = np.zeros(n_samples, dtype=bool) 135 136 # Create mask based on sample indexes 137 if indexes is not None: 138 indexes = np.atleast_1d(indexes) 139 # Adjust negative indexes 140 indexes = np.mod(indexes, n_samples) 141 if np.any((indexes < 0) | (indexes >= n_samples)): 142 raise IndexError("One or more sample indexes are out of bounds.") 143 # Create mask for samples at specified indexes 144 mask_indexes = np.zeros(n_samples, dtype=bool) 145 mask_indexes[indexes] = True 146 else: 147 mask_indexes = np.zeros(n_samples, dtype=bool) 148 149 # Combine masks using logical OR (union of samples) 150 mask_combined = mask_samples | mask_indexes 151 152 if not include: 153 # Invert mask if excluding samples 154 mask_combined = ~mask_combined 155 156 # Filter the phenotype DataFrame 157 if inplace: 158 self['phen_df'] = self['phen_df'][mask_combined].reset_index(drop=True) 159 return None 160 else: 161 phen_obj = self.copy() 162 phen_obj['phen_df'] = phen_obj['phen_df'][mask_combined].reset_index(drop=True) 163 return phen_obj
Filter samples in the MultiPhenotypeObject
based on sample names or indexes.
This method allows you to include or exclude specific samples by their names,
indexes, or both. When both samples and indexes are provided, the union of
the specified samples is used. Negative indexes are supported and follow NumPy's indexing
conventions. It updates the lai
, samples
, and haplotypes
attributes accordingly.
Arguments:
- samples (str or array_like of str, optional): Names of the samples to include or exclude. Can be a single sample name or a sequence of sample names. Default is None.
- indexes (int or array_like of int, optional): Indexes of the samples to include or exclude. Can be a single index or a sequence of indexes. Negative indexes are supported. Default is None.
- include (bool, default=True): If True, includes only the specified samples. If False, excludes the specified samples. Default is True.
- inplace (bool, default=False): If True, modifies the object in place. If False, returns a new
MultiPhenotypeObject
with the samples filtered. Default is False.
Returns:
Optional[MultiPhenotypeObject]: Returns a new MultiPhenotypeObject with the specified samples filtered if
inplace=False
. Ifinplace=True
, modifies the object in place and returns None.
6class UKBPhenotypeObject(): 7 """ 8 A class for UK Biobank (UKB) phenotype data. 9 10 This class provides a structured way to handle phenotype information, including sample identifiers, 11 the counts of cases and controls, and haplotype data. 12 """ 13 def __init__( 14 self, 15 samples: List, 16 n_samples: int, 17 cases: List, 18 n_cases: int, 19 controls: List, 20 n_controls: int, 21 all_haplotypes: List, 22 cases_haplotypes: List, 23 controls_haplotypes: List 24 ) -> None: 25 """ 26 Initialize the UKBPhenotypeObject with phenotype data. 27 28 Args: 29 samples (list of str): 30 A list of sample identifiers. 31 n_samples (int): 32 The total number of samples. 33 cases (list of str): 34 A list of identifiers for the cases. 35 n_cases (int): 36 The total number of cases. 37 controls (list of str): 38 A list of identifiers for the controls. 39 n_controls (int): 40 The total number of controls. 41 all_haplotypes (list of str): 42 A list of haplotypes for all samples. 43 cases_haplotypes (list of str): 44 A list of haplotypes for the cases. 45 controls_haplotypes (list of str): 46 A list of haplotypes for the controls. 47 """ 48 self.__samples = samples 49 self.__n_samples = n_samples 50 self.__cases = cases 51 self.__n_cases = n_cases 52 self.__controls = controls 53 self.__n_controls = n_controls 54 self.__all_haplotypes = all_haplotypes 55 self.__cases_haplotypes = cases_haplotypes 56 self.__controls_haplotypes = controls_haplotypes 57 58 def __getitem__(self, key): 59 """ 60 To access an attribute of the class using the square bracket notation, 61 similar to a dictionary. 62 """ 63 try: 64 return getattr(self, key) 65 except: 66 raise KeyError(f'Invalid key: {key}') 67 68 def __setitem__(self, key, value): 69 """ 70 To set an attribute of the class using the square bracket notation, 71 similar to a dictionary. 72 """ 73 try: 74 setattr(self, key, value) 75 except AttributeError: 76 raise KeyError(f'Invalid key: {key}') 77 78 @property 79 def samples(self) -> List: 80 """ 81 Retrieve `samples`. 82 83 Returns: 84 List of str: A list of sample identifiers. 85 """ 86 return self.__samples 87 88 @property 89 def n_samples(self) -> int: 90 """ 91 Retrieve `n_samples`. 92 93 Returns: 94 int: The total number of samples. 95 """ 96 return self.__n_samples 97 98 @property 99 def cases(self) -> List: 100 """ 101 Retrieve `cases`. 102 103 Returns: 104 List of str: A list of identifiers for the cases. 105 """ 106 return self.__cases 107 108 @property 109 def n_cases(self) -> int: 110 """ 111 Retrieve `n_cases`. 112 113 Returns: 114 int: The total number of cases. 115 """ 116 return self.__n_cases 117 118 @property 119 def controls(self) -> List: 120 """ 121 Retrieve `controls`. 122 123 Returns: 124 List of str: A list of identifiers for the controls. 125 """ 126 return self.__controls 127 128 @property 129 def n_controls(self) -> int: 130 """ 131 Retrieve `n_controls`. 132 133 Returns: 134 int: The total number of controls. 135 """ 136 return self.__n_controls 137 138 @property 139 def all_haplotypes(self) -> List: 140 """ 141 Retrieve `all_haplotypes`. 142 143 Returns: 144 List of str: A list of haplotypes for all samples. 145 """ 146 return self.__all_haplotypes 147 148 @property 149 def cases_haplotypes(self) -> List: 150 """ 151 Retrieve `cases_haplotypes`. 152 153 Returns: 154 List of str: A list of haplotypes for the cases. 155 """ 156 return self.__cases_haplotypes 157 158 @property 159 def controls_haplotypes(self) -> List: 160 """ 161 Retrieve `controls_haplotypes`. 162 163 Returns: 164 List of str: A list of haplotypes for the controls. 165 """ 166 return self.__controls_haplotypes 167 168 def copy(self): 169 """ 170 Create and return a copy of the current `UKBPhenotypeObject` instance. 171 172 Returns: 173 UKBPhenotypeObject: A new instance of the current object. 174 """ 175 return copy.copy(self) 176 177 def keys(self) -> List: 178 """ 179 Retrieve a list of public attribute names for this `UKBPhenotypeObject` instance. 180 181 Returns: 182 List: A list of attribute names, with internal name-mangling removed, 183 for easier reference to public attributes in the instance. 184 """ 185 return [attr.replace('_UKBPhenotypeObject__', '') for attr in vars(self)]
A class for UK Biobank (UKB) phenotype data.
This class provides a structured way to handle phenotype information, including sample identifiers, the counts of cases and controls, and haplotype data.
13 def __init__( 14 self, 15 samples: List, 16 n_samples: int, 17 cases: List, 18 n_cases: int, 19 controls: List, 20 n_controls: int, 21 all_haplotypes: List, 22 cases_haplotypes: List, 23 controls_haplotypes: List 24 ) -> None: 25 """ 26 Initialize the UKBPhenotypeObject with phenotype data. 27 28 Args: 29 samples (list of str): 30 A list of sample identifiers. 31 n_samples (int): 32 The total number of samples. 33 cases (list of str): 34 A list of identifiers for the cases. 35 n_cases (int): 36 The total number of cases. 37 controls (list of str): 38 A list of identifiers for the controls. 39 n_controls (int): 40 The total number of controls. 41 all_haplotypes (list of str): 42 A list of haplotypes for all samples. 43 cases_haplotypes (list of str): 44 A list of haplotypes for the cases. 45 controls_haplotypes (list of str): 46 A list of haplotypes for the controls. 47 """ 48 self.__samples = samples 49 self.__n_samples = n_samples 50 self.__cases = cases 51 self.__n_cases = n_cases 52 self.__controls = controls 53 self.__n_controls = n_controls 54 self.__all_haplotypes = all_haplotypes 55 self.__cases_haplotypes = cases_haplotypes 56 self.__controls_haplotypes = controls_haplotypes
Initialize the UKBPhenotypeObject with phenotype data.
Arguments:
- samples (list of str): A list of sample identifiers.
- n_samples (int): The total number of samples.
- cases (list of str): A list of identifiers for the cases.
- n_cases (int): The total number of cases.
- controls (list of str): A list of identifiers for the controls.
- n_controls (int): The total number of controls.
- all_haplotypes (list of str): A list of haplotypes for all samples.
- cases_haplotypes (list of str): A list of haplotypes for the cases.
- controls_haplotypes (list of str): A list of haplotypes for the controls.
168 def copy(self): 169 """ 170 Create and return a copy of the current `UKBPhenotypeObject` instance. 171 172 Returns: 173 UKBPhenotypeObject: A new instance of the current object. 174 """ 175 return copy.copy(self)
Create and return a copy of the current UKBPhenotypeObject
instance.
Returns:
UKBPhenotypeObject: A new instance of the current object.
177 def keys(self) -> List: 178 """ 179 Retrieve a list of public attribute names for this `UKBPhenotypeObject` instance. 180 181 Returns: 182 List: A list of attribute names, with internal name-mangling removed, 183 for easier reference to public attributes in the instance. 184 """ 185 return [attr.replace('_UKBPhenotypeObject__', '') for attr in vars(self)]
Retrieve a list of public attribute names for this UKBPhenotypeObject
instance.
Returns:
List: A list of attribute names, with internal name-mangling removed, for easier reference to public attributes in the instance.