snputils.phenotype.genobj
9class MultiPhenotypeObject(): 10 """ 11 A class for multi-phenotype data. 12 13 This class serves as a container for phenotype data, allowing for 14 operations such as filtering samples and accessing phenotype information. 15 It uses a DataFrame to store the data, with the first column reserved for the sample identifers. 16 """ 17 def __init__( 18 self, 19 phen_df: pd.DataFrame 20 ) -> None: 21 """ 22 Args: 23 phen_df (pd.DataFrame): 24 A Pandas DataFrame containing phenotype data, with the first column 25 representing sample identifiers. 26 """ 27 self.__phen_df = phen_df 28 29 def __getitem__(self, key): 30 """ 31 To access an attribute of the class using the square bracket notation, 32 similar to a dictionary. 33 """ 34 try: 35 return getattr(self, key) 36 except: 37 raise KeyError(f'Invalid key: {key}') 38 39 def __setitem__(self, key, value): 40 """ 41 To set an attribute of the class using the square bracket notation, 42 similar to a dictionary. 43 """ 44 try: 45 setattr(self, key, value) 46 except AttributeError: 47 raise KeyError(f'Invalid key: {key}') 48 49 @property 50 def phen_df(self) -> pd.DataFrame: 51 """ 52 Retrieve `phen_df`. 53 54 Returns: 55 pd.DataFrame: 56 A Pandas DataFrame containing phenotype data, with the first column 57 representing sample identifiers. 58 """ 59 return self.__phen_df 60 61 @phen_df.setter 62 def phen_df(self, x: pd.DataFrame): 63 """ 64 Update `phen_df`. 65 """ 66 self.__phen_df = x 67 68 @property 69 def n_samples(self) -> int: 70 """ 71 Retrieve `n_samples`. 72 73 Returns: 74 int: The total number of samples. 75 """ 76 return len(self.phen_df) 77 78 def copy(self): 79 """ 80 Create and return a copy of the current `MultiPhenotypeObject` instance. 81 82 Returns: 83 MultiPhenotypeObject: A new instance of the current object. 84 """ 85 return copy.copy(self) 86 87 def filter_samples( 88 self, 89 samples: Optional[Union[str, Sequence[str], np.ndarray]] = None, 90 indexes: Optional[Union[int, Sequence[int], np.ndarray]] = None, 91 include: bool = True, 92 reorder: bool = False, 93 inplace: bool = False 94 ) -> Optional['MultiPhenotypeObject']: 95 """ 96 Filter samples in the `MultiPhenotypeObject` based on sample names or indexes. 97 98 This method allows you to include or exclude specific samples by their names, 99 indexes, or both. When both samples and indexes are provided, the union of 100 the specified samples is used. Negative indexes are supported and follow NumPy's indexing 101 conventions. Set `reorder=True` to match the ordering of the provided `samples` and/or 102 `indexes` lists when including. 103 104 Args: 105 samples (str or array_like of str, optional): 106 Names of the samples to include or exclude. Can be a single sample name or a 107 sequence of sample names. Default is None. 108 indexes (int or array_like of int, optional): 109 Indexes of the samples to include or exclude. Can be a single index or a sequence 110 of indexes. Negative indexes are supported. Default is None. 111 include (bool, default=True): 112 If True, includes only the specified samples. If False, excludes the specified 113 samples. Default is True. 114 inplace (bool, default=False): 115 If True, modifies the object in place. If False, returns a new 116 `MultiPhenotypeObject` with the samples filtered. Default is False. 117 118 Returns: 119 Optional[MultiPhenotypeObject]: Returns a new MultiPhenotypeObject with the specified samples 120 filtered if `inplace=False`. If `inplace=True`, modifies the object in place and returns None. 121 """ 122 # Ensure at least one of samples or indexes is provided 123 if samples is None and indexes is None: 124 raise ValueError("At least one of 'samples' or 'indexes' must be provided.") 125 126 n_samples = self.n_samples 127 128 # Create mask based on sample names 129 if samples is not None: 130 samples = np.asarray(samples).ravel() 131 # Extract sample names from the DataFrame 132 sample_names = self.__phen_df.iloc[:, 0].values 133 # Create mask for samples belonging to specified names 134 mask_samples = np.isin(sample_names, samples) 135 else: 136 mask_samples = np.zeros(n_samples, dtype=bool) 137 138 # Create mask based on sample indexes 139 if indexes is not None: 140 indexes = np.asarray(indexes).ravel() 141 # Adjust negative indexes 142 indexes = np.mod(indexes, n_samples) 143 if np.any((indexes < 0) | (indexes >= n_samples)): 144 raise IndexError("One or more sample indexes are out of bounds.") 145 # Create mask for samples at specified indexes 146 mask_indexes = np.zeros(n_samples, dtype=bool) 147 mask_indexes[indexes] = True 148 else: 149 mask_indexes = np.zeros(n_samples, dtype=bool) 150 151 # Combine masks using logical OR (union of samples) 152 mask_combined = mask_samples | mask_indexes 153 154 if not include: 155 # Invert mask if excluding samples 156 mask_combined = ~mask_combined 157 158 # If requested, compute an ordering of selected rows that follows the provided lists 159 ordered_indices = None 160 if include and reorder: 161 sel_indices = np.where(mask_combined)[0] 162 sample_names = self.__phen_df.iloc[:, 0].values 163 ordered_list = [] 164 added = np.zeros(n_samples, dtype=bool) 165 166 # Respect the order provided in `samples` (supports duplicate sample names) 167 if samples is not None: 168 for s in samples: 169 matches = np.where(sample_names == s)[0] 170 for idx in matches: 171 if mask_combined[idx] and not added[idx]: 172 ordered_list.append(int(idx)) 173 added[idx] = True 174 175 # Then respect the order in `indexes` 176 if indexes is not None: 177 adj_idx = np.mod(np.atleast_1d(indexes), n_samples) 178 for idx in adj_idx: 179 if mask_combined[idx] and not added[idx]: 180 ordered_list.append(int(idx)) 181 added[idx] = True 182 183 # Finally, append any remaining selected rows in their original order 184 for idx in sel_indices: 185 if not added[idx]: 186 ordered_list.append(int(idx)) 187 188 ordered_indices = np.asarray(ordered_list, dtype=int) 189 190 # Filter the phenotype DataFrame 191 if inplace: 192 if ordered_indices is not None: 193 self['phen_df'] = self['phen_df'].iloc[ordered_indices].reset_index(drop=True) 194 else: 195 self['phen_df'] = self['phen_df'][mask_combined].reset_index(drop=True) 196 return None 197 else: 198 phen_obj = self.copy() 199 if ordered_indices is not None: 200 phen_obj['phen_df'] = phen_obj['phen_df'].iloc[ordered_indices].reset_index(drop=True) 201 else: 202 phen_obj['phen_df'] = phen_obj['phen_df'][mask_combined].reset_index(drop=True) 203 return phen_obj
A class for multi-phenotype data.
This class serves as a container for phenotype data, allowing for operations such as filtering samples and accessing phenotype information. It uses a DataFrame to store the data, with the first column reserved for the sample identifers.
17 def __init__( 18 self, 19 phen_df: pd.DataFrame 20 ) -> None: 21 """ 22 Args: 23 phen_df (pd.DataFrame): 24 A Pandas DataFrame containing phenotype data, with the first column 25 representing sample identifiers. 26 """ 27 self.__phen_df = phen_df
Arguments:
- phen_df (pd.DataFrame): A Pandas DataFrame containing phenotype data, with the first column representing sample identifiers.
49 @property 50 def phen_df(self) -> pd.DataFrame: 51 """ 52 Retrieve `phen_df`. 53 54 Returns: 55 pd.DataFrame: 56 A Pandas DataFrame containing phenotype data, with the first column 57 representing sample identifiers. 58 """ 59 return self.__phen_df
Retrieve phen_df.
Returns:
pd.DataFrame: A Pandas DataFrame containing phenotype data, with the first column representing sample identifiers.
78 def copy(self): 79 """ 80 Create and return a copy of the current `MultiPhenotypeObject` instance. 81 82 Returns: 83 MultiPhenotypeObject: A new instance of the current object. 84 """ 85 return copy.copy(self)
Create and return a copy of the current MultiPhenotypeObject instance.
Returns:
MultiPhenotypeObject: A new instance of the current object.
87 def filter_samples( 88 self, 89 samples: Optional[Union[str, Sequence[str], np.ndarray]] = None, 90 indexes: Optional[Union[int, Sequence[int], np.ndarray]] = None, 91 include: bool = True, 92 reorder: bool = False, 93 inplace: bool = False 94 ) -> Optional['MultiPhenotypeObject']: 95 """ 96 Filter samples in the `MultiPhenotypeObject` based on sample names or indexes. 97 98 This method allows you to include or exclude specific samples by their names, 99 indexes, or both. When both samples and indexes are provided, the union of 100 the specified samples is used. Negative indexes are supported and follow NumPy's indexing 101 conventions. Set `reorder=True` to match the ordering of the provided `samples` and/or 102 `indexes` lists when including. 103 104 Args: 105 samples (str or array_like of str, optional): 106 Names of the samples to include or exclude. Can be a single sample name or a 107 sequence of sample names. Default is None. 108 indexes (int or array_like of int, optional): 109 Indexes of the samples to include or exclude. Can be a single index or a sequence 110 of indexes. Negative indexes are supported. Default is None. 111 include (bool, default=True): 112 If True, includes only the specified samples. If False, excludes the specified 113 samples. Default is True. 114 inplace (bool, default=False): 115 If True, modifies the object in place. If False, returns a new 116 `MultiPhenotypeObject` with the samples filtered. Default is False. 117 118 Returns: 119 Optional[MultiPhenotypeObject]: Returns a new MultiPhenotypeObject with the specified samples 120 filtered if `inplace=False`. If `inplace=True`, modifies the object in place and returns None. 121 """ 122 # Ensure at least one of samples or indexes is provided 123 if samples is None and indexes is None: 124 raise ValueError("At least one of 'samples' or 'indexes' must be provided.") 125 126 n_samples = self.n_samples 127 128 # Create mask based on sample names 129 if samples is not None: 130 samples = np.asarray(samples).ravel() 131 # Extract sample names from the DataFrame 132 sample_names = self.__phen_df.iloc[:, 0].values 133 # Create mask for samples belonging to specified names 134 mask_samples = np.isin(sample_names, samples) 135 else: 136 mask_samples = np.zeros(n_samples, dtype=bool) 137 138 # Create mask based on sample indexes 139 if indexes is not None: 140 indexes = np.asarray(indexes).ravel() 141 # Adjust negative indexes 142 indexes = np.mod(indexes, n_samples) 143 if np.any((indexes < 0) | (indexes >= n_samples)): 144 raise IndexError("One or more sample indexes are out of bounds.") 145 # Create mask for samples at specified indexes 146 mask_indexes = np.zeros(n_samples, dtype=bool) 147 mask_indexes[indexes] = True 148 else: 149 mask_indexes = np.zeros(n_samples, dtype=bool) 150 151 # Combine masks using logical OR (union of samples) 152 mask_combined = mask_samples | mask_indexes 153 154 if not include: 155 # Invert mask if excluding samples 156 mask_combined = ~mask_combined 157 158 # If requested, compute an ordering of selected rows that follows the provided lists 159 ordered_indices = None 160 if include and reorder: 161 sel_indices = np.where(mask_combined)[0] 162 sample_names = self.__phen_df.iloc[:, 0].values 163 ordered_list = [] 164 added = np.zeros(n_samples, dtype=bool) 165 166 # Respect the order provided in `samples` (supports duplicate sample names) 167 if samples is not None: 168 for s in samples: 169 matches = np.where(sample_names == s)[0] 170 for idx in matches: 171 if mask_combined[idx] and not added[idx]: 172 ordered_list.append(int(idx)) 173 added[idx] = True 174 175 # Then respect the order in `indexes` 176 if indexes is not None: 177 adj_idx = np.mod(np.atleast_1d(indexes), n_samples) 178 for idx in adj_idx: 179 if mask_combined[idx] and not added[idx]: 180 ordered_list.append(int(idx)) 181 added[idx] = True 182 183 # Finally, append any remaining selected rows in their original order 184 for idx in sel_indices: 185 if not added[idx]: 186 ordered_list.append(int(idx)) 187 188 ordered_indices = np.asarray(ordered_list, dtype=int) 189 190 # Filter the phenotype DataFrame 191 if inplace: 192 if ordered_indices is not None: 193 self['phen_df'] = self['phen_df'].iloc[ordered_indices].reset_index(drop=True) 194 else: 195 self['phen_df'] = self['phen_df'][mask_combined].reset_index(drop=True) 196 return None 197 else: 198 phen_obj = self.copy() 199 if ordered_indices is not None: 200 phen_obj['phen_df'] = phen_obj['phen_df'].iloc[ordered_indices].reset_index(drop=True) 201 else: 202 phen_obj['phen_df'] = phen_obj['phen_df'][mask_combined].reset_index(drop=True) 203 return phen_obj
Filter samples in the MultiPhenotypeObject based on sample names or indexes.
This method allows you to include or exclude specific samples by their names,
indexes, or both. When both samples and indexes are provided, the union of
the specified samples is used. Negative indexes are supported and follow NumPy's indexing
conventions. Set reorder=True to match the ordering of the provided samples and/or
indexes lists when including.
Arguments:
- samples (str or array_like of str, optional): Names of the samples to include or exclude. Can be a single sample name or a sequence of sample names. Default is None.
- indexes (int or array_like of int, optional): Indexes of the samples to include or exclude. Can be a single index or a sequence of indexes. Negative indexes are supported. Default is None.
- include (bool, default=True): If True, includes only the specified samples. If False, excludes the specified samples. Default is True.
- inplace (bool, default=False): If True, modifies the object in place. If False, returns a new
MultiPhenotypeObjectwith the samples filtered. Default is False.
Returns:
Optional[MultiPhenotypeObject]: Returns a new MultiPhenotypeObject with the specified samples filtered if
inplace=False. Ifinplace=True, modifies the object in place and returns None.
6class UKBPhenotypeObject(): 7 """ 8 A class for UK Biobank (UKB) phenotype data. 9 10 This class provides a structured way to handle phenotype information, including sample identifiers, 11 the counts of cases and controls, and haplotype data. 12 """ 13 def __init__( 14 self, 15 samples: List, 16 n_samples: int, 17 cases: List, 18 n_cases: int, 19 controls: List, 20 n_controls: int, 21 all_haplotypes: List, 22 cases_haplotypes: List, 23 controls_haplotypes: List 24 ) -> None: 25 """ 26 Initialize the UKBPhenotypeObject with phenotype data. 27 28 Args: 29 samples (list of str): 30 A list of sample identifiers. 31 n_samples (int): 32 The total number of samples. 33 cases (list of str): 34 A list of identifiers for the cases. 35 n_cases (int): 36 The total number of cases. 37 controls (list of str): 38 A list of identifiers for the controls. 39 n_controls (int): 40 The total number of controls. 41 all_haplotypes (list of str): 42 A list of haplotypes for all samples. 43 cases_haplotypes (list of str): 44 A list of haplotypes for the cases. 45 controls_haplotypes (list of str): 46 A list of haplotypes for the controls. 47 """ 48 self.__samples = samples 49 self.__n_samples = n_samples 50 self.__cases = cases 51 self.__n_cases = n_cases 52 self.__controls = controls 53 self.__n_controls = n_controls 54 self.__all_haplotypes = all_haplotypes 55 self.__cases_haplotypes = cases_haplotypes 56 self.__controls_haplotypes = controls_haplotypes 57 58 def __getitem__(self, key): 59 """ 60 To access an attribute of the class using the square bracket notation, 61 similar to a dictionary. 62 """ 63 try: 64 return getattr(self, key) 65 except: 66 raise KeyError(f'Invalid key: {key}') 67 68 def __setitem__(self, key, value): 69 """ 70 To set an attribute of the class using the square bracket notation, 71 similar to a dictionary. 72 """ 73 try: 74 setattr(self, key, value) 75 except AttributeError: 76 raise KeyError(f'Invalid key: {key}') 77 78 @property 79 def samples(self) -> List: 80 """ 81 Retrieve `samples`. 82 83 Returns: 84 List of str: A list of sample identifiers. 85 """ 86 return self.__samples 87 88 @property 89 def n_samples(self) -> int: 90 """ 91 Retrieve `n_samples`. 92 93 Returns: 94 int: The total number of samples. 95 """ 96 return self.__n_samples 97 98 @property 99 def cases(self) -> List: 100 """ 101 Retrieve `cases`. 102 103 Returns: 104 List of str: A list of identifiers for the cases. 105 """ 106 return self.__cases 107 108 @property 109 def n_cases(self) -> int: 110 """ 111 Retrieve `n_cases`. 112 113 Returns: 114 int: The total number of cases. 115 """ 116 return self.__n_cases 117 118 @property 119 def controls(self) -> List: 120 """ 121 Retrieve `controls`. 122 123 Returns: 124 List of str: A list of identifiers for the controls. 125 """ 126 return self.__controls 127 128 @property 129 def n_controls(self) -> int: 130 """ 131 Retrieve `n_controls`. 132 133 Returns: 134 int: The total number of controls. 135 """ 136 return self.__n_controls 137 138 @property 139 def all_haplotypes(self) -> List: 140 """ 141 Retrieve `all_haplotypes`. 142 143 Returns: 144 List of str: A list of haplotypes for all samples. 145 """ 146 return self.__all_haplotypes 147 148 @property 149 def cases_haplotypes(self) -> List: 150 """ 151 Retrieve `cases_haplotypes`. 152 153 Returns: 154 List of str: A list of haplotypes for the cases. 155 """ 156 return self.__cases_haplotypes 157 158 @property 159 def controls_haplotypes(self) -> List: 160 """ 161 Retrieve `controls_haplotypes`. 162 163 Returns: 164 List of str: A list of haplotypes for the controls. 165 """ 166 return self.__controls_haplotypes 167 168 def copy(self): 169 """ 170 Create and return a copy of the current `UKBPhenotypeObject` instance. 171 172 Returns: 173 UKBPhenotypeObject: A new instance of the current object. 174 """ 175 return copy.copy(self) 176 177 def keys(self) -> List: 178 """ 179 Retrieve a list of public attribute names for this `UKBPhenotypeObject` instance. 180 181 Returns: 182 List: A list of attribute names, with internal name-mangling removed, 183 for easier reference to public attributes in the instance. 184 """ 185 return [attr.replace('_UKBPhenotypeObject__', '') for attr in vars(self)]
A class for UK Biobank (UKB) phenotype data.
This class provides a structured way to handle phenotype information, including sample identifiers, the counts of cases and controls, and haplotype data.
13 def __init__( 14 self, 15 samples: List, 16 n_samples: int, 17 cases: List, 18 n_cases: int, 19 controls: List, 20 n_controls: int, 21 all_haplotypes: List, 22 cases_haplotypes: List, 23 controls_haplotypes: List 24 ) -> None: 25 """ 26 Initialize the UKBPhenotypeObject with phenotype data. 27 28 Args: 29 samples (list of str): 30 A list of sample identifiers. 31 n_samples (int): 32 The total number of samples. 33 cases (list of str): 34 A list of identifiers for the cases. 35 n_cases (int): 36 The total number of cases. 37 controls (list of str): 38 A list of identifiers for the controls. 39 n_controls (int): 40 The total number of controls. 41 all_haplotypes (list of str): 42 A list of haplotypes for all samples. 43 cases_haplotypes (list of str): 44 A list of haplotypes for the cases. 45 controls_haplotypes (list of str): 46 A list of haplotypes for the controls. 47 """ 48 self.__samples = samples 49 self.__n_samples = n_samples 50 self.__cases = cases 51 self.__n_cases = n_cases 52 self.__controls = controls 53 self.__n_controls = n_controls 54 self.__all_haplotypes = all_haplotypes 55 self.__cases_haplotypes = cases_haplotypes 56 self.__controls_haplotypes = controls_haplotypes
Initialize the UKBPhenotypeObject with phenotype data.
Arguments:
- samples (list of str): A list of sample identifiers.
- n_samples (int): The total number of samples.
- cases (list of str): A list of identifiers for the cases.
- n_cases (int): The total number of cases.
- controls (list of str): A list of identifiers for the controls.
- n_controls (int): The total number of controls.
- all_haplotypes (list of str): A list of haplotypes for all samples.
- cases_haplotypes (list of str): A list of haplotypes for the cases.
- controls_haplotypes (list of str): A list of haplotypes for the controls.
168 def copy(self): 169 """ 170 Create and return a copy of the current `UKBPhenotypeObject` instance. 171 172 Returns: 173 UKBPhenotypeObject: A new instance of the current object. 174 """ 175 return copy.copy(self)
Create and return a copy of the current UKBPhenotypeObject instance.
Returns:
UKBPhenotypeObject: A new instance of the current object.
177 def keys(self) -> List: 178 """ 179 Retrieve a list of public attribute names for this `UKBPhenotypeObject` instance. 180 181 Returns: 182 List: A list of attribute names, with internal name-mangling removed, 183 for easier reference to public attributes in the instance. 184 """ 185 return [attr.replace('_UKBPhenotypeObject__', '') for attr in vars(self)]
Retrieve a list of public attribute names for this UKBPhenotypeObject instance.
Returns:
List: A list of attribute names, with internal name-mangling removed, for easier reference to public attributes in the instance.