snputils.phenotype.genobj

1from .multi_phenobj import MultiPhenotypeObject
2from .ukb_phenobj import UKBPhenotypeObject
3
4__all__ = ['MultiPhenotypeObject', 'UKBPhenotypeObject']
class MultiPhenotypeObject:
  9class MultiPhenotypeObject():
 10    """
 11    A class for multi-phenotype data.
 12
 13    This class serves as a container for phenotype data, allowing for
 14    operations such as filtering samples and accessing phenotype information.
 15    It uses a DataFrame to store the data, with the first column reserved for the sample identifers.
 16    """
 17    def __init__(
 18        self,
 19        phen_df: pd.DataFrame
 20    ) -> None:
 21        """
 22        Args:
 23            phen_df (pd.DataFrame): 
 24                A Pandas DataFrame containing phenotype data, with the first column 
 25                representing sample identifiers.
 26        """
 27        self.__phen_df = phen_df
 28
 29    def __getitem__(self, key):
 30        """
 31        To access an attribute of the class using the square bracket notation,
 32        similar to a dictionary.
 33        """
 34        try:
 35            return getattr(self, key)
 36        except:
 37            raise KeyError(f'Invalid key: {key}')
 38
 39    def __setitem__(self, key, value):
 40        """
 41        To set an attribute of the class using the square bracket notation,
 42        similar to a dictionary.
 43        """
 44        try:
 45            setattr(self, key, value)
 46        except AttributeError:
 47            raise KeyError(f'Invalid key: {key}')
 48
 49    @property
 50    def phen_df(self) -> pd.DataFrame:
 51        """
 52        Retrieve `phen_df`.
 53
 54        Returns:
 55            pd.DataFrame: 
 56                A Pandas DataFrame containing phenotype data, with the first column 
 57                representing sample identifiers.
 58        """
 59        return self.__phen_df
 60    
 61    @phen_df.setter
 62    def phen_df(self, x: pd.DataFrame):
 63        """
 64        Update `phen_df`.
 65        """
 66        self.__phen_df = x
 67    
 68    @property
 69    def n_samples(self) -> int:
 70        """
 71        Retrieve `n_samples`.
 72
 73        Returns:
 74            int: The total number of samples.
 75        """
 76        return len(self.phen_df)
 77
 78    def copy(self):
 79        """
 80        Create and return a copy of the current `MultiPhenotypeObject` instance.
 81
 82        Returns:
 83            MultiPhenotypeObject: A new instance of the current object.
 84        """
 85        return copy.copy(self)
 86    
 87    def filter_samples(
 88            self, 
 89            samples: Optional[Union[str, Sequence[str], np.ndarray]] = None, 
 90            indexes: Optional[Union[int, Sequence[int], np.ndarray]] = None, 
 91            include: bool = True, 
 92            inplace: bool = False
 93        ) -> Optional['MultiPhenotypeObject']:
 94        """
 95        Filter samples in the `MultiPhenotypeObject` based on sample names or indexes.
 96
 97        This method allows you to include or exclude specific samples by their names,
 98        indexes, or both. When both samples and indexes are provided, the union of
 99        the specified samples is used. Negative indexes are supported and follow NumPy's indexing 
100        conventions. It updates the `lai`, `samples`, and `haplotypes` attributes accordingly.
101
102        Args:
103            samples (str or array_like of str, optional): 
104                 Names of the samples to include or exclude. Can be a single sample name or a
105                 sequence of sample names. Default is None.
106            indexes (int or array_like of int, optional):
107                Indexes of the samples to include or exclude. Can be a single index or a sequence
108                of indexes. Negative indexes are supported. Default is None.
109            include (bool, default=True): 
110                If True, includes only the specified samples. If False, excludes the specified
111                samples. Default is True.
112            inplace (bool, default=False): 
113                If True, modifies the object in place. If False, returns a new
114                `MultiPhenotypeObject` with the samples filtered. Default is False.
115
116        Returns:
117            Optional[MultiPhenotypeObject]: Returns a new MultiPhenotypeObject with the specified samples 
118            filtered if `inplace=False`. If `inplace=True`, modifies the object in place and returns None.
119        """
120        # Ensure at least one of samples or indexes is provided
121        if samples is None and indexes is None:
122            raise ValueError("At least one of 'samples' or 'indexes' must be provided.")
123
124        n_samples = self.n_samples
125
126        # Create mask based on sample names
127        if samples is not None:
128            samples = np.atleast_1d(samples)
129            # Extract sample names from the DataFrame
130            sample_names = self.__phen_df.iloc[:, 0].values
131            # Create mask for samples belonging to specified names
132            mask_samples = np.isin(sample_names, samples)
133        else:
134            mask_samples = np.zeros(n_samples, dtype=bool)
135
136        # Create mask based on sample indexes
137        if indexes is not None:
138            indexes = np.atleast_1d(indexes)
139            # Adjust negative indexes
140            indexes = np.mod(indexes, n_samples)
141            if np.any((indexes < 0) | (indexes >= n_samples)):
142                raise IndexError("One or more sample indexes are out of bounds.")
143            # Create mask for samples at specified indexes
144            mask_indexes = np.zeros(n_samples, dtype=bool)
145            mask_indexes[indexes] = True
146        else:
147            mask_indexes = np.zeros(n_samples, dtype=bool)
148
149        # Combine masks using logical OR (union of samples)
150        mask_combined = mask_samples | mask_indexes
151
152        if not include:
153            # Invert mask if excluding samples
154            mask_combined = ~mask_combined
155
156        # Filter the phenotype DataFrame
157        if inplace:
158            self['phen_df'] = self['phen_df'][mask_combined].reset_index(drop=True)
159            return None
160        else:
161            phen_obj = self.copy()
162            phen_obj['phen_df'] = phen_obj['phen_df'][mask_combined].reset_index(drop=True)
163            return phen_obj

A class for multi-phenotype data.

This class serves as a container for phenotype data, allowing for operations such as filtering samples and accessing phenotype information. It uses a DataFrame to store the data, with the first column reserved for the sample identifers.

MultiPhenotypeObject(phen_df: pandas.core.frame.DataFrame)
17    def __init__(
18        self,
19        phen_df: pd.DataFrame
20    ) -> None:
21        """
22        Args:
23            phen_df (pd.DataFrame): 
24                A Pandas DataFrame containing phenotype data, with the first column 
25                representing sample identifiers.
26        """
27        self.__phen_df = phen_df
Arguments:
  • phen_df (pd.DataFrame): A Pandas DataFrame containing phenotype data, with the first column representing sample identifiers.
phen_df: pandas.core.frame.DataFrame
49    @property
50    def phen_df(self) -> pd.DataFrame:
51        """
52        Retrieve `phen_df`.
53
54        Returns:
55            pd.DataFrame: 
56                A Pandas DataFrame containing phenotype data, with the first column 
57                representing sample identifiers.
58        """
59        return self.__phen_df

Retrieve phen_df.

Returns:

pd.DataFrame: A Pandas DataFrame containing phenotype data, with the first column representing sample identifiers.

n_samples: int
68    @property
69    def n_samples(self) -> int:
70        """
71        Retrieve `n_samples`.
72
73        Returns:
74            int: The total number of samples.
75        """
76        return len(self.phen_df)

Retrieve n_samples.

Returns:

int: The total number of samples.

def copy(self):
78    def copy(self):
79        """
80        Create and return a copy of the current `MultiPhenotypeObject` instance.
81
82        Returns:
83            MultiPhenotypeObject: A new instance of the current object.
84        """
85        return copy.copy(self)

Create and return a copy of the current MultiPhenotypeObject instance.

Returns:

MultiPhenotypeObject: A new instance of the current object.

def filter_samples( self, samples: Union[str, Sequence[str], numpy.ndarray, NoneType] = None, indexes: Union[int, Sequence[int], numpy.ndarray, NoneType] = None, include: bool = True, inplace: bool = False) -> Optional[MultiPhenotypeObject]:
 87    def filter_samples(
 88            self, 
 89            samples: Optional[Union[str, Sequence[str], np.ndarray]] = None, 
 90            indexes: Optional[Union[int, Sequence[int], np.ndarray]] = None, 
 91            include: bool = True, 
 92            inplace: bool = False
 93        ) -> Optional['MultiPhenotypeObject']:
 94        """
 95        Filter samples in the `MultiPhenotypeObject` based on sample names or indexes.
 96
 97        This method allows you to include or exclude specific samples by their names,
 98        indexes, or both. When both samples and indexes are provided, the union of
 99        the specified samples is used. Negative indexes are supported and follow NumPy's indexing 
100        conventions. It updates the `lai`, `samples`, and `haplotypes` attributes accordingly.
101
102        Args:
103            samples (str or array_like of str, optional): 
104                 Names of the samples to include or exclude. Can be a single sample name or a
105                 sequence of sample names. Default is None.
106            indexes (int or array_like of int, optional):
107                Indexes of the samples to include or exclude. Can be a single index or a sequence
108                of indexes. Negative indexes are supported. Default is None.
109            include (bool, default=True): 
110                If True, includes only the specified samples. If False, excludes the specified
111                samples. Default is True.
112            inplace (bool, default=False): 
113                If True, modifies the object in place. If False, returns a new
114                `MultiPhenotypeObject` with the samples filtered. Default is False.
115
116        Returns:
117            Optional[MultiPhenotypeObject]: Returns a new MultiPhenotypeObject with the specified samples 
118            filtered if `inplace=False`. If `inplace=True`, modifies the object in place and returns None.
119        """
120        # Ensure at least one of samples or indexes is provided
121        if samples is None and indexes is None:
122            raise ValueError("At least one of 'samples' or 'indexes' must be provided.")
123
124        n_samples = self.n_samples
125
126        # Create mask based on sample names
127        if samples is not None:
128            samples = np.atleast_1d(samples)
129            # Extract sample names from the DataFrame
130            sample_names = self.__phen_df.iloc[:, 0].values
131            # Create mask for samples belonging to specified names
132            mask_samples = np.isin(sample_names, samples)
133        else:
134            mask_samples = np.zeros(n_samples, dtype=bool)
135
136        # Create mask based on sample indexes
137        if indexes is not None:
138            indexes = np.atleast_1d(indexes)
139            # Adjust negative indexes
140            indexes = np.mod(indexes, n_samples)
141            if np.any((indexes < 0) | (indexes >= n_samples)):
142                raise IndexError("One or more sample indexes are out of bounds.")
143            # Create mask for samples at specified indexes
144            mask_indexes = np.zeros(n_samples, dtype=bool)
145            mask_indexes[indexes] = True
146        else:
147            mask_indexes = np.zeros(n_samples, dtype=bool)
148
149        # Combine masks using logical OR (union of samples)
150        mask_combined = mask_samples | mask_indexes
151
152        if not include:
153            # Invert mask if excluding samples
154            mask_combined = ~mask_combined
155
156        # Filter the phenotype DataFrame
157        if inplace:
158            self['phen_df'] = self['phen_df'][mask_combined].reset_index(drop=True)
159            return None
160        else:
161            phen_obj = self.copy()
162            phen_obj['phen_df'] = phen_obj['phen_df'][mask_combined].reset_index(drop=True)
163            return phen_obj

Filter samples in the MultiPhenotypeObject based on sample names or indexes.

This method allows you to include or exclude specific samples by their names, indexes, or both. When both samples and indexes are provided, the union of the specified samples is used. Negative indexes are supported and follow NumPy's indexing conventions. It updates the lai, samples, and haplotypes attributes accordingly.

Arguments:
  • samples (str or array_like of str, optional): Names of the samples to include or exclude. Can be a single sample name or a sequence of sample names. Default is None.
  • indexes (int or array_like of int, optional): Indexes of the samples to include or exclude. Can be a single index or a sequence of indexes. Negative indexes are supported. Default is None.
  • include (bool, default=True): If True, includes only the specified samples. If False, excludes the specified samples. Default is True.
  • inplace (bool, default=False): If True, modifies the object in place. If False, returns a new MultiPhenotypeObject with the samples filtered. Default is False.
Returns:

Optional[MultiPhenotypeObject]: Returns a new MultiPhenotypeObject with the specified samples filtered if inplace=False. If inplace=True, modifies the object in place and returns None.

class UKBPhenotypeObject:
  6class UKBPhenotypeObject():
  7    """
  8    A class for UK Biobank (UKB) phenotype data.
  9
 10    This class provides a structured way to handle phenotype information, including sample identifiers,
 11    the counts of cases and controls, and haplotype data.
 12    """
 13    def __init__(
 14        self, 
 15        samples: List, 
 16        n_samples: int, 
 17        cases: List, 
 18        n_cases: int, 
 19        controls: List, 
 20        n_controls: int, 
 21        all_haplotypes: List, 
 22        cases_haplotypes: List, 
 23        controls_haplotypes: List
 24    ) -> None:
 25        """
 26        Initialize the UKBPhenotypeObject with phenotype data.
 27
 28        Args:
 29            samples (list of str): 
 30                A list of sample identifiers.
 31            n_samples (int): 
 32                The total number of samples.
 33            cases (list of str): 
 34                A list of identifiers for the cases.
 35            n_cases (int): 
 36                The total number of cases.
 37            controls (list of str): 
 38                A list of identifiers for the controls.
 39            n_controls (int): 
 40                The total number of controls.
 41            all_haplotypes (list of str): 
 42                A list of haplotypes for all samples.
 43            cases_haplotypes (list of str): 
 44                A list of haplotypes for the cases.
 45            controls_haplotypes (list of str): 
 46                A list of haplotypes for the controls.
 47        """
 48        self.__samples = samples
 49        self.__n_samples = n_samples
 50        self.__cases = cases
 51        self.__n_cases = n_cases
 52        self.__controls = controls
 53        self.__n_controls = n_controls
 54        self.__all_haplotypes = all_haplotypes
 55        self.__cases_haplotypes = cases_haplotypes
 56        self.__controls_haplotypes = controls_haplotypes
 57
 58    def __getitem__(self, key):
 59        """
 60        To access an attribute of the class using the square bracket notation,
 61        similar to a dictionary.
 62        """
 63        try:
 64            return getattr(self, key)
 65        except:
 66            raise KeyError(f'Invalid key: {key}')
 67
 68    def __setitem__(self, key, value):
 69        """
 70        To set an attribute of the class using the square bracket notation,
 71        similar to a dictionary.
 72        """
 73        try:
 74            setattr(self, key, value)
 75        except AttributeError:
 76            raise KeyError(f'Invalid key: {key}')
 77
 78    @property
 79    def samples(self) -> List:
 80        """
 81        Retrieve `samples`.
 82
 83        Returns:
 84            List of str: A list of sample identifiers.
 85        """
 86        return self.__samples
 87    
 88    @property
 89    def n_samples(self) -> int:
 90        """
 91        Retrieve `n_samples`.
 92
 93        Returns:
 94            int: The total number of samples.
 95        """
 96        return self.__n_samples
 97    
 98    @property
 99    def cases(self) -> List:
100        """
101        Retrieve `cases`.
102
103        Returns:
104            List of str: A list of identifiers for the cases.
105        """
106        return self.__cases
107    
108    @property
109    def n_cases(self) -> int:
110        """
111        Retrieve `n_cases`.
112
113        Returns:
114            int: The total number of cases.
115        """
116        return self.__n_cases
117    
118    @property
119    def controls(self) -> List:
120        """
121        Retrieve `controls`.
122
123        Returns:
124            List of str: A list of identifiers for the controls.
125        """
126        return self.__controls
127    
128    @property
129    def n_controls(self) -> int:
130        """
131        Retrieve `n_controls`.
132
133        Returns:
134            int: The total number of controls.
135        """
136        return self.__n_controls
137    
138    @property
139    def all_haplotypes(self) -> List:
140        """
141        Retrieve `all_haplotypes`.
142
143        Returns:
144            List of str: A list of haplotypes for all samples.
145        """
146        return self.__all_haplotypes
147    
148    @property
149    def cases_haplotypes(self) -> List:
150        """
151        Retrieve `cases_haplotypes`.
152
153        Returns:
154            List of str: A list of haplotypes for the cases.
155        """
156        return self.__cases_haplotypes
157    
158    @property
159    def controls_haplotypes(self) -> List:
160        """
161        Retrieve `controls_haplotypes`.
162
163        Returns:
164            List of str: A list of haplotypes for the controls.
165        """
166        return self.__controls_haplotypes
167
168    def copy(self):
169        """
170        Create and return a copy of the current `UKBPhenotypeObject` instance.
171
172        Returns:
173            UKBPhenotypeObject: A new instance of the current object.
174        """
175        return copy.copy(self)
176
177    def keys(self) -> List:
178        """
179        Retrieve a list of public attribute names for this `UKBPhenotypeObject` instance.
180
181        Returns:
182            List: A list of attribute names, with internal name-mangling removed, 
183                  for easier reference to public attributes in the instance.
184        """
185        return [attr.replace('_UKBPhenotypeObject__', '') for attr in vars(self)]

A class for UK Biobank (UKB) phenotype data.

This class provides a structured way to handle phenotype information, including sample identifiers, the counts of cases and controls, and haplotype data.

UKBPhenotypeObject( samples: List, n_samples: int, cases: List, n_cases: int, controls: List, n_controls: int, all_haplotypes: List, cases_haplotypes: List, controls_haplotypes: List)
13    def __init__(
14        self, 
15        samples: List, 
16        n_samples: int, 
17        cases: List, 
18        n_cases: int, 
19        controls: List, 
20        n_controls: int, 
21        all_haplotypes: List, 
22        cases_haplotypes: List, 
23        controls_haplotypes: List
24    ) -> None:
25        """
26        Initialize the UKBPhenotypeObject with phenotype data.
27
28        Args:
29            samples (list of str): 
30                A list of sample identifiers.
31            n_samples (int): 
32                The total number of samples.
33            cases (list of str): 
34                A list of identifiers for the cases.
35            n_cases (int): 
36                The total number of cases.
37            controls (list of str): 
38                A list of identifiers for the controls.
39            n_controls (int): 
40                The total number of controls.
41            all_haplotypes (list of str): 
42                A list of haplotypes for all samples.
43            cases_haplotypes (list of str): 
44                A list of haplotypes for the cases.
45            controls_haplotypes (list of str): 
46                A list of haplotypes for the controls.
47        """
48        self.__samples = samples
49        self.__n_samples = n_samples
50        self.__cases = cases
51        self.__n_cases = n_cases
52        self.__controls = controls
53        self.__n_controls = n_controls
54        self.__all_haplotypes = all_haplotypes
55        self.__cases_haplotypes = cases_haplotypes
56        self.__controls_haplotypes = controls_haplotypes

Initialize the UKBPhenotypeObject with phenotype data.

Arguments:
  • samples (list of str): A list of sample identifiers.
  • n_samples (int): The total number of samples.
  • cases (list of str): A list of identifiers for the cases.
  • n_cases (int): The total number of cases.
  • controls (list of str): A list of identifiers for the controls.
  • n_controls (int): The total number of controls.
  • all_haplotypes (list of str): A list of haplotypes for all samples.
  • cases_haplotypes (list of str): A list of haplotypes for the cases.
  • controls_haplotypes (list of str): A list of haplotypes for the controls.
samples: List
78    @property
79    def samples(self) -> List:
80        """
81        Retrieve `samples`.
82
83        Returns:
84            List of str: A list of sample identifiers.
85        """
86        return self.__samples

Retrieve samples.

Returns:

List of str: A list of sample identifiers.

n_samples: int
88    @property
89    def n_samples(self) -> int:
90        """
91        Retrieve `n_samples`.
92
93        Returns:
94            int: The total number of samples.
95        """
96        return self.__n_samples

Retrieve n_samples.

Returns:

int: The total number of samples.

cases: List
 98    @property
 99    def cases(self) -> List:
100        """
101        Retrieve `cases`.
102
103        Returns:
104            List of str: A list of identifiers for the cases.
105        """
106        return self.__cases

Retrieve cases.

Returns:

List of str: A list of identifiers for the cases.

n_cases: int
108    @property
109    def n_cases(self) -> int:
110        """
111        Retrieve `n_cases`.
112
113        Returns:
114            int: The total number of cases.
115        """
116        return self.__n_cases

Retrieve n_cases.

Returns:

int: The total number of cases.

controls: List
118    @property
119    def controls(self) -> List:
120        """
121        Retrieve `controls`.
122
123        Returns:
124            List of str: A list of identifiers for the controls.
125        """
126        return self.__controls

Retrieve controls.

Returns:

List of str: A list of identifiers for the controls.

n_controls: int
128    @property
129    def n_controls(self) -> int:
130        """
131        Retrieve `n_controls`.
132
133        Returns:
134            int: The total number of controls.
135        """
136        return self.__n_controls

Retrieve n_controls.

Returns:

int: The total number of controls.

all_haplotypes: List
138    @property
139    def all_haplotypes(self) -> List:
140        """
141        Retrieve `all_haplotypes`.
142
143        Returns:
144            List of str: A list of haplotypes for all samples.
145        """
146        return self.__all_haplotypes

Retrieve all_haplotypes.

Returns:

List of str: A list of haplotypes for all samples.

cases_haplotypes: List
148    @property
149    def cases_haplotypes(self) -> List:
150        """
151        Retrieve `cases_haplotypes`.
152
153        Returns:
154            List of str: A list of haplotypes for the cases.
155        """
156        return self.__cases_haplotypes

Retrieve cases_haplotypes.

Returns:

List of str: A list of haplotypes for the cases.

controls_haplotypes: List
158    @property
159    def controls_haplotypes(self) -> List:
160        """
161        Retrieve `controls_haplotypes`.
162
163        Returns:
164            List of str: A list of haplotypes for the controls.
165        """
166        return self.__controls_haplotypes

Retrieve controls_haplotypes.

Returns:

List of str: A list of haplotypes for the controls.

def copy(self):
168    def copy(self):
169        """
170        Create and return a copy of the current `UKBPhenotypeObject` instance.
171
172        Returns:
173            UKBPhenotypeObject: A new instance of the current object.
174        """
175        return copy.copy(self)

Create and return a copy of the current UKBPhenotypeObject instance.

Returns:

UKBPhenotypeObject: A new instance of the current object.

def keys(self) -> List:
177    def keys(self) -> List:
178        """
179        Retrieve a list of public attribute names for this `UKBPhenotypeObject` instance.
180
181        Returns:
182            List: A list of attribute names, with internal name-mangling removed, 
183                  for easier reference to public attributes in the instance.
184        """
185        return [attr.replace('_UKBPhenotypeObject__', '') for attr in vars(self)]

Retrieve a list of public attribute names for this UKBPhenotypeObject instance.

Returns:

List: A list of attribute names, with internal name-mangling removed, for easier reference to public attributes in the instance.