Source code for skcyto.preprocessing
import numpy as np
from flowutils import logicle_c
from numpy.typing import NDArray
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
from sklearn.utils.validation import check_is_fitted
[docs]class LogicleTransformer(BaseEstimator, TransformerMixin, OneToOneFeatureMixin):
"""Logicle Transformation
Implemented according to GatingML2.0 specification.
See also
Moore WA and Parks DR. Update for the logicle data scale including operational
code implementations. Cytometry A., 2012:81A(4):273–277.
Parameters
----------
t : float
Upper bound of the linear scale, by default 262144
m : float
Number of decades for the logarithmic scale, by default 4.5
w : float
Number of decades for linear scale, by default 0.5
a : float
Number of negative decades, by default 0
"""
_parameter_constraints: dict ={
}
[docs] def __init__(self,
t: float = 262144,
m: float = 4.5,
w: float = 0.5,
a: float = 0):
self.t = t
self.m = m
self.w = w
self.a = a
def _reset(self):
if hasattr(self, "n_samples_seen_"):
del self.n_samples_seen_
[docs] def fit(self, X: NDArray, y: NDArray = None):
"""Fit only validates parameters and data, as everything is defined by the user.
X : NDArray
Input data
y : Ignored.
Not used, only present for API conventions.
"""
self._validate_params()
self._reset()
first_pass = not hasattr(self, "n_samples_seen_")
X = self._validate_data(
X,
reset = first_pass, # To set n_features_in_ attribute or check against it.
force_all_finite = True
)
if first_pass:
self.n_samples_seen_ = X.shape[0]
else:
self.n_samples_seen_ += X.shape[0]
return self
[docs] def transform(self, X: NDArray) -> NDArray:
"""Transform according to specified logicle
Parameters
----------
X : NDArray
Data
Returns
-------
NDArray
Transformed data
"""
check_is_fitted(self)
X = self._validate_data(
X,
reset = False,
force_all_finite = True
)
X = X.copy()
for i in range(X.shape[1]):
X[:, i] = logicle_c.logicle_scale(self.t, self.w, self.m, self.a, X[:, i])
return X
[docs] def inverse_transform(self, X: NDArray) -> NDArray:
"""Perform inverse logicle transform
Parameters
----------
X : NDArray
Input data
Returns
-------
NDArray
inversely transformed data
"""
check_is_fitted(self)
X = self._validate_data(
X,
reset = False,
force_all_finite = True
)
X = X.copy()
for i in range(X.shape[1]):
X[:, i] = logicle_c.logicle_inverse(self.t, self.w, self.m, self.a, X[:, i])
return X
[docs] def fit_transform(self, X: NDArray, y = None) -> NDArray:
"""Fit and transform data
As fit does not doo anything, this is identical to just running transform and is
only implemented for API conventions.
Parameters
----------
X : NDArray
Input data
y : Ignored.
Not used, only present for API conventions.
Returns
-------
NDArray
Transformed data
"""
return super().fit_transform(X, y)
[docs]class HyperlogTransformer(BaseEstimator, TransformerMixin, OneToOneFeatureMixin):
"""Hyperlog transform
Implemented according to GatingML2.0 specification.
See also
Bagwell CB. Hyperlog-a flexible log-like transform for negative, zero, and
positive valued data. Cytometry A., 2005:64(1):34–42.
Parameters
----------
t : float
Upper bound of the linear scale, by default 262144
m : float
Number of decades for the logarithmic scale, by default 4.5
w : float
Number of decades for linear scale, by default 0.5
a : float
Number of negative decades, by default 0
"""
_parameter_constraints: dict ={
}
[docs] def __init__(self,
t: float = 262144,
m: float = 4.5,
w: float = 0.5,
a: float = 0):
self.t = t
self.m = m
self.w = w
self.a = a
def _reset(self):
if hasattr(self, "n_samples_seen_"):
del self.n_samples_seen_
[docs] def fit(self, X: NDArray, y: NDArray = None):
"""Fit only validates parameters and data, as everything is defined by the user.
X : NDArray
Input data
y : Ignored.
Not used, only present for API conventions.
"""
self._validate_params()
first_pass = not hasattr(self, "n_samples_seen_")
X = self._validate_data(
X,
reset = first_pass, # To set n_features_in_ attribute or check against it.
force_all_finite = True
)
if first_pass:
self.n_samples_seen_ = X.shape[0]
else:
self.n_samples_seen_ += X.shape[0]
return self
[docs] def transform(self, X: NDArray) -> NDArray:
"""Transform according to specified hyperlog
Parameters
----------
X : NDArray
Data
Returns
-------
NDArray
Transformed data
"""
check_is_fitted(self)
X = self._validate_data(
X,
reset = False,
force_all_finite = True
)
X = X.copy()
for i in range(X.shape[1]):
X[:, i] = logicle_c.hyperlog_scale(self.t, self.w, self.m, self.a, X[:, i])
return X
[docs] def inverse_transform(self, X: NDArray) -> NDArray:
"""Perform inverse hyperlog transform
Parameters
----------
X : NDArray
Input data
Returns
-------
NDArray
inversely transformed data
"""
check_is_fitted(self)
X = self._validate_data(
X,
reset = False,
force_all_finite = True)
X = X.copy()
for i in range(X.shape[1]):
X[:, i] = logicle_c.hyperlog_inverse(self.t, self.w, self.m, self.a, X[:, i])
return X
[docs] def fit_transform(self, X: NDArray, y = None) -> NDArray:
"""Fit and transform data
As fit does not doo anything, this is identical to just running transform and is
only implemented for API conventions.
Parameters
----------
X : NDArray
Input data
y : Ignored.
Not used, only present for API conventions.
Returns
-------
NDArray
Transformed data
"""
return super().fit_transform(X, y)
[docs]class AsinhTransformer(BaseEstimator, TransformerMixin, OneToOneFeatureMixin):
"""Parametrized Arcsinh transform
Implemented according to GatingML2.0 specification
Note that this is equivalent to a logicle transform with w = 0
Parameters
----------
t : float
Upper bound of the linear scale, by default 262144
m : float
Number of decades for the logarithmic scale, by default 4.5
a : float
Number of negative decades, by default 0
"""
_parameter_constraints: dict ={
}
[docs] def __init__(self,
t: float = 262144,
m: float = 4.5,
a: float = 0):
self.t = t
self.m = m
self.a = a
def _reset(self):
if hasattr(self, "n_samples_seen_"):
del self.n_samples_seen_
[docs] def fit(self, X: NDArray, y: NDArray = None):
"""Fit only validates parameters and data, as everything is defined by the user.
X : NDArray
Input data
y : Ignored.
Not used, only present for API conventions.
"""
self._validate_params()
self._reset()
first_pass = not hasattr(self, "n_samples_seen_")
X = self._validate_data(
X,
reset = first_pass, # To set n_features_in_ attribute or check against it.
force_all_finite = True
)
if first_pass:
self.n_samples_seen_ = X.shape[0]
else:
self.n_samples_seen_ += X.shape[0]
return self
[docs] def transform(self, X: NDArray) -> NDArray:
"""Transform according to specified asinh transform
Parameters
----------
X : NDArray
Data
Returns
-------
NDArray
Transformed data
"""
check_is_fitted(self)
X = self._validate_data(
X,
reset = False,
force_all_finite = True
)
X = X.copy()
for i in range(X.shape[1]):
X[:, i] = logicle_c.logicle_scale(self.t, 0, self.m, self.a, X[:, i])
return X
[docs] def inverse_transform(self, X: NDArray) -> NDArray:
"""Perform inverse asinh transform
Parameters
----------
X : NDArray
Input data
Returns
-------
NDArray
inversely transformed data
"""
check_is_fitted(self)
X = self._validate_data(
X,
reset = False,
force_all_finite = True
)
X = X.copy()
for i in range(X.shape[1]):
X[:, i] = logicle_c.logicle_inverse(self.t, 0, self.m, self.a, X[:, i])
return X
[docs] def fit_transform(self, X: NDArray, y = None) -> NDArray:
"""Fit and transform data
As fit does not doo anything, this is identical to just running transform and is
only implemented for API conventions.
Parameters
----------
X : NDArray
Input data
y : Ignored.
Not used, only present for API conventions.
Returns
-------
NDArray
Transformed data
"""
return super().fit_transform(X, y)
[docs]class LogTransformer(BaseEstimator, TransformerMixin, OneToOneFeatureMixin):
"""Parametrized Log transform
Implemented according to GatingML2.0 specification.
Parameters
----------
t : float
Upper bound of the linear scale, by default 262144
m : float
Number of decades for the logarithmic scale, by default 4.5
"""
_parameter_constraints: dict ={
}
def _reset(self):
if hasattr(self, "n_samples_seen_"):
del self.n_samples_seen_
[docs] def fit(self, X: NDArray, y: NDArray = None):
"""Fit only validates parameters and data, as everything is defined by the user.
X : NDArray
Input data
y : Ignored.
Not used, only present for API conventions.
"""
self._validate_params()
self._reset()
first_pass = not hasattr(self, "n_samples_seen_")
X = self._validate_data(
X,
reset = first_pass, # To set n_features_in_ attribute or check against it.
force_all_finite = True
)
if first_pass:
self.n_samples_seen_ = X.shape[0]
else:
self.n_samples_seen_ += X.shape[0]
return self
[docs] def transform(self, X: NDArray) -> NDArray:
"""Transform according to specified log transform
Parameters
----------
X : NDArray
Data
Returns
-------
NDArray
Transformed data
"""
check_is_fitted(self)
X = self._validate_data(
X,
reset = False,
force_all_finite = True
)
X = X.copy()
for i in range(X.shape[1]):
X[:, i] = ((1 / self.m) * np.log10(X[:, i] / self.t) + 1).flatten()
return X
[docs] def inverse_transform(self, X: NDArray) -> NDArray:
"""Perform inverse log transform
Parameters
----------
X : NDArray
Input data
Returns
-------
NDArray
inversely transformed data
"""
check_is_fitted(self)
X = self._validate_data(
X,
reset = False,
force_all_finite = True
)
X = X.copy()
for i in range(X.shape[1]):
X[:, i] = 10 ** ((X[:, i] - 1) * self.m) * self.t
return X
[docs] def fit_transform(self, X: NDArray, y = None) -> NDArray:
"""Fit and transform data
As fit does not doo anything, this is identical to just running transform and is
only implemented for API conventions.
Parameters
----------
X : NDArray
Input data
y : Ignored.
Not used, only present for API conventions.
Returns
-------
NDArray
Transformed data
"""
return super().fit_transform(X, y)
[docs]class CompensationTransformer(BaseEstimator, TransformerMixin, OneToOneFeatureMixin):
[docs] def fit(self, X: NDArray, y: NDArray = None):
"""Fit only checky compatibility between input data and compensation matrix.
X : NDArray
Input data
y : Ignored.
Not used, only present for API conventions.
"""
X = self._validate_data(
X,
force_all_finite=True)
self._check_X_C_compatible(X)
return self
[docs] def transform(self, X: NDArray) -> NDArray:
"""Compensate data with specified compensation matrix
Parameters
----------
X : NDArray
Data
Returns
-------
NDArray
Compensated data
"""
X = self._validate_data(
X,
force_all_finite=True)
self._check_X_C_compatible(X)
X = np.linalg.solve(self.C.T, X.T).T
return X
[docs] def inverse_transform(self, X: NDArray) -> NDArray:
"""Decompensate data
Parameters
----------
X : NDArray
Input data
Returns
-------
NDArray
Decompensated data
"""
X = self._validate_data(
X,
force_all_finite=True)
self._check_X_C_compatible(X)
X = np.dot(X, self.C)
return X
[docs] def fit_transform(self, X: NDArray, y = None) -> NDArray:
"""Fit and transform data
As fit does not doo anything, this is identical to just running transform and is
only implemented for API conventions.
Parameters
----------
X : NDArray
Input data
y : Ignored.
Not used, only present for API conventions.
Returns
-------
NDArray
Compensated data
"""
return super().fit_transform(X, y)
def _check_X_C_compatible(self, X:NDArray):
if self.C.shape[1] != X.shape[1]:
raise ValueError("Compensation matrix is incompatible with data.")
# Flin transformation is MinMaxScaler