Source code for casimac.casimac

# -*- coding: utf-8 -*-
"""CASIMAC: multi-class/single-label classifier with gradients. 
"""


__version__ = "1.2.4"


import warnings
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_is_fitted, check_random_state
from sklearn.utils.multiclass import unique_labels
from sklearn.metrics import pairwise_distances
from scipy.special import erf
from scipy import optimize


[docs] class CASIMAClassifier(BaseEstimator, ClassifierMixin): """Multi-class/single-label classifier. Parameters ---------- model_constructor : callable Method that returns an sklearn estimator. This estimator is trained on the estimation of latent variables from features. In particular, the estimator must provide a ``fit`` method (for training) and a ``predict`` method (for predictions). For the prediction of class probabilities, the ``predict`` method must also support a second argument ``return_std``, which returns the standard deviations of the predictions together with the mean values if set to True. It is assumed that the predictions of the estimator obey a Gaussian probability distribution with the aforementioned mean and variance. repulsion_strength : float, optional (default: 1) Scalar strength used for the repulsion term (``beta``). Should be non-negative. Choose 0 to disable repulsions. repulsion_number : int, optional (default: 1) Number of nearest neighbors used for the repulsion term (``k_beta``). attraction_strength : float, optional (default: 1) Scalar strength used for the attraction term (``alpha``). Should be non-negative. Choose 0 to disable attraction. attraction_number : int, optional (default: 1) Number of nearest neighbors used for the attraction term (``k_alpha``). metric : str or callable, optional (default: 'euclidean') Metric options used in ``sklearn.metrics.pairwise_distances``. See the respective documentation for more details. proba_calc_method : 'analytical', 'MC' or 'MC-fast', optional (default: 'analytical') Determines the method used for the prediction of class probabilities. Choose 'analytical' for an analytical calculation (can only be used for two classes, otherwise fall back to 'MC'). Choose 'MC' for a sequential Monte Carlo implementation (slower, less memory) and 'MC-fast' for a simultaneous Monte Carlo implementation (faster, more memory). proba_NMC : int, optional (default: 1000) Number of Monte Carlo samples (per dimension) for the prediciton of class probabilities. p_calc_method: 'iterative', 'explicit', optional (default: 'iterative') Determines the method for the calculation of the simplex vectors. random_state : int, RandomState instance or None, optional (default: None) The random generator to use for the prediction of class probabilities. If an integer is given, a new random generator with this seed is created. ``None`` leads to a newly generated seed. l_repulsion_reduce : callable, optional (default: numpy.nanmean) Legacy option, not recommended! Function to reduce the set of nearest neighbor distances to a single number used in the repulsion term. Note that numpy.nan may occur in the list of distances. l_repulsion_fun : callable or None, optional (default: None) Legacy option, not recommended! Final function that is applied to the repulsion term. Set to ``None`` to disable the function call. l_attraction_reduce : callable, optional (default: numpy.nanmean) Legacy option, not recommended! Function to reduce the set of nearest neighbor distances to a single number used in the attraction term. Note that numpy.nan may occur in the list of distances. l_attraction_fun : callable or None, optional (default: numpy.reciprocal) Legacy option, not recommended! Final function that is applied to the attraction term. Set to ``None`` to disable the function call. l_c_transformation_fun : callable or None, optional (default: None) Legacy option, not recommended! Optional transformtion function (e.g., for rescaling) of the latent variable coefficients. Set to ``None`` to disable the function call. Attributes ---------- X_ : array-like of shape (n_samples, n_features) Feature vectors in training data. y_ : array-like of shape (n_samples,) Target labels in training data. classes_ : array of shape (n_classes,) Unique class labels in y\_. d_ : array-like of shape (n_samples,) or (n_samples, n_targets) Vector of latent variables calculated from X\_ and y\_. model_ : obj Instance of the model trained on the estimation of latent variables from features. Is created by the call of model_constructor. random_state_ : numpy.random.RandomState Instance of the random state used for Monte Carlo predictions of the class probabilities. """ def __init__(self, model_constructor, repulsion_strength=1, repulsion_number=1, attraction_strength=0, attraction_number=1, metric='euclidean', proba_calc_method='analytical', proba_NMC=1000, p_calc_method='iterative', random_state=None, l_repulsion_reduce=np.nanmean, l_repulsion_fun=None, l_attraction_reduce=np.nanmean, l_attraction_fun=np.reciprocal, l_c_transformation_fun=None): # Settings self.model_constructor = model_constructor self.repulsion_strength = repulsion_strength self.repulsion_number = repulsion_number self.attraction_strength = attraction_strength self.attraction_number = attraction_number self.metric = metric self.proba_calc_method = proba_calc_method # [MC, MC-fast, analytical] self.proba_NMC = proba_NMC # per dimension self.p_calc_method = p_calc_method self.random_state = random_state self.l_repulsion_reduce = l_repulsion_reduce # legacy option self.l_repulsion_fun = l_repulsion_fun # legacy option self.l_attraction_reduce = l_attraction_reduce # legacy option self.l_attraction_fun = l_attraction_fun # legacy option self.l_c_transformation_fun = l_c_transformation_fun # legacy option
[docs] def _calc_class_normals(self, n): """Calculate class normals (i.e., the negative vertices) of an (n-1)-simplex. The results are stored in the attribute ``_class_normals`` during a call of ``fit``. """ # Choose calculation method if self.p_calc_method == 'iterative': return self._calc_class_normals_iterative(n) elif self.p_calc_method == 'explicit': return self._calc_class_normals_explicit(n) else: raise NotImplementedError("Unknown class normal calculation method '{}'!".format(self.p_calc_method))
[docs] def _calc_class_normals_iterative(self, n): """Calculate the class normals (i.e., the negative vertices) of an (n-1)-simplex using an iterative method. """ # Calculate class normals v = np.zeros((n-1,n)) angle = -1/(n-1) v[0,0] = 1 v[0,1:] = angle for i in range(1,n-1): v[i,i] = np.sqrt(1 - np.sum(v[:i,i]**2)) for j in range(i+1,n): v[i,j] = (angle - np.dot(v[:i,i],v[:i,j])) / v[i,i] # Return class normals in rows: (vector 1, ..., vector n), each of dimension n-1 return v.T
[docs] def _calc_class_normals_explicit(self, n): """Calculate the class normals (i.e., the negative vertices) of an (n-1)-simplex using an explicit method. """ # Calculate simplex vertices q = np.zeros((n,n-1)) for i in range(1,n): q[i-1,i-1] = 1 q[n-1,:] = (1+np.sqrt(n))/(n-1) v = np.empty((n,n-1)) c = (1+1/np.sqrt(n))/(n-1) nu = np.sqrt(1-1/n) for i in range(1,n+1): v[i-1,:] = (q[i-1] - c) / nu # Return class normals in rows: (vector 1, ..., vector n), each of dimension n-1 return -v
[docs] def _calc_binary_projectors(self): """Calculate binary projectors (normalized segmentation planes), which are used to obtain the decision function. They are stored in the attribute ``_binary_projectors`` during a call of ``fit``. """ # Calculate (normalized) binary projectors bp = np.zeros((self._num_classes,self._num_classes,self._num_classes-1)) for i in range(self._num_classes): for j in range(self._num_classes): if i != j: bp[i,j,:] = self._class_normals[i,:]-self._class_normals[j,:] bp[i,j,:] /= np.linalg.norm(bp[i,j,:]) # Return projectors as array [class i, class j, vector] return bp
[docs] def _calc_latent_coefficients(self, distance_to_own, distance_to_other_list): """Calculate coefficients (combined from repulsion and attraction terms) for the transformation to the latent space. Specifically, map from the distance arrays of the own and the other class to an array of reduced distances. This mapping is performed for each class. Notes: 1) Nearest neighbor calculation may fail if there are not enough neighbors available. 2) Returned coefficients must be non-negative. """ # Setup coefficients c = np.zeros((len(distance_to_other_list), distance_to_own.shape[0])) # Repulsion (based on distances to other) if self.repulsion_number > 0 and self.repulsion_strength != 0: #for idx in range(len(distance_to_other_list)): # distance_to_other_list[idx][distance_to_other_list[idx]<=0] = np.nan # remove invalids idx_list = [np.argpartition(d, min(self.repulsion_number, d.shape[1]-1), axis=1) for d in distance_to_other_list] repulse = np.array([[self.l_repulsion_reduce(d[i[:self.repulsion_number]]) for i, d in zip(idx,distances)] for idx, distances in zip(idx_list,distance_to_other_list) ]) if self.l_repulsion_fun is not None: repulse = self.l_repulsion_fun(repulse) c += self.repulsion_strength * repulse # Attraction (based on distance to own) if self.attraction_number > 0 and self.attraction_strength != 0: distance_to_own[distance_to_own<=0] = np.nan # remove invalids idx_list = np.argpartition(distance_to_own, min(self.attraction_number, distance_to_own.shape[1]-1), axis=1) attract = np.array([self.l_attraction_reduce(d[i[:self.attraction_number]]) for i, d in zip(idx_list,distance_to_own)]) if self.l_attraction_fun is not None: attract = self.l_attraction_fun(attract) attract = np.tile(attract,(c.shape[0],1)) c += self.attraction_strength * attract # Return coefficients if self.l_c_transformation_fun is not None: c = self.l_c_transformation_fun(c) return c
[docs] def _calc_distance_features(self, X, y): """Calculate distance features. """ d = np.zeros((X.shape[0],self._num_classes-1)) D = pairwise_distances(X, metric=self.metric) X_class_idxa_list = [np.where(y == self.classes_[class_idx])[0] for class_idx in range(self._num_classes)] for class_idx in range(self._num_classes): other_class_idx_list = [other_class_idx for other_class_idx in range(self._num_classes) if other_class_idx != class_idx] distance_to_own = D[np.array(X_class_idxa_list[class_idx]).reshape(-1,1),X_class_idxa_list[class_idx]] distance_to_other_list = [D[np.array(X_class_idxa_list[class_idx]).reshape(-1,1),np.array(X_class_idxa_list[other_class_idx])] for other_class_idx in other_class_idx_list] c = self._calc_latent_coefficients(distance_to_own, distance_to_other_list) d[X_class_idxa_list[class_idx]] = -np.tensordot(c.T,self._class_normals[other_class_idx_list,:],axes=1) # transformation return d
[docs] def _calc_edge_distances(self, d): """Calculate distances to edge points, which can be used to determine the correct classes. """ d = np.asarray(d).reshape(-1,self._num_classes-1) edge_distances = np.zeros((d.shape[0],self._num_classes)) for j in range(self._num_classes): edge_distances[:,j] = np.linalg.norm(self._class_normals[j,:]-d, axis=1) return edge_distances
[docs] def _calc_distance_features_to_class(self, d): """Map from distance feature space d to class space y. Minimize distance to edge points to determine the correct classes. """ best_classes = np.array(np.argmin(self._calc_edge_distances(d),axis=1),dtype=np.int64) # return first minimum by default return np.array(self.classes_)[best_classes]
[docs] def _calc_proba_analytical(self, mu, sigma, return_std): """Calculate binary class probabilities (and their standard deviations) with analytical formulas. """ mu = mu.ravel() sigma = sigma.ravel() sigma[sigma==0] = np.nan p_pos = (1+erf(mu/(np.sqrt(2)*sigma)))/2 p_pos[np.isnan(p_pos)] = (np.sign(mu[np.isnan(p_pos)])+1)/2 p_neg = 1 - p_pos p = np.concatenate((p_pos[:,np.newaxis], p_neg[:,np.newaxis]), axis=1) if return_std: p_sigma = np.zeros(p.shape) return p, p_sigma else: return p
[docs] def _calc_proba_mc(self, mu, sigma, return_std, method): """Calculate (binary or multi-class) class probabilities (and their standard devitions) with a Monte Carlo approach. """ # Setup p = np.zeros((mu.shape[0],self._num_classes)) # p[X index, class index] N = self.proba_NMC # Check shape of sigma: multivariate or univariate (i.e., a uniform gaussian), reshape accordingly if sigma.size != mu.size: sigma = np.repeat(sigma,mu.shape[1]) # Run MC: Both methods should lead to the same result. if method == 'simultaneous': # Method 1: Simultaneous calculation of query points. Fast, but needs more memory. (Try to store all in matrix v to reduce memory.) v = mu.ravel() # v = mu v = self.random_state_.normal(v,sigma.ravel(),(N,mu.size)).reshape((N*mu.shape[0],*mu.shape[1:])) # v = d v = self._calc_distance_features_to_class(v).reshape((N,mu.shape[0])) # v = y for class_idx in range(self._num_classes): p[:,class_idx] = np.sum(v==self.classes_[class_idx],axis=0) / N elif method == 'sequential': # Method 2: Sequential calculation of query points. Slower, but needs less memory. sigma = sigma.reshape(mu.shape) for idx in range(mu.shape[0]): d = self.random_state_.normal(mu[idx,:], sigma[idx,:], (N, mu.shape[1])) y = self._calc_distance_features_to_class(d) for class_idx in range(self._num_classes): p[idx,class_idx] = y[y==self.classes_[class_idx]].size / N else: raise NotImplementedError("_calc_proba_mc method '{}' not implemented!".format(method)) # Return result if return_std: p_sigma = np.sqrt((p * (1-p)**2 + (1-p) * p**2) / (N-1)) return p, p_sigma else: return p
[docs] def _calc_proba(self, mu, sigma, return_std): """Call suitable probability calculator depending on options. """ if self.proba_calc_method == 'analytical': if self._num_classes == 2: self.used_proba_calc_method_ = 'analytical' return self._calc_proba_analytical(mu, sigma, return_std) else: warnings.warn("Probability calculation method 'analytical' can only be used for 2 classes; here we have {} classes. Fall back to calculation method 'MC'.".format(self._num_classes), stacklevel=2) self.used_proba_calc_method_ = 'MC' return self._calc_proba_mc(mu, sigma, return_std, "sequential") elif self.proba_calc_method == 'MC': self.used_proba_calc_method_ = 'MC' return self._calc_proba_mc(mu, sigma, return_std, "sequential") elif self.proba_calc_method == 'MC-fast': self.used_proba_calc_method_ = 'MC-fast' return self._calc_proba_mc(mu, sigma, return_std, "simultaneous") else: raise NotImplementedError("Unknown probability calculation method '{}'!".format(self.proba_calc_method))
[docs] def _calc_proba_grad_analytical(self, mu, sigma, dmu, dsigma): """Calculate binary class probability gradients with analytical formulas. """ mu = mu.ravel() sigma = sigma.ravel() sigma[sigma==0] = np.nan dp_pos_mu = np.exp(-mu**2/(2*sigma**2))/(np.sqrt(2*np.pi)*sigma) # dp/dmu dp_pos_mu[np.isnan(dp_pos_mu)] = 0 # ~ np.inf dp_pos_sigma = -(mu*np.exp(-mu**2/(2*sigma**2)))/(np.sqrt(2*np.pi)*sigma**2) # dp/dsigma dp_pos_sigma[np.isnan(dp_pos_sigma)] = 0 # ~ np.inf dp_pos = dp_pos_mu[:,np.newaxis]*dmu[:,:,0] + dp_pos_sigma[:,np.newaxis]*dsigma[:,:,0] # dp dp_neg = -dp_pos return np.concatenate((dp_pos[:,:,np.newaxis],dp_neg[:,:,np.newaxis]), axis=2)
[docs] def _calc_proba_grad_mc(self, mu, sigma, dmu, dsigma): """Calculate (binary or multi-class) class probability gradients with a Monte Carlo approach. Notes: 1) This method is very experimental and not guaranteed to work. 2) A more stable method should be used instead. """ # TODO: replace with more stable method # Show warning message warnings.warn("The function _calc_proba_grad_mc is very experimental, use with care!") # Perform gradient calculations def mu_func(x, sample_idx, class_idx): return self._calc_proba_mc(x, sigma[sample_idx,:].reshape(1,-1))[0,class_idx] def sigma_func(x, sample_idx, class_idx): return self._calc_proba_mc(mu[sample_idx,:].reshape(1,-1), x)[0,class_idx] dp_mu = np.empty((mu.shape[0], self._num_classes, mu.shape[1])) # dp/dmu for sample_idx in range(mu.shape[0]): for class_idx in range(self._num_classes): grad = optimize.approx_fprime(mu[sample_idx,:].reshape(1,-1), mu_func, self._dproba_eps, sample_idx, class_idx) dp_mu[sample_idx, class_idx,:] = grad dp_sigma = np.empty((mu.shape[0], self._num_classes, mu.shape[1])) # dp/dsigma for sample_idx in range(mu.shape[0]): for class_idx in range(self._num_classes): grad = optimize.approx_fprime(sigma[sample_idx,:].reshape(1,-1), sigma_func, self._dproba_eps, sample_idx, class_idx) dp_sigma[sample_idx, class_idx,:] = grad dp = np.empty((mu.shape[0], dmu.shape[1], self._num_classes)) # dp for var_idx in range(dmu.shape[1]): for class_idx in range(self._num_classes): dp[:,var_idx,class_idx] = np.sum(dp_mu[:,class_idx,:] * dmu[:,var_idx,:],axis=1) + np.sum(dp_sigma[:,class_idx,:] * dsigma[:,var_idx,:],axis=1) return dp
[docs] def _calc_proba_grad(self, mu, sigma, dmu, dsigma): """Call suitable probability gradient calculator depending on the number of classes. """ if self._num_classes == 2: return self._calc_proba_grad_analytical(mu, sigma, dmu, dsigma) else: return self._calc_proba_grad_mc(mu, sigma, dmu, dsigma)
[docs] def _calc_decision_function(self, d_predict, return_idx_col_map): """Calculate decision function. """ # determine decision borders for all class combinations decision = np.zeros((d_predict.shape[0], self._num_classes*(self._num_classes-1)//2)) idx_col_map = [] for i in range(self._num_classes): for j in range(self._num_classes): if i < j: decision[:,len(idx_col_map)] = np.dot(d_predict, self._binary_projectors[i,j,:]) idx_col_map.append((i,j)) # return results in a suitable format if self._num_classes == 2: return decision.ravel() # [n_sample] else: # [n_sample, n_class * (n_class-1) / 2], border names in idx_col_map if return_idx_col_map: return decision, idx_col_map return decision
[docs] def _calc_decision_function_grad(self, dmean, return_idx_col_map): """Calculate gradient of the decision function. """ # determine decision border gradients for all class combinations decision_grad = np.zeros((dmean.shape[0], dmean.shape[1], self._num_classes*(self._num_classes-1)//2)) idx_col_map = [] for i in range(self._num_classes): for j in range(self._num_classes): if i < j: decision_grad[:,:,len(idx_col_map)] = np.dot(dmean, self._binary_projectors[i,j,:]) idx_col_map.append((i,j)) # return results in a suitable format if self._num_classes == 2: return decision_grad.reshape(dmean.shape[0], dmean.shape[1]) # [n_sample, n_var] else: # [n_sample, n_var, n_class * (n_class-1) / 2], border names in idx_col_map if return_idx_col_map: return decision_grad, idx_col_map return decision_grad
[docs] def _transform_ref(self, d, tau): """Calculate the reference transformation. """ projections = np.zeros((d.shape[0],self._num_classes)) for i in range(self._num_classes): projections[:,i] = np.dot(d*tau,self._class_normals[i,:]) transformed_projections = np.exp(projections) unit_simplex_norm = np.repeat(np.sum(transformed_projections,axis=1),self._num_classes).reshape((d.shape[0],self._num_classes)) s = transformed_projections / unit_simplex_norm return s
[docs] def _transform_scale(self, d, tau): """Calculate the scale transformation. """ s = np.zeros((d.shape[0],self._num_classes)) for j in range(d.shape[0]): exp_list = [np.exp(-1*tau*np.dot(self._class_normals[i,:],d[j,:])) for i in range(self._num_classes)] s[j,:] = np.array(exp_list) / np.sum(exp_list) return s
[docs] def _inverse_transform_ref(self, s, tau): """Calculate the inverse reference transformation. """ d = np.zeros((s.shape[0],self._num_classes-1)) projections = np.log(s) # shifted by np.sum(shifted_projections, axis=1)/self._num_classes (however, this shift is eliminated by summing over the outer product) projection_normal = (self._num_classes-1)/self._num_classes for i in range(self._num_classes): d += np.outer(projections[:,i],self._class_normals[i,:]) d *= projection_normal / tau return d
[docs] def _inverse_transform_scale(self, s, tau): """Calculate the inverse scale transformation. """ d = np.zeros((s.shape[0], self._num_classes-1)) for j in range(s.shape[0]): for i in range(self._num_classes): d[j,:] -= np.log(s[j,i])*self._class_normals[i,:] d *= (self._num_classes-1)/(tau*self._num_classes) return d
[docs] def _calc_default_tau(self, d): """Calculate the data-dependent scaling factor for transformations. """ return 1/np.min(np.std(d,axis=0))
[docs] def fit(self, X, y, d=None): """Fit Classifier. Note that the estimator may depend on the naming of the labels. That is, because the set of unique labels (stored in the attribute ``classes_``) determines the association of classes to simplex vertices and therefore different associations lead to different latent spaces. All these latent spaces are linearly homeomorphic to each other, but can lead to a different behavior of the regression model (stored in the attribute ``model_``). Parameters ---------- X : array-like of shape (n_samples, n_features) Feature vectors of training data. y : array-like of shape (n_samples,) Target labels of training data. d : latent variables, array-like of shape (n_samples, n_classes-1) or None, optional (default: None) Precalculated vector of latent variables. Set to ``None`` to calculate ``d`` automatically based on ``X`` and ``y`` (recommended). Returns ------- self : returns an instance of self. """ # Check that X and y have correct shape X, y = check_X_y(X, y) self.X_ = X self.y_ = y # Store the classes seen during fit self.classes_ = unique_labels(self.y_) self._num_classes = self.classes_.size if self._num_classes < 2: raise ValueError("At least 2 classes required!") self._class_normals = self._calc_class_normals(self._num_classes) self._binary_projectors = self._calc_binary_projectors() # Prepare rng self.random_state_ = check_random_state(self.random_state) # Prepare model self.model_ = self.model_constructor() if not hasattr(self.model_, 'fit') or not hasattr(self.model_, 'predict'): raise ValueError("Model must provide methods 'fit' and 'predict'!") # Distance features self.d_ = d if self.d_ is None: self.d_ = self._calc_distance_features(self.X_,self.y_) else: self.d_ = np.asarray(self.d_) if self.d_.shape != (self.X_.shape[0],self._num_classes-1): raise ValueError("Provided distance features have an invalid shape: {} instead of {}!".format(self.d_.shape, (self.X_.shape[0],self._num_classes-1))) # Fit model self.model_.fit(self.X_,self.d_) # Return the classifier return self
[docs] def predict(self, X): """Perform classification on an array of test vectors X. Requires a previous call of ``fit``. Parameters ---------- X : array-like of shape (n_samples, n_features) Query points where the classifier is evaluated. Returns ------- C : ndarray of shape (n_samples,) Predicted target values for X, values are from ``classes_``. """ # Check if fitted check_is_fitted(self, ['d_', 'y_']) # Make model prediction d_predict = self.model_.predict(X) return self._calc_distance_features_to_class(d_predict)
[docs] def predict_proba(self, X, return_std=False): """Return probability estimates for the test vector X. Requires a previous call of ``fit``. Note that it is assumed that the predictions of the regression model (stored in the attribute ``model_``) obey a Gaussian probability distribution. The ``predict`` method of the regression model must support a second argument ``return_std``, which returns the standard deviations of the predictions together with the mean values if set to True so that ``(mean, std) = model_.predict(X, return_std=True)``. Parameters ---------- X : array-like of shape (n_samples, n_features) Query points where the classifier is evaluated. return_std : bool, optional (default: False) If True, the standard-deviation of the predictive distribution at the query points is returned along with the mean. Returns ------- p : array-like of shape (n_samples, n_classes) Returns the probability of the samples for each class in the model. The columns correspond to the classes in sorted order, as they appear in the attribute ``classes_``. p_std : array-like of shape (n_samples,), optional Best estimate of the standard deviation of the predicted probabilities at the query points. Only returned when return_std is True. """ # Check if fitted check_is_fitted(self, ['d_', 'y_']) # Make model prediction mu, sigma = self.model_.predict(X, return_std=True) return self._calc_proba(mu, sigma, return_std)
[docs] def predict_proba_grad(self, X): """Return the gradient of the probability estimates with respect to the features. Requires a previous call of ``fit``. Note that it is assumed that the predictions of the regression model (stored in the attribute ``model_``) obey a Gaussian probability distribution. The ``predict`` method of the regression model must support a second argument ``return_std``, which returns the standard deviations of the predictions together with the mean values if set to True so that ``(mean, std) = model_.predict(X, return_std=True)``. Furthermore, the model must provide a function ``predict_grad``, which predicts the gradients of the ``(mean, std)`` predictions from the ``predict`` method with respect to the features in the same way so that ``(dmean, dstd) = model_.predict_grad(X, return_std=True)``. Parameters ---------- X : array-like of shape (n_samples, n_features) Query points where the classifier is evaluated. Returns ------- dp : array-like of shape (n_samples, n_features, n_classes) Returns the gradient of the probability of the samples with respect to each feature for each class in the model. """ # Check if fitted check_is_fitted(self, ['d_', 'y_']) # Check availability of gradients if not hasattr(self.model_, 'predict_grad'): raise NotImplementedError("Gradients not available: model does not provide a predict_grad function!") # Calculate gradients mu, sigma = self.model_.predict(X, return_std=True) dmean, dstd = self.model_.predict_grad(X, return_std=True) return self._calc_proba_grad(mu, sigma, dmean, dstd)
[docs] def decision_function(self, X, return_idx_col_map=False): """Return the binary decision functions for the test vector X. Requires a previous call of ``fit``. Parameters ---------- X : array-like of shape (n_samples, n_features) Query points where the classifier is evaluated. return_idx_col_map : bool, optional (default: False) If True, ``idx_col_map`` is returned. Returns ------- d : array-like of shape (n_samples,) for a binary classification or (n_sample, n_class * (n_class-1) / 2) otherwise Returns the decision functions in the form of an array of the form (first class index, second class index) sorted according to idx_col_map. In case of a binary classification problem, the returned array is flattened. idx_col_map : array-like of shape (n_class*(n_class-1)/2,), optional List of tuples (first class index, second class index) to identify the contents of d for a multi-class classification. The indices correspond to the classes in sorted order, as they appear in the attribute ``classes_``. Only returned when return_idx_col_map is True and there are more than two classes. In case of two classes, idx_col_map would always correspond to ((0,1),) and is therefore not returned. """ # Check if fitted check_is_fitted(self, ['d_', 'y_']) # Make model prediction d_predict = self.model_.predict(X) if len(d_predict.shape) == 1: # ensure correct shape of model output d_predict = d_predict[:,None] # Determine decision function (and optionally the idx_col_map) return self._calc_decision_function(d_predict, return_idx_col_map)
[docs] def decision_function_grad(self, X, return_idx_col_map=False): """Return the gradient of the ecision function with respect to the features. Requires a previous call of ``fit``. Note that it is assumed that the regression model (stored in the attribute ``model_``) must provide a function ``predict_grad``, which predicts the gradients of the predictions with respect to the features. Parameters ---------- X : array-like of shape (n_samples, n_features) Query points where the classifier is evaluated. return_idx_col_map : bool, optional (default: False) If ``True``, ``idx_col_map`` is returned. Returns ------- dd : array-like of shape (n_samples, n_fetaures) for a binary classification or (n_sample, n_features, n_class * (n_class-1) / 2) otherwise. Returns the gradient of the decision function with repect to the features. idx_col_map : array-like of shape (n_class*(n_class-1)/2,), optional List of tuples (first class index, second class index) to identify the contents of d for a multi-class classification. Only returned when return_idx_col_map is True and there are more than two classes. """ # Check if fitted check_is_fitted(self, ['d_', 'y_']) # Check availability of gradients if not hasattr(self.model_, 'predict_grad'): raise NotImplementedError("Gradients not available: model does not provide a predict_grad function!") # Calculate gradients dmean = self.model_.predict_grad(X) return self._calc_decision_function_grad(dmean, return_idx_col_map)
[docs] def fit_transform(self, X, y, d=None, tau=None, method='reference'): """Fit the model and transforms all latent space coordinates to another simplex space (dimensions+1). Also store the scaling factor in the attribute ``tau_``. Requires a previous call of ``fit``. Parameters ---------- X : array-like of shape (n_samples, n_features) Feature vectors of training data. y : array-like of shape (n_samples,) Target labels of training data. d : latent variables, array-like of shape (n_samples, n_classes-1) or None, optional (default: None) Precalculated vector of latent variables. Set to ``None`` to calculate ``d`` automatically based on ``X`` and ``y`` (recommended). tau : float or None, optional (default: None) Scaling factor > 0. If set to ``None``, a data-dependent scaling is used (and returned). method: 'reference' or 'scale', optional (default: 'reference') Determines the transformation method. 'reference': transformation of the simplex into rotated cones highlighting the inter-class distances (default method for visualization). 'scale': rescaling of the simplex to a unit simplex. Returns ------- s : array-like of shape (n_samples, n_classes+1) Simplex vector space coordinates as a representation of the attribute ``d_``. tau : float Scaling factor used for the transformation. Only returned when ``tau`` is set to ``None``. """ # Fit self.fit(X, y, d=d) # Perform transformation return self.transform(self.d_, tau, method)
[docs] def transform(self, d, tau=None, method='reference'): """Transform latent space coordinates to another simplex space (dimensions+1). Requires a previous call of ``fit``. Parameters ---------- d : array-like of shape (n_samples, n_classes) Latent space coordinates to transform. tau : float or None, optional (default: None) Scaling factor > 0. If set to ``None``, a data-dependent scaling is used (and returned). method: 'reference' or 'scale', optional (default: 'reference') Determines the transformation method. 'reference': transformation of the simplex into rotated cones highlighting the inter-class distances (default method for visualization). 'scale': rescaling of the simplex to a unit simplex. Returns ------- s : array-like of shape (n_samples, n_classes+1) Reference simplex vector space coordinates as a representation of the attribute ``d_``. tau : float Scaling factor used for the transformation. Only returned when ``tau`` is set to ``None``. """ # Check if fitted check_is_fitted(self, ['d_', 'y_']) # Determine tau if tau is None: # use default data-dependent scaling tau = self._calc_default_tau(d) return_tau = True else: return_tau = False # Choose transformation method if method == 'reference': s = self._transform_ref(d, tau) elif method == 'scale': s = self._transform_scale(d, tau) else: raise NotImplementedError("Unknown transformation method '{}'!".format(method)) # Return results if return_tau: return s, tau return s
[docs] def inverse_transform(self, s, tau, method='reference'): """Transform back from the transformed simplex space to the latent space. Requires a previous call of ``fit``. Parameters ---------- s : array-like of shape (n_samples, n_classes+1) Reference simplex vector space coordinates to transform. tau : float Scaling factor > 0. method: 'reference' or 'scale', optional (default: 'reference') Determines the transformation method. 'reference': transformation of the simplex into rotated cones highlighting the inter-class distances (default method for visualization). 'scale': rescaling of the simplex to a unit simplex. Returns ------- d : array-like of shape (n_samples, n_classes) Inverse transformation of the reference simplex vector space coordinates ``s``. """ # Check if fitted check_is_fitted(self, ['d_', 'y_']) # Choose transformation method if method == 'reference': return self._inverse_transform_ref(s, tau) elif method == 'scale': return self._inverse_transform_scale(s, tau) else: raise NotImplementedError("Unknown transformation method '{}'!".format(method))
[docs] def train(self, X, y, d=None): """Alias for ``fit`` for backward compatibility, see there. """ return self.fit(X, y, d=d)
[docs] def predict_class_label(self, X): """Alias for ``predict`` for backward compatibility, see there. """ return self.predict(X)
[docs] def predict_class_label_probability(self, X, return_std=False): """Alias for ``predict_proba`` for backward compatibility, see there. """ return self.predict_proba(X, return_std=return_std)
[docs] def inflate(self, d, tau=None): """Alias for ``transform`` with ``method='reference'`` for backward compatibility, see there. """ return self.transform(d, tau=None, method='reference')
[docs] def compress(self, s, tau): """Alias for ``inverse_transform`` with ``method='reference`` for backward compatibility, see there. """ return self.inverse_transform(s, tau, method='reference')