# -*- coding: utf-8 -*-
"""CASIMAC: multi-class/single-label classifier with gradients.
"""
__version__ = "1.2.4"
import warnings
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_is_fitted, check_random_state
from sklearn.utils.multiclass import unique_labels
from sklearn.metrics import pairwise_distances
from scipy.special import erf
from scipy import optimize
[docs]
class CASIMAClassifier(BaseEstimator, ClassifierMixin):
"""Multi-class/single-label classifier.
Parameters
----------
model_constructor : callable
Method that returns an sklearn estimator. This estimator is trained on
the estimation of latent variables from features. In particular, the
estimator must provide a ``fit`` method (for training) and a
``predict`` method (for predictions). For the prediction of class
probabilities, the ``predict`` method must also support a second
argument ``return_std``, which returns the standard deviations of the
predictions together with the mean values if set to True. It is
assumed that the predictions of the estimator obey a Gaussian
probability distribution with the aforementioned mean and variance.
repulsion_strength : float, optional (default: 1)
Scalar strength used for the repulsion term (``beta``). Should be
non-negative. Choose 0 to disable repulsions.
repulsion_number : int, optional (default: 1)
Number of nearest neighbors used for the repulsion term (``k_beta``).
attraction_strength : float, optional (default: 1)
Scalar strength used for the attraction term (``alpha``). Should be
non-negative. Choose 0 to disable attraction.
attraction_number : int, optional (default: 1)
Number of nearest neighbors used for the attraction term (``k_alpha``).
metric : str or callable, optional (default: 'euclidean')
Metric options used in ``sklearn.metrics.pairwise_distances``. See the
respective documentation for more details.
proba_calc_method : 'analytical', 'MC' or 'MC-fast', optional (default: 'analytical')
Determines the method used for the prediction of class probabilities.
Choose 'analytical' for an analytical calculation (can only be used
for two classes, otherwise fall back to 'MC'). Choose 'MC' for a
sequential Monte Carlo implementation (slower, less memory) and
'MC-fast' for a simultaneous Monte Carlo implementation (faster,
more memory).
proba_NMC : int, optional (default: 1000)
Number of Monte Carlo samples (per dimension) for the prediciton of
class probabilities.
p_calc_method: 'iterative', 'explicit', optional (default: 'iterative')
Determines the method for the calculation of the simplex vectors.
random_state : int, RandomState instance or None, optional (default: None)
The random generator to use for the prediction of class probabilities.
If an integer is given, a new random generator with this seed is
created. ``None`` leads to a newly generated seed.
l_repulsion_reduce : callable, optional (default: numpy.nanmean)
Legacy option, not recommended! Function to reduce the set of nearest
neighbor distances to a single number used in the repulsion term. Note
that numpy.nan may occur in the list of distances.
l_repulsion_fun : callable or None, optional (default: None)
Legacy option, not recommended! Final function that is applied to the
repulsion term. Set to ``None`` to disable the function call.
l_attraction_reduce : callable, optional (default: numpy.nanmean)
Legacy option, not recommended! Function to reduce the set of nearest
neighbor distances to a single number used in the attraction term. Note
that numpy.nan may occur in the list of distances.
l_attraction_fun : callable or None, optional (default: numpy.reciprocal)
Legacy option, not recommended! Final function that is applied to the
attraction term. Set to ``None`` to disable the function call.
l_c_transformation_fun : callable or None, optional (default: None)
Legacy option, not recommended! Optional transformtion function (e.g.,
for rescaling) of the latent variable coefficients. Set to ``None`` to
disable the function call.
Attributes
----------
X_ : array-like of shape (n_samples, n_features)
Feature vectors in training data.
y_ : array-like of shape (n_samples,)
Target labels in training data.
classes_ : array of shape (n_classes,)
Unique class labels in y\_.
d_ : array-like of shape (n_samples,) or (n_samples, n_targets)
Vector of latent variables calculated from X\_ and y\_.
model_ : obj
Instance of the model trained on the estimation of latent variables
from features. Is created by the call of model_constructor.
random_state_ : numpy.random.RandomState
Instance of the random state used for Monte Carlo predictions of
the class probabilities.
"""
def __init__(self, model_constructor,
repulsion_strength=1, repulsion_number=1,
attraction_strength=0, attraction_number=1,
metric='euclidean', proba_calc_method='analytical',
proba_NMC=1000, p_calc_method='iterative', random_state=None,
l_repulsion_reduce=np.nanmean, l_repulsion_fun=None,
l_attraction_reduce=np.nanmean, l_attraction_fun=np.reciprocal,
l_c_transformation_fun=None):
# Settings
self.model_constructor = model_constructor
self.repulsion_strength = repulsion_strength
self.repulsion_number = repulsion_number
self.attraction_strength = attraction_strength
self.attraction_number = attraction_number
self.metric = metric
self.proba_calc_method = proba_calc_method # [MC, MC-fast, analytical]
self.proba_NMC = proba_NMC # per dimension
self.p_calc_method = p_calc_method
self.random_state = random_state
self.l_repulsion_reduce = l_repulsion_reduce # legacy option
self.l_repulsion_fun = l_repulsion_fun # legacy option
self.l_attraction_reduce = l_attraction_reduce # legacy option
self.l_attraction_fun = l_attraction_fun # legacy option
self.l_c_transformation_fun = l_c_transformation_fun # legacy option
[docs]
def _calc_class_normals(self, n):
"""Calculate class normals (i.e., the negative vertices) of an
(n-1)-simplex. The results are stored in the attribute
``_class_normals`` during a call of ``fit``.
"""
# Choose calculation method
if self.p_calc_method == 'iterative':
return self._calc_class_normals_iterative(n)
elif self.p_calc_method == 'explicit':
return self._calc_class_normals_explicit(n)
else:
raise NotImplementedError("Unknown class normal calculation method '{}'!".format(self.p_calc_method))
[docs]
def _calc_class_normals_iterative(self, n):
"""Calculate the class normals (i.e., the negative vertices) of an
(n-1)-simplex using an iterative method.
"""
# Calculate class normals
v = np.zeros((n-1,n))
angle = -1/(n-1)
v[0,0] = 1
v[0,1:] = angle
for i in range(1,n-1):
v[i,i] = np.sqrt(1 - np.sum(v[:i,i]**2))
for j in range(i+1,n):
v[i,j] = (angle - np.dot(v[:i,i],v[:i,j])) / v[i,i]
# Return class normals in rows: (vector 1, ..., vector n), each of dimension n-1
return v.T
[docs]
def _calc_class_normals_explicit(self, n):
"""Calculate the class normals (i.e., the negative vertices) of an
(n-1)-simplex using an explicit method.
"""
# Calculate simplex vertices
q = np.zeros((n,n-1))
for i in range(1,n):
q[i-1,i-1] = 1
q[n-1,:] = (1+np.sqrt(n))/(n-1)
v = np.empty((n,n-1))
c = (1+1/np.sqrt(n))/(n-1)
nu = np.sqrt(1-1/n)
for i in range(1,n+1):
v[i-1,:] = (q[i-1] - c) / nu
# Return class normals in rows: (vector 1, ..., vector n), each of dimension n-1
return -v
[docs]
def _calc_binary_projectors(self):
"""Calculate binary projectors (normalized segmentation planes),
which are used to obtain the decision function. They are stored in the
attribute ``_binary_projectors`` during a call of ``fit``.
"""
# Calculate (normalized) binary projectors
bp = np.zeros((self._num_classes,self._num_classes,self._num_classes-1))
for i in range(self._num_classes):
for j in range(self._num_classes):
if i != j:
bp[i,j,:] = self._class_normals[i,:]-self._class_normals[j,:]
bp[i,j,:] /= np.linalg.norm(bp[i,j,:])
# Return projectors as array [class i, class j, vector]
return bp
[docs]
def _calc_latent_coefficients(self, distance_to_own, distance_to_other_list):
"""Calculate coefficients (combined from repulsion and attraction
terms) for the transformation to the latent space. Specifically, map
from the distance arrays of the own and the other class to an array of
reduced distances. This mapping is performed for each class.
Notes:
1) Nearest neighbor calculation may fail if there are not enough
neighbors available.
2) Returned coefficients must be non-negative.
"""
# Setup coefficients
c = np.zeros((len(distance_to_other_list), distance_to_own.shape[0]))
# Repulsion (based on distances to other)
if self.repulsion_number > 0 and self.repulsion_strength != 0:
#for idx in range(len(distance_to_other_list)):
# distance_to_other_list[idx][distance_to_other_list[idx]<=0] = np.nan # remove invalids
idx_list = [np.argpartition(d, min(self.repulsion_number, d.shape[1]-1), axis=1) for d in distance_to_other_list]
repulse = np.array([[self.l_repulsion_reduce(d[i[:self.repulsion_number]]) for i, d in zip(idx,distances)] for idx, distances in zip(idx_list,distance_to_other_list) ])
if self.l_repulsion_fun is not None:
repulse = self.l_repulsion_fun(repulse)
c += self.repulsion_strength * repulse
# Attraction (based on distance to own)
if self.attraction_number > 0 and self.attraction_strength != 0:
distance_to_own[distance_to_own<=0] = np.nan # remove invalids
idx_list = np.argpartition(distance_to_own, min(self.attraction_number, distance_to_own.shape[1]-1), axis=1)
attract = np.array([self.l_attraction_reduce(d[i[:self.attraction_number]]) for i, d in zip(idx_list,distance_to_own)])
if self.l_attraction_fun is not None:
attract = self.l_attraction_fun(attract)
attract = np.tile(attract,(c.shape[0],1))
c += self.attraction_strength * attract
# Return coefficients
if self.l_c_transformation_fun is not None:
c = self.l_c_transformation_fun(c)
return c
[docs]
def _calc_distance_features(self, X, y):
"""Calculate distance features.
"""
d = np.zeros((X.shape[0],self._num_classes-1))
D = pairwise_distances(X, metric=self.metric)
X_class_idxa_list = [np.where(y == self.classes_[class_idx])[0] for class_idx in range(self._num_classes)]
for class_idx in range(self._num_classes):
other_class_idx_list = [other_class_idx for other_class_idx in range(self._num_classes) if other_class_idx != class_idx]
distance_to_own = D[np.array(X_class_idxa_list[class_idx]).reshape(-1,1),X_class_idxa_list[class_idx]]
distance_to_other_list = [D[np.array(X_class_idxa_list[class_idx]).reshape(-1,1),np.array(X_class_idxa_list[other_class_idx])] for other_class_idx in other_class_idx_list]
c = self._calc_latent_coefficients(distance_to_own, distance_to_other_list)
d[X_class_idxa_list[class_idx]] = -np.tensordot(c.T,self._class_normals[other_class_idx_list,:],axes=1) # transformation
return d
[docs]
def _calc_edge_distances(self, d):
"""Calculate distances to edge points, which can be used to determine
the correct classes.
"""
d = np.asarray(d).reshape(-1,self._num_classes-1)
edge_distances = np.zeros((d.shape[0],self._num_classes))
for j in range(self._num_classes):
edge_distances[:,j] = np.linalg.norm(self._class_normals[j,:]-d, axis=1)
return edge_distances
[docs]
def _calc_distance_features_to_class(self, d):
"""Map from distance feature space d to class space y.
Minimize distance to edge points to determine the correct classes.
"""
best_classes = np.array(np.argmin(self._calc_edge_distances(d),axis=1),dtype=np.int64) # return first minimum by default
return np.array(self.classes_)[best_classes]
[docs]
def _calc_proba_analytical(self, mu, sigma, return_std):
"""Calculate binary class probabilities (and their standard deviations)
with analytical formulas.
"""
mu = mu.ravel()
sigma = sigma.ravel()
sigma[sigma==0] = np.nan
p_pos = (1+erf(mu/(np.sqrt(2)*sigma)))/2
p_pos[np.isnan(p_pos)] = (np.sign(mu[np.isnan(p_pos)])+1)/2
p_neg = 1 - p_pos
p = np.concatenate((p_pos[:,np.newaxis], p_neg[:,np.newaxis]), axis=1)
if return_std:
p_sigma = np.zeros(p.shape)
return p, p_sigma
else:
return p
[docs]
def _calc_proba_mc(self, mu, sigma, return_std, method):
"""Calculate (binary or multi-class) class probabilities (and their
standard devitions) with a Monte Carlo approach.
"""
# Setup
p = np.zeros((mu.shape[0],self._num_classes)) # p[X index, class index]
N = self.proba_NMC
# Check shape of sigma: multivariate or univariate (i.e., a uniform gaussian), reshape accordingly
if sigma.size != mu.size:
sigma = np.repeat(sigma,mu.shape[1])
# Run MC: Both methods should lead to the same result.
if method == 'simultaneous':
# Method 1: Simultaneous calculation of query points. Fast, but needs more memory. (Try to store all in matrix v to reduce memory.)
v = mu.ravel() # v = mu
v = self.random_state_.normal(v,sigma.ravel(),(N,mu.size)).reshape((N*mu.shape[0],*mu.shape[1:])) # v = d
v = self._calc_distance_features_to_class(v).reshape((N,mu.shape[0])) # v = y
for class_idx in range(self._num_classes):
p[:,class_idx] = np.sum(v==self.classes_[class_idx],axis=0) / N
elif method == 'sequential':
# Method 2: Sequential calculation of query points. Slower, but needs less memory.
sigma = sigma.reshape(mu.shape)
for idx in range(mu.shape[0]):
d = self.random_state_.normal(mu[idx,:], sigma[idx,:], (N, mu.shape[1]))
y = self._calc_distance_features_to_class(d)
for class_idx in range(self._num_classes):
p[idx,class_idx] = y[y==self.classes_[class_idx]].size / N
else:
raise NotImplementedError("_calc_proba_mc method '{}' not implemented!".format(method))
# Return result
if return_std:
p_sigma = np.sqrt((p * (1-p)**2 + (1-p) * p**2) / (N-1))
return p, p_sigma
else:
return p
[docs]
def _calc_proba(self, mu, sigma, return_std):
"""Call suitable probability calculator depending on options.
"""
if self.proba_calc_method == 'analytical':
if self._num_classes == 2:
self.used_proba_calc_method_ = 'analytical'
return self._calc_proba_analytical(mu, sigma, return_std)
else:
warnings.warn("Probability calculation method 'analytical' can only be used for 2 classes; here we have {} classes. Fall back to calculation method 'MC'.".format(self._num_classes), stacklevel=2)
self.used_proba_calc_method_ = 'MC'
return self._calc_proba_mc(mu, sigma, return_std, "sequential")
elif self.proba_calc_method == 'MC':
self.used_proba_calc_method_ = 'MC'
return self._calc_proba_mc(mu, sigma, return_std, "sequential")
elif self.proba_calc_method == 'MC-fast':
self.used_proba_calc_method_ = 'MC-fast'
return self._calc_proba_mc(mu, sigma, return_std, "simultaneous")
else:
raise NotImplementedError("Unknown probability calculation method '{}'!".format(self.proba_calc_method))
[docs]
def _calc_proba_grad_analytical(self, mu, sigma, dmu, dsigma):
"""Calculate binary class probability gradients with analytical
formulas.
"""
mu = mu.ravel()
sigma = sigma.ravel()
sigma[sigma==0] = np.nan
dp_pos_mu = np.exp(-mu**2/(2*sigma**2))/(np.sqrt(2*np.pi)*sigma) # dp/dmu
dp_pos_mu[np.isnan(dp_pos_mu)] = 0 # ~ np.inf
dp_pos_sigma = -(mu*np.exp(-mu**2/(2*sigma**2)))/(np.sqrt(2*np.pi)*sigma**2) # dp/dsigma
dp_pos_sigma[np.isnan(dp_pos_sigma)] = 0 # ~ np.inf
dp_pos = dp_pos_mu[:,np.newaxis]*dmu[:,:,0] + dp_pos_sigma[:,np.newaxis]*dsigma[:,:,0] # dp
dp_neg = -dp_pos
return np.concatenate((dp_pos[:,:,np.newaxis],dp_neg[:,:,np.newaxis]), axis=2)
[docs]
def _calc_proba_grad_mc(self, mu, sigma, dmu, dsigma):
"""Calculate (binary or multi-class) class probability gradients with a
Monte Carlo approach.
Notes:
1) This method is very experimental and not guaranteed to work.
2) A more stable method should be used instead.
"""
# TODO: replace with more stable method
# Show warning message
warnings.warn("The function _calc_proba_grad_mc is very experimental, use with care!")
# Perform gradient calculations
def mu_func(x, sample_idx, class_idx):
return self._calc_proba_mc(x, sigma[sample_idx,:].reshape(1,-1))[0,class_idx]
def sigma_func(x, sample_idx, class_idx):
return self._calc_proba_mc(mu[sample_idx,:].reshape(1,-1), x)[0,class_idx]
dp_mu = np.empty((mu.shape[0], self._num_classes, mu.shape[1])) # dp/dmu
for sample_idx in range(mu.shape[0]):
for class_idx in range(self._num_classes):
grad = optimize.approx_fprime(mu[sample_idx,:].reshape(1,-1), mu_func, self._dproba_eps, sample_idx, class_idx)
dp_mu[sample_idx, class_idx,:] = grad
dp_sigma = np.empty((mu.shape[0], self._num_classes, mu.shape[1])) # dp/dsigma
for sample_idx in range(mu.shape[0]):
for class_idx in range(self._num_classes):
grad = optimize.approx_fprime(sigma[sample_idx,:].reshape(1,-1), sigma_func, self._dproba_eps, sample_idx, class_idx)
dp_sigma[sample_idx, class_idx,:] = grad
dp = np.empty((mu.shape[0], dmu.shape[1], self._num_classes)) # dp
for var_idx in range(dmu.shape[1]):
for class_idx in range(self._num_classes):
dp[:,var_idx,class_idx] = np.sum(dp_mu[:,class_idx,:] * dmu[:,var_idx,:],axis=1) + np.sum(dp_sigma[:,class_idx,:] * dsigma[:,var_idx,:],axis=1)
return dp
[docs]
def _calc_proba_grad(self, mu, sigma, dmu, dsigma):
"""Call suitable probability gradient calculator depending on the
number of classes.
"""
if self._num_classes == 2:
return self._calc_proba_grad_analytical(mu, sigma, dmu, dsigma)
else:
return self._calc_proba_grad_mc(mu, sigma, dmu, dsigma)
[docs]
def _calc_decision_function(self, d_predict, return_idx_col_map):
"""Calculate decision function.
"""
# determine decision borders for all class combinations
decision = np.zeros((d_predict.shape[0], self._num_classes*(self._num_classes-1)//2))
idx_col_map = []
for i in range(self._num_classes):
for j in range(self._num_classes):
if i < j:
decision[:,len(idx_col_map)] = np.dot(d_predict, self._binary_projectors[i,j,:])
idx_col_map.append((i,j))
# return results in a suitable format
if self._num_classes == 2:
return decision.ravel() # [n_sample]
else: # [n_sample, n_class * (n_class-1) / 2], border names in idx_col_map
if return_idx_col_map:
return decision, idx_col_map
return decision
[docs]
def _calc_decision_function_grad(self, dmean, return_idx_col_map):
"""Calculate gradient of the decision function.
"""
# determine decision border gradients for all class combinations
decision_grad = np.zeros((dmean.shape[0], dmean.shape[1], self._num_classes*(self._num_classes-1)//2))
idx_col_map = []
for i in range(self._num_classes):
for j in range(self._num_classes):
if i < j:
decision_grad[:,:,len(idx_col_map)] = np.dot(dmean, self._binary_projectors[i,j,:])
idx_col_map.append((i,j))
# return results in a suitable format
if self._num_classes == 2:
return decision_grad.reshape(dmean.shape[0], dmean.shape[1]) # [n_sample, n_var]
else: # [n_sample, n_var, n_class * (n_class-1) / 2], border names in idx_col_map
if return_idx_col_map:
return decision_grad, idx_col_map
return decision_grad
[docs]
def _calc_default_tau(self, d):
"""Calculate the data-dependent scaling factor for transformations.
"""
return 1/np.min(np.std(d,axis=0))
[docs]
def fit(self, X, y, d=None):
"""Fit Classifier.
Note that the estimator may depend on the naming of the labels. That
is, because the set of unique labels (stored in the attribute
``classes_``) determines the association of classes to simplex
vertices and therefore different associations lead to different latent
spaces. All these latent spaces are linearly homeomorphic to each
other, but can lead to a different behavior of the regression model
(stored in the attribute ``model_``).
Parameters
----------
X : array-like of shape (n_samples, n_features)
Feature vectors of training data.
y : array-like of shape (n_samples,)
Target labels of training data.
d : latent variables, array-like of shape (n_samples, n_classes-1) or None, optional (default: None)
Precalculated vector of latent variables. Set to ``None`` to
calculate ``d`` automatically based on ``X`` and ``y``
(recommended).
Returns
-------
self : returns an instance of self.
"""
# Check that X and y have correct shape
X, y = check_X_y(X, y)
self.X_ = X
self.y_ = y
# Store the classes seen during fit
self.classes_ = unique_labels(self.y_)
self._num_classes = self.classes_.size
if self._num_classes < 2:
raise ValueError("At least 2 classes required!")
self._class_normals = self._calc_class_normals(self._num_classes)
self._binary_projectors = self._calc_binary_projectors()
# Prepare rng
self.random_state_ = check_random_state(self.random_state)
# Prepare model
self.model_ = self.model_constructor()
if not hasattr(self.model_, 'fit') or not hasattr(self.model_, 'predict'):
raise ValueError("Model must provide methods 'fit' and 'predict'!")
# Distance features
self.d_ = d
if self.d_ is None:
self.d_ = self._calc_distance_features(self.X_,self.y_)
else:
self.d_ = np.asarray(self.d_)
if self.d_.shape != (self.X_.shape[0],self._num_classes-1):
raise ValueError("Provided distance features have an invalid shape: {} instead of {}!".format(self.d_.shape, (self.X_.shape[0],self._num_classes-1)))
# Fit model
self.model_.fit(self.X_,self.d_)
# Return the classifier
return self
[docs]
def predict(self, X):
"""Perform classification on an array of test vectors X. Requires a
previous call of ``fit``.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Query points where the classifier is evaluated.
Returns
-------
C : ndarray of shape (n_samples,)
Predicted target values for X, values are from ``classes_``.
"""
# Check if fitted
check_is_fitted(self, ['d_', 'y_'])
# Make model prediction
d_predict = self.model_.predict(X)
return self._calc_distance_features_to_class(d_predict)
[docs]
def predict_proba(self, X, return_std=False):
"""Return probability estimates for the test vector X. Requires a
previous call of ``fit``.
Note that it is assumed that the predictions of the regression model
(stored in the attribute ``model_``) obey a Gaussian probability
distribution. The ``predict`` method of the regression model must
support a second argument ``return_std``, which returns the standard
deviations of the predictions together with the mean values if set to
True so that ``(mean, std) = model_.predict(X, return_std=True)``.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Query points where the classifier is evaluated.
return_std : bool, optional (default: False)
If True, the standard-deviation of the predictive distribution at
the query points is returned along with the mean.
Returns
-------
p : array-like of shape (n_samples, n_classes)
Returns the probability of the samples for each class in
the model. The columns correspond to the classes in sorted
order, as they appear in the attribute ``classes_``.
p_std : array-like of shape (n_samples,), optional
Best estimate of the standard deviation of the predicted
probabilities at the query points.
Only returned when return_std is True.
"""
# Check if fitted
check_is_fitted(self, ['d_', 'y_'])
# Make model prediction
mu, sigma = self.model_.predict(X, return_std=True)
return self._calc_proba(mu, sigma, return_std)
[docs]
def predict_proba_grad(self, X):
"""Return the gradient of the probability estimates with respect to the
features. Requires a previous call of ``fit``.
Note that it is assumed that the predictions of the regression model
(stored in the attribute ``model_``) obey a Gaussian probability
distribution. The ``predict`` method of the regression model must
support a second argument ``return_std``, which returns the standard
deviations of the predictions together with the mean values if set to
True so that ``(mean, std) = model_.predict(X, return_std=True)``.
Furthermore, the model must provide a function ``predict_grad``, which
predicts the gradients of the ``(mean, std)`` predictions from the
``predict`` method with respect to the features in the same way so that
``(dmean, dstd) = model_.predict_grad(X, return_std=True)``.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Query points where the classifier is evaluated.
Returns
-------
dp : array-like of shape (n_samples, n_features, n_classes)
Returns the gradient of the probability of the samples with respect
to each feature for each class in the model.
"""
# Check if fitted
check_is_fitted(self, ['d_', 'y_'])
# Check availability of gradients
if not hasattr(self.model_, 'predict_grad'):
raise NotImplementedError("Gradients not available: model does not provide a predict_grad function!")
# Calculate gradients
mu, sigma = self.model_.predict(X, return_std=True)
dmean, dstd = self.model_.predict_grad(X, return_std=True)
return self._calc_proba_grad(mu, sigma, dmean, dstd)
[docs]
def decision_function(self, X, return_idx_col_map=False):
"""Return the binary decision functions for the test vector X. Requires
a previous call of ``fit``.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Query points where the classifier is evaluated.
return_idx_col_map : bool, optional (default: False)
If True, ``idx_col_map`` is returned.
Returns
-------
d : array-like of shape (n_samples,) for a binary classification or (n_sample, n_class * (n_class-1) / 2) otherwise
Returns the decision functions in the form of an array of the form
(first class index, second class index) sorted according to
idx_col_map. In case of a binary classification problem, the
returned array is flattened.
idx_col_map : array-like of shape (n_class*(n_class-1)/2,), optional
List of tuples (first class index, second class index) to identify
the contents of d for a multi-class classification. The indices
correspond to the classes in sorted order, as they appear in the
attribute ``classes_``.
Only returned when return_idx_col_map is True and there are more
than two classes. In case of two classes, idx_col_map would always
correspond to ((0,1),) and is therefore not returned.
"""
# Check if fitted
check_is_fitted(self, ['d_', 'y_'])
# Make model prediction
d_predict = self.model_.predict(X)
if len(d_predict.shape) == 1: # ensure correct shape of model output
d_predict = d_predict[:,None]
# Determine decision function (and optionally the idx_col_map)
return self._calc_decision_function(d_predict, return_idx_col_map)
[docs]
def decision_function_grad(self, X, return_idx_col_map=False):
"""Return the gradient of the ecision function with respect to the
features. Requires a previous call of ``fit``.
Note that it is assumed that the regression model (stored in the
attribute ``model_``) must provide a function ``predict_grad``, which
predicts the gradients of the predictions with respect to the features.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Query points where the classifier is evaluated.
return_idx_col_map : bool, optional (default: False)
If ``True``, ``idx_col_map`` is returned.
Returns
-------
dd : array-like of shape (n_samples, n_fetaures) for a binary classification or (n_sample, n_features, n_class * (n_class-1) / 2) otherwise.
Returns the gradient of the decision function with repect to the
features.
idx_col_map : array-like of shape (n_class*(n_class-1)/2,), optional
List of tuples (first class index, second class index) to identify
the contents of d for a multi-class classification.
Only returned when return_idx_col_map is True and there are more
than two classes.
"""
# Check if fitted
check_is_fitted(self, ['d_', 'y_'])
# Check availability of gradients
if not hasattr(self.model_, 'predict_grad'):
raise NotImplementedError("Gradients not available: model does not provide a predict_grad function!")
# Calculate gradients
dmean = self.model_.predict_grad(X)
return self._calc_decision_function_grad(dmean, return_idx_col_map)
[docs]
def train(self, X, y, d=None):
"""Alias for ``fit`` for backward compatibility, see there.
"""
return self.fit(X, y, d=d)
[docs]
def predict_class_label(self, X):
"""Alias for ``predict`` for backward compatibility, see there.
"""
return self.predict(X)
[docs]
def predict_class_label_probability(self, X, return_std=False):
"""Alias for ``predict_proba`` for backward compatibility, see there.
"""
return self.predict_proba(X, return_std=return_std)
[docs]
def inflate(self, d, tau=None):
"""Alias for ``transform`` with ``method='reference'`` for backward
compatibility, see there.
"""
return self.transform(d, tau=None, method='reference')
[docs]
def compress(self, s, tau):
"""Alias for ``inverse_transform`` with ``method='reference`` for
backward compatibility, see there.
"""
return self.inverse_transform(s, tau, method='reference')