"""
Cost Sensitive Minimax Risk Classification. Copyright (C) 2025 Jorge Angulo Rodriguez
This program is free software: you can redistribute it and/or modify it under the terms of the
GNU General Public License as published by the Free Software Foundation,
either version 3 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program.
If not, see https://www.gnu.org/licenses/.
"""
import sys
from cvxpy.utilities.key_utils import to_str
from scipy.spatial import ConvexHull, convex_hull_plot_2d, Delaunay, distance
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted
from sklearn.preprocessing import normalize
# Import the MRC super class
from MRCpy import BaseMRC
from MRCpy.solvers.cvx import *
from MRCpy.solvers.cg import *
[docs]class LMRC(BaseMRC):
r'''
Cost Sensitive Minimax Risk Classifier.
The class LMRC implements Cost Sensitive Minimax Risk Classifiers (MRC),
an MRC that can use any loss defined by a loss matrix.
It implements two kinds of prediction methods.
Parameters
----------
n_classes : int
Number of different possible labels for an instance.
s : float, default=0.3
Parameter that tunes the estimation of expected values
of feature mapping function. It is used to calculate :math:`\boldsymbol{\lambda}`
(variance in the mean estimates
for the expectations of the feature mappings) in the following way
.. math::
\boldsymbol{\lambda} = s \cdot \text{std}(\Phi(X,Y)) / \sqrt{|X|}
where (X,Y) is the dataset of training samples and their
labels respectively and
:math:`\text{std}(\Phi(X,Y))` stands for standard deviation
of :math:`\Phi(X,Y)` in the supervised dataset (X,Y).
loss : str or array-like of shape (n_classes_classification, n_classes)
If a string, one of ``'0-1'``, ``'ordinal'``, ``'abstention'``,
``'random'``, ``'ordinal-squared'``. Generates the corresponding
loss matrix. If an array, it will be used as the transposed loss matrix.
alpha : float, optional
Penalty for abstention in that type of loss.
Defaults to ``1 / (n_classes + 1)``.
max_iters : int, default=2000
Maximum number of iterations to use
for finding the solution of optimization in
the subgradient approach.
phi : str or BasePhi instance, default='linear'
Type of feature mapping function to use for mapping the input data.
The currently available feature mapping methods are
``'fourier'`` and ``'linear'``.
The users can also implement their own feature mapping object
(should be a ``BasePhi`` instance) and pass it to this argument.
Note that when using ``'fourier'`` feature mapping,
training and testing instances are expected to be normalized.
To implement a feature mapping, please go through the
:ref:`Feature Mapping` section.
``'linear'``
It uses the identity feature map referred to as Linear feature map.
See class ``BasePhi``.
``'fourier'``
It uses Random Fourier Feature map. See class ``RandomFourierPhi``.
eps : float, default=1e-4
Threshold for comparing values. For instance, testing if two values
are different is done as ``abs(value1 - value2) < eps``
instead of ``value1 == value2``.
use_closed_form : bool, default=False
If True, and if the loss matrix is squared, then it will use the
closed form method for classification.
**phi_kwargs : Additional parameters for feature mappings.
Groups the multiple optional parameters
for the corresponding feature mappings (``phi``).
For example in case of fourier features,
the number of features is given by ``n_components``
parameter which can be passed as argument
``LMRC(phi='fourier', n_components=500)``.
The list of arguments for each feature mappings class
can be found in the corresponding documentation.
'''
def __generate_loss_matrix(self, loss, n, alpha=None, Beta=10):
'''
This function generates some common loss matrices: abstention,
0-1, ordinal, ordinal-squared, weighted (only for binary) and random.
Parameters
----------
loss : `str` (0-1, ordinal, abstention, random, ordinal-squared) or
`array`-like of shape (`n_classes_classification`, `n_classes`)
If it's a string, it generates the loss matrix.
If it's an array, it will use as the transposed loss maxtrix
n : `int`
number of classes
alpha : `float`
abstention penalty (optional parameter). Defaults to 1/(n_classes+1)
Returns
-------
self :
transposed loss matrix or validated loss matrix.
'''
#Check if it is an string and generate the matrix
# if it is one of the defined types
if isinstance(loss, str):
if "abstention" in loss:
if alpha is None:
alpha = 1 / (n + 1)
LT = np.ones((n, n)) - np.identity(n)
LT = np.hstack((LT, alpha * np.ones((n, 1))))
return LT
if loss == "0-1":
LT = np.ones((n, n)) - np.identity(n)
return LT
elif loss == "ordinal":
LT = [[np.abs(i - j) for i in range(n)] for j in range(n)]
LT = np.array(LT)
return LT
elif loss == "ordinal-squared":
LT = [[(i - j) * (i - j) for i in range(n)] for j in range(n)]
LT = np.array(LT)
return LT
elif loss == "random":
LT = np.random.uniform(0, 1, (n, n))
# Set the diagonal elements to 0
np.fill_diagonal(LT, 0)
return LT
elif loss == "weighted" and n == 2:
LT = np.array([[0, 1], [Beta, 0]])
return LT
return 1
else: #If it is not a string, we validate it as an array
if not np.issubdtype(loss.dtype, np.number):
raise TypeError("Array must be numeric")
if loss.ndim != 2:
raise ValueError(f"Expected {2} dimensions")
return loss
[docs] def __init__(self,
s=0.3,
loss = "0-1",
n_classes=2,
alpha = None,
eps=1e-4,
phi='linear',
use_closed_form = False,
**phi_kwargs):
self.eps = eps
self.use_closed_form = use_closed_form
self.cvx_solvers = ['GUROBI', 'MOSEK', 'SCS', 'CLARABEL', 'CVXOPT',
'ECOS', 'ECOS_BB', 'GLPK', 'GLPK_MI', 'OSQP',
"COPT", "XPRESS", "PROXQP", "CPLEX", 'SCIPY']
self.loss_matrix = self.__generate_loss_matrix(loss, n_classes, alpha)
self.n_classes = n_classes #|Y|
self.n_classification_classes_ = self.loss_matrix.shape[1] # |T|
super().__init__(s=s,
loss = loss,
phi=phi, **phi_kwargs)
[docs] def minimax_risk(self, X, tau_, lambda_, n_classes):
'''
Solves the minimax risk problem
for different the given loss matrix
The solution of the default L-MRC optimization
gives the upper bound of the error.
Parameters
----------
X : `array`-like of shape (`n_samples`, `n_dimensions`)
Training instances used for solving
the minimax risk optimization problem.
tau_ : `array`-like of shape (`n_features` * `n_classes`)
Mean estimates
for the expectations of feature mappings.
lambda_ : `array`-like of shape (`n_features` * `n_classes`)
Variance in the mean estimates
for the expectations of the feature mappings.
n_classes : `int`
Number of labels in the dataset.
Returns
-------
self :
Fitted estimator
'''
self.n_classes = n_classes
self.tau_ = check_array(tau_, accept_sparse=True, ensure_2d=False)
self.lambda_ = check_array(lambda_, accept_sparse=True,
ensure_2d=False)
phi = self.compute_phi(X)
self.tau_ = self.tau_.flatten()
self.lambda_ = self.lambda_.flatten()
phi = np.unique(phi, axis=0)
# Constants
m = phi.shape[2]
n = phi.shape[0]
# Save the phi configurations for finding the lower bounds
self.lowerPhiConfigs = phi
# Get H-form of the polyhedron of restrictions defined by the Convex learning problem given by the loss matrix
self.b, self.A = self.__get_A_b(self.loss_matrix)
# Let's compute homogenization matrix N
n_0 = np.hstack([np.ones(self.n_classification_classes_), np.zeros(self.n_classes)])
N = np.hstack([self.loss_matrix, np.identity(self.n_classes)])
N = np.vstack([n_0, N])
self.N = N
# We compute the intersection between the rays generating cone(N) and Hyperplane H(alpha): alpha*z_0+z_1+...+z_{|Y|}
points = self.__compute_intersection_points(self.loss_matrix, self.n_classes, self.n_classification_classes_)
#Triangulate the result using Dalaunay triangulation
try:
tri = Delaunay(points, furthest_site=True)
except:
tri = Delaunay(points)
#Learing problem
#Define cvxpy problem
mu = cvx.Variable(m)
eta = cvx.Variable(m)
nu = cvx.Variable(1)
an = self.tau_ - self.lambda_ / np.sqrt(n)
bn = self.tau_ + self.lambda_ / np.sqrt(n)
objective = cvx.Minimize(0.5 * (bn - an) @ eta - 0.5 * (bn + an) @ mu - nu)
constraints = [
self.A @ (cvx.matmul(phi[i, :, :], mu) + cvx.matmul(nu, np.ones((1, self.n_classes)))) <= self.b
for i in range(n)
]
constraints.append(eta + mu >= 0)
constraints.append(eta - mu >= 0)
problem = cvx.Problem(objective, constraints)
for s in self.cvx_solvers:
try:
problem.solve(solver=s) # , max_iters=m_iter)
self.mu_ = mu.value
self.nu_ = nu.value
except:
pass
if mu.value is not None and nu.value is not None:
break
if mu.value is None or nu.value is None:
raise RuntimeError("Learning problem is not feasible")
# Upper bounds are now available.
self.upper_ = (0.5 * (bn - an) @ eta.value -
0.5 * (bn + an) @ mu.value - nu.value)
# Get submatrices of N and compute the inverse matrices
flag = True
for T in tri.simplices:
for j in T:
if (np.abs(np.sum(tri.points[j] - points[j])) > 0.0001):
flag = False
if not flag:
"Error in array ordering during triangulation"
exit()
# We store the inverted simplex matrices for later, as they are needed for
# classification
TN = [[N[:, np.sort(T)], np.sort(T)] for T in tri.simplices]
self.Inverted_Matrices = [[np.linalg.inv(TT[0]), TT[1]] for TT in TN]
self.is_fitted_ = True
return self
[docs] def predict_proba(self, X):
'''
Conditional probabilities corresponding to each class
for each unlabeled input instance
Parameters
----------
X : `array`-like of shape (`n_samples`, `n_dimensions`)
Testing instances for which
the prediction probabilities are calculated for each class.
Returns
-------
hy_x : `ndarray` of shape (`n_samples`, `n_classes`)
Probabilities :math:`(p(y|x))` corresponding to the predictions
for each class.
'''
X = check_array(X, accept_sparse=True)
check_is_fitted(self, "is_fitted_")
phi = self.compute_phi(X)
hy_x = []
for i, x in enumerate(X):
cx = self.nu_ * np.ones(self.n_classes) + phi[i, :, :] @ self.mu_
cx_1 = np.hstack((1, -cx))
max_index = None
# If the matrix is squared and the user selects the option, we use the
# closed classificiation rule
if self.use_closed_form and self.n_classes == self.n_classification_classes_:
h_t = self.__find_closed_form_classification_rule(cx,
self.loss_matrix, self.n_classes)
else: # Otherwise we use the general classifcation rule
h_t = self.__find_classification_rule(self.Inverted_Matrices,
cx_1,
(self.n_classes +
self.n_classification_classes_),
self.eps)
# If all elements are positive then we have found a valid classification rule
# Otherwise the point falls outside the polyhedron of restrictions,
# and we find and alternative classification rule based on projceting the point
if not np.all(h_t >= -self.eps) and max_index is None:
h_t = self.__find_alternative_classification_rule_2(self.Inverted_Matrices,
cx_1, self.N)
if h_t is not None and np.all(h_t >= -self.eps):
# Remove auxiliary dimensions corresponding to Slack variables
h = h_t[:self.n_classification_classes_]
else:
# If all fails, we just return the "uniform" with respect to the loss
h = self.__uniform_distribution()
hy_x.append(h)
return np.array(hy_x)
[docs] def get_upper_bound(self):
'''
Returns the upper bound on the expected loss for the fitted classifier.
Returns
-------
upper_bound : `float`
Upper bound of the expected loss for the fitted classifier.
'''
# If model is fitted, we already have the upper bound
if self.is_fitted_:
return self.upper_
if self.deterministic:
# Number of instances in training
n = self.lowerPhiConfigs.shape[0]
# Feature mapping length
m = self.phi.len_
mu = cvx.Variable(m)
eta = cvx.Variable(m)
nu = cvx.Variable(1)
an = self.tau_ - self.lambda_ / np.sqrt(n)
bn = self.tau_ + self.lambda_ / np.sqrt(n)
objective = cvx.Minimize(0.5 * (bn - an) @ eta - 0.5 * (bn + an) @ mu - nu)
constraints = [
self.A @ (cvx.matmul(self.lowerPhiConfigs[i, :, :], mu) +
cvx.matmul(nu, np.ones((1, self.n_classes)))) <= self.b
for i in range(n)
]
constraints.append(eta + mu >= 0)
constraints.append(eta - mu >= 0)
problem = cvx.Problem(objective, constraints)
for s in self.cvx_solvers:
try:
problem.solve(solver=s)
except Exception as e:
print(e)
print("Failed cvxpy problem using solver: ", s)
pass
if mu.value is not None and nu.value is not None:
break
if mu.value is None or nu.value is None:
raise RuntimeError("Learning problem is not feasible")
self.upper_ = (0.5 * (bn - an) @ eta.value -
0.5 * (bn + an) @ mu.value - nu.value)
return self.upper_
[docs] def get_lower_bound(self, X, Y):
'''
Obtains the lower bound on the expected loss for the fitted classifier.
Parameters
----------
X : `array`-like of shape (`n_samples`, `n_dimensions`)
Test instances for which the labels are to be predicted
by the MRC model.
Y : `array`-like of shape (`n_samples`, 1), default = `None`
Labels corresponding to the testing instances
used to compute the error in the prediction.
Returns
-------
lower_bound : `float`
Lower bound of the error for the fitted classifier.
'''
# Classifier should be fitted to obtain the lower bound
check_is_fitted(self, "is_fitted_")
# Learned feature mappings
phi = self.lowerPhiConfigs
# Variables
n = phi.shape[0]
m = phi.shape[2]
#Get all errors
errors = self.error(X,Y,False)
#Define the cvx problem that determines the lower bound
mu = cvx.Variable(m)
eta = cvx.Variable(m)
nu = cvx.Variable(1)
an = self.tau_ - self.lambda_ / np.sqrt(n)
bn = self.tau_ + self.lambda_ / np.sqrt(n)
objective = cvx.Maximize(- 0.5 * (bn - an) @ eta + 0.5 * (bn + an) @ mu + nu)
constraints = [
cvx.matmul(self.lowerPhiConfigs[i, :, :], mu) +
cvx.matmul(nu, np.ones((1, self.n_classes))) <= errors[i]
for i in range(n)
]
constraints.append(eta + mu >= 0)
constraints.append(eta - mu >= 0)
problem = cvx.Problem(objective, constraints)
for s in self.cvx_solvers:
try:
problem.solve(solver=s)
except:
pass
self.lower_ = (- 0.5 * (bn - an) @ eta.value +
0.5 * (bn + an) @ mu.value + nu.value)
return self.lower_
[docs] def error(self, X, Y, mean=True):
'''
Return the error obtained for the given test
data and labels using the loss function given
Parameters
----------
X : `array`-like of shape (`n_samples`, `n_dimensions`)
Test instances for which the labels are to be predicted
by the MRC model.
Y : `array`-like of shape (`n_samples`, 1), default = `None`
Labels corresponding to the testing instances
used to compute the error in the prediction.
mean: `Boolean`, defaults to `True`. If true, the method returns the mean error.
If false, the method returns an array with all the errors.
Returns
-------
error : `float` (if mean is `True`)
Mean error of the learned MRC classifier
OR
error: `array`-like of shape (`n_samples`) (if mean is `False`)
Array with the errors commited when classifying each instance
'''
X = check_array(X, accept_sparse=True)
check_is_fitted(self, "is_fitted_")
Y_pred = self.predict(X)
error = []
for i, y in enumerate(Y):
if isinstance(Y_pred[i], (int, np.int64)):
q = np.zeros(self.n_classification_classes_)
q[Y_pred[i]] = 1
else:
q = Y_pred[i]
p = np.zeros(self.n_classes)
p[y] = 1
if q is not None and np.all(q >= -self.eps):
q_t = q[:self.n_classification_classes_]
q_t = normalize([q_t], norm="l1")[0]
max_index = np.argmax(q_t)
q_max = np.zeros_like(q_t)
q_max[max_index] = 1
e = p @ self.loss_matrix @ q_max
error.append(e)
error = np.array(error)
if mean:
return np.average(error)
else:
return error
# This version of the predict(X) function
# is almost the same as the parent's class method
# However, small adjustments need to be done in order to prevent
# issues with non-squared loss matrices
[docs] def predict(self, X):
'''
Predict classes for new instances using a fitted model.
Returns the predicted classes for the given instances in ``X``
using the probabilities given by the function ``predict_proba``.
This method overwrites the parent's class method so that
non-square loss matrices are allowed.
Parameters
----------
X : array-like of shape (n_samples, n_dimensions)
Test instances for which the labels are to be predicted
by the MRC model.
Returns
-------
y_pred : array-like of shape (n_samples,)
Predicted labels corresponding to the given instances.
'''
X = check_array(X, accept_sparse=True)
check_is_fitted(self, "is_fitted_")
# Get the prediction probabilities for the classifier
proba = self.predict_proba(X)
if self.deterministic:
y_pred = np.argmax(proba, axis=1)
else:
np.random.seed(self.random_state)
y_pred = [np.random.choice(self.n_classification_classes_, size=1, p=pc)[0]
for pc in proba]
return y_pred
##################################################################################
######### AUXILIARY FUNCTIONS ####################################################
##################################################################################
# Transforms the problem restrictions into Aw_x<=b form given LT . Returns A, b
def __get_A_b(self, LT):
'''
Computes the H-form of the polyhedron of restrictions.
That is polyhedron L is given as ${\omega \in R^m | A\omega < b}$
Parameters:
-----------
LT: `array`-like of shape (`n_dimensions`,`n_classification_classes_`)
Transposed loss matrix
Returns:
--------
A: `array`-like of shape (`restrictions`,`m`),
where `restrictions` is not known a-priori
b `array`-like of shape (`restrictions`)
'''
# CP matrix encodes the restrictions. It transforms them into equality restrictions by adding Slack variables.
# It also adds new restriction to encode that ||h||=1
# CP = |unos(|T|) zeros(|Y|)|
# |-L^T -Id(|Y|) |
try:
import cdd
except ImportError:
raise ImportError(
"pycddlib is required for LMRC. "
"See installation guide: "
"https://pycddlib.readthedocs.io/en/latest/quickstart.html#installation"
)
d_1 = LT.shape[0] # |Y|
d_2 = LT.shape[1] # |T|
CP = np.hstack((-LT, -np.identity(d_1)))
v = np.hstack((np.ones(d_2), np.zeros(d_1)))
CP = np.vstack((v, CP))
# Transform CP matrix into shape that cdd can use
V = np.vstack(
[
np.hstack([np.zeros((CP.shape[1], 1)), CP.T]),
np.hstack([1, np.zeros(CP.shape[0])]),
]
)
# Define CDD polyhedron from the given restriction matrix
mat = cdd.matrix_from_array(V)
mat.rep_type = cdd.RepType.GENERATOR
P = cdd.polyhedron_from_matrix(mat)
ineq = cdd.copy_inequalities(P)
H = np.array(ineq.array)
A = []
for i in range(H.shape[0]):
A.append(-H[i, 1:])
A = np.array(A)
b = np.zeros(A.shape[0])
m = A.shape[1]
#Add intersecction with hiperplane x = 1
v = np.zeros((2, m))
v[0][0] = 1
v[1][0] = -1
b = np.concatenate((b, np.array([1, -1])))
A = np.vstack([A, v])
b = b.reshape((b.shape[0], 1))
#print(A)
mat = cdd.matrix_from_array(np.hstack([b, -A]))
mat.rep_type = cdd.RepType.INEQUALITY
cdd.matrix_canonicalize(mat)
g = cdd.copy_generators(cdd.polyhedron_from_matrix(mat))
gg = np.array(g.array)
gg = np.hstack((np.array(gg[:, 0][:, np.newaxis]),np.array(gg[:, 2:])))
mat = cdd.matrix_from_array(gg)
mat.rep_type = cdd.RepType.GENERATOR
g = cdd.polyhedron_from_matrix(mat)
bA = np.array(cdd.copy_inequalities(g).array)
b, A = np.array(bA[:, 0]), -np.array(bA[:, 1:])
return b, A
# Computes the intersection points between the cone generated by matrix N and a hyperplane that intersects all rays of the cone.
# Coordinates are given in reference frame sush that the hyperplane has coordinates z_0=1. Points are projected to one dimension less,
# and the first coordinate is removed (see proof details in the paper)
def __compute_intersection_points(self, LT, d_1, d_2):
L_1_norms = [np.dot(LT[:, i], np.ones(d_1)) for i in range(d_2)]
# Compute alpha
alpha = np.max(L_1_norms)
alpha += 0.1 * np.abs(alpha)
# print(alpha)
# Compute the gamma points for the fist |T| rays as 1/(alpha+<l_(i,ยท),1>)
gama = [1 / (alpha + L_1_norms[i]) for i in range(d_2)]
# Compute intersection points for those rays
int_points_1 = [gama[i] * np.hstack((1, LT[:, i])) for i in range(d_2)]
# Compute the intersection points for the las |Y| rays as (e,e_i)^T of the form gamma_i*(0,e_i)^T where gamma_i=1
int_points_2 = [np.zeros(d_1 + 1) for i in range(d_1)]
for i in range(d_1):
int_points_2[i][i + 1] = 1
int_points = np.concatenate((int_points_1, int_points_2))
# print(int_points)
# Perform change of reference to a new basis, where Hyperplane H is given by z_0=1
# normal vector (alpha,1,1,...,1):
nv = np.ones(d_1 + 1)
nv[0] = alpha
# B has the normal vector as thje first column. The other columns are the generators of H, (1,-alpha*e_i)^T 1<=i<=|Y|
B = -alpha * np.identity(d_1 + 1)
B[0, :] = 1
B[:, 0] = nv
# print(B)
# K matrix necessary to calculate change of reference matrix
K = np.zeros((d_1 + 1, d_1 + 1))
K[0, 1:] = np.ones(d_1)
# Change of reference matrix D
# D = (np.identity(d_1+1)+K) @ np.linalg.inv(B)
D = np.linalg.inv(B)
# print(D)
op = np.zeros(d_1 + 1)
op[0] = 1
o = (1 / alpha) * op
# Points in the new refference frame
new_int_points = [(D @ (v - o) + op) for v in int_points]
# Take all but the first coordinates
projected_points = [p[1:] for p in new_int_points]
# Points are ordered in the same order as the columns of N
points = np.array(projected_points)
return points
def __find_classification_rule(self, Inverted_Matrices, cx_1, n, eps):
for NI in Inverted_Matrices:
h_t = np.zeros(n) # Create an array of zeros at the start of each iteration
# For each simplex, we calculate h_t as the coordinates of cx_1 in that simplex
# So, cx_1 = N_s @ h_t, where N is the matrix that generates the simplex.
# NI = N_x^-1 of that matrix, so h_t = NI @ cx_1
aux = NI[0] @ cx_1
# We just need to adjust the coordinates, since NI squared matrix of size n_clases^2
# And we need the coordinates for h_t in the general coordinate framework with
# dimensions = n_classes + n_classification_classes_
h_t[NI[1]] = aux
# If we find a non-negative rule, then is valid and we are done. Otherwise keep looking
if np.all(h_t >= -eps):
return h_t
return h_t # Return the final h_t if no break condition is met
# Clossed for classification. See article for explanation.
# In this case we need squared and non-singular loss matrix
def __find_closed_form_classification_rule(self, cx, LL, n):
h_3 = - np.linalg.inv(LL) @ (cx + (self.lambda_ @ np.abs(self.mu_)) / np.sqrt(n))
h_3 = np.where(h_3 > 0, h_3, 0)
h_3 = h_3 / np.sum(h_3)
return h_3
# This is the same as __find_classification_rule, but we just return all
# possible classification rules (one per simplex).
# Many might not be valid classification rules
# See __find_classification_rule for comments on how it works
def __find_all_classification_rules(self, Inverted_Matrices, cx_1, n):
h_list = []
for NI in Inverted_Matrices:
h_t = np.zeros(n)
aux = NI[0] @ cx_1
h_t[NI[1]] = aux
h_list.append(h_t)
return h_list
# This function is used when no valid classification rule is found by standard methods.
# This means that cx is not in the learned polyhedron of restrictions, so we need an approximation.
# This is done by projecting all the possible rules and finding the closest one
def __find_alternative_classification_rule_2(self, Inverted_Matrices, cx_1, N):
h_t = None
d = sys.maxsize
# We find all potential classification rules, that is, the coordinates of cx_1 for each simplex
# If this function is used, it means that they are all non-valid solutions:
# they will have negative components.
h_list = self.__find_all_classification_rules(Inverted_Matrices, cx_1,
(self.n_classes + self.n_classification_classes_))
for hh in h_list:
# Substitute all negative components with 0
h_proj = np.maximum(0, hh)
# Normalize, so that the resulting array is still a provability distribution
h_proj /= np.sum(hh)
# Now we use the new coordinates to "project" cx_1 into the simplex corresponding to this
# particular h_t
cx_1_proj = N @ h_proj
# We compute the distance to the ordiginal, and we will choose the projected point that is
# closest to the original.
d_aux = distance.euclidean(cx_1_proj, cx_1)
if d_aux < d:
h_t = h_proj
d = d_aux
return h_t
def __uniform_distribution(self):
q = cvx.Variable(self.n_classification_classes_)
p = np.ones(self.n_classes)
p = (1 / self.n_classes) * p
objective = cvx.Minimize((p @ self.loss_matrix) @ q)
constraints = [(np.ones(self.n_classification_classes_) @ q) == 1, q >= 0]
problem = cvx.Problem(objective, constraints)
problem.solve(solver=self.cvx_solvers[0])
uniform = q.value
return uniform