diff --git a/pyPLNmodels/VEM.py b/pyPLNmodels/VEM.py index d12564f05fb1210021e11ea1357587e267fc70cf..0e21099045c3f8630c76fdd1b8d3be9822909282 100644 --- a/pyPLNmodels/VEM.py +++ b/pyPLNmodels/VEM.py @@ -1,28 +1,26 @@ import time from abc import ABC, abstractmethod +import pickle import torch import numpy as np import seaborn as sns import matplotlib.pyplot as plt -import pickle from sklearn.decomposition import PCA from ._closed_forms import closed_formula_beta, closed_formula_Sigma, closed_formula_pi -from .elbos import ELBOPLN, ELBOPLNPCA, ELBOZIPLN, profiledELBOPLN +from .elbos import ELBOPLNPCA, ELBOZIPLN, profiledELBOPLN from ._utils import ( PLNPlotArgs, - init_Sigma, - init_C, + init_sigma, + init_c, init_beta, - get_O_from_sum_of_Y, + get_offsets_from_sum_of_counts, check_dimensions_are_equal, init_M, - init_S, - NotFitError, format_data, check_parameters_shape, - extract_cov_O_Oformula, + extract_cov_offsets_offsetsformula, nice_string_of_dict, ) @@ -38,36 +36,42 @@ class _PLN(ABC): """ Virtual class for all the PLN models. - This class must be derivatived. The methods `get_Sigma`, `compute_ELBO`, + This class must be derivatived. The methods `get_Sigma`, `compute_elbo`, `random_init_var_parameters` and `list_of_parameters_needing_gradient` must be defined. """ + WINDOW = 3 + def __init__(self): """ Simple initialization method. """ - self.window = 3 + self.WINDOW = 3 self._fitted = False - self.plotargs = PLNPlotArgs(self.window) + self.plotargs = PLNPlotArgs(self.WINDOW) - def format_datas(self, Y, covariates, O, O_formula): - self.Y = format_data(Y) + def format_datas(self, counts, covariates, offsets, offsets_formula): + self.counts = format_data(counts) if covariates is None: self.covariates = torch.full( - (self.Y.shape[0], 1), 1, device=DEVICE + (self.counts.shape[0], 1), 1, device=DEVICE ).double() else: self.covariates = format_data(covariates) - if O is None: - if O_formula == "sum": - print("Setting the offsets O as the log of the sum of Y") - self.O = torch.log(get_O_from_sum_of_Y(self.Y)).double().to(DEVICE) + if offsets is None: + if offsets_formula == "sum": + print("Setting the offsets offsets as the log of the sum of counts") + self.offsets = ( + torch.log(get_offsets_from_sum_of_counts(self.counts)) + .double() + .to(DEVICE) + ) else: - self.O = torch.zeros(self.Y.shape, device=DEVICE) + self.offsets = torch.zeros(self.counts.shape, device=DEVICE) else: - self.O = format_data(O).to(DEVICE) - self._n, self._p = self.Y.shape + self.offsets = format_data(offsets).to(DEVICE) + self._n, self._p = self.counts.shape self._d = self.covariates.shape[1] @property @@ -83,7 +87,7 @@ class _PLN(ABC): return self._d def smart_init_beta(self): - self._beta = init_beta(self.Y, self.covariates, self.O) + self._beta = init_beta(self.counts, self.covariates, self.offsets) def random_init_beta(self): self._beta = torch.randn((self._d, self._p), device=DEVICE) @@ -112,9 +116,9 @@ class _PLN(ABC): self.random_init_model_parameters() self.random_init_var_parameters() print("Initialization finished") - self.putParametersToDevice() + self.put_parameters_to_device() - def putParametersToDevice(self): + def put_parameters_to_device(self): for parameter in self.list_of_parameters_needing_gradient: parameter.requires_grad_(True) @@ -123,40 +127,41 @@ class _PLN(ABC): """ A list containing all the parameters that needs to be upgraded via a gradient step. """ - pass def fit( self, - Y, + counts, covariates=None, - O=None, + offsets=None, nb_max_iteration=50000, lr=0.01, class_optimizer=torch.optim.Rprop, tol=1e-3, do_smart_init=True, verbose=False, - O_formula="sum", + offsets_formula="sum", keep_going=False, ): """ Main function of the class. Fit a PLN to the data. Parameters ---------- - Y : torch.tensor or ndarray or DataFrame. + counts : torch.tensor or ndarray or DataFrame. 2-d count data. - covariates : torch.tensor or ndarray or DataFrame or None, default = None - If not `None`, the first dimension should equal the first dimension of `Y`. - O : torch.tensor or ndarray or DataFrame or None, default = None - Model offset. If not `None`, size should be the same as `Y`. + covariates : torch.tensor or ndarray or DataFrame or + None, default = None + If not `None`, the first dimension should equal the first + dimension of `counts`. + offsets : torch.tensor or ndarray or DataFrame or None, default = None + Model offset. If not `None`, size should be the same as `counts`. """ - self.t0 = time.time() + self.beginnning_time = time.time() if keep_going is False: - self.format_datas(Y, covariates, O, O_formula) - check_parameters_shape(self.Y, self.covariates, self.O) + self.format_datas(counts, covariates, offsets, offsets_formula) + check_parameters_shape(self.counts, self.covariates, self.offsets) self.init_parameters(do_smart_init) if self._fitted is True and keep_going is True: - self.t0 -= self.plotargs.running_times[-1] + self.beginnning_time -= self.plotargs.running_times[-1] self.optim = class_optimizer(self.list_of_parameters_needing_gradient, lr=lr) nb_iteration_done = 0 stop_condition = False @@ -176,7 +181,7 @@ class _PLN(ABC): simple docstrings with black errors """ self.optim.zero_grad() - loss = -self.compute_ELBO() + loss = -self.compute_elbo() loss.backward() self.optim.step() self.update_closed_forms() @@ -199,15 +204,15 @@ class _PLN(ABC): print("-------UPDATE-------") print("Iteration number: ", self.plotargs.iteration_number) print("Criterion: ", np.round(self.plotargs.criterions[-1], 8)) - print("ELBO:", np.round(self.plotargs.ELBOs_list[-1], 6)) + print("ELBO:", np.round(self.plotargs.elbos_list[-1], 6)) def compute_criterion_and_update_plotargs(self, loss, tol): - self.plotargs.ELBOs_list.append(-loss.item() / self._n) - self.plotargs.running_times.append(time.time() - self.t0) - if self.plotargs.iteration_number > self.window: + self.plotargs.elbos_list.append(-loss.item() / self._n) + self.plotargs.running_times.append(time.time() - self.beginnning_time) + if self.plotargs.iteration_number > self.WINDOW: criterion = abs( - self.plotargs.ELBOs_list[-1] - - self.plotargs.ELBOs_list[-1 - self.window] + self.plotargs.elbos_list[-1] + - self.plotargs.elbos_list[-1 - self.WINDOW] ) self.plotargs.criterions.append(criterion) return criterion @@ -217,11 +222,10 @@ class _PLN(ABC): pass @abstractmethod - def compute_ELBO(self): + def compute_elbo(self): """ Compute the Evidence Lower BOund (ELBO) that will be maximized by pytorch. """ - pass def display_Sigma(self, ax=None, savefig=False, name_file=""): """ @@ -249,12 +253,12 @@ class _PLN(ABC): plt.show() # to avoid displaying a blanck screen def __str__(self): - string = "A multivariate Poisson Lognormal with " + self.description + "\n" + string = f"A multivariate Poisson Lognormal with {self.description}" string += nice_string_of_dict(self.dict_for_printing) return string def show(self, axes=None): - print("Best likelihood:", np.max(-self.plotargs.ELBOs_list[-1])) + print("Best likelihood:", np.max(-self.plotargs.elbos_list[-1])) if axes is None: _, axes = plt.subplots(1, 3, figsize=(23, 5)) self.plotargs.show_loss(ax=axes[-3]) @@ -263,14 +267,16 @@ class _PLN(ABC): plt.show() @property - def ELBOs_list(self): - return self.plotargs.ELBOs_list + def elbos_list(self): + return self.plotargs.elbos_list @property def loglike(self): if self._fitted is False: - raise NotFitError() - return self._n * self.ELBOs_list[-1] + raise AttributeError( + "The model is not fitted so that it did not" "computed likelihood" + ) + return self._n * self.elbos_list[-1] @property def BIC(self): @@ -290,7 +296,11 @@ class _PLN(ABC): @property def dict_data(self): - return {"Y": self.Y, "covariates": self.covariates, "O": self.O} + return { + "counts": self.counts, + "covariates": self.covariates, + "offsets": self.offsets, + } @property def model_in_a_dict(self): @@ -328,13 +338,15 @@ class _PLN(ABC): self.set_parameters_from_dict(model_in_a_dict) def set_data_from_dict(self, model_in_a_dict): - Y = model_in_a_dict["Y"] - covariates, O, O_formula = extract_cov_O_Oformula(model_in_a_dict) - self.format_datas(Y, covariates, O, O_formula) - check_parameters_shape(self.Y, self.covariates, self.O) - self.Y = Y + counts = model_in_a_dict["counts"] + covariates, offsets, offsets_formula = extract_cov_offsets_offsetsformula( + model_in_a_dict + ) + self.format_datas(counts, covariates, offsets, offsets_formula) + check_parameters_shape(self.counts, self.covariates, self.offsets) + self.counts = counts self.covariates = covariates - self.O = O + self.offsets = offsets @abstractmethod def set_parameters_from_dict(self, model_in_a_dict): @@ -368,12 +380,15 @@ class PLN(_PLN): def list_of_parameters_needing_gradient(self): return [self._M, self._S] - def compute_ELBO(self): + def compute_elbo(self): """ - Compute the Evidence Lower BOund (ELBO) that will be maximized by pytorch. Here we use the profiled ELBO + Compute the Evidence Lower BOund (ELBO) that will be + maximized by pytorch. Here we use the profiled ELBO for the full covariance matrix. """ - return profiledELBOPLN(self.Y, self.covariates, self.O, self._M, self._S) + return profiledELBOPLN( + self.counts, self.covariates, self.offsets, self._M, self._S + ) def smart_init_model_parameters(self): # no model parameters since we are doing a profiled ELBO @@ -433,15 +448,15 @@ class PLNPCA: def __init__(self, ranks): if isinstance(ranks, list): self.ranks = ranks - self.dict_PLNPCA = {} + self.dict_models = {} for rank in ranks: if isinstance(rank, int): - self.dict_PLNPCA[rank] = _PLNPCA(rank) + self.dict_models[rank] = _PLNPCA(rank) else: TypeError("Please instantiate with either a list of integers.") elif isinstance(ranks, int): self.ranks = [ranks] - self.dict_PLNPCA = {ranks: _PLNPCA(ranks)} + self.dict_models = {ranks: _PLNPCA(ranks)} else: raise TypeError( "Please instantiate with either a list of integer or an integer" @@ -449,49 +464,53 @@ class PLNPCA: @property def models(self): - return list(self.dict_PLNPCA.values()) + return list(self.dict_models.values()) def fit( self, - Y, + counts, covariates=None, - O=None, + offsets=None, nb_max_iteration=100000, lr=0.01, class_optimizer=torch.optim.Rprop, tol=1e-3, do_smart_init=True, verbose=False, - O_formula="sum", + offsets_formula="sum", ): - for pca in self.dict_PLNPCA.values(): + for pca in self.dict_models.values(): pca.fit( - Y, + counts, covariates, - O, + offsets, nb_max_iteration, lr, class_optimizer, tol, do_smart_init, verbose, - O_formula, + offsets_formula, ) def __getitem__(self, rank): - return self.dict_PLNPCA[rank] + return self.dict_models[rank] @property def BIC(self): - return {model._q: np.round(model.BIC, 3) for model in self.dict_PLNPCA.values()} + return { + model._rank: np.round(model.BIC, 3) for model in self.dict_models.values() + } @property def AIC(self): - return {model._q: np.round(model.AIC, 3) for model in self.dict_PLNPCA.values()} + return { + model._rank: np.round(model.AIC, 3) for model in self.dict_models.values() + } @property def loglikes(self): - return {model._q: model.loglike for model in self.dict_PLNPCA.values()} + return {model._rank: model.loglike for model in self.dict_models.values()} def show(self): bic = self.BIC @@ -521,13 +540,13 @@ class PLNPCA: return self[self.ranks[np.argmin(list(self.AIC.values()))]] def save_model(self, rank, filename): - self.dict_PLNPCA[rank].save_model(filename) + self.dict_models[rank].save_model(filename) with open(filename, "wb") as fp: pickle.dump(self.model_in_a_dict, fp) def save_models(self, filename): for model in self.models: - model_filename = filename + str(model._q) + model_filename = filename + str(model._rank) model.save_model(model_filename) @property @@ -542,25 +561,27 @@ class PLNPCA: to_print += f"Ranks considered:{self.ranks} \n \n" to_print += f"BIC metric:{self.BIC}\n" to_print += ( - f"Best model (lower BIC):{self.best_model(criterion = 'BIC')._q}\n \n" + f"Best model (lower BIC):{self.best_model(criterion = 'BIC')._rank}\n \n" ) to_print += f"AIC metric:{self.AIC}\n" - to_print += f"Best model (lower AIC):{self.best_model(criterion = 'AIC')._q}\n" + to_print += ( + f"Best model (lower AIC):{self.best_model(criterion = 'AIC')._rank}\n" + ) return to_print def load_model_from_file(self, rank, path_of_file): with open(path_of_file, "rb") as fp: model_in_a_dict = pickle.load(fp) rank = model_in_a_dict["rank"] - self.dict_PLNPCA[rank].model_in_a_dict = model_in_a_dict + self.dict_models[rank].model_in_a_dict = model_in_a_dict class _PLNPCA(_PLN): NAME = "PLNPCA" - def __init__(self, q): + def __init__(self, rank): super().__init__() - self._q = q + self._rank = rank @property def dict_model_parameters(self): @@ -571,23 +592,25 @@ class _PLNPCA(_PLN): def smart_init_model_parameters(self): super().smart_init_beta() - self._C = init_C(self.Y, self.covariates, self.O, self._beta, self._q) + self._C = init_c( + self.counts, self.covariates, self.offsets, self._beta, self._rank + ) def random_init_model_parameters(self): super().random_init_beta() - self._C = torch.randn((self._p, self._q)).to(DEVICE) + self._C = torch.randn((self._p, self._rank)).to(DEVICE) def random_init_var_parameters(self): - self._S = 1 / 2 * torch.ones((self._n, self._q)).to(DEVICE) - self._M = torch.ones((self._n, self._q)).to(DEVICE) + self._S = 1 / 2 * torch.ones((self._n, self._rank)).to(DEVICE) + self._M = torch.ones((self._n, self._rank)).to(DEVICE) def smart_init_var_parameters(self): self._M = ( - init_M(self.Y, self.covariates, self.O, self._beta, self._C) + init_M(self.counts, self.covariates, self.offsets, self._beta, self._C) .to(DEVICE) .detach() ) - self._S = 1 / 2 * torch.ones((self._n, self._q)).to(DEVICE) + self._S = 1 / 2 * torch.ones((self._n, self._rank)).to(DEVICE) self._M.requires_grad_(True) self._S.requires_grad_(True) @@ -595,11 +618,11 @@ class _PLNPCA(_PLN): def list_of_parameters_needing_gradient(self): return [self._C, self._beta, self._M, self._S] - def compute_ELBO(self): + def compute_elbo(self): return ELBOPLNPCA( - self.Y, + self.counts, self.covariates, - self.O, + self.offsets, self._M, self._S, self._C, @@ -608,7 +631,7 @@ class _PLNPCA(_PLN): @property def number_of_parameters(self): - return self._p * (self._d + self._q) - self._q * (self._q - 1) / 2 + return self._p * (self._d + self._rank) - self._rank * (self._rank - 1) / 2 def set_parameters_from_dict(self, model_in_a_dict): S = format_data(model_in_a_dict["S"]) @@ -634,16 +657,16 @@ class _PLNPCA(_PLN): @property def description(self): - return f" with {self._q} principal component." + return f" with {self._rank} principal component." @property def latent_variables(self): return torch.matmul(self._M, self._C.T).detach() def get_projected_latent_variables(self, nb_dim): - if nb_dim > self._q: + if nb_dim > self._rank: raise AttributeError( - "The number of dimension {nb_dim} is larger than the rank {self._q}" + f"The number of dimension {nb_dim} is larger than the rank {self._rank}" ) ortho_C = torch.linalg.qr(self._C, "reduced")[0] return torch.mm(self.latent_variables, ortho_C[:, :nb_dim]).detach() @@ -654,7 +677,7 @@ class _PLNPCA(_PLN): @property def model_in_a_dict(self): - return super().model_in_a_dict | {"rank": self._q} + return super().model_in_a_dict | {"rank": self._rank} @model_in_a_dict.setter def model_in_a_dict(self, model_in_a_dict): @@ -690,20 +713,20 @@ class ZIPLN(PLN): # should change the good initialization, especially for Theta_zero def smart_init_model_parameters(self): super().smart_init_model_parameters() - self._Sigma = init_Sigma(self.Y, self.covariates, self.O, self._beta) + self._Sigma = init_sigma(self.counts, self.covariates, self.offsets, self._beta) self._Theta_zero = torch.randn(self._d, self._p) def random_init_var_parameters(self): - self.dirac = self.Y == 0 + self.dirac = self.counts == 0 self._M = torch.randn(self._n, self._p) self._S = torch.randn(self._n, self._p) self.pi = torch.empty(self._n, self._p).uniform_(0, 1).to(DEVICE) * self.dirac - def compute_ELBO(self): + def compute_elbo(self): return ELBOZIPLN( - self.Y, + self.counts, self.covariates, - self.O, + self.offsets, self._M, self._S, self.pi, @@ -723,7 +746,12 @@ class ZIPLN(PLN): self.covariates, self._M, self._S, self._beta, self._n ) self.pi = closed_formula_pi( - self.O, self._M, self._S, self.dirac, self.covariates, self._Theta_zero + self.offsets, + self._M, + self._S, + self.dirac, + self.covariates, + self._Theta_zero, ) @property diff --git a/pyPLNmodels/_closed_forms.py b/pyPLNmodels/_closed_forms.py index d9d7dd9c7c84dfe885122f6307f6cb9c5b9a2151..5964d801ac2abed5bd886f09b097f34c72853289 100644 --- a/pyPLNmodels/_closed_forms.py +++ b/pyPLNmodels/_closed_forms.py @@ -16,6 +16,6 @@ def closed_formula_beta(covariates, M): ) -def closed_formula_pi(O, M, S, dirac, covariates, Theta_zero): - A = torch.exp(O + M + torch.multiply(S, S) / 2) +def closed_formula_pi(offsets, M, S, dirac, covariates, Theta_zero): + A = torch.exp(offsets + M + torch.multiply(S, S) / 2) return torch.multiply(torch.sigmoid(A + torch.mm(covariates, Theta_zero)), dirac) diff --git a/pyPLNmodels/_utils.py b/pyPLNmodels/_utils.py index ef44b7c6c81d8f359278281975cfb9a67a7eaaa5..b2b60638fd46ffc49f7633c3701c76df3febc235 100644 --- a/pyPLNmodels/_utils.py +++ b/pyPLNmodels/_utils.py @@ -1,40 +1,42 @@ -import math +import math # pylint:disable=[C0114] +from scipy.linalg import toeplitz import matplotlib.pyplot as plt import numpy as np import torch import torch.linalg as TLA import pandas as pd -from scipy.linalg import toeplitz torch.set_default_dtype(torch.float64) -# O is not doing anything in the initialization of Sigma. should be fixed. + +# offsets is not doing anything in the initialization of Sigma. should be fixed. if torch.cuda.is_available(): DEVICE = torch.device("cuda") else: DEVICE = torch.device("cpu") -# DEVICE = torch.device('cpu') # have to deal with this class PLNPlotArgs: def __init__(self, window): self.window = window - self.running_times = list() + self.running_times = [] self.criterions = [1] * window - self.ELBOs_list = list() + self.elbos_list = [] @property def iteration_number(self): - return len(self.ELBOs_list) + return len(self.elbos_list) - def show_loss(self, ax=None, savefig=False, nameDoss=""): + def show_loss(self, ax=None, savefig=False, name_doss=""): """Show the ELBO of the algorithm along the iterations. args: 'ax': AxesSubplot object. The ELBO will be displayed in this ax - if not None. If None, will simply create an axis. Default is None. - 'name_file': str. The name of the file the graphic will be saved to. + if not None. If None, will simply create an axis. Default + is None. + 'name_file': str. The name of the file the graphic + will be saved to. Default is 'fastPLNPCA_ELBO'. returns: None but displays the ELBO. """ @@ -42,27 +44,29 @@ class PLNPlotArgs: ax = plt.gca() ax.plot( self.running_times, - -np.array(self.ELBOs_list), + -np.array(self.elbos_list), label="Negative ELBO", ) - ax.set_title( - "Negative ELBO. Best ELBO = " + str(np.round(self.ELBOs_list[-1], 6)) - ) + last_elbos = np.round(self.elbos_list[-1], 6) + ax.set_title(f"Negative ELBO. Best ELBO ={last_elbos}") ax.set_yscale("log") ax.set_xlabel("Seconds") ax.set_ylabel("ELBO") ax.legend() # save the graphic if needed if savefig: - plt.savefig(nameDoss) + plt.savefig(name_doss) - def show_stopping_criterion(self, ax=None, savefig=False, nameDoss=""): + def show_stopping_criterion(self, ax=None, savefig=False, name_doss=""): """Show the criterion of the algorithm along the iterations. args: - 'ax': AxesSubplot object. The criterion will be displayed in this ax - if not None. If None, will simply create an axis. Default is None. - 'name_file': str. The name of the file the graphic will be saved to. + 'ax': AxesSubplot object. The criterion will be displayed + in this ax + if not None. If None, will simply create an axis. + Default is None. + 'name_file': str. The name of the file the graphic will + be saved to. Default is 'fastPLN_criterion'. returns: None but displays the criterion. """ @@ -80,71 +84,69 @@ class PLNPlotArgs: ax.legend() # save the graphic if needed if savefig: - plt.savefig(nameDoss) + plt.savefig(name_doss) -def init_Sigma(Y, covariates, O, beta): - """Initialization for Sigma for the PLN model. Take the log of Y - (careful when Y=0), remove the covariates effects X@beta and +def init_sigma(counts, covariates, offsets, beta): + """Initialization for Sigma for the PLN model. Take the log of counts + (careful when counts=0), remove the covariates effects X@beta and then do as a MLE for Gaussians samples. Args : - Y: torch.tensor. Samples with size (n,p) + counts: torch.tensor. Samples with size (n,p) 0: torch.tensor. Offset, size (n,p) covariates: torch.tensor. Covariates, size (n,d) beta: torch.tensor of size (d,p) Returns : torch.tensor of size (p,p). """ - # Take the log of Y, and be careful when Y = 0. If Y = 0, - # then we set the log(Y) as 0. - log_Y = torch.log(Y + (Y == 0) * math.exp(-2)) + # Take the log of counts, and be careful when counts = 0. If counts = 0, + # then we set the log(counts) as 0. + log_y = torch.log(counts + (counts == 0) * math.exp(-2)) # we remove the mean so that we see only the covariances - log_Y_centered = ( - log_Y - torch.matmul(covariates.unsqueeze(1), beta.unsqueeze(0)).squeeze() + log_y_centered = ( + log_y - torch.matmul(covariates.unsqueeze(1), beta.unsqueeze(0)).squeeze() ) # MLE in a Gaussian setting - n = Y.shape[0] - Sigma_hat = 1 / (n - 1) * (log_Y_centered.T) @ log_Y_centered + n = counts.shape[0] + Sigma_hat = 1 / (n - 1) * (log_y_centered.T) @ log_y_centered return Sigma_hat -def init_C(Y, covariates, O, beta, q): +def init_c(counts, covariates, offsets, beta, rank): """Inititalization for C for the PLN model. Get a first guess for Sigma that is easier to estimate and then takes - the q largest eigenvectors to get C. + the rank largest eigenvectors to get C. Args : - Y: torch.tensor. Samples with size (n,p) + counts: torch.tensor. Samples with size (n,p) 0: torch.tensor. Offset, size (n,p) covarites: torch.tensor. Covariates, size (n,d) beta: torch.tensor of size (d,p) - q: int. The dimension of the latent space, i.e. the reducted dimension. + rank: int. The dimension of the latent space, i.e. the reducted dimension. Returns : - torch.tensor of size (p,q). The initialization of C. + torch.tensor of size (p,rank). The initialization of C. """ - # get a guess for Sigma - Sigma_hat = init_Sigma(Y, covariates, O, beta).detach() - # taking the q largest eigenvectors - C = C_from_Sigma(Sigma_hat, q) + Sigma_hat = init_sigma(counts, covariates, offsets, beta).detach() + C = C_from_Sigma(Sigma_hat, rank) return C -def init_M(Y, covariates, O, beta, C, N_iter_max=500, lr=0.01, eps=7e-3): +def init_M(counts, covariates, offsets, beta, C, N_iter_max=500, lr=0.01, eps=7e-3): """Initialization for the variational parameter M. Basically, the mode of the log_posterior is computed. Args: - Y: torch.tensor. Samples with size (n,p) + counts: torch.tensor. Samples with size (n,p) 0: torch.tensor. Offset, size (n,p) covariates: torch.tensor. Covariates, size (n,d) beta: torch.tensor of size (d,p) N_iter_max: int. The maximum number of iteration in the gradient ascent. lr: positive float. The learning rate of the optimizer. - eps: positive float, optional. The tolerance. The algorithm will stop if - the maximum of |W_t-W_{t-1}| is lower than eps, where W_t + eps: positive float, optional. The tolerance. The algorithm will stop + if the maximum of |W_t-W_{t-1}| is lower than eps, where W_t is the t-th iteration of the algorithm.This parameter changes a lot the resulting time of the algorithm. Default is 9e-3. """ - W = torch.randn(Y.shape[0], C.shape[1], device=DEVICE) + W = torch.randn(counts.shape[0], C.shape[1], device=DEVICE) W.requires_grad_(True) optimizer = torch.optim.Rprop([W], lr=lr) crit = 2 * eps @@ -152,7 +154,8 @@ def init_M(Y, covariates, O, beta, C, N_iter_max=500, lr=0.01, eps=7e-3): keep_condition = True i = 0 while i < N_iter_max and keep_condition: - loss = -torch.mean(log_PW_given_Y(Y, covariates, O, W, C, beta)) + batch_loss = log_PW_given_Y(counts, covariates, offsets, W, C, beta) + loss = -torch.mean(batch_loss) loss.backward() optimizer.step() crit = torch.max(torch.abs(W - old_W)) @@ -164,17 +167,17 @@ def init_M(Y, covariates, O, beta, C, N_iter_max=500, lr=0.01, eps=7e-3): return W -def sigmoid(x): +def sigmoid(tens): """Compute the sigmoid function of x element-wise.""" - return 1 / (1 + torch.exp(-x)) + return 1 / (1 + torch.exp(-tens)) -def sample_PLN(C, beta, covariates, O, B_zero=None): +def sample_PLN(C, beta, covariates, offsets, B_zero=None): """Sample Poisson log Normal variables. If B_zero is not None, the model will be zero inflated. Args: - C: torch.tensor of size (p,q). The matrix C of the PLN model + C: torch.tensor of size (p,rank). The matrix C of the PLN model beta: torch.tensor of size (d,p). Regression parameter. 0: torch.tensor of size (n,p). Offsets. covariates : torch.tensor of size (n,d). Covariates. @@ -182,48 +185,46 @@ def sample_PLN(C, beta, covariates, O, B_zero=None): the ZIPLN model is chosen, so that it will add a Bernouilli layer. Default is None. Returns : - Y: torch.tensor of size (n,p), the count variables. + counts: torch.tensor of size (n,p), the count variables. Z: torch.tensor of size (n,p), the gaussian latent variables. ksi: torch.tensor of size (n,p), the bernoulli latent variables (full of zeros if B_zero is None). """ - n = O.shape[0] - q = C.shape[1] - Z = torch.mm(torch.randn(n, q, device=DEVICE), C.T) + covariates @ beta - parameter = torch.exp(O + Z) + n = offsets.shape[0] + rank = C.shape[1] + Z = torch.mm(torch.randn(n, rank, device=DEVICE), C.T) + covariates @ beta + parameter = torch.exp(offsets + Z) if B_zero is not None: print("ZIPLN is sampled") ZI_cov = covariates @ B_zero ksi = torch.bernoulli(1 / (1 + torch.exp(-ZI_cov))) else: ksi = 0 - Y = (1 - ksi) * torch.poisson(parameter) - return Y, Z, ksi + counts = (1 - ksi) * torch.poisson(parameter) + return counts, Z, ksi -def logit(x): +def logit(tens): """logit function. If x is too close from 1, we set the result to 0. performs logit element wise.""" - return torch.nan_to_num(torch.log(x / (1 - x)), nan=0, neginf=0, posinf=0) + return torch.nan_to_num(torch.log(x / (1 - tens)), nan=0, neginf=0, posinf=0) def build_block_Sigma(p, block_size): """Build a matrix per block of size (p,p). There will be p//block_size+1 blocks of size block_size. The first p//block_size ones will be the same - size. The last one will have a smaller size (size (0,0) if p%block_size = 0). + size. The last one will have a smaller size (size (0,0) + if p%block_size = 0). Args: p: int. block_size: int. Should be lower than p. Returns: a torch.tensor of size (p,p) and symmetric. """ - # np.random.seed(0) k = p // block_size # number of matrices of size p//block_size. - # will multiply each block by some random quantities alea = np.random.randn(k + 1) ** 2 + 1 Sigma = np.zeros((p, p)) last_block_size = p - k * block_size - # We need to form the k matrics of size p//block_size for i in range(k): Sigma[ i * block_size : (i + 1) * block_size, i * block_size : (i + 1) * block_size @@ -236,28 +237,29 @@ def build_block_Sigma(p, block_size): return Sigma -def C_from_Sigma(Sigma, q): - """Get the best matrix of size (p,q) when Sigma is of +def C_from_Sigma(Sigma, rank): + """Get the best matrix of size (p,rank) when Sigma is of size (p,p). i.e. reduces norm(Sigma-C@C.T) Args : - Sigma: torch.tensor of size (p,p). Should be positive definite and symmetric. - q: int. The number of columns wanted for C + Sigma: torch.tensor of size (p,p). Should be positive definite and + symmetric. + rank: int. The number of columns wanted for C Returns: - C_reduct: torch.tensor of size (p,q) containing the q eigenvectors with largest eigenvalues. + C_reduct: torch.tensor of size (p,rank) containing the rank eigenvectors with + largest eigenvalues. """ - w, v = TLA.eigh(Sigma) # Get the eigenvaluues and eigenvectors - # Take only the q largest - C_reduct = v[:, -q:] @ torch.diag(torch.sqrt(w[-q:])) + w, v = TLA.eigh(Sigma) + C_reduct = v[:, -rank:] @ torch.diag(torch.sqrt(w[-rank:])) return C_reduct -def init_beta(Y, covariates, O): - log_Y = torch.log(Y + (Y == 0) * math.exp(-2)) - log_Y = log_Y.to(DEVICE) +def init_beta(counts, covariates, offsets): + log_y = torch.log(counts + (counts == 0) * math.exp(-2)) + log_y = log_y.to(DEVICE) return torch.matmul( torch.inverse(torch.matmul(covariates.T, covariates)), - torch.matmul(covariates.T, log_Y), + torch.matmul(covariates.T, log_y), ) @@ -270,62 +272,53 @@ def log_stirling(n): An approximation of log(n_!) element-wise. """ n_ = n + (n == 0) # Replace the 0 with 1. It doesn't change anything since 0! = 1! - return torch.log(torch.sqrt(2 * np.pi * n_)) + n_ * torch.log( - n_ / math.exp(1) - ) # Stirling formula + return torch.log(torch.sqrt(2 * np.pi * n_)) + n_ * torch.log(n_ / math.exp(1)) -def log_PW_given_Y(Y_b, covariates_b, O_b, W, C, beta): +def log_PW_given_Y(counts_b, covariates_b, offsets_b, W, C, beta): """Compute the log posterior of the PLN model. Compute it either - for W of size (N_samples, N_batch,q) or (batch_size, q). Need to have + for W of size (N_samples, N_batch,rank) or (batch_size, rank). Need to have both cases since it is done for both cases after. Please the mathematical description of the package for the formula. Args : - Y_b : torch.tensor of size (batch_size, p) + counts_b : torch.tensor of size (batch_size, p) covariates_b : torch.tensor of size (batch_size, d) or (d) Returns: torch.tensor of size (N_samples, batch_size) or (batch_size). """ length = len(W.shape) - q = W.shape[-1] + rank = W.shape[-1] if length == 2: CW = torch.matmul(C.unsqueeze(0), W.unsqueeze(2)).squeeze() elif length == 3: CW = torch.matmul(C.unsqueeze(0).unsqueeze(1), W.unsqueeze(3)).squeeze() - A_b = O_b + CW + covariates_b @ beta - first_term = -q / 2 * math.log(2 * math.pi) - 1 / 2 * torch.norm(W, dim=-1) ** 2 - second_term = torch.sum(-torch.exp(A_b) + A_b * Y_b - log_stirling(Y_b), axis=-1) + A_b = offsets_b + CW + covariates_b @ beta + first_term = -rank / 2 * math.log(2 * math.pi) - 1 / 2 * torch.norm(W, dim=-1) ** 2 + second_term = torch.sum( + -torch.exp(A_b) + A_b * counts_b - log_stirling(counts_b), axis=-1 + ) return first_term + second_term -def plot_list(myList, label, ax=None): - if ax == None: - ax = plt.gca() - ax.plot(np.arange(len(myList)), myList, label=label) - - -def trunc_log(x, eps=1e-16): - y = torch.min(torch.max(x, torch.tensor([eps])), torch.tensor([1 - eps])) +def trunc_log(tens, eps=1e-16): + y = torch.min(torch.max(tens, torch.tensor([eps])), torch.tensor([1 - eps])) return torch.log(y) -def get_O_from_sum_of_Y(Y): - sumOfY = torch.sum(Y, axis=1) - return sumOfY.repeat((Y.shape[1], 1)).T +def get_offsets_from_sum_of_counts(counts): + sum_of_counts = torch.sum(counts, axis=1) + return sum_of_counts.repeat((counts.shape[1], 1)).T def raise_wrong_dimension_error( str_first_array, str_second_array, dim_first_array, dim_second_array, dim_of_error ): - raise ValueError( - "The size of tensor {} ({}) must mach the size of tensor {} ({}) at non-singleton dimension {}".format( - str_first_array, - dim_first_array, - str_second_array, - dim_second_array, - dim_of_error, - ) + msg = ( + f"The size of tensor {str_first_array} ({dim_first_array}) must match" + f"the size of tensor {str_second_array} ({dim_second_array}) at" + f"non-singleton dimension {dim_of_error}" ) + raise ValueError(msg) def check_dimensions_are_equal( @@ -341,24 +334,18 @@ def check_dimensions_are_equal( ) -def init_S(Y, covariates, O, beta, C, M): - n, q = M.shape +def init_S(counts, covariates, offsets, beta, C, M): + n, rank = M.shape batch_matrix = torch.matmul(C.unsqueeze(2), C.unsqueeze(1)).unsqueeze(0) CW = torch.matmul(C.unsqueeze(0), M.unsqueeze(2)).squeeze() - common = torch.exp(O + covariates @ beta + CW).unsqueeze(2).unsqueeze(3) + common = torch.exp(offsets + covariates @ beta + CW).unsqueeze(2).unsqueeze(3) prod = batch_matrix * common - # The hessian of the posterior - hess_posterior = torch.sum(prod, axis=1) + torch.eye(q).to(DEVICE) + hess_posterior = torch.sum(prod, axis=1) + torch.eye(rank).to(DEVICE) inv_hess_posterior = -torch.inverse(hess_posterior) hess_posterior = torch.diagonal(inv_hess_posterior, dim1=-2, dim2=-1) return hess_posterior -class NotFitError(Exception): - def __init__(self, message="Please fit your model.", *args, **kwargs): - super().__init__(message, *args, **kwargs) - - def format_data(data): if isinstance(data, pd.DataFrame): return torch.from_numpy(data.values).double().to(DEVICE) @@ -366,33 +353,32 @@ def format_data(data): return torch.from_numpy(data).double().to(DEVICE) if isinstance(data, torch.Tensor): return data - else: - raise AttributeError( - "Please insert either a numpy array, pandas.DataFrame or torch.tensor" - ) + raise AttributeError( + "Please insert either a numpy array, pandas.DataFrame or torch.tensor" + ) -def check_parameters_shape(Y, covariates, O): - nY, pY = Y.shape - nO, pO = O.shape - nCov, _ = covariates.shape - check_dimensions_are_equal("Y", "O", nY, nO, 0) - check_dimensions_are_equal("Y", "covariates", nY, nCov, 0) - check_dimensions_are_equal("Y", "O", pY, pO, 1) +def check_parameters_shape(counts, covariates, offsets): + n_counts, p_counts = counts.shape + n_offsets, p_offsets = offsets.shape + n_cov, _ = covariates.shape + check_dimensions_are_equal("counts", "offsets", n_counts, n_offsets, 0) + check_dimensions_are_equal("counts", "covariates", n_counts, n_cov, 0) + check_dimensions_are_equal("counts", "offsets", p_counts, p_offsets, 1) def extract_data(dictionnary, parameter_in_string): try: return dictionnary[parameter_in_string] - except: + except KeyError: return None -def extract_cov_O_Oformula(dictionnary): +def extract_cov_offsets_offsetsformula(dictionnary): covariates = extract_data(dictionnary, "covariates") - O = extract_data(dictionnary, "O") - O_formula = extract_data(dictionnary, "O_formula") - return covariates, O, O_formula + offsets = extract_data(dictionnary, "offsets") + offsets_formula = extract_data(dictionnary, "offsets_formula") + return covariates, offsets, offsets_formula def nice_string_of_dict(dictionnary): diff --git a/pyPLNmodels/elbos.py b/pyPLNmodels/elbos.py index cc6ea1c0ef044c3c6a035f3acb15e504b8bb1950..81590770fb0c0da4640fa1d19492e4468e604980 100644 --- a/pyPLNmodels/elbos.py +++ b/pyPLNmodels/elbos.py @@ -3,13 +3,13 @@ from ._utils import log_stirling, trunc_log from ._closed_forms import closed_formula_Sigma, closed_formula_beta -def ELBOPLN(Y, covariates, O, M, S, Sigma, beta): +def ELBOPLN(counts, covariates, offsets, M, S, Sigma, beta): """ Compute the ELBO (Evidence LOwer Bound) for the PLN model. See the doc for more details on the computation. Args: - Y: torch.tensor. Counts with size (n,p) + counts: torch.tensor. Counts with size (n,p) 0: torch.tensor. Offset, size (n,p) covariates: torch.tensor. Covariates, size (n,d) M: torch.tensor. Variational parameter with size (n,p) @@ -19,14 +19,14 @@ def ELBOPLN(Y, covariates, O, M, S, Sigma, beta): Returns: torch.tensor of size 1 with a gradient. """ - n, p = Y.shape + n, p = counts.shape SrondS = torch.multiply(S, S) - OplusM = O + M + offsetsplusM = offsets + M MmoinsXB = M - torch.mm(covariates, beta) elbo = -n / 2 * torch.logdet(Sigma) elbo += torch.sum( - torch.multiply(Y, OplusM) - - torch.exp(OplusM + SrondS / 2) + torch.multiply(counts, offsetsplusM) + - torch.exp(offsetsplusM + SrondS / 2) + 1 / 2 * torch.log(SrondS) ) DplusMmoinsXB2 = torch.diag(torch.sum(SrondS, dim=0)) + torch.mm( @@ -34,19 +34,19 @@ def ELBOPLN(Y, covariates, O, M, S, Sigma, beta): ) moinspsur2n = 1 / 2 * torch.trace(torch.mm(torch.inverse(Sigma), DplusMmoinsXB2)) elbo -= 1 / 2 * torch.trace(torch.mm(torch.inverse(Sigma), DplusMmoinsXB2)) - elbo -= torch.sum(log_stirling(Y)) + elbo -= torch.sum(log_stirling(counts)) elbo += n * p / 2 return elbo -def profiledELBOPLN(Y, covariates, O, M, S): +def profiledELBOPLN(counts, covariates, offsets, M, S): """ Compute the ELBO (Evidence LOwer Bound) for the PLN model. We use the fact that Sigma and beta are completely determined by M,S, and the covariates. See the doc for more details on the computation. Args: - Y: torch.tensor. Counts with size (n,p) + counts: torch.tensor. Counts with size (n,p) 0: torch.tensor. Offset, size (n,p) covariates: torch.tensor. Covariates, size (n,d) M: torch.tensor. Variational parameter with size (n,p) @@ -56,28 +56,28 @@ def profiledELBOPLN(Y, covariates, O, M, S): Returns: torch.tensor of size 1 with a gradient. """ - n, p = Y.shape + n, p = counts.shape SrondS = torch.multiply(S, S) - OplusM = O + M + offsetsplusM = offsets + M closed_beta = closed_formula_beta(covariates, M) closed_Sigma = closed_formula_Sigma(covariates, M, S, closed_beta, n) elbo = -n / 2 * torch.logdet(closed_Sigma) elbo += torch.sum( - torch.multiply(Y, OplusM) - - torch.exp(OplusM + SrondS / 2) + torch.multiply(counts, offsetsplusM) + - torch.exp(offsetsplusM + SrondS / 2) + 1 / 2 * torch.log(SrondS) ) - elbo -= torch.sum(log_stirling(Y)) + elbo -= torch.sum(log_stirling(counts)) return elbo -def ELBOPLNPCA(Y, covariates, O, M, S, C, beta): +def ELBOPLNPCA(counts, covariates, offsets, M, S, C, beta): """ Compute the ELBO (Evidence LOwer Bound) for the PLN model with a PCA parametrization. See the doc for more details on the computation. Args: - Y: torch.tensor. Counts with size (n,p) + counts: torch.tensor. Counts with size (n,p) 0: torch.tensor. Offset, size (n,p) covariates: torch.tensor. Covariates, size (n,d) M: torch.tensor. Variational parameter with size (n,p) @@ -87,34 +87,34 @@ def ELBOPLNPCA(Y, covariates, O, M, S, C, beta): Returns: torch.tensor of size 1 with a gradient. """ - n = Y.shape[0] - q = C.shape[1] - A = O + torch.mm(covariates, beta) + torch.mm(M, C.T) + n = counts.shape[0] + rank = C.shape[1] + A = offsets + torch.mm(covariates, beta) + torch.mm(M, C.T) SrondS = torch.multiply(S, S) - YA = torch.sum(torch.multiply(Y, A)) + countsA = torch.sum(torch.multiply(counts, A)) moinsexpAplusSrondSCCT = torch.sum( -torch.exp(A + 1 / 2 * torch.mm(SrondS, torch.multiply(C, C).T)) ) moinslogSrondS = 1 / 2 * torch.sum(torch.log(SrondS)) MMplusSrondS = torch.sum(-1 / 2 * (torch.multiply(M, M) + torch.multiply(S, S))) - log_stirlingY = torch.sum(log_stirling(Y)) + log_stirlingcounts = torch.sum(log_stirling(counts)) return ( - YA + countsA + moinsexpAplusSrondSCCT + moinslogSrondS + MMplusSrondS - - log_stirlingY - + n * q / 2 + - log_stirlingcounts + + n * rank / 2 ) ## should rename some variables so that is is clearer when we see the formula -def ELBOZIPLN(Y, covariates, O, M, S, pi, Sigma, beta, B_zero, dirac): +def ELBOZIPLN(counts, covariates, offsets, M, S, pi, Sigma, beta, B_zero, dirac): """Compute the ELBO (Evidence LOwer Bound) for the Zero Inflated PLN model. See the doc for more details on the computation. Args: - Y: torch.tensor. Counts with size (n,p) + counts: torch.tensor. Counts with size (n,p) 0: torch.tensor. Offset, size (n,p) covariates: torch.tensor. Covariates, size (n,d) M: torch.tensor. Variational parameter with size (n,p) @@ -129,18 +129,18 @@ def ELBOZIPLN(Y, covariates, O, M, S, pi, Sigma, beta, B_zero, dirac): if torch.norm(pi * dirac - pi) > 0.0001: print("Bug") return False - n = Y.shape[0] - p = Y.shape[1] + n = counts.shape[0] + p = counts.shape[1] SrondS = torch.multiply(S, S) - OplusM = O + M + offsetsplusM = offsets + M MmoinsXB = M - torch.mm(covariates, beta) XB_zero = torch.mm(covariates, B_zero) elbo = torch.sum( torch.multiply( 1 - pi, - torch.multiply(Y, OplusM) - - torch.exp(OplusM + SrondS / 2) - - log_stirling(Y), + torch.multiply(counts, offsetsplusM) + - torch.exp(offsetsplusM + SrondS / 2) + - log_stirling(counts), ) + pi ) diff --git a/tests/test_args.py b/tests/test_args.py index f4d69ef4fd7cd60deee976ac03fe40fcdd954a27..fe0434ece28515883da41a27d8cc6435262e0429 100644 --- a/tests/test_args.py +++ b/tests/test_args.py @@ -7,6 +7,7 @@ from tests.utils import get_simulated_data, get_real_data, MSE Y_sim, covariates_sim, O_sim, true_Sigma, true_beta = get_simulated_data() RANKS = [4, 8] +print("ca marche") @pytest.fixture diff --git a/tests/test_common.py b/tests/test_common.py index 4956aff5f85162f31df4844d14bf1cd94fa993d7..f9c8e22c60dccd5746d782cd0650b4bb2db2d61d 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -11,7 +11,7 @@ Y_sim, covariates_sim, O_sim, true_Sigma, true_beta = get_simulated_data() Y_real, covariates_real, O_real = get_real_data() O_real = np.log(O_real) -q = 8 +rank = 8 @pytest.fixture @@ -22,7 +22,7 @@ def my_instance_pln(): @pytest.fixture def my_instance__plnpca(): - plnpca = _PLNPCA(q=q) + plnpca = _PLNPCA(rank=rank) return plnpca @@ -42,21 +42,21 @@ def my_real_fitted_pln(): @pytest.fixture def my_real_fitted__plnpca(): - plnpca = _PLNPCA(q=q) + plnpca = _PLNPCA(rank=rank) plnpca.fit(Y_real, covariates_real, O_real) return plnpca @pytest.fixture def my_simulated_fitted__plnpca(): - plnpca = _PLNPCA(q=q) + plnpca = _PLNPCA(rank=rank) plnpca.fit(Y_sim, covariates_sim, O_sim) return plnpca @pytest.fixture def my_simulated_fitted__plnpca(): - plnpca = _PLNPCA(q=q) + plnpca = _PLNPCA(rank=rank) plnpca.fit(Y_sim, covariates_sim, O_sim) return plnpca @@ -79,7 +79,7 @@ def test_find_right_beta(pln): def test_number_of_iterations(my_simulated_fitted_pln): - nb_iterations = len(my_simulated_fitted_pln.ELBOs_list) + nb_iterations = len(my_simulated_fitted_pln.elbos_list) assert 40 < nb_iterations < 60