Module nmtf.modules.nmtf_base
Non-negative matrix and tensor factorization basic functions
Expand source code
"""Non-negative matrix and tensor factorization basic functions
"""
# Author: Paul Fogel
# License: MIT
# Jan 4, '20
# Initialize progressbar
import pandas as pd
import math
import numpy as np
from scipy.sparse.linalg import svds
from tqdm import tqdm
from scipy.stats import hypergeom
from scipy.optimize import nnls
from .nmtf_core import *
from .nmtf_utils import *
import sys
if not hasattr(sys, 'argv'):
sys.argv = ['']
EPSILON = np.finfo(np.float32).eps
def NMFInit(M, Mmis, Mt0, Mw0, nc, tolerance, LogIter, myStatusBox):
"""Initialize NMF components using NNSVD
Input:
M: Input matrix
Mmis: Define missing values (0 = missing cell, 1 = real cell)
Mt0: Initial left hand matrix (may be empty)
Mw0: Initial right hand matrix (may be empty)
nc: NMF rank
Output:
Mt: Left hand matrix
Mw: Right hand matrix
Reference
---------
C. Boutsidis, E. Gallopoulos (2008) SVD based initialization: A head start for nonnegative matrix factorization
Pattern Recognition Pattern Recognition Volume 41, Issue 4, April 2008, Pages 1350-1362
"""
n, p = M.shape
Mmis = Mmis.astype(np.int)
n_Mmis = Mmis.shape[0]
if n_Mmis == 0:
ID = np.where(np.isnan(M) == True)
n_Mmis = ID[0].size
if n_Mmis > 0:
Mmis = (np.isnan(M) == False)
Mmis = Mmis.astype(np.int)
M[Mmis == 0] = 0
nc = int(nc)
Mt = np.copy(Mt0)
Mw = np.copy(Mw0)
if (Mt.shape[0] == 0) or (Mw.shape[0] == 0):
if n_Mmis == 0:
if nc >= min(n,p):
# arpack does not accept to factorize at full rank -> need to duplicate in both dimensions to force it work
t, d, w = svds(np.concatenate((np.concatenate((M, M), axis=1),np.concatenate((M, M), axis=1)), axis=0), k=nc)
t *= np.sqrt(2)
w *= np.sqrt(2)
d /= 2
# svd causes mem allocation problem with large matrices
# t, d, w = np.linalg.svd(M)
# Mt = t
# Mw = w.T
else:
t, d, w = svds(M, k=nc)
Mt = t[:n,:]
Mw = w[:,:p].T
#svds returns singular vectors in reverse order
Mt = Mt[:,::-1]
Mw = Mw[:,::-1]
d = d[::-1]
else:
Mt, d, Mw, Mmis, Mmsr, Mmsr2, AddMessage, ErrMessage, cancel_pressed = rSVDSolve(
M, Mmis, nc, tolerance, LogIter, 0, "", 200,
1, 1, 1, myStatusBox)
for k in range(0, nc):
U1 = Mt[:, k]
U2 = -Mt[:, k]
U1[U1 < 0] = 0
U2[U2 < 0] = 0
V1 = Mw[:, k]
V2 = -Mw[:, k]
V1[V1 < 0] = 0
V2[V2 < 0] = 0
U1 = np.reshape(U1, (n, 1))
V1 = np.reshape(V1, (1, p))
U2 = np.reshape(U2, (n, 1))
V2 = np.reshape(V2, (1, p))
if np.linalg.norm(U1 @ V1) > np.linalg.norm(U2 @ V2):
Mt[:, k] = np.reshape(U1, n)
Mw[:, k] = np.reshape(V1, p)
else:
Mt[:, k] = np.reshape(U2, n)
Mw[:, k] = np.reshape(V2, p)
return [Mt, Mw]
def rNMFSolve(
M, Mmis, Mt0, Mw0, nc, tolerance, precision, LogIter, MaxIterations, NMFAlgo, NMFFixUserLHE,
NMFFixUserRHE, NMFMaxInterm,
NMFSparseLevel, NMFRobustResampleColumns, NMFRobustNRuns, NMFCalculateLeverage, NMFUseRobustLeverage,
NMFFindParts, NMFFindCentroids, NMFKernel, NMFReweighColumns, NMFPriors, myStatusBox):
"""Estimate left and right hand matrices (robust version)
Input:
M: Input matrix
Mmis: Define missing values (0 = missing cell, 1 = real cell)
Mt0: Initial left hand matrix
Mw0: Initial right hand matrix
nc: NMF rank
tolerance: Convergence threshold
precision: Replace 0-values in multiplication rules
LogIter: Log results through iterations
MaxIterations: Max iterations
NMFAlgo: =1,3: Divergence; =2,4: Least squares;
NMFFixUserLHE: = 1 => fixed left hand matrix columns
NMFFixUserRHE: = 1 => fixed right hand matrix columns
NMFMaxInterm: Max iterations for warmup multiplication rules
NMFSparseLevel: Requested sparsity in terms of relative number of rows with 0 values in right hand matrix
NMFRobustResampleColumns: Resample columns during bootstrap
NMFRobustNRuns: Number of bootstrap runs
NMFCalculateLeverage: Calculate leverages
NMFUseRobustLeverage: Calculate leverages based on robust max across factoring columns
NMFFindParts: Enforce convexity on left hand matrix
NMFFindCentroids: Enforce convexity on right hand matrix
NMFKernel: Type of kernel used; 1: linear; 2: quadraitc; 3: radial
NMFReweighColumns: Reweigh columns in 2nd step of parts-based NMF
NMFPriors: Priors on right hand matrix
Output:
Mt: Left hand matrix
Mw: Right hand matrix
MtPct: Percent robust clustered rows
MwPct: Percent robust clustered columns
diff: Objective minimum achieved
Mh: Convexity matrix
flagNonconvex: Updated non-convexity flag on left hand matrix
"""
# Check parameter consistency (and correct if needed)
AddMessage = []
ErrMessage =''
cancel_pressed = 0
nc = int(nc)
if NMFFixUserLHE*NMFFixUserRHE == 1:
return Mt0, Mw0, np.array([]), np.array([]), 0, np.array([]), 0, AddMessage, ErrMessage, cancel_pressed
if (nc == 1) & (NMFAlgo > 2):
NMFAlgo -= 2
if NMFAlgo <= 2:
NMFRobustNRuns = 0
Mmis = Mmis.astype(np.int)
n_Mmis = Mmis.shape[0]
if n_Mmis == 0:
ID = np.where(np.isnan(M) == True)
n_Mmis = ID[0].size
if n_Mmis > 0:
Mmis = (np.isnan(M) == False)
Mmis = Mmis.astype(np.int)
M[Mmis == 0] = 0
else:
M[Mmis == 0] = 0
if NMFRobustResampleColumns > 0:
M = np.copy(M).T
if n_Mmis > 0:
Mmis = np.copy(Mmis).T
Mtemp = np.copy(Mw0)
Mw0 = np.copy(Mt0)
Mt0 = Mtemp
NMFFixUserLHEtemp = NMFFixUserLHE
NMFFixUserLHE = NMFFixUserRHE
NMFFixUserRHE = NMFFixUserLHEtemp
n, p = M.shape
try:
n_NMFPriors, nc = NMFPriors.shape
except:
n_NMFPriors = 0
NMFRobustNRuns = int(NMFRobustNRuns)
MtPct = np.nan
MwPct = np.nan
flagNonconvex = 0
# Step 1: NMF
Status = "Step 1 - NMF Ncomp=" + str(nc) + ": "
Mt, Mw, diffsup, Mhsup, NMFPriors, flagNonconvex, AddMessage, ErrMessage, cancel_pressed = NMFSolve(
M, Mmis, Mt0, Mw0, nc, tolerance, precision, LogIter, Status, MaxIterations, NMFAlgo,
NMFFixUserLHE, NMFFixUserRHE, NMFMaxInterm, 100, NMFSparseLevel,
NMFFindParts, NMFFindCentroids, NMFKernel, NMFReweighColumns, NMFPriors, flagNonconvex, AddMessage, myStatusBox)
Mtsup = np.copy(Mt)
Mwsup = np.copy(Mw)
if (n_NMFPriors > 0) & (NMFReweighColumns > 0):
# Run again with fixed LHE & no priors
Status = "Step 1bis - NMF (fixed LHE) Ncomp=" + str(nc) + ": "
Mw = np.ones((p, nc)) / math.sqrt(p)
Mt, Mw, diffsup, Mh, NMFPriors, flagNonconvex, AddMessage, ErrMessage, cancel_pressed = NMFSolve(
M, Mmis, Mtsup, Mw, nc, tolerance, precision, LogIter, Status, MaxIterations, NMFAlgo, nc, 0, NMFMaxInterm, 100,
NMFSparseLevel, NMFFindParts, NMFFindCentroids, NMFKernel, 0, NMFPriors, flagNonconvex, AddMessage,
myStatusBox)
Mtsup = np.copy(Mt)
Mwsup = np.copy(Mw)
# Bootstrap to assess robust clustering
if NMFRobustNRuns > 1:
# Update Mwsup
MwPct = np.zeros((p, nc))
MwBlk = np.zeros((p, NMFRobustNRuns * nc))
for iBootstrap in range(0, NMFRobustNRuns):
Boot = np.random.randint(n, size=n)
Status = "Step 2 - " + \
"Boot " + str(iBootstrap + 1) + "/" + str(NMFRobustNRuns) + " NMF Ncomp=" + str(nc) + ": "
if n_Mmis > 0:
Mt, Mw, diff, Mh, NMFPriors, flagNonconvex, AddMessage, ErrMessage, cancel_pressed = NMFSolve(
M[Boot, :], Mmis[Boot, :], Mtsup[Boot, :], Mwsup, nc, 1.e-3, precision, LogIter, Status, MaxIterations, NMFAlgo, nc, 0,
NMFMaxInterm, 20, NMFSparseLevel, NMFFindParts, NMFFindCentroids, NMFKernel, NMFReweighColumns,
NMFPriors, flagNonconvex, AddMessage, myStatusBox)
else:
Mt, Mw, diff, Mh, NMFPriors, flagNonconvex, AddMessage, ErrMessage, cancel_pressed = NMFSolve(
M[Boot, :], Mmis, Mtsup[Boot, :], Mwsup, nc, 1.e-3, precision, LogIter, Status, MaxIterations, NMFAlgo, nc, 0,
NMFMaxInterm, 20, NMFSparseLevel, NMFFindParts, NMFFindCentroids, NMFKernel, NMFReweighColumns,
NMFPriors, flagNonconvex, AddMessage, myStatusBox)
for k in range(0, nc):
MwBlk[:, k * NMFRobustNRuns + iBootstrap] = Mw[:, k]
Mwn = np.zeros((p, nc))
for k in range(0, nc):
if (NMFAlgo == 2) | (NMFAlgo == 4):
ScaleMw = np.linalg.norm(MwBlk[:, k * NMFRobustNRuns + iBootstrap])
else:
ScaleMw = np.sum(MwBlk[:, k * NMFRobustNRuns + iBootstrap])
if ScaleMw > 0:
MwBlk[:, k * NMFRobustNRuns + iBootstrap] = \
MwBlk[:, k * NMFRobustNRuns + iBootstrap] / ScaleMw
Mwn[:, k] = MwBlk[:, k * NMFRobustNRuns + iBootstrap]
ColClust = np.zeros(p, dtype=int)
if NMFCalculateLeverage > 0:
Mwn, AddMessage, ErrMessage, cancel_pressed = Leverage(Mwn, NMFUseRobustLeverage, AddMessage,
myStatusBox)
for j in range(0, p):
ColClust[j] = np.argmax(np.array(Mwn[j, :]))
MwPct[j, ColClust[j]] = MwPct[j, ColClust[j]] + 1
MwPct = MwPct / NMFRobustNRuns
# Update Mtsup
MtPct = np.zeros((n, nc))
for iBootstrap in range(0, NMFRobustNRuns):
Status = "Step 3 - " + \
"Boot " + str(iBootstrap + 1) + "/" + str(NMFRobustNRuns) + " NMF Ncomp=" + str(nc) + ": "
Mw = np.zeros((p, nc))
for k in range(0, nc):
Mw[:, k] = MwBlk[:, k * NMFRobustNRuns + iBootstrap]
Mt, Mw, diff, Mh, NMFPriors, flagNonconvex, AddMessage, ErrMessage, cancel_pressed = NMFSolve(
M, Mmis, Mtsup, Mw, nc, 1.e-3, precision, LogIter, Status, MaxIterations, NMFAlgo, 0, nc, NMFMaxInterm, 20,
NMFSparseLevel, NMFFindParts, NMFFindCentroids, NMFKernel, NMFReweighColumns, NMFPriors, flagNonconvex,
AddMessage, myStatusBox)
RowClust = np.zeros(n, dtype=int)
if NMFCalculateLeverage > 0:
Mtn, AddMessage, ErrMessage, cancel_pressed = Leverage(Mt, NMFUseRobustLeverage, AddMessage,
myStatusBox)
else:
Mtn = Mt
for i in range(0, n):
RowClust[i] = np.argmax(Mtn[i, :])
MtPct[i, RowClust[i]] = MtPct[i, RowClust[i]] + 1
MtPct = MtPct / NMFRobustNRuns
Mt = Mtsup
Mw = Mwsup
Mh = Mhsup
diff = diffsup
if NMFRobustResampleColumns > 0:
Mtemp = np.copy(Mt)
Mt = np.copy(Mw)
Mw = Mtemp
Mtemp = np.copy(MtPct)
MtPct = np.copy(MwPct)
MwPct = Mtemp
return Mt, Mw, MtPct, MwPct, diff, Mh, flagNonconvex, AddMessage, ErrMessage, cancel_pressed
def NTFInit(M, Mmis, Mt_nmf, Mw_nmf, nc, tolerance, precision, LogIter, NTFUnimodal,
NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, init_type, myStatusBox):
"""Initialize NTF components for HALS
Input:
M: Input tensor
Mmis: Define missing values (0 = missing cell, 1 = real cell)
Mt_nmf: initialization of LHM in NMF(unstacked tensor), may be empty
Mw_nmf: initialization of RHM of NMF(unstacked tensor), may be empty
nc: NTF rank
tolerance: Convergence threshold
precision: Replace 0-values in multiplication rules
LogIter: Log results through iterations
NTFUnimodal: Apply Unimodal constraint on factoring vectors
NTFLeftComponents: Apply Unimodal/Smooth constraint on left hand matrix
NTFRightComponents: Apply Unimodal/Smooth constraint on right hand matrix
NTFBlockComponents: Apply Unimodal/Smooth constraint on block hand matrix
NBlocks: Number of NTF blocks
init_type : integer, default 0
init_type = 0 : NMF initialization applied on the reshaped matrix [1st dim x vectorized (2nd & 3rd dim)]
init_type = 1 : NMF initialization applied on the reshaped matrix [vectorized (1st & 2nd dim) x 3rd dim]
Output:
Mt: Left hand matrix
Mw: Right hand matrix
Mb: Block hand matrix
"""
AddMessage = []
n, p = M.shape
Mmis = Mmis.astype(np.int)
n_Mmis = Mmis.shape[0]
if n_Mmis == 0:
ID = np.where(np.isnan(M) == True)
n_Mmis = ID[0].size
if n_Mmis > 0:
Mmis = (np.isnan(M) == False)
Mmis = Mmis.astype(np.int)
M[Mmis == 0] = 0
nc = int(nc)
NBlocks = int(NBlocks)
init_type = int(init_type)
Status0 = "Step 1 - Quick NMF Ncomp=" + str(nc) + ": "
if init_type == 1:
#Init legacy
Mstacked, Mmis_stacked = NTFStack(M, Mmis, NBlocks)
nc2 = min(nc, NBlocks) # factorization rank can't be > number of blocks
if (Mt_nmf.shape[0] == 0) or (Mw_nmf.shape[0] == 0):
Mt_nmf, Mw_nmf = NMFInit(Mstacked, Mmis_stacked, np.array([]), np.array([]), nc2, tolerance, LogIter, myStatusBox)
else:
Mt_nmf, Mw_nmf = NMFInit(Mstacked, Mmis_stacked, Mt_nmf, Mw_nmf, nc2, tolerance, LogIter, myStatusBox)
# Quick NMF
Mt_nmf, Mw_nmf, diff, Mh, dummy1, dummy2, AddMessage, ErrMessage, cancel_pressed = NMFSolve(
Mstacked, Mmis_stacked, Mt_nmf, Mw_nmf, nc2, tolerance, precision, LogIter, Status0,
10, 2, 0, 0, 1, 1, 0, 0, 0, 1, 0, np.array([]), 0, AddMessage, myStatusBox)
# Factorize Left vectors and distribute multiple factors if nc2 < nc
Mt = np.zeros((n, nc))
Mw = np.zeros((int(p / NBlocks), nc))
Mb = np.zeros((NBlocks, nc))
NFact = int(np.ceil(nc / NBlocks))
for k in range(0, nc2):
myStatusBox.update_status(delay=1, status="Start SVD...")
U, d, V = svds(np.reshape(Mt_nmf[:, k], (int(p / NBlocks), n)).T, k=NFact)
V = V.T
#svds returns singular vectors in reverse order
U = U[:,::-1]
V = V[:,::-1]
d = d[::-1]
myStatusBox.update_status(delay=1, status="SVD completed")
for iFact in range(0, NFact):
ind = iFact * NBlocks + k
if ind < nc:
U1 = U[:, iFact]
U2 = -U[:, iFact]
U1[U1 < 0] = 0
U2[U2 < 0] = 0
V1 = V[:, iFact]
V2 = -V[:, iFact]
V1[V1 < 0] = 0
V2[V2 < 0] = 0
U1 = np.reshape(U1, (n, 1))
V1 = np.reshape(V1, (1, int(p / NBlocks)))
U2 = np.reshape(U2, (n, 1))
V2 = np.reshape(V2, ((1, int(p / NBlocks))))
if np.linalg.norm(U1 @ V1) > np.linalg.norm(U2 @ V2):
Mt[:, ind] = np.reshape(U1, n)
Mw[:, ind] = d[iFact] * np.reshape(V1, int(p / NBlocks))
else:
Mt[:, ind] = np.reshape(U2, n)
Mw[:, ind] = d[iFact] * np.reshape(V2, int(p / NBlocks))
Mb[:, ind] = Mw_nmf[:, k]
else:
#Init default
if (Mt_nmf.shape[0] == 0) or (Mw_nmf.shape[0] == 0):
Mt_nmf, Mw_nmf = NMFInit(M, Mmis, np.array([]), np.array([]), nc, tolerance, LogIter, myStatusBox)
else:
Mt_nmf, Mw_nmf = NMFInit(M, Mmis, Mt_nmf, Mw_nmf, nc, tolerance, LogIter, myStatusBox)
# Quick NMF
Mt_nmf, Mw_nmf, diff, Mh, dummy1, dummy2, AddMessage, ErrMessage, cancel_pressed = NMFSolve(
M, Mmis, Mt_nmf, Mw_nmf, nc, tolerance, precision, LogIter, Status0,
10, 2, 0, 0, 1, 1, 0, 0, 0, 1, 0, np.array([]), 0, AddMessage, myStatusBox)
#Factorize Left vectors
Mt = np.zeros((n, nc))
Mw = np.zeros((int(p / NBlocks), nc))
Mb = np.zeros((NBlocks, nc))
for k in range(0, nc):
myStatusBox.update_status(delay=1, status="Start SVD...")
U, d, V = svds(np.reshape(Mw_nmf[:, k], (int(p / NBlocks), NBlocks)), k=1)
V = V.T
U = np.abs(U)
V = np.abs(V)
myStatusBox.update_status(delay=1, status="SVD completed")
Mt[:, k] = Mt_nmf[:, k]
Mw[:, k] = d[0] * np.reshape(U, int(p / NBlocks))
Mb[:, k] = np.reshape(V, NBlocks)
for k in range(0, nc):
if (NTFUnimodal > 0) & (NTFLeftComponents > 0):
# Enforce unimodal distribution
tmax = np.argmax(Mt[:, k])
for i in range(tmax + 1, n):
Mt[i, k] = min(Mt[i - 1, k], Mt[i, k])
for i in range(tmax - 1, -1, -1):
Mt[i, k] = min(Mt[i + 1, k], Mt[i, k])
if (NTFUnimodal > 0) & (NTFRightComponents > 0):
# Enforce unimodal distribution
wmax = np.argmax(Mw[:, k])
for j in range(wmax + 1, int(p / NBlocks)):
Mw[j, k] = min(Mw[j - 1, k], Mw[j, k])
for j in range(wmax - 1, -1, -1):
Mw[j, k] = min(Mw[j + 1, k], Mw[j, k])
if (NTFUnimodal > 0) & (NTFBlockComponents > 0):
# Enforce unimodal distribution
bmax = np.argmax(Mb[:, k])
for iBlock in range(bmax + 1, NBlocks):
Mb[iBlock, k] = min(Mb[iBlock - 1, k], Mb[iBlock, k])
for iBlock in range(bmax - 1, -1, -1):
Mb[iBlock, k] = min(Mb[iBlock + 1, k], Mb[iBlock, k])
return [Mt, Mw, Mb, AddMessage, ErrMessage, cancel_pressed]
def rNTFSolve(M, Mmis, Mt0, Mw0, Mb0, nc, tolerance, precision, LogIter, MaxIterations, NMFFixUserLHE, NMFFixUserRHE,
NMFFixUserBHE, NMFAlgo, NMFRobustNRuns, NMFCalculateLeverage, NMFUseRobustLeverage, NTFFastHALS, NTFNIterations,
NMFSparseLevel, NTFUnimodal, NTFSmooth, NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, NTFNConv,
NMFPriors, myStatusBox):
"""Estimate NTF matrices (robust version)
Input:
M: Input matrix
Mmis: Define missing values (0 = missing cell, 1 = real cell)
Mt0: Initial left hand matrix
Mw0: Initial right hand matrix
Mb0: Initial block hand matrix
nc: NTF rank
tolerance: Convergence threshold
precision: Replace 0-values in multiplication rules
LogIter: Log results through iterations
MaxIterations: Max iterations
NMFFixUserLHE: fix left hand matrix columns: = 1, else = 0
NMFFixUserRHE: fix right hand matrix columns: = 1, else = 0
NMFFixUserBHE: fix block hand matrix columns: = 1, else = 0
NMFAlgo: =5: Non-robust version, =6: Robust version
NMFRobustNRuns: Number of bootstrap runs
NMFCalculateLeverage: Calculate leverages
NMFUseRobustLeverage: Calculate leverages based on robust max across factoring columns
NTFFastHALS: Use Fast HALS (does not accept handle missing values and convolution)
NTFNIterations: Warmup iterations for fast HALS
NMFSparseLevel : sparsity level (as defined by Hoyer); +/- = make RHE/LHe sparse
NTFUnimodal: Apply Unimodal constraint on factoring vectors
NTFSmooth: Apply Smooth constraint on factoring vectors
NTFLeftComponents: Apply Unimodal/Smooth constraint on left hand matrix
NTFRightComponents: Apply Unimodal/Smooth constraint on right hand matrix
NTFBlockComponents: Apply Unimodal/Smooth constraint on block hand matrix
NBlocks: Number of NTF blocks
NTFNConv: Half-Size of the convolution window on 3rd-dimension of the tensor
NMFPriors: Elements in Mw that should be updated (others remain 0)
Output:
Mt_conv: Convolutional Left hand matrix
Mt: Left hand matrix
Mw: Right hand matrix
Mb: Block hand matrix
MtPct: Percent robust clustered rows
MwPct: Percent robust clustered columns
diff : Objective minimum achieved
"""
AddMessage = []
ErrMessage = ''
cancel_pressed = 0
n, p0 = M.shape
nc = int(nc)
NBlocks = int(NBlocks)
p = int(p0 / NBlocks)
NTFNConv = int(NTFNConv)
if NMFFixUserLHE*NMFFixUserRHE*NMFFixUserBHE == 1:
return np.zeros((n, nc*(2*NTFNConv+1))), Mt0, Mw0, Mb0, np.zeros((n, p0)), np.ones((n, nc)), np.ones((p, nc)), AddMessage, ErrMessage, cancel_pressed
Mmis = Mmis.astype(np.int)
n_Mmis = Mmis.shape[0]
if n_Mmis == 0:
ID = np.where(np.isnan(M) == True)
n_Mmis = ID[0].size
if n_Mmis > 0:
Mmis = (np.isnan(M) == False)
Mmis = Mmis.astype(np.int)
M[Mmis == 0] = 0
else:
M[Mmis == 0] = 0
NTFNIterations = int(NTFNIterations)
NMFRobustNRuns = int(NMFRobustNRuns)
Mt = np.copy(Mt0)
Mw = np.copy(Mw0)
Mb = np.copy(Mb0)
Mt_conv = np.array([])
# Check parameter consistency (and correct if needed)
if (nc == 1) | (NMFAlgo == 5):
NMFRobustNRuns = 0
if NMFRobustNRuns == 0:
MtPct = np.nan
MwPct = np.nan
if (n_Mmis > 0 or NTFNConv > 0 or NMFSparseLevel != 0) and NTFFastHALS > 0:
NTFFastHALS = 0
reverse2HALS = 1
else:
reverse2HALS = 0
# Step 1: NTF
Status0 = "Step 1 - NTF Ncomp=" + str(nc) + ": "
if NTFFastHALS > 0:
if NTFNIterations > 0:
Mt_conv, Mt, Mw, Mb, diff, cancel_pressed = NTFSolve(
M, Mmis, Mt, Mw, Mb, nc, tolerance, LogIter, Status0,
NTFNIterations, NMFFixUserLHE, NMFFixUserRHE, NMFFixUserBHE, 0, NTFUnimodal, NTFSmooth,
NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, NTFNConv, NMFPriors, myStatusBox)
Mt, Mw, Mb, diff, cancel_pressed = NTFSolveFast(
M, Mmis, Mt, Mw, Mb, nc, tolerance, precision, LogIter, Status0,
MaxIterations, NMFFixUserLHE, NMFFixUserRHE, NMFFixUserBHE, NTFUnimodal, NTFSmooth,
NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, myStatusBox)
else:
Mt_conv, Mt, Mw, Mb, diff, cancel_pressed = NTFSolve(
M, Mmis, Mt, Mw, Mb, nc, tolerance, LogIter, Status0,
MaxIterations, NMFFixUserLHE, NMFFixUserRHE, NMFFixUserBHE, NMFSparseLevel, NTFUnimodal, NTFSmooth,
NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, NTFNConv, NMFPriors, myStatusBox)
Mtsup = np.copy(Mt)
Mwsup = np.copy(Mw)
Mbsup = np.copy(Mb)
diff_sup = diff
# Bootstrap to assess robust clustering
if NMFRobustNRuns > 1:
# Update Mwsup
MwPct = np.zeros((p, nc))
MwBlk = np.zeros((p, NMFRobustNRuns * nc))
for iBootstrap in range(0, NMFRobustNRuns):
Boot = np.random.randint(n, size=n)
Status0 = "Step 2 - " + \
"Boot " + str(iBootstrap + 1) + "/" + str(NMFRobustNRuns) + " NTF Ncomp=" + str(nc) + ": "
if NTFFastHALS > 0:
if n_Mmis > 0:
Mt, Mw, Mb, diff, cancel_pressed = NTFSolveFast(
M[Boot, :], Mmis[Boot, :], Mtsup[Boot, :], Mwsup, Mb, nc, 1.e-3, precision, LogIter, Status0,
MaxIterations, 1, 0, NMFFixUserBHE, NTFUnimodal, NTFSmooth,
NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, myStatusBox)
else:
Mt, Mw, Mb, diff, cancel_pressed = NTFSolveFast(
M[Boot, :], np.array([]), Mtsup[Boot, :], Mwsup, Mb, nc, 1.e-3, precision, LogIter, Status0,
MaxIterations, 1, 0, NMFFixUserBHE, NTFUnimodal, NTFSmooth,
NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, myStatusBox)
else:
if n_Mmis > 0:
Mt_conv, Mt, Mw, Mb, diff, cancel_pressed = NTFSolve(
M[Boot, :], Mmis[Boot, :], Mtsup[Boot, :], Mwsup, Mb, nc, 1.e-3, LogIter, Status0,
MaxIterations, 1, 0, NMFFixUserBHE, NMFSparseLevel, NTFUnimodal, NTFSmooth,
NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, NTFNConv, NMFPriors, myStatusBox)
else:
Mt_conv, Mt, Mw, Mb, diff, cancel_pressed = NTFSolve(
M[Boot, :], np.array([]), Mtsup[Boot, :], Mwsup, Mb, nc, 1.e-3, LogIter, Status0,
MaxIterations, 1, 0, NMFFixUserBHE, NMFSparseLevel, NTFUnimodal, NTFSmooth,
NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, NTFNConv, NMFPriors, myStatusBox)
for k in range(0, nc):
MwBlk[:, k * NMFRobustNRuns + iBootstrap] = Mw[:, k]
Mwn = np.zeros((p, nc))
for k in range(0, nc):
ScaleMw = np.linalg.norm(MwBlk[:, k * NMFRobustNRuns + iBootstrap])
if ScaleMw > 0:
MwBlk[:, k * NMFRobustNRuns + iBootstrap] = \
MwBlk[:, k * NMFRobustNRuns + iBootstrap] / ScaleMw
Mwn[:, k] = MwBlk[:, k * NMFRobustNRuns + iBootstrap]
ColClust = np.zeros(p, dtype=int)
if NMFCalculateLeverage > 0:
Mwn, AddMessage, ErrMessage, cancel_pressed = Leverage(Mwn, NMFUseRobustLeverage, AddMessage,
myStatusBox)
for j in range(0, p):
ColClust[j] = np.argmax(np.array(Mwn[j, :]))
MwPct[j, ColClust[j]] = MwPct[j, ColClust[j]] + 1
MwPct = MwPct / NMFRobustNRuns
# Update Mtsup
MtPct = np.zeros((n, nc))
for iBootstrap in range(0, NMFRobustNRuns):
Status0 = "Step 3 - " + \
"Boot " + str(iBootstrap + 1) + "/" + str(NMFRobustNRuns) + " NTF Ncomp=" + str(nc) + ": "
Mw = np.zeros((p, nc))
for k in range(0, nc):
Mw[:, k] = MwBlk[:, k * NMFRobustNRuns + iBootstrap]
if NTFFastHALS > 0:
Mt, Mw, Mb, diff, cancel_pressed = NTFSolveFast(
M, Mmis, Mtsup, Mw, Mb, nc, 1.e-3, precision, LogIter, Status0, MaxIterations, 0, 1, NMFFixUserBHE,
NTFUnimodal, NTFSmooth,
NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, myStatusBox)
else:
Mt_conv, Mt, Mw, Mb, diff, cancel_pressed = NTFSolve(
M, Mmis, Mtsup, Mw, Mb, nc, 1.e-3, LogIter, Status0, MaxIterations, 0, 1, NMFFixUserBHE,
NMFSparseLevel, NTFUnimodal, NTFSmooth,
NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, NTFNConv, NMFPriors, myStatusBox)
RowClust = np.zeros(n, dtype=int)
if NMFCalculateLeverage > 0:
Mtn, AddMessage, ErrMessage, cancel_pressed = Leverage(Mt, NMFUseRobustLeverage, AddMessage,
myStatusBox)
else:
Mtn = Mt
for i in range(0, n):
RowClust[i] = np.argmax(Mtn[i, :])
MtPct[i, RowClust[i]] = MtPct[i, RowClust[i]] + 1
MtPct = MtPct / NMFRobustNRuns
Mt = Mtsup
Mw = Mwsup
Mb = Mbsup
diff = diff_sup
if reverse2HALS > 0:
AddMessage.insert(len(AddMessage), 'Currently, Fast HALS cannot be applied with missing data or convolution window and was reversed to Simple HALS.')
return Mt_conv, Mt, Mw, Mb, MtPct, MwPct, diff, AddMessage, ErrMessage, cancel_pressed
def rSVDSolve(M, Mmis, nc, tolerance, LogIter, LogTrials, Status0, MaxIterations,
SVDAlgo, SVDCoverage, SVDNTrials, myStatusBox):
"""Estimate SVD matrices (robust version)
Input:
M: Input matrix
Mmis: Define missing values (0 = missing cell, 1 = real cell)
nc: SVD rank
tolerance: Convergence threshold
LogIter: Log results through iterations
LogTrials: Log results through trials
Status0: Initial displayed status to be updated during iterations
MaxIterations: Max iterations
SVDAlgo: =1: Non-robust version, =2: Robust version
SVDCoverage: Coverage non-outliers (robust version)
SVDNTrials: Number of trials (robust version)
Output:
Mt: Left hand matrix
Mev: Scaling factors
Mw: Right hand matrix
Mmis: Matrix of missing/flagged outliers
Mmsr: Vector of Residual SSQ
Mmsr2: Vector of Reidual variance
Reference
---------
L. Liu et al (2003) Robust singular value decomposition analysis of microarray data
PNAS November 11, 2003 vol. 100 no. 23 13167–13172
"""
AddMessage = []
ErrMessage = ''
cancel_pressed = 0
# M0 is the running matrix (to be factorized, initialized from M)
M0 = np.copy(M)
n, p = M0.shape
Mmis = Mmis.astype(np.bool_)
n_Mmis = Mmis.shape[0]
if n_Mmis > 0:
M0[Mmis == False] = np.nan
else:
Mmis = (np.isnan(M0) == False)
Mmis = Mmis.astype(np.bool_)
n_Mmis = Mmis.shape[0]
trace0 = np.sum(M0[Mmis] ** 2)
nc = int(nc)
SVDNTrials = int(SVDNTrials)
nxp = n * p
nxpcov = int(round(nxp * SVDCoverage, 0))
Mmsr = np.zeros(nc)
Mmsr2 = np.zeros(nc)
Mev = np.zeros(nc)
if SVDAlgo == 2:
MaxTrial = SVDNTrials
else:
MaxTrial = 1
Mw = np.zeros((p, nc))
Mt = np.zeros((n, nc))
Mdiff = np.zeros((n, p))
w = np.zeros(p)
t = np.zeros(n)
wTrial = np.zeros(p)
tTrial = np.zeros(n)
MmisTrial = np.zeros((n, p), dtype=np.bool)
# Outer-reference M becomes local reference M, which is the running matrix within ALS/LTS loop.
M = np.zeros((n, p))
wnorm = np.zeros((p, n))
tnorm = np.zeros((n, p))
denomw = np.zeros(n)
denomt = np.zeros(p)
StepIter = math.ceil(MaxIterations / 100)
pbar_step = 100 * StepIter / MaxIterations
if (n_Mmis == 0) & (SVDAlgo == 1):
FastCode = 1
else:
FastCode = 0
if (FastCode == 0) and (SVDAlgo == 1):
denomw[np.count_nonzero(Mmis, axis=1) < 2] = np.nan
denomt[np.count_nonzero(Mmis, axis=0) < 2] = np.nan
for k in range(0, nc):
for iTrial in range(0, MaxTrial):
myStatusBox.init_bar(delay=1)
# Copy values of M0 into M
M[:, :] = M0
Status1 = Status0 + "Ncomp " + str(k + 1) + " Trial " + str(iTrial + 1) + ": "
if SVDAlgo == 2:
# Select a random subset
M = np.reshape(M, (nxp, 1))
M[np.argsort(np.random.rand(nxp))[nxpcov:nxp]] = np.nan
M = np.reshape(M, (n, p))
Mmis[:, :] = (np.isnan(M) == False)
# Initialize w
for j in range(0, p):
w[j] = np.median(M[Mmis[:, j], j])
if np.where(w > 0)[0].size == 0:
w[:] = 1
w /= np.linalg.norm(w)
# Replace missing values by 0's before regression
M[Mmis == False] = 0
# initialize t (LTS =stochastic)
if FastCode == 0:
wnorm[:, :] = np.repeat(w[:, np.newaxis]**2, n, axis=1) * Mmis.T
denomw[:] = np.sum(wnorm, axis=0)
# Request at least 2 non-missing values to perform row regression
if SVDAlgo == 2:
denomw[np.count_nonzero(Mmis, axis=1) < 2] = np.nan
t[:] = M @ w / denomw
else:
t[:] = M @ w / np.linalg.norm(w) ** 2
t[np.isnan(t) == True] = np.median(t[np.isnan(t) == False])
if SVDAlgo == 2:
Mdiff[:, :] = np.abs(M0 - np.reshape(t, (n, 1)) @ np.reshape(w, (1, p)))
# Restore missing values instead of 0's
M[Mmis == False] = M0[Mmis == False]
M = np.reshape(M, (nxp, 1))
M[np.argsort(np.reshape(Mdiff, nxp))[nxpcov:nxp]] = np.nan
M = np.reshape(M, (n, p))
Mmis[:, :] = (np.isnan(M) == False)
# Replace missing values by 0's before regression
M[Mmis == False] = 0
iIter = 0
cont = 1
while (cont > 0) & (iIter < MaxIterations):
# build w
if FastCode == 0:
tnorm[:, :] = np.repeat(t[:, np.newaxis]**2, p, axis=1) * Mmis
denomt[:] = np.sum(tnorm, axis=0)
#Request at least 2 non-missing values to perform column regression
if SVDAlgo == 2:
denomt[np.count_nonzero(Mmis, axis=0) < 2] = np.nan
w[:] = M.T @ t / denomt
else:
w[:] = M.T @ t / np.linalg.norm(t) ** 2
w[np.isnan(w) == True] = np.median(w[np.isnan(w) == False])
# normalize w
w /= np.linalg.norm(w)
if SVDAlgo == 2:
Mdiff[:, :] = np.abs(M0 - np.reshape(t, (n, 1)) @ np.reshape(w, (1, p)))
# Restore missing values instead of 0's
M[Mmis == False] = M0[Mmis == False]
M = np.reshape(M, (nxp, 1))
# Outliers resume to missing values
M[np.argsort(np.reshape(Mdiff, nxp))[nxpcov:nxp]] = np.nan
M = np.reshape(M, (n, p))
Mmis[:, :] = (np.isnan(M) == False)
# Replace missing values by 0's before regression
M[Mmis == False] = 0
# build t
if FastCode == 0:
wnorm[:, :] = np.repeat(w[:, np.newaxis] ** 2, n, axis=1) * Mmis.T
denomw[:] = np.sum(wnorm, axis=0)
# Request at least 2 non-missing values to perform row regression
if SVDAlgo == 2:
denomw[np.count_nonzero(Mmis, axis=1) < 2] = np.nan
t[:] = M @ w / denomw
else:
t[:] = M @ w / np.linalg.norm(w) ** 2
t[np.isnan(t) == True] = np.median(t[np.isnan(t) == False])
# note: only w is normalized within loop, t is normalized after convergence
if SVDAlgo == 2:
Mdiff[:, :] = np.abs(M0 - np.reshape(t, (n, 1)) @ np.reshape(w, (1, p)))
# Restore missing values instead of 0's
M[Mmis == False] = M0[Mmis == False]
M = np.reshape(M, (nxp, 1))
# Outliers resume to missing values
M[np.argsort(np.reshape(Mdiff, nxp))[nxpcov:nxp]] = np.nan
M = np.reshape(M, (n, p))
Mmis[:, :] = (np.isnan(M) == False)
# Replace missing values by 0's before regression
M[Mmis == False] = 0
if iIter % StepIter == 0:
if SVDAlgo == 1:
Mdiff[:, :] = np.abs(M0 - np.reshape(t, (n, 1)) @ np.reshape(w, (1, p)))
Status = Status1 + 'Iteration: %s' % int(iIter)
myStatusBox.update_status(delay=1, status=Status)
myStatusBox.update_bar(delay=1, step=pbar_step)
if myStatusBox.cancel_pressed:
cancel_pressed = 1
return [Mt, Mev, Mw, Mmis, Mmsr, Mmsr2, AddMessage, ErrMessage, cancel_pressed]
diff = np.linalg.norm(Mdiff[Mmis]) ** 2 / np.where(Mmis)[0].size
if LogIter == 1:
if SVDAlgo == 2:
myStatusBox.myPrint("Ncomp: " + str(k) + " Trial: " + str(iTrial) + " Iter: " + str(
iIter) + " MSR: " + str(diff))
else:
myStatusBox.myPrint("Ncomp: " + str(k) + " Iter: " + str(iIter) + " MSR: " + str(diff))
if iIter > 0:
if abs(diff - diff0) / diff0 < tolerance:
cont = 0
diff0 = diff
iIter += 1
# save trial
if iTrial == 0:
BestTrial = iTrial
DiffTrial = diff
tTrial[:] = t
wTrial[:] = w
MmisTrial[:, :] = Mmis
elif diff < DiffTrial:
BestTrial = iTrial
DiffTrial = diff
tTrial[:] = t
wTrial[:] = w
MmisTrial[:, :] = Mmis
if LogTrials == 1:
myStatusBox.myPrint("Ncomp: " + str(k) + " Trial: " + str(iTrial) + " MSR: " + str(diff))
if LogTrials:
myStatusBox.myPrint("Ncomp: " + str(k) + " Best trial: " + str(BestTrial) + " MSR: " + str(DiffTrial))
t[:] = tTrial
w[:] = wTrial
Mw[:, k] = w
# compute eigen value
if SVDAlgo == 2:
# Robust regression of M on tw`
Mdiff[:, :] = np.abs(M0 - np.reshape(t, (n, 1)) @ np.reshape(w, (1, p)))
RMdiff = np.argsort(np.reshape(Mdiff, nxp))
t /= np.linalg.norm(t) # Normalize t
Mt[:, k] = t
Mmis = np.reshape(Mmis, nxp)
Mmis[RMdiff[nxpcov:nxp]] = False
Ycells = np.reshape(M0, (nxp, 1))[Mmis]
Xcells = np.reshape(np.reshape(t, (n, 1)) @ np.reshape(w, (1, p)), (nxp, 1))[Mmis]
Mev[k] = Ycells.T @ Xcells / np.linalg.norm(Xcells) ** 2
Mmis = np.reshape(Mmis, (n, p))
else:
Mev[k] = np.linalg.norm(t)
Mt[:, k] = t / Mev[k] # normalize t
if k == 0:
Mmsr[k] = Mev[k] ** 2
else:
Mmsr[k] = Mmsr[k - 1] + Mev[k] ** 2
Mmsr2[k] = Mmsr[k] - Mev[0] ** 2
# M0 is deflated before calculating next component
M0 = M0 - Mev[k] * np.reshape(Mt[:, k], (n, 1)) @ np.reshape(Mw[:, k].T, (1, p))
trace02 = trace0 - Mev[0] ** 2
Mmsr = 1 - Mmsr / trace0
Mmsr[Mmsr > 1] = 1
Mmsr[Mmsr < 0] = 0
Mmsr2 = 1 - Mmsr2 / trace02
Mmsr2[Mmsr2 > 1] = 1
Mmsr2[Mmsr2 < 0] = 0
if nc > 1:
RMev = np.argsort(-Mev)
Mev = Mev[RMev]
Mw0 = Mw
Mt0 = Mt
for k in range(0, nc):
Mw[:, k] = Mw0[:, RMev[k]]
Mt[:, k] = Mt0[:, RMev[k]]
Mmis[:, :] = True
Mmis[MmisTrial == False] = False
#Mmis.astype(dtype=int)
return [Mt, Mev, Mw, Mmis, Mmsr, Mmsr2, AddMessage, ErrMessage, cancel_pressed]
def non_negative_factorization(X, W=None, H=None, n_components=None,
update_W=True,
update_H=True,
beta_loss='frobenius',
use_hals=False,
n_bootstrap=None,
tol=1e-6,
max_iter=150, max_iter_mult=20,
regularization=None, sparsity=0,
leverage='standard',
convex=None, kernel='linear',
skewness=False,
null_priors=False,
random_state=None,
verbose=0):
"""Compute Non-negative Matrix Factorization (NMF)
Find two non-negative matrices (W, H) such as x = W @ H.T + Error.
This factorization can be used for example for
dimensionality reduction, source separation or topic extraction.
The objective function is minimized with an alternating minimization of W
and H.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Constant matrix.
W : array-like, shape (n_samples, n_components)
prior W
If n_update_W == 0 , it is used as a constant, to solve for H only.
H : array-like, shape (n_features, n_components)
prior H
If n_update_H = 0 , it is used as a constant, to solve for W only.
n_components : integer
Number of components, if n_components is not set : n_components = min(n_samples, n_features)
update_W : boolean, default: True
Update or keep W fixed
update_H : boolean, default: True
Update or keep H fixed
beta_loss : string, default 'frobenius'
String must be in {'frobenius', 'kullback-leibler'}.
Beta divergence to be minimized, measuring the distance between X
and the dot product WH. Note that values different from 'frobenius'
(or 2) and 'kullback-leibler' (or 1) lead to significantly slower
fits. Note that for beta_loss == 'kullback-leibler', the input
matrix X cannot contain zeros.
use_hals : boolean
True -> HALS algorithm (note that convex and kullback-leibler loss opions are not supported)
False-> Projected gradiant
n_bootstrap : integer, default: 0
Number of bootstrap runs.
tol : float, default: 1e-6
Tolerance of the stopping condition.
max_iter : integer, default: 200
Maximum number of iterations.
max_iter_mult : integer, default: 20
Maximum number of iterations in multiplicative warm-up to projected gradient (beta_loss = 'frobenius' only).
regularization : None | 'components' | 'transformation'
Select whether the regularization affects the components (H), the
transformation (W) or none of them.
sparsity : float, default: 0
Sparsity target with 0 <= sparsity <= 1 representing either:
- the % rows in W or H set to 0 (when use_hals = False)
- the mean % rows per column in W or H set to 0 (when use_hals = True)
leverage : None | 'standard' | 'robust', default 'standard'
Calculate leverage of W and H rows on each component.
convex : None | 'components' | 'transformation', default None
Apply convex constraint on W or H.
kernel : 'linear', 'quadratic', 'radial', default 'linear'
Can be set if convex = 'transformation'.
null_priors : boolean, default False
Cells of H with prior cells = 0 will not be updated.
Can be set only if prior H has been defined.
skewness : boolean, default False
When solving mixture problems, columns of X at the extremities of the convex hull will be given largest weights.
The column weight is a function of the skewness and its sign.
The expected sign of the skewness is based on the skewness of W components, as returned by the first pass
of a 2-steps convex NMF. Thus, during the first pass, skewness must be set to False.
Can be set only if convex = 'transformation' and prior W and H have been defined.
random_state : int, RandomState instance or None, optional, default: None
If int, random_state is the seed used by the random number generator;
If RandomState instance, random_state is the random number generator;
If None, the random number generator is the RandomState instance used
by `np.random`.
verbose : integer, default: 0
The verbosity level (0/1).
Returns
-------
Estimator (dictionary) with following entries
W : array-like, shape (n_samples, n_components)
Solution to the non-negative least squares problem.
H : array-like, shape (n_features, n_components)
Solution to the non-negative least squares problem.
volume : scalar, volume occupied by W and H
WB : array-like, shape (n_samples, n_components)
Percent consistently clustered rows for each component.
only if n_bootstrap > 0.
HB : array-like, shape (n_features, n_components)
Percent consistently clustered columns for each component.
only if n_bootstrap > 0.
B : array-like, shape (n_observations, n_components) or (n_features, n_components)
only if active convex variant, H = B.T @ X or W = X @ B
diff : Objective minimum achieved
"""
if use_hals:
#convex and kullback-leibler loss options are not supported
beta_loss='frobenius'
convex=None
M = X
n, p = M.shape
if n_components is None:
nc = min(n, p)
else:
nc = n_components
if beta_loss == 'frobenius':
NMFAlgo = 2
else:
NMFAlgo = 1
LogIter = verbose
myStatusBox = StatusBoxTqdm(verbose=LogIter)
tolerance = tol
precision = EPSILON
if (W is None) & (H is None):
Mt, Mw = NMFInit(M, np.array([]), np.array([]), np.array([]), nc, tolerance, LogIter, myStatusBox)
init = 'nndsvd'
else:
if H is None:
Mw = np.ones((p, nc))
init = 'custom_W'
elif W is None:
Mt = np.ones((n, nc))
init = 'custom_H'
else:
init = 'custom'
for k in range(0, nc):
if NMFAlgo == 2:
Mt[:, k] = Mt[:, k] / np.linalg.norm(Mt[:, k])
Mw[:, k] = Mw[:, k] / np.linalg.norm(Mw[:, k])
else:
Mt[:, k] = Mt[:, k] / np.sum(Mt[:, k])
Mw[:, k] = Mw[:, k] / np.sum(Mw[:, k])
if n_bootstrap is None:
NMFRobustNRuns = 0
else:
NMFRobustNRuns = n_bootstrap
if NMFRobustNRuns > 1:
NMFAlgo += 2
if update_W is True:
NMFFixUserLHE = 0
else:
NMFFixUserLHE = 1
if update_H is True:
NMFFixUserRHE = 0
else:
NMFFixUserRHE = 1
MaxIterations = max_iter
NMFMaxInterm = max_iter_mult
if regularization is None:
NMFSparseLevel = 0
else:
if regularization == 'components':
NMFSparseLevel = sparsity
elif regularization == 'transformation':
NMFSparseLevel = -sparsity
else:
NMFSparseLevel = 0
NMFRobustResampleColumns = 0
if leverage == 'standard':
NMFCalculateLeverage = 1
NMFUseRobustLeverage = 0
elif leverage == 'robust':
NMFCalculateLeverage = 1
NMFUseRobustLeverage = 1
else:
NMFCalculateLeverage = 0
NMFUseRobustLeverage = 0
if convex is None:
NMFFindParts = 0
NMFFindCentroids = 0
NMFKernel = 1
elif convex == 'transformation':
NMFFindParts = 1
NMFFindCentroids = 0
NMFKernel = 1
elif convex == 'components':
NMFFindParts = 0
NMFFindCentroids = 1
if kernel == 'linear':
NMFKernel = 1
elif kernel == 'quadratic':
NMFKernel = 2
elif kernel == 'radial':
NMFKernel = 3
else:
NMFKernel = 1
if (null_priors is True) & ((init == 'custom') | (init == 'custom_H')):
NMFPriors = H
else:
NMFPriors = np.array([])
if convex is None:
NMFReweighColumns = 0
else:
if (convex == 'transformation') & (init == 'custom'):
if skewness is True:
NMFReweighColumns = 1
else:
NMFReweighColumns = 0
else:
NMFReweighColumns = 0
if random_state is not None:
RandomSeed = random_state
np.random.seed(RandomSeed)
if use_hals:
if NMFAlgo <=2:
NTFAlgo = 5
else:
NTFAlgo = 6
Mt_conv, Mt, Mw, Mb, MtPct, MwPct, diff, AddMessage, ErrMessage, cancel_pressed = rNTFSolve(
M, np.array([]), Mt, Mw, np.array([]), nc, tolerance, precision, LogIter, MaxIterations, NMFFixUserLHE, NMFFixUserRHE,
1, NTFAlgo, NMFRobustNRuns, NMFCalculateLeverage, NMFUseRobustLeverage,
0, 0, NMFSparseLevel, 0, 0, 0, 0, 0, 1, 0, np.array([]), myStatusBox)
Mev = np.ones(nc)
if (NMFFixUserLHE == 0) & (NMFFixUserRHE == 0):
# Scale
for k in range(0, nc):
ScaleMt = np.linalg.norm(Mt[:, k])
ScaleMw = np.linalg.norm(Mw[:, k])
Mev[k] = ScaleMt * ScaleMw
if Mev[k] > 0:
Mt[:, k] = Mt[:, k] / ScaleMt
Mw[:, k] = Mw[:, k] / ScaleMw
else:
Mt, Mw, MtPct, MwPct, diff, Mh, flagNonconvex, AddMessage, ErrMessage, cancel_pressed = rNMFSolve(
M, np.array([]), Mt, Mw, nc, tolerance, precision, LogIter, MaxIterations, NMFAlgo, NMFFixUserLHE,
NMFFixUserRHE, NMFMaxInterm,
NMFSparseLevel, NMFRobustResampleColumns, NMFRobustNRuns, NMFCalculateLeverage, NMFUseRobustLeverage,
NMFFindParts, NMFFindCentroids, NMFKernel, NMFReweighColumns, NMFPriors, myStatusBox)
Mev = np.ones(nc)
if (NMFFindParts == 0) & (NMFFindCentroids == 0) & (NMFFixUserLHE == 0) & (NMFFixUserRHE == 0):
# Scale
for k in range(0, nc):
if (NMFAlgo == 2) | (NMFAlgo == 4):
ScaleMt = np.linalg.norm(Mt[:, k])
ScaleMw = np.linalg.norm(Mw[:, k])
else:
ScaleMt = np.sum(Mt[:, k])
ScaleMw = np.sum(Mw[:, k])
Mev[k] = ScaleMt * ScaleMw
if Mev[k] > 0:
Mt[:, k] = Mt[:, k] / ScaleMt
Mw[:, k] = Mw[:, k] / ScaleMw
volume = NMFDet(Mt, Mw, 1)
for message in AddMessage:
print(message)
myStatusBox.close()
# Order by decreasing scale
RMev = np.argsort(-Mev)
Mev = Mev[RMev]
Mt = Mt[:, RMev]
Mw = Mw[:, RMev]
if isinstance(MtPct, np.ndarray):
MtPct = MtPct[:, RMev]
MwPct = MwPct[:, RMev]
if (NMFFindParts == 0) & (NMFFindCentroids == 0):
# Scale by max com p
for k in range(0, nc):
MaxCol = np.max(Mt[:, k])
if MaxCol > 0:
Mt[:, k] /= MaxCol
Mw[:, k] *= Mev[k] * MaxCol
Mev[k] = 1
else:
Mev[k] = 0
estimator = {}
if NMFRobustNRuns <= 1:
if (NMFFindParts == 0) & (NMFFindCentroids == 0):
estimator.update([('W', Mt), ('H', Mw), ('volume', volume), ('diff', diff)])
else:
estimator.update([('W', Mt), ('H', Mw), ('volume', volume), ('B', Mh), ('diff', diff)])
else:
if (NMFFindParts == 0) & (NMFFindCentroids == 0):
estimator.update([('W', Mt), ('H', Mw), ('volume', volume), ('WB', MtPct), ('HB', MwPct), ('diff', diff)])
else:
estimator.update([('W', Mt), ('H', Mw), ('volume', volume), ('B', Mh), ('WB', MtPct), ('HB', MwPct), ('diff', diff)])
return estimator
def nmf_predict(estimator, leverage='robust', blocks=None, cluster_by_stability=False, custom_order=False, verbose=0):
"""Derives ordered sample and feature indexes for future use in ordered heatmaps
Parameters
----------
estimator : tuplet as returned by non_negative_factorization
leverage : None | 'standard' | 'robust', default 'robust'
Calculate leverage of W and H rows on each component.
blocks : array-like, shape(n_blocks), default None
Size of each block (if any) in ordered heatmap.
cluster_by_stability : boolean, default False
Use stability instead of leverage to assign samples/features to clusters
custom_order : boolean, default False
if False samples/features with highest leverage or stability appear on top of each cluster
if True within cluster ordering is modified to suggest a continuum between adjacent clusters
verbose : integer, default: 0
The verbosity level (0/1).
Returns
-------
Completed estimator with following entries:
WL : array-like, shape (n_samples, n_components)
Sample leverage on each component
HL : array-like, shape (n_features, n_components)
Feature leverage on each component
QL : array-like, shape (n_blocks, n_components)
Block leverage on each component (NTF only)
WR : vector-like, shape (n_samples)
Ranked sample indexes (by cluster and leverage or stability)
Used to produce ordered heatmaps
HR : vector-like, shape (n_features)
Ranked feature indexes (by cluster and leverage or stability)
Used to produce ordered heatmaps
WN : vector-like, shape (n_components)
Sample cluster bounds in ordered heatmap
HN : vector-like, shape (n_components)
Feature cluster bounds in ordered heatmap
WC : vector-like, shape (n_samples)
Sample assigned cluster
HC : vector-like, shape (n_features)
Feature assigned cluster
QC : vector-like, shape (size(blocks))
Block assigned cluster (NTF only)
"""
Mt = estimator['W']
Mw = estimator['H']
if 'Q' in estimator:
# X is a 3D tensor, in unfolded form of a 2D array
# horizontal concatenation of blocks of equal size.
Mb = estimator['Q']
NMFAlgo = 5
NBlocks = Mb.shape[0]
BlkSize = Mw.shape[0] * np.ones(NBlocks)
else:
Mb = np.array([])
NMFAlgo = 0
if blocks is None:
NBlocks = 1
BlkSize = np.array([Mw.shape[0]])
else:
NBlocks = blocks.shape[0]
BlkSize = blocks
if 'WB' in estimator:
MtPct = estimator['WB']
else:
MtPct = None
if 'HB' in estimator:
MwPct = estimator['HB']
else:
MwPct = None
if leverage == 'standard':
NMFCalculateLeverage = 1
NMFUseRobustLeverage = 0
elif leverage == 'robust':
NMFCalculateLeverage = 1
NMFUseRobustLeverage = 1
else:
NMFCalculateLeverage = 0
NMFUseRobustLeverage = 0
if cluster_by_stability is True:
NMFRobustClusterByStability = 1
else:
NMFRobustClusterByStability = 0
if custom_order is True:
CellPlotOrderedClusters = 1
else:
CellPlotOrderedClusters = 0
AddMessage = []
myStatusBox = StatusBoxTqdm(verbose=verbose)
Mtn, Mwn, Mbn, RCt, RCw, NCt, NCw, RowClust, ColClust, BlockClust, AddMessage, ErrMessage, cancel_pressed = \
BuildClusters(Mt, Mw, Mb, MtPct, MwPct, NBlocks, BlkSize, NMFCalculateLeverage, NMFUseRobustLeverage, NMFAlgo,
NMFRobustClusterByStability, CellPlotOrderedClusters, AddMessage, myStatusBox)
for message in AddMessage:
print(message)
myStatusBox.close()
if 'Q' in estimator:
estimator.update([('WL', Mtn), ('HL', Mwn), ('WR', RCt), ('HR', RCw), ('WN', NCt), ('HN', NCw),
('WC', RowClust), ('HC', ColClust), ('QL', Mbn), ('QC', BlockClust)])
else:
estimator.update([('WL', Mtn), ('HL', Mwn), ('WR', RCt), ('HR', RCw), ('WN', NCt), ('HN', NCw),
('WC', RowClust), ('HC', ColClust), ('QL', None), ('QC', None)])
return estimator
def nmf_permutation_test_score(estimator, y, n_permutations=100, verbose=0):
"""Do a permutation test to assess association between ordered samples and some covariate
Parameters
----------
estimator : tuplet as returned by non_negative_factorization and nmf_predict
y : array-like, group to be predicted
n_permutations : integer, default: 100
verbose : integer, default: 0
The verbosity level (0/1).
Returns
-------
Completed estimator with following entries:
score : float
The true score without permuting targets.
pvalue : float
The p-value, which approximates the probability that the score would be obtained by chance.
CS : array-like, shape(n_components)
The size of each cluster
CP : array-like, shape(n_components)
The pvalue of the most significant group within each cluster
CG : array-like, shape(n_components)
The index of the most significant group within each cluster
CN : array-like, shape(n_components, n_groups)
The size of each group within each cluster
"""
Mt = estimator['W']
RCt = estimator['WR']
NCt = estimator['WN']
RowGroups = y
uniques, index = np.unique([row for row in RowGroups], return_index=True)
ListGroups = RowGroups[index]
nbGroups = ListGroups.shape[0]
Ngroup = np.zeros(nbGroups)
for group in range(0, nbGroups):
Ngroup[group] = np.where(RowGroups == ListGroups[group])[0].shape[0]
Nrun = n_permutations
myStatusBox = StatusBoxTqdm(verbose=verbose)
ClusterSize, Pglob, prun, ClusterProb, ClusterGroup, ClusterNgroup, cancel_pressed = \
GlobalSign(Nrun, nbGroups, Mt, RCt, NCt, RowGroups, ListGroups, Ngroup, myStatusBox)
estimator.update(
[('score', prun), ('pvalue', Pglob), ('CS', ClusterSize), ('CP', ClusterProb), ('CG', ClusterGroup),
('CN', ClusterNgroup)])
return estimator
def non_negative_tensor_factorization(X, n_blocks, W=None, H=None, Q=None, n_components=None,
update_W=True,
update_H=True,
update_Q=True,
fast_hals=True, n_iter_hals=2, n_shift=0,
regularization=None, sparsity=0,
unimodal=False, smooth=False,
apply_left=False, apply_right=False, apply_block=False,
n_bootstrap=None,
tol=1e-6,
max_iter=150,
leverage='standard',
random_state=None,
init_type=0,
verbose=0):
"""Compute Non-negative Tensor Factorization (NTF)
Find three non-negative matrices (W, H, F) such as x = W @@ H @@ F + Error (@@ = tensor product).
This factorization can be used for example for
dimensionality reduction, source separation or topic extraction.
The objective function is minimized with an alternating minimization of W
and H.
Parameters
----------
X : array-like, shape (n_samples, n_features x n_blocks)
Constant matrix.
X is a tensor with shape (n_samples, n_features, n_blocks), however unfolded along 2nd and 3rd dimensions.
n_blocks : integer
W : array-like, shape (n_samples, n_components)
prior W
H : array-like, shape (n_features, n_components)
prior H
Q : array-like, shape (n_blocks, n_components)
prior Q
n_components : integer
Number of components, if n_components is not set : n_components = min(n_samples, n_features)
update_W : boolean, default: True
Update or keep W fixed
update_H : boolean, default: True
Update or keep H fixed
update_Q : boolean, default: True
Update or keep Q fixed
fast_hals : boolean, default: True
Use fast implementation of HALS
n_iter_hals : integer, default: 2
Number of HALS iterations prior to fast HALS
n_shift : integer, default: 0
max shifting in convolutional NTF
regularization : None | 'components' | 'transformation'
Select whether the regularization affects the components (H), the
transformation (W) or none of them.
sparsity : float, default: 0
Sparsity target with 0 <= sparsity <= 1 representing the mean % rows per column in W or H set to 0
unimodal : Boolean, default: False
smooth : Boolean, default: False
apply_left : Boolean, default: False
apply_right : Boolean, default: False
apply_block : Boolean, default: False
n_bootstrap : integer, default: 0
Number of bootstrap runs.
tol : float, default: 1e-6
Tolerance of the stopping condition.
max_iter : integer, default: 200
Maximum number of iterations.
leverage : None | 'standard' | 'robust', default 'standard'
Calculate leverage of W and H rows on each component.
random_state : int, RandomState instance or None, optional, default: None
If int, random_state is the seed used by the random number generator;
If RandomState instance, random_state is the random number generator;
If None, the random number generator is the RandomState instance used
by `np.random`.
init_type : integer, default 0
init_type = 0 : NMF initialization applied on the reshaped matrix [1st dim x vectorized (2nd & 3rd dim)]
init_type = 1 : NMF initialization applied on the reshaped matrix [vectorized (1st & 2nd dim) x 3rd dim]
verbose : integer, default: 0
The verbosity level (0/1).
Returns
-------
Estimator (dictionary) with following entries
W : array-like, shape (n_samples, n_components)
Solution to the non-negative least squares problem.
H : array-like, shape (n_features, n_components)
Solution to the non-negative least squares problem.
Q : array-like, shape (n_blocks, n_components)
Solution to the non-negative least squares problem.
volume : scalar, volume occupied by W and H
WB : array-like, shape (n_samples, n_components)
Percent consistently clustered rows for each component.
only if n_bootstrap > 0.
HB : array-like, shape (n_features, n_components)
Percent consistently clustered columns for each component.
only if n_bootstrap > 0.
Reference
---------
A. Cichocki, P.H.A.N. Anh-Huym, Fast local algorithms for large scale nonnegative matrix and tensor factorizations,
IEICE Trans. Fundam. Electron. Commun. Comput. Sci. 92 (3) (2009) 708–721.
"""
M = X
n, p = M.shape
if n_components is None:
nc = min(n, p)
else:
nc = n_components
NBlocks = n_blocks
p_block = int(p / NBlocks)
tolerance = tol
precision = EPSILON
LogIter = verbose
if regularization is None:
NMFSparseLevel = 0
else:
if regularization == 'components':
NMFSparseLevel = sparsity
elif regularization == 'transformation':
NMFSparseLevel = -sparsity
else:
NMFSparseLevel = 0
NTFUnimodal = unimodal
NTFSmooth = smooth
NTFLeftComponents = apply_left
NTFRightComponents = apply_right
NTFBlockComponents = apply_block
if random_state is not None:
RandomSeed = random_state
np.random.seed(RandomSeed)
myStatusBox = StatusBoxTqdm(verbose=LogIter)
if (W is None) & (H is None) & (Q is None):
Mt0, Mw0, Mb0, AddMessage, ErrMessage, cancel_pressed = NTFInit(M, np.array([]), np.array([]), np.array([]), nc,
tolerance, precision, LogIter, NTFUnimodal,
NTFLeftComponents, NTFRightComponents,
NTFBlockComponents, NBlocks, init_type, myStatusBox)
else:
if W is None:
Mt0 = np.ones((n, nc))
else:
Mt0 = np.copy(W)
if H is None:
Mw0= np.ones((p_block, nc))
else:
Mw0 = np.copy(H)
if Q is None:
Mb0 = np.ones((NBlocks, nc))
else:
Mb0 = np.copy(Q)
Mfit = np.zeros((n, p))
for k in range(0, nc):
for iBlock in range(0, NBlocks):
Mfit[:, iBlock*p_block:(iBlock+1)*p_block] += Mb0[iBlock, k] * \
np.reshape(Mt0[:, k], (n, 1)) @ np.reshape(Mw0[:, k], (1, p_block))
ScaleRatio = (np.linalg.norm(Mfit) / np.linalg.norm(M))**(1/3)
for k in range(0, nc):
Mt0[:, k] /= ScaleRatio
Mw0[:, k] /= ScaleRatio
Mb0[:, k] /= ScaleRatio
Mfit = np.zeros((n, p))
for k in range(0, nc):
for iBlock in range(0, NBlocks):
Mfit[:, iBlock*p_block:(iBlock+1)*p_block] += Mb0[iBlock, k] * \
np.reshape(Mt0[:, k], (n, 1)) @ np.reshape(Mw0[:, k], (1, p_block))
NTFFastHALS = fast_hals
NTFNIterations = n_iter_hals
MaxIterations = max_iter
NTFNConv = n_shift
if n_bootstrap is None:
NMFRobustNRuns = 0
else:
NMFRobustNRuns = n_bootstrap
if NMFRobustNRuns <= 1:
NMFAlgo = 5
else:
NMFAlgo = 6
if leverage == 'standard':
NMFCalculateLeverage = 1
NMFUseRobustLeverage = 0
elif leverage == 'robust':
NMFCalculateLeverage = 1
NMFUseRobustLeverage = 1
else:
NMFCalculateLeverage = 0
NMFUseRobustLeverage = 0
if random_state is not None:
RandomSeed = random_state
np.random.seed(RandomSeed)
if update_W:
NMFFixUserLHE = 0
else:
NMFFixUserLHE = 1
if update_H:
NMFFixUserRHE = 0
else:
NMFFixUserRHE = 1
if update_Q:
NMFFixUserBHE = 0
else:
NMFFixUserBHE = 1
Mt_conv, Mt, Mw, Mb, MtPct, MwPct, diff, AddMessage, ErrMessage, cancel_pressed = rNTFSolve(
M, np.array([]), Mt0, Mw0, Mb0, nc, tolerance, precision, LogIter, MaxIterations, NMFFixUserLHE, NMFFixUserRHE,
NMFFixUserBHE, NMFAlgo, NMFRobustNRuns,
NMFCalculateLeverage, NMFUseRobustLeverage, NTFFastHALS, NTFNIterations, NMFSparseLevel, NTFUnimodal, NTFSmooth,
NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, NTFNConv, np.array([]), myStatusBox)
volume = NMFDet(Mt, Mw, 1)
for message in AddMessage:
print(message)
myStatusBox.close()
estimator = {}
if NMFRobustNRuns <= 1:
estimator.update([('W', Mt), ('H', Mw), ('Q', Mb), ('volume', volume), ('diff', diff)])
else:
estimator.update([('W', Mt), ('H', Mw), ('Q', Mb), ('volume', volume), ('WB', MtPct), ('HB', MwPct), ('diff', diff)])
return estimator
Functions
def NMFInit(M, Mmis, Mt0, Mw0, nc, tolerance, LogIter, myStatusBox)
-
Initialize NMF components using NNSVD
Input
M: Input matrix Mmis: Define missing values (0 = missing cell, 1 = real cell) Mt0: Initial left hand matrix (may be empty) Mw0: Initial right hand matrix (may be empty) nc: NMF rank
Output
Mt: Left hand matrix Mw: Right hand matrix
Reference
C. Boutsidis, E. Gallopoulos (2008) SVD based initialization: A head start for nonnegative matrix factorization Pattern Recognition Pattern Recognition Volume 41, Issue 4, April 2008, Pages 1350-1362
Expand source code
def NMFInit(M, Mmis, Mt0, Mw0, nc, tolerance, LogIter, myStatusBox): """Initialize NMF components using NNSVD Input: M: Input matrix Mmis: Define missing values (0 = missing cell, 1 = real cell) Mt0: Initial left hand matrix (may be empty) Mw0: Initial right hand matrix (may be empty) nc: NMF rank Output: Mt: Left hand matrix Mw: Right hand matrix Reference --------- C. Boutsidis, E. Gallopoulos (2008) SVD based initialization: A head start for nonnegative matrix factorization Pattern Recognition Pattern Recognition Volume 41, Issue 4, April 2008, Pages 1350-1362 """ n, p = M.shape Mmis = Mmis.astype(np.int) n_Mmis = Mmis.shape[0] if n_Mmis == 0: ID = np.where(np.isnan(M) == True) n_Mmis = ID[0].size if n_Mmis > 0: Mmis = (np.isnan(M) == False) Mmis = Mmis.astype(np.int) M[Mmis == 0] = 0 nc = int(nc) Mt = np.copy(Mt0) Mw = np.copy(Mw0) if (Mt.shape[0] == 0) or (Mw.shape[0] == 0): if n_Mmis == 0: if nc >= min(n,p): # arpack does not accept to factorize at full rank -> need to duplicate in both dimensions to force it work t, d, w = svds(np.concatenate((np.concatenate((M, M), axis=1),np.concatenate((M, M), axis=1)), axis=0), k=nc) t *= np.sqrt(2) w *= np.sqrt(2) d /= 2 # svd causes mem allocation problem with large matrices # t, d, w = np.linalg.svd(M) # Mt = t # Mw = w.T else: t, d, w = svds(M, k=nc) Mt = t[:n,:] Mw = w[:,:p].T #svds returns singular vectors in reverse order Mt = Mt[:,::-1] Mw = Mw[:,::-1] d = d[::-1] else: Mt, d, Mw, Mmis, Mmsr, Mmsr2, AddMessage, ErrMessage, cancel_pressed = rSVDSolve( M, Mmis, nc, tolerance, LogIter, 0, "", 200, 1, 1, 1, myStatusBox) for k in range(0, nc): U1 = Mt[:, k] U2 = -Mt[:, k] U1[U1 < 0] = 0 U2[U2 < 0] = 0 V1 = Mw[:, k] V2 = -Mw[:, k] V1[V1 < 0] = 0 V2[V2 < 0] = 0 U1 = np.reshape(U1, (n, 1)) V1 = np.reshape(V1, (1, p)) U2 = np.reshape(U2, (n, 1)) V2 = np.reshape(V2, (1, p)) if np.linalg.norm(U1 @ V1) > np.linalg.norm(U2 @ V2): Mt[:, k] = np.reshape(U1, n) Mw[:, k] = np.reshape(V1, p) else: Mt[:, k] = np.reshape(U2, n) Mw[:, k] = np.reshape(V2, p) return [Mt, Mw]
def NTFInit(M, Mmis, Mt_nmf, Mw_nmf, nc, tolerance, precision, LogIter, NTFUnimodal, NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, init_type, myStatusBox)
-
Initialize NTF components for HALS
Input
M: Input tensor Mmis: Define missing values (0 = missing cell, 1 = real cell) Mt_nmf: initialization of LHM in NMF(unstacked tensor), may be empty Mw_nmf: initialization of RHM of NMF(unstacked tensor), may be empty nc: NTF rank tolerance: Convergence threshold precision: Replace 0-values in multiplication rules LogIter: Log results through iterations NTFUnimodal: Apply Unimodal constraint on factoring vectors NTFLeftComponents: Apply Unimodal/Smooth constraint on left hand matrix NTFRightComponents: Apply Unimodal/Smooth constraint on right hand matrix NTFBlockComponents: Apply Unimodal/Smooth constraint on block hand matrix NBlocks: Number of NTF blocks init_type : integer, default 0 init_type = 0 : NMF initialization applied on the reshaped matrix [1st dim x vectorized (2nd & 3rd dim)] init_type = 1 : NMF initialization applied on the reshaped matrix [vectorized (1st & 2nd dim) x 3rd dim]
Output
Mt: Left hand matrix Mw: Right hand matrix Mb: Block hand matrix
Expand source code
def NTFInit(M, Mmis, Mt_nmf, Mw_nmf, nc, tolerance, precision, LogIter, NTFUnimodal, NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, init_type, myStatusBox): """Initialize NTF components for HALS Input: M: Input tensor Mmis: Define missing values (0 = missing cell, 1 = real cell) Mt_nmf: initialization of LHM in NMF(unstacked tensor), may be empty Mw_nmf: initialization of RHM of NMF(unstacked tensor), may be empty nc: NTF rank tolerance: Convergence threshold precision: Replace 0-values in multiplication rules LogIter: Log results through iterations NTFUnimodal: Apply Unimodal constraint on factoring vectors NTFLeftComponents: Apply Unimodal/Smooth constraint on left hand matrix NTFRightComponents: Apply Unimodal/Smooth constraint on right hand matrix NTFBlockComponents: Apply Unimodal/Smooth constraint on block hand matrix NBlocks: Number of NTF blocks init_type : integer, default 0 init_type = 0 : NMF initialization applied on the reshaped matrix [1st dim x vectorized (2nd & 3rd dim)] init_type = 1 : NMF initialization applied on the reshaped matrix [vectorized (1st & 2nd dim) x 3rd dim] Output: Mt: Left hand matrix Mw: Right hand matrix Mb: Block hand matrix """ AddMessage = [] n, p = M.shape Mmis = Mmis.astype(np.int) n_Mmis = Mmis.shape[0] if n_Mmis == 0: ID = np.where(np.isnan(M) == True) n_Mmis = ID[0].size if n_Mmis > 0: Mmis = (np.isnan(M) == False) Mmis = Mmis.astype(np.int) M[Mmis == 0] = 0 nc = int(nc) NBlocks = int(NBlocks) init_type = int(init_type) Status0 = "Step 1 - Quick NMF Ncomp=" + str(nc) + ": " if init_type == 1: #Init legacy Mstacked, Mmis_stacked = NTFStack(M, Mmis, NBlocks) nc2 = min(nc, NBlocks) # factorization rank can't be > number of blocks if (Mt_nmf.shape[0] == 0) or (Mw_nmf.shape[0] == 0): Mt_nmf, Mw_nmf = NMFInit(Mstacked, Mmis_stacked, np.array([]), np.array([]), nc2, tolerance, LogIter, myStatusBox) else: Mt_nmf, Mw_nmf = NMFInit(Mstacked, Mmis_stacked, Mt_nmf, Mw_nmf, nc2, tolerance, LogIter, myStatusBox) # Quick NMF Mt_nmf, Mw_nmf, diff, Mh, dummy1, dummy2, AddMessage, ErrMessage, cancel_pressed = NMFSolve( Mstacked, Mmis_stacked, Mt_nmf, Mw_nmf, nc2, tolerance, precision, LogIter, Status0, 10, 2, 0, 0, 1, 1, 0, 0, 0, 1, 0, np.array([]), 0, AddMessage, myStatusBox) # Factorize Left vectors and distribute multiple factors if nc2 < nc Mt = np.zeros((n, nc)) Mw = np.zeros((int(p / NBlocks), nc)) Mb = np.zeros((NBlocks, nc)) NFact = int(np.ceil(nc / NBlocks)) for k in range(0, nc2): myStatusBox.update_status(delay=1, status="Start SVD...") U, d, V = svds(np.reshape(Mt_nmf[:, k], (int(p / NBlocks), n)).T, k=NFact) V = V.T #svds returns singular vectors in reverse order U = U[:,::-1] V = V[:,::-1] d = d[::-1] myStatusBox.update_status(delay=1, status="SVD completed") for iFact in range(0, NFact): ind = iFact * NBlocks + k if ind < nc: U1 = U[:, iFact] U2 = -U[:, iFact] U1[U1 < 0] = 0 U2[U2 < 0] = 0 V1 = V[:, iFact] V2 = -V[:, iFact] V1[V1 < 0] = 0 V2[V2 < 0] = 0 U1 = np.reshape(U1, (n, 1)) V1 = np.reshape(V1, (1, int(p / NBlocks))) U2 = np.reshape(U2, (n, 1)) V2 = np.reshape(V2, ((1, int(p / NBlocks)))) if np.linalg.norm(U1 @ V1) > np.linalg.norm(U2 @ V2): Mt[:, ind] = np.reshape(U1, n) Mw[:, ind] = d[iFact] * np.reshape(V1, int(p / NBlocks)) else: Mt[:, ind] = np.reshape(U2, n) Mw[:, ind] = d[iFact] * np.reshape(V2, int(p / NBlocks)) Mb[:, ind] = Mw_nmf[:, k] else: #Init default if (Mt_nmf.shape[0] == 0) or (Mw_nmf.shape[0] == 0): Mt_nmf, Mw_nmf = NMFInit(M, Mmis, np.array([]), np.array([]), nc, tolerance, LogIter, myStatusBox) else: Mt_nmf, Mw_nmf = NMFInit(M, Mmis, Mt_nmf, Mw_nmf, nc, tolerance, LogIter, myStatusBox) # Quick NMF Mt_nmf, Mw_nmf, diff, Mh, dummy1, dummy2, AddMessage, ErrMessage, cancel_pressed = NMFSolve( M, Mmis, Mt_nmf, Mw_nmf, nc, tolerance, precision, LogIter, Status0, 10, 2, 0, 0, 1, 1, 0, 0, 0, 1, 0, np.array([]), 0, AddMessage, myStatusBox) #Factorize Left vectors Mt = np.zeros((n, nc)) Mw = np.zeros((int(p / NBlocks), nc)) Mb = np.zeros((NBlocks, nc)) for k in range(0, nc): myStatusBox.update_status(delay=1, status="Start SVD...") U, d, V = svds(np.reshape(Mw_nmf[:, k], (int(p / NBlocks), NBlocks)), k=1) V = V.T U = np.abs(U) V = np.abs(V) myStatusBox.update_status(delay=1, status="SVD completed") Mt[:, k] = Mt_nmf[:, k] Mw[:, k] = d[0] * np.reshape(U, int(p / NBlocks)) Mb[:, k] = np.reshape(V, NBlocks) for k in range(0, nc): if (NTFUnimodal > 0) & (NTFLeftComponents > 0): # Enforce unimodal distribution tmax = np.argmax(Mt[:, k]) for i in range(tmax + 1, n): Mt[i, k] = min(Mt[i - 1, k], Mt[i, k]) for i in range(tmax - 1, -1, -1): Mt[i, k] = min(Mt[i + 1, k], Mt[i, k]) if (NTFUnimodal > 0) & (NTFRightComponents > 0): # Enforce unimodal distribution wmax = np.argmax(Mw[:, k]) for j in range(wmax + 1, int(p / NBlocks)): Mw[j, k] = min(Mw[j - 1, k], Mw[j, k]) for j in range(wmax - 1, -1, -1): Mw[j, k] = min(Mw[j + 1, k], Mw[j, k]) if (NTFUnimodal > 0) & (NTFBlockComponents > 0): # Enforce unimodal distribution bmax = np.argmax(Mb[:, k]) for iBlock in range(bmax + 1, NBlocks): Mb[iBlock, k] = min(Mb[iBlock - 1, k], Mb[iBlock, k]) for iBlock in range(bmax - 1, -1, -1): Mb[iBlock, k] = min(Mb[iBlock + 1, k], Mb[iBlock, k]) return [Mt, Mw, Mb, AddMessage, ErrMessage, cancel_pressed]
def nmf_permutation_test_score(estimator, y, n_permutations=100, verbose=0)
-
Do a permutation test to assess association between ordered samples and some covariate
Parameters
estimator
:tuplet as returned by non_negative_factorization() and nmf_predict()
y
:array-like, group to be predicted
n_permutations
:integer
, default: 100
verbose
:integer
, default: 0
- The verbosity level (0/1).
Returns
Completed estimator with following entries:
score
:float
- The true score without permuting targets.
pvalue
:float
- The p-value, which approximates the probability that the score would be obtained by chance.
CS
:array-like, shape(n_components)
- The size of each cluster
CP
:array-like, shape(n_components)
- The pvalue of the most significant group within each cluster
CG
:array-like, shape(n_components)
- The index of the most significant group within each cluster
CN
:array-like, shape(n_components, n_groups)
- The size of each group within each cluster
Expand source code
def nmf_permutation_test_score(estimator, y, n_permutations=100, verbose=0): """Do a permutation test to assess association between ordered samples and some covariate Parameters ---------- estimator : tuplet as returned by non_negative_factorization and nmf_predict y : array-like, group to be predicted n_permutations : integer, default: 100 verbose : integer, default: 0 The verbosity level (0/1). Returns ------- Completed estimator with following entries: score : float The true score without permuting targets. pvalue : float The p-value, which approximates the probability that the score would be obtained by chance. CS : array-like, shape(n_components) The size of each cluster CP : array-like, shape(n_components) The pvalue of the most significant group within each cluster CG : array-like, shape(n_components) The index of the most significant group within each cluster CN : array-like, shape(n_components, n_groups) The size of each group within each cluster """ Mt = estimator['W'] RCt = estimator['WR'] NCt = estimator['WN'] RowGroups = y uniques, index = np.unique([row for row in RowGroups], return_index=True) ListGroups = RowGroups[index] nbGroups = ListGroups.shape[0] Ngroup = np.zeros(nbGroups) for group in range(0, nbGroups): Ngroup[group] = np.where(RowGroups == ListGroups[group])[0].shape[0] Nrun = n_permutations myStatusBox = StatusBoxTqdm(verbose=verbose) ClusterSize, Pglob, prun, ClusterProb, ClusterGroup, ClusterNgroup, cancel_pressed = \ GlobalSign(Nrun, nbGroups, Mt, RCt, NCt, RowGroups, ListGroups, Ngroup, myStatusBox) estimator.update( [('score', prun), ('pvalue', Pglob), ('CS', ClusterSize), ('CP', ClusterProb), ('CG', ClusterGroup), ('CN', ClusterNgroup)]) return estimator
def nmf_predict(estimator, leverage='robust', blocks=None, cluster_by_stability=False, custom_order=False, verbose=0)
-
Derives ordered sample and feature indexes for future use in ordered heatmaps
Parameters
estimator
:tuplet as returned by non_negative_factorization()
leverage
:None | 'standard' | 'robust'
, default'robust'
- Calculate leverage of W and H rows on each component.
blocks
:array-like, shape(n_blocks)
, defaultNone
- Size of each block (if any) in ordered heatmap.
cluster_by_stability
:boolean
, defaultFalse
- Use stability instead of leverage to assign samples/features to clusters
custom_order
:boolean
, defaultFalse
- if False samples/features with highest leverage or stability appear on top of each cluster if True within cluster ordering is modified to suggest a continuum between adjacent clusters
verbose
:integer
, default: 0
- The verbosity level (0/1).
Returns
Completed estimator with following entries:
WL
:array-like, shape (n_samples, n_components)
- Sample leverage on each component
HL
:array-like, shape (n_features, n_components)
- Feature leverage on each component
QL
:array-like, shape (n_blocks, n_components)
- Block leverage on each component (NTF only)
WR
:vector-like, shape (n_samples)
- Ranked sample indexes (by cluster and leverage or stability) Used to produce ordered heatmaps
HR
:vector-like, shape (n_features)
- Ranked feature indexes (by cluster and leverage or stability) Used to produce ordered heatmaps
WN
:vector-like, shape (n_components)
- Sample cluster bounds in ordered heatmap
HN
:vector-like, shape (n_components)
- Feature cluster bounds in ordered heatmap
WC
:vector-like, shape (n_samples)
- Sample assigned cluster
HC
:vector-like, shape (n_features)
- Feature assigned cluster
QC
:vector-like, shape (size(blocks))
- Block assigned cluster (NTF only)
Expand source code
def nmf_predict(estimator, leverage='robust', blocks=None, cluster_by_stability=False, custom_order=False, verbose=0): """Derives ordered sample and feature indexes for future use in ordered heatmaps Parameters ---------- estimator : tuplet as returned by non_negative_factorization leverage : None | 'standard' | 'robust', default 'robust' Calculate leverage of W and H rows on each component. blocks : array-like, shape(n_blocks), default None Size of each block (if any) in ordered heatmap. cluster_by_stability : boolean, default False Use stability instead of leverage to assign samples/features to clusters custom_order : boolean, default False if False samples/features with highest leverage or stability appear on top of each cluster if True within cluster ordering is modified to suggest a continuum between adjacent clusters verbose : integer, default: 0 The verbosity level (0/1). Returns ------- Completed estimator with following entries: WL : array-like, shape (n_samples, n_components) Sample leverage on each component HL : array-like, shape (n_features, n_components) Feature leverage on each component QL : array-like, shape (n_blocks, n_components) Block leverage on each component (NTF only) WR : vector-like, shape (n_samples) Ranked sample indexes (by cluster and leverage or stability) Used to produce ordered heatmaps HR : vector-like, shape (n_features) Ranked feature indexes (by cluster and leverage or stability) Used to produce ordered heatmaps WN : vector-like, shape (n_components) Sample cluster bounds in ordered heatmap HN : vector-like, shape (n_components) Feature cluster bounds in ordered heatmap WC : vector-like, shape (n_samples) Sample assigned cluster HC : vector-like, shape (n_features) Feature assigned cluster QC : vector-like, shape (size(blocks)) Block assigned cluster (NTF only) """ Mt = estimator['W'] Mw = estimator['H'] if 'Q' in estimator: # X is a 3D tensor, in unfolded form of a 2D array # horizontal concatenation of blocks of equal size. Mb = estimator['Q'] NMFAlgo = 5 NBlocks = Mb.shape[0] BlkSize = Mw.shape[0] * np.ones(NBlocks) else: Mb = np.array([]) NMFAlgo = 0 if blocks is None: NBlocks = 1 BlkSize = np.array([Mw.shape[0]]) else: NBlocks = blocks.shape[0] BlkSize = blocks if 'WB' in estimator: MtPct = estimator['WB'] else: MtPct = None if 'HB' in estimator: MwPct = estimator['HB'] else: MwPct = None if leverage == 'standard': NMFCalculateLeverage = 1 NMFUseRobustLeverage = 0 elif leverage == 'robust': NMFCalculateLeverage = 1 NMFUseRobustLeverage = 1 else: NMFCalculateLeverage = 0 NMFUseRobustLeverage = 0 if cluster_by_stability is True: NMFRobustClusterByStability = 1 else: NMFRobustClusterByStability = 0 if custom_order is True: CellPlotOrderedClusters = 1 else: CellPlotOrderedClusters = 0 AddMessage = [] myStatusBox = StatusBoxTqdm(verbose=verbose) Mtn, Mwn, Mbn, RCt, RCw, NCt, NCw, RowClust, ColClust, BlockClust, AddMessage, ErrMessage, cancel_pressed = \ BuildClusters(Mt, Mw, Mb, MtPct, MwPct, NBlocks, BlkSize, NMFCalculateLeverage, NMFUseRobustLeverage, NMFAlgo, NMFRobustClusterByStability, CellPlotOrderedClusters, AddMessage, myStatusBox) for message in AddMessage: print(message) myStatusBox.close() if 'Q' in estimator: estimator.update([('WL', Mtn), ('HL', Mwn), ('WR', RCt), ('HR', RCw), ('WN', NCt), ('HN', NCw), ('WC', RowClust), ('HC', ColClust), ('QL', Mbn), ('QC', BlockClust)]) else: estimator.update([('WL', Mtn), ('HL', Mwn), ('WR', RCt), ('HR', RCw), ('WN', NCt), ('HN', NCw), ('WC', RowClust), ('HC', ColClust), ('QL', None), ('QC', None)]) return estimator
def non_negative_factorization(X, W=None, H=None, n_components=None, update_W=True, update_H=True, beta_loss='frobenius', use_hals=False, n_bootstrap=None, tol=1e-06, max_iter=150, max_iter_mult=20, regularization=None, sparsity=0, leverage='standard', convex=None, kernel='linear', skewness=False, null_priors=False, random_state=None, verbose=0)
-
Compute Non-negative Matrix Factorization (NMF)
Find two non-negative matrices (W, H) such as x = W @ H.T + Error. This factorization can be used for example for dimensionality reduction, source separation or topic extraction.
The objective function is minimized with an alternating minimization of W and H.
Parameters
X
:array-like, shape (n_samples, n_features)
- Constant matrix.
W
:array-like, shape (n_samples, n_components)
- prior W If n_update_W == 0 , it is used as a constant, to solve for H only.
H
:array-like, shape (n_features, n_components)
- prior H If n_update_H = 0 , it is used as a constant, to solve for W only.
n_components
:integer
- Number of components, if n_components is not set : n_components = min(n_samples, n_features)
update_W
:boolean
, default: True
- Update or keep W fixed
update_H
:boolean
, default: True
- Update or keep H fixed
beta_loss
:string
, default'frobenius'
- String must be in {'frobenius', 'kullback-leibler'}. Beta divergence to be minimized, measuring the distance between X and the dot product WH. Note that values different from 'frobenius' (or 2) and 'kullback-leibler' (or 1) lead to significantly slower fits. Note that for beta_loss == 'kullback-leibler', the input matrix X cannot contain zeros.
use_hals
:boolean
- True -> HALS algorithm (note that convex and kullback-leibler loss opions are not supported) False-> Projected gradiant
n_bootstrap
:integer
, default: 0
- Number of bootstrap runs.
tol
:float
, default: 1e-6
- Tolerance of the stopping condition.
max_iter
:integer
, default: 200
- Maximum number of iterations.
max_iter_mult
:integer
, default: 20
- Maximum number of iterations in multiplicative warm-up to projected gradient (beta_loss = 'frobenius' only).
regularization
:None | 'components' | 'transformation'
- Select whether the regularization affects the components (H), the transformation (W) or none of them.
sparsity
:float
, default: 0
- Sparsity target with 0 <= sparsity <= 1 representing either: - the % rows in W or H set to 0 (when use_hals = False) - the mean % rows per column in W or H set to 0 (when use_hals = True)
leverage
:None | 'standard' | 'robust'
, default'standard'
- Calculate leverage of W and H rows on each component.
convex
:None | 'components' | 'transformation'
, defaultNone
- Apply convex constraint on W or H.
kernel
:'linear', 'quadratic', 'radial'
, default'linear'
- Can be set if convex = 'transformation'.
null_priors
:boolean
, defaultFalse
- Cells of H with prior cells = 0 will not be updated. Can be set only if prior H has been defined.
skewness
:boolean
, defaultFalse
- When solving mixture problems, columns of X at the extremities of the convex hull will be given largest weights. The column weight is a function of the skewness and its sign. The expected sign of the skewness is based on the skewness of W components, as returned by the first pass of a 2-steps convex NMF. Thus, during the first pass, skewness must be set to False. Can be set only if convex = 'transformation' and prior W and H have been defined.
random_state
:int, RandomState instance
orNone
, optional, default: None
- If int, random_state is the seed used by the random number generator;
If RandomState instance, random_state is the random number generator;
If None, the random number generator is the RandomState instance used
by
np.random
. verbose
:integer
, default: 0
- The verbosity level (0/1).
Returns
Estimator (dictionary) with following entries
W
:array-like, shape (n_samples, n_components)
- Solution to the non-negative least squares problem.
H
:array-like, shape (n_features, n_components)
- Solution to the non-negative least squares problem.
volume
:scalar, volume occupied by W and H
WB
:array-like, shape (n_samples, n_components)
- Percent consistently clustered rows for each component. only if n_bootstrap > 0.
HB
:array-like, shape (n_features, n_components)
- Percent consistently clustered columns for each component. only if n_bootstrap > 0.
B
:array-like, shape (n_observations, n_components)
or(n_features, n_components)
- only if active convex variant, H = B.T @ X or W = X @ B
diff
:Objective minimum achieved
Expand source code
def non_negative_factorization(X, W=None, H=None, n_components=None, update_W=True, update_H=True, beta_loss='frobenius', use_hals=False, n_bootstrap=None, tol=1e-6, max_iter=150, max_iter_mult=20, regularization=None, sparsity=0, leverage='standard', convex=None, kernel='linear', skewness=False, null_priors=False, random_state=None, verbose=0): """Compute Non-negative Matrix Factorization (NMF) Find two non-negative matrices (W, H) such as x = W @ H.T + Error. This factorization can be used for example for dimensionality reduction, source separation or topic extraction. The objective function is minimized with an alternating minimization of W and H. Parameters ---------- X : array-like, shape (n_samples, n_features) Constant matrix. W : array-like, shape (n_samples, n_components) prior W If n_update_W == 0 , it is used as a constant, to solve for H only. H : array-like, shape (n_features, n_components) prior H If n_update_H = 0 , it is used as a constant, to solve for W only. n_components : integer Number of components, if n_components is not set : n_components = min(n_samples, n_features) update_W : boolean, default: True Update or keep W fixed update_H : boolean, default: True Update or keep H fixed beta_loss : string, default 'frobenius' String must be in {'frobenius', 'kullback-leibler'}. Beta divergence to be minimized, measuring the distance between X and the dot product WH. Note that values different from 'frobenius' (or 2) and 'kullback-leibler' (or 1) lead to significantly slower fits. Note that for beta_loss == 'kullback-leibler', the input matrix X cannot contain zeros. use_hals : boolean True -> HALS algorithm (note that convex and kullback-leibler loss opions are not supported) False-> Projected gradiant n_bootstrap : integer, default: 0 Number of bootstrap runs. tol : float, default: 1e-6 Tolerance of the stopping condition. max_iter : integer, default: 200 Maximum number of iterations. max_iter_mult : integer, default: 20 Maximum number of iterations in multiplicative warm-up to projected gradient (beta_loss = 'frobenius' only). regularization : None | 'components' | 'transformation' Select whether the regularization affects the components (H), the transformation (W) or none of them. sparsity : float, default: 0 Sparsity target with 0 <= sparsity <= 1 representing either: - the % rows in W or H set to 0 (when use_hals = False) - the mean % rows per column in W or H set to 0 (when use_hals = True) leverage : None | 'standard' | 'robust', default 'standard' Calculate leverage of W and H rows on each component. convex : None | 'components' | 'transformation', default None Apply convex constraint on W or H. kernel : 'linear', 'quadratic', 'radial', default 'linear' Can be set if convex = 'transformation'. null_priors : boolean, default False Cells of H with prior cells = 0 will not be updated. Can be set only if prior H has been defined. skewness : boolean, default False When solving mixture problems, columns of X at the extremities of the convex hull will be given largest weights. The column weight is a function of the skewness and its sign. The expected sign of the skewness is based on the skewness of W components, as returned by the first pass of a 2-steps convex NMF. Thus, during the first pass, skewness must be set to False. Can be set only if convex = 'transformation' and prior W and H have been defined. random_state : int, RandomState instance or None, optional, default: None If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`. verbose : integer, default: 0 The verbosity level (0/1). Returns ------- Estimator (dictionary) with following entries W : array-like, shape (n_samples, n_components) Solution to the non-negative least squares problem. H : array-like, shape (n_features, n_components) Solution to the non-negative least squares problem. volume : scalar, volume occupied by W and H WB : array-like, shape (n_samples, n_components) Percent consistently clustered rows for each component. only if n_bootstrap > 0. HB : array-like, shape (n_features, n_components) Percent consistently clustered columns for each component. only if n_bootstrap > 0. B : array-like, shape (n_observations, n_components) or (n_features, n_components) only if active convex variant, H = B.T @ X or W = X @ B diff : Objective minimum achieved """ if use_hals: #convex and kullback-leibler loss options are not supported beta_loss='frobenius' convex=None M = X n, p = M.shape if n_components is None: nc = min(n, p) else: nc = n_components if beta_loss == 'frobenius': NMFAlgo = 2 else: NMFAlgo = 1 LogIter = verbose myStatusBox = StatusBoxTqdm(verbose=LogIter) tolerance = tol precision = EPSILON if (W is None) & (H is None): Mt, Mw = NMFInit(M, np.array([]), np.array([]), np.array([]), nc, tolerance, LogIter, myStatusBox) init = 'nndsvd' else: if H is None: Mw = np.ones((p, nc)) init = 'custom_W' elif W is None: Mt = np.ones((n, nc)) init = 'custom_H' else: init = 'custom' for k in range(0, nc): if NMFAlgo == 2: Mt[:, k] = Mt[:, k] / np.linalg.norm(Mt[:, k]) Mw[:, k] = Mw[:, k] / np.linalg.norm(Mw[:, k]) else: Mt[:, k] = Mt[:, k] / np.sum(Mt[:, k]) Mw[:, k] = Mw[:, k] / np.sum(Mw[:, k]) if n_bootstrap is None: NMFRobustNRuns = 0 else: NMFRobustNRuns = n_bootstrap if NMFRobustNRuns > 1: NMFAlgo += 2 if update_W is True: NMFFixUserLHE = 0 else: NMFFixUserLHE = 1 if update_H is True: NMFFixUserRHE = 0 else: NMFFixUserRHE = 1 MaxIterations = max_iter NMFMaxInterm = max_iter_mult if regularization is None: NMFSparseLevel = 0 else: if regularization == 'components': NMFSparseLevel = sparsity elif regularization == 'transformation': NMFSparseLevel = -sparsity else: NMFSparseLevel = 0 NMFRobustResampleColumns = 0 if leverage == 'standard': NMFCalculateLeverage = 1 NMFUseRobustLeverage = 0 elif leverage == 'robust': NMFCalculateLeverage = 1 NMFUseRobustLeverage = 1 else: NMFCalculateLeverage = 0 NMFUseRobustLeverage = 0 if convex is None: NMFFindParts = 0 NMFFindCentroids = 0 NMFKernel = 1 elif convex == 'transformation': NMFFindParts = 1 NMFFindCentroids = 0 NMFKernel = 1 elif convex == 'components': NMFFindParts = 0 NMFFindCentroids = 1 if kernel == 'linear': NMFKernel = 1 elif kernel == 'quadratic': NMFKernel = 2 elif kernel == 'radial': NMFKernel = 3 else: NMFKernel = 1 if (null_priors is True) & ((init == 'custom') | (init == 'custom_H')): NMFPriors = H else: NMFPriors = np.array([]) if convex is None: NMFReweighColumns = 0 else: if (convex == 'transformation') & (init == 'custom'): if skewness is True: NMFReweighColumns = 1 else: NMFReweighColumns = 0 else: NMFReweighColumns = 0 if random_state is not None: RandomSeed = random_state np.random.seed(RandomSeed) if use_hals: if NMFAlgo <=2: NTFAlgo = 5 else: NTFAlgo = 6 Mt_conv, Mt, Mw, Mb, MtPct, MwPct, diff, AddMessage, ErrMessage, cancel_pressed = rNTFSolve( M, np.array([]), Mt, Mw, np.array([]), nc, tolerance, precision, LogIter, MaxIterations, NMFFixUserLHE, NMFFixUserRHE, 1, NTFAlgo, NMFRobustNRuns, NMFCalculateLeverage, NMFUseRobustLeverage, 0, 0, NMFSparseLevel, 0, 0, 0, 0, 0, 1, 0, np.array([]), myStatusBox) Mev = np.ones(nc) if (NMFFixUserLHE == 0) & (NMFFixUserRHE == 0): # Scale for k in range(0, nc): ScaleMt = np.linalg.norm(Mt[:, k]) ScaleMw = np.linalg.norm(Mw[:, k]) Mev[k] = ScaleMt * ScaleMw if Mev[k] > 0: Mt[:, k] = Mt[:, k] / ScaleMt Mw[:, k] = Mw[:, k] / ScaleMw else: Mt, Mw, MtPct, MwPct, diff, Mh, flagNonconvex, AddMessage, ErrMessage, cancel_pressed = rNMFSolve( M, np.array([]), Mt, Mw, nc, tolerance, precision, LogIter, MaxIterations, NMFAlgo, NMFFixUserLHE, NMFFixUserRHE, NMFMaxInterm, NMFSparseLevel, NMFRobustResampleColumns, NMFRobustNRuns, NMFCalculateLeverage, NMFUseRobustLeverage, NMFFindParts, NMFFindCentroids, NMFKernel, NMFReweighColumns, NMFPriors, myStatusBox) Mev = np.ones(nc) if (NMFFindParts == 0) & (NMFFindCentroids == 0) & (NMFFixUserLHE == 0) & (NMFFixUserRHE == 0): # Scale for k in range(0, nc): if (NMFAlgo == 2) | (NMFAlgo == 4): ScaleMt = np.linalg.norm(Mt[:, k]) ScaleMw = np.linalg.norm(Mw[:, k]) else: ScaleMt = np.sum(Mt[:, k]) ScaleMw = np.sum(Mw[:, k]) Mev[k] = ScaleMt * ScaleMw if Mev[k] > 0: Mt[:, k] = Mt[:, k] / ScaleMt Mw[:, k] = Mw[:, k] / ScaleMw volume = NMFDet(Mt, Mw, 1) for message in AddMessage: print(message) myStatusBox.close() # Order by decreasing scale RMev = np.argsort(-Mev) Mev = Mev[RMev] Mt = Mt[:, RMev] Mw = Mw[:, RMev] if isinstance(MtPct, np.ndarray): MtPct = MtPct[:, RMev] MwPct = MwPct[:, RMev] if (NMFFindParts == 0) & (NMFFindCentroids == 0): # Scale by max com p for k in range(0, nc): MaxCol = np.max(Mt[:, k]) if MaxCol > 0: Mt[:, k] /= MaxCol Mw[:, k] *= Mev[k] * MaxCol Mev[k] = 1 else: Mev[k] = 0 estimator = {} if NMFRobustNRuns <= 1: if (NMFFindParts == 0) & (NMFFindCentroids == 0): estimator.update([('W', Mt), ('H', Mw), ('volume', volume), ('diff', diff)]) else: estimator.update([('W', Mt), ('H', Mw), ('volume', volume), ('B', Mh), ('diff', diff)]) else: if (NMFFindParts == 0) & (NMFFindCentroids == 0): estimator.update([('W', Mt), ('H', Mw), ('volume', volume), ('WB', MtPct), ('HB', MwPct), ('diff', diff)]) else: estimator.update([('W', Mt), ('H', Mw), ('volume', volume), ('B', Mh), ('WB', MtPct), ('HB', MwPct), ('diff', diff)]) return estimator
def non_negative_tensor_factorization(X, n_blocks, W=None, H=None, Q=None, n_components=None, update_W=True, update_H=True, update_Q=True, fast_hals=True, n_iter_hals=2, n_shift=0, regularization=None, sparsity=0, unimodal=False, smooth=False, apply_left=False, apply_right=False, apply_block=False, n_bootstrap=None, tol=1e-06, max_iter=150, leverage='standard', random_state=None, init_type=0, verbose=0)
-
Compute Non-negative Tensor Factorization (NTF)
Find three non-negative matrices (W, H, F) such as x = W @@ H @@ F + Error (@@ = tensor product). This factorization can be used for example for dimensionality reduction, source separation or topic extraction.
The objective function is minimized with an alternating minimization of W and H.
Parameters
X
:array-like, shape (n_samples, n_features x n_blocks)
- Constant matrix. X is a tensor with shape (n_samples, n_features, n_blocks), however unfolded along 2nd and 3rd dimensions.
n_blocks
:integer
W
:array-like, shape (n_samples, n_components)
- prior W
H
:array-like, shape (n_features, n_components)
- prior H
Q
:array-like, shape (n_blocks, n_components)
- prior Q
n_components
:integer
- Number of components, if n_components is not set : n_components = min(n_samples, n_features)
update_W
:boolean
, default: True
- Update or keep W fixed
update_H
:boolean
, default: True
- Update or keep H fixed
update_Q
:boolean
, default: True
- Update or keep Q fixed
fast_hals
:boolean
, default: True
- Use fast implementation of HALS
n_iter_hals
:integer
, default: 2
- Number of HALS iterations prior to fast HALS
n_shift
:integer
, default: 0
- max shifting in convolutional NTF
regularization
:None | 'components' | 'transformation'
- Select whether the regularization affects the components (H), the transformation (W) or none of them.
sparsity
:float
, default: 0
- Sparsity target with 0 <= sparsity <= 1 representing the mean % rows per column in W or H set to 0
unimodal
:Boolean
, default: False
smooth
:Boolean
, default: False
apply_left
:Boolean
, default: False
apply_right
:Boolean
, default: False
apply_block
:Boolean
, default: False
n_bootstrap
:integer
, default: 0
- Number of bootstrap runs.
tol
:float
, default: 1e-6
- Tolerance of the stopping condition.
max_iter
:integer
, default: 200
- Maximum number of iterations.
leverage
:None | 'standard' | 'robust'
, default'standard'
- Calculate leverage of W and H rows on each component.
random_state
:int, RandomState instance
orNone
, optional, default: None
- If int, random_state is the seed used by the random number generator;
If RandomState instance, random_state is the random number generator;
If None, the random number generator is the RandomState instance used
by
np.random
. init_type
:integer
, default0
- init_type = 0 : NMF initialization applied on the reshaped matrix [1st dim x vectorized (2nd & 3rd dim)] init_type = 1 : NMF initialization applied on the reshaped matrix [vectorized (1st & 2nd dim) x 3rd dim]
verbose
:integer
, default: 0
- The verbosity level (0/1).
Returns
Estimator (dictionary) with following entries W : array-like, shape (n_samples, n_components) Solution to the non-negative least squares problem. H : array-like, shape (n_features, n_components) Solution to the non-negative least squares problem. Q : array-like, shape (n_blocks, n_components) Solution to the non-negative least squares problem. volume : scalar, volume occupied by W and H WB : array-like, shape (n_samples, n_components) Percent consistently clustered rows for each component. only if n_bootstrap > 0. HB : array-like, shape (n_features, n_components) Percent consistently clustered columns for each component. only if n_bootstrap > 0.
Reference
A. Cichocki, P.H.A.N. Anh-Huym, Fast local algorithms for large scale nonnegative matrix and tensor factorizations, IEICE Trans. Fundam. Electron. Commun. Comput. Sci. 92 (3) (2009) 708–721.
Expand source code
def non_negative_tensor_factorization(X, n_blocks, W=None, H=None, Q=None, n_components=None, update_W=True, update_H=True, update_Q=True, fast_hals=True, n_iter_hals=2, n_shift=0, regularization=None, sparsity=0, unimodal=False, smooth=False, apply_left=False, apply_right=False, apply_block=False, n_bootstrap=None, tol=1e-6, max_iter=150, leverage='standard', random_state=None, init_type=0, verbose=0): """Compute Non-negative Tensor Factorization (NTF) Find three non-negative matrices (W, H, F) such as x = W @@ H @@ F + Error (@@ = tensor product). This factorization can be used for example for dimensionality reduction, source separation or topic extraction. The objective function is minimized with an alternating minimization of W and H. Parameters ---------- X : array-like, shape (n_samples, n_features x n_blocks) Constant matrix. X is a tensor with shape (n_samples, n_features, n_blocks), however unfolded along 2nd and 3rd dimensions. n_blocks : integer W : array-like, shape (n_samples, n_components) prior W H : array-like, shape (n_features, n_components) prior H Q : array-like, shape (n_blocks, n_components) prior Q n_components : integer Number of components, if n_components is not set : n_components = min(n_samples, n_features) update_W : boolean, default: True Update or keep W fixed update_H : boolean, default: True Update or keep H fixed update_Q : boolean, default: True Update or keep Q fixed fast_hals : boolean, default: True Use fast implementation of HALS n_iter_hals : integer, default: 2 Number of HALS iterations prior to fast HALS n_shift : integer, default: 0 max shifting in convolutional NTF regularization : None | 'components' | 'transformation' Select whether the regularization affects the components (H), the transformation (W) or none of them. sparsity : float, default: 0 Sparsity target with 0 <= sparsity <= 1 representing the mean % rows per column in W or H set to 0 unimodal : Boolean, default: False smooth : Boolean, default: False apply_left : Boolean, default: False apply_right : Boolean, default: False apply_block : Boolean, default: False n_bootstrap : integer, default: 0 Number of bootstrap runs. tol : float, default: 1e-6 Tolerance of the stopping condition. max_iter : integer, default: 200 Maximum number of iterations. leverage : None | 'standard' | 'robust', default 'standard' Calculate leverage of W and H rows on each component. random_state : int, RandomState instance or None, optional, default: None If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`. init_type : integer, default 0 init_type = 0 : NMF initialization applied on the reshaped matrix [1st dim x vectorized (2nd & 3rd dim)] init_type = 1 : NMF initialization applied on the reshaped matrix [vectorized (1st & 2nd dim) x 3rd dim] verbose : integer, default: 0 The verbosity level (0/1). Returns ------- Estimator (dictionary) with following entries W : array-like, shape (n_samples, n_components) Solution to the non-negative least squares problem. H : array-like, shape (n_features, n_components) Solution to the non-negative least squares problem. Q : array-like, shape (n_blocks, n_components) Solution to the non-negative least squares problem. volume : scalar, volume occupied by W and H WB : array-like, shape (n_samples, n_components) Percent consistently clustered rows for each component. only if n_bootstrap > 0. HB : array-like, shape (n_features, n_components) Percent consistently clustered columns for each component. only if n_bootstrap > 0. Reference --------- A. Cichocki, P.H.A.N. Anh-Huym, Fast local algorithms for large scale nonnegative matrix and tensor factorizations, IEICE Trans. Fundam. Electron. Commun. Comput. Sci. 92 (3) (2009) 708–721. """ M = X n, p = M.shape if n_components is None: nc = min(n, p) else: nc = n_components NBlocks = n_blocks p_block = int(p / NBlocks) tolerance = tol precision = EPSILON LogIter = verbose if regularization is None: NMFSparseLevel = 0 else: if regularization == 'components': NMFSparseLevel = sparsity elif regularization == 'transformation': NMFSparseLevel = -sparsity else: NMFSparseLevel = 0 NTFUnimodal = unimodal NTFSmooth = smooth NTFLeftComponents = apply_left NTFRightComponents = apply_right NTFBlockComponents = apply_block if random_state is not None: RandomSeed = random_state np.random.seed(RandomSeed) myStatusBox = StatusBoxTqdm(verbose=LogIter) if (W is None) & (H is None) & (Q is None): Mt0, Mw0, Mb0, AddMessage, ErrMessage, cancel_pressed = NTFInit(M, np.array([]), np.array([]), np.array([]), nc, tolerance, precision, LogIter, NTFUnimodal, NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, init_type, myStatusBox) else: if W is None: Mt0 = np.ones((n, nc)) else: Mt0 = np.copy(W) if H is None: Mw0= np.ones((p_block, nc)) else: Mw0 = np.copy(H) if Q is None: Mb0 = np.ones((NBlocks, nc)) else: Mb0 = np.copy(Q) Mfit = np.zeros((n, p)) for k in range(0, nc): for iBlock in range(0, NBlocks): Mfit[:, iBlock*p_block:(iBlock+1)*p_block] += Mb0[iBlock, k] * \ np.reshape(Mt0[:, k], (n, 1)) @ np.reshape(Mw0[:, k], (1, p_block)) ScaleRatio = (np.linalg.norm(Mfit) / np.linalg.norm(M))**(1/3) for k in range(0, nc): Mt0[:, k] /= ScaleRatio Mw0[:, k] /= ScaleRatio Mb0[:, k] /= ScaleRatio Mfit = np.zeros((n, p)) for k in range(0, nc): for iBlock in range(0, NBlocks): Mfit[:, iBlock*p_block:(iBlock+1)*p_block] += Mb0[iBlock, k] * \ np.reshape(Mt0[:, k], (n, 1)) @ np.reshape(Mw0[:, k], (1, p_block)) NTFFastHALS = fast_hals NTFNIterations = n_iter_hals MaxIterations = max_iter NTFNConv = n_shift if n_bootstrap is None: NMFRobustNRuns = 0 else: NMFRobustNRuns = n_bootstrap if NMFRobustNRuns <= 1: NMFAlgo = 5 else: NMFAlgo = 6 if leverage == 'standard': NMFCalculateLeverage = 1 NMFUseRobustLeverage = 0 elif leverage == 'robust': NMFCalculateLeverage = 1 NMFUseRobustLeverage = 1 else: NMFCalculateLeverage = 0 NMFUseRobustLeverage = 0 if random_state is not None: RandomSeed = random_state np.random.seed(RandomSeed) if update_W: NMFFixUserLHE = 0 else: NMFFixUserLHE = 1 if update_H: NMFFixUserRHE = 0 else: NMFFixUserRHE = 1 if update_Q: NMFFixUserBHE = 0 else: NMFFixUserBHE = 1 Mt_conv, Mt, Mw, Mb, MtPct, MwPct, diff, AddMessage, ErrMessage, cancel_pressed = rNTFSolve( M, np.array([]), Mt0, Mw0, Mb0, nc, tolerance, precision, LogIter, MaxIterations, NMFFixUserLHE, NMFFixUserRHE, NMFFixUserBHE, NMFAlgo, NMFRobustNRuns, NMFCalculateLeverage, NMFUseRobustLeverage, NTFFastHALS, NTFNIterations, NMFSparseLevel, NTFUnimodal, NTFSmooth, NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, NTFNConv, np.array([]), myStatusBox) volume = NMFDet(Mt, Mw, 1) for message in AddMessage: print(message) myStatusBox.close() estimator = {} if NMFRobustNRuns <= 1: estimator.update([('W', Mt), ('H', Mw), ('Q', Mb), ('volume', volume), ('diff', diff)]) else: estimator.update([('W', Mt), ('H', Mw), ('Q', Mb), ('volume', volume), ('WB', MtPct), ('HB', MwPct), ('diff', diff)]) return estimator
def rNMFSolve(M, Mmis, Mt0, Mw0, nc, tolerance, precision, LogIter, MaxIterations, NMFAlgo, NMFFixUserLHE, NMFFixUserRHE, NMFMaxInterm, NMFSparseLevel, NMFRobustResampleColumns, NMFRobustNRuns, NMFCalculateLeverage, NMFUseRobustLeverage, NMFFindParts, NMFFindCentroids, NMFKernel, NMFReweighColumns, NMFPriors, myStatusBox)
-
Estimate left and right hand matrices (robust version)
Input
M: Input matrix Mmis: Define missing values (0 = missing cell, 1 = real cell) Mt0: Initial left hand matrix Mw0: Initial right hand matrix nc: NMF rank tolerance: Convergence threshold precision: Replace 0-values in multiplication rules LogIter: Log results through iterations MaxIterations: Max iterations NMFAlgo: =1,3: Divergence; =2,4: Least squares; NMFFixUserLHE: = 1 => fixed left hand matrix columns NMFFixUserRHE: = 1 => fixed right hand matrix columns NMFMaxInterm: Max iterations for warmup multiplication rules NMFSparseLevel: Requested sparsity in terms of relative number of rows with 0 values in right hand matrix NMFRobustResampleColumns: Resample columns during bootstrap NMFRobustNRuns: Number of bootstrap runs NMFCalculateLeverage: Calculate leverages NMFUseRobustLeverage: Calculate leverages based on robust max across factoring columns NMFFindParts: Enforce convexity on left hand matrix NMFFindCentroids: Enforce convexity on right hand matrix NMFKernel: Type of kernel used; 1: linear; 2: quadraitc; 3: radial NMFReweighColumns: Reweigh columns in 2nd step of parts-based NMF NMFPriors: Priors on right hand matrix
Output
Mt: Left hand matrix Mw: Right hand matrix MtPct: Percent robust clustered rows MwPct: Percent robust clustered columns diff: Objective minimum achieved Mh: Convexity matrix flagNonconvex: Updated non-convexity flag on left hand matrix
Expand source code
def rNMFSolve( M, Mmis, Mt0, Mw0, nc, tolerance, precision, LogIter, MaxIterations, NMFAlgo, NMFFixUserLHE, NMFFixUserRHE, NMFMaxInterm, NMFSparseLevel, NMFRobustResampleColumns, NMFRobustNRuns, NMFCalculateLeverage, NMFUseRobustLeverage, NMFFindParts, NMFFindCentroids, NMFKernel, NMFReweighColumns, NMFPriors, myStatusBox): """Estimate left and right hand matrices (robust version) Input: M: Input matrix Mmis: Define missing values (0 = missing cell, 1 = real cell) Mt0: Initial left hand matrix Mw0: Initial right hand matrix nc: NMF rank tolerance: Convergence threshold precision: Replace 0-values in multiplication rules LogIter: Log results through iterations MaxIterations: Max iterations NMFAlgo: =1,3: Divergence; =2,4: Least squares; NMFFixUserLHE: = 1 => fixed left hand matrix columns NMFFixUserRHE: = 1 => fixed right hand matrix columns NMFMaxInterm: Max iterations for warmup multiplication rules NMFSparseLevel: Requested sparsity in terms of relative number of rows with 0 values in right hand matrix NMFRobustResampleColumns: Resample columns during bootstrap NMFRobustNRuns: Number of bootstrap runs NMFCalculateLeverage: Calculate leverages NMFUseRobustLeverage: Calculate leverages based on robust max across factoring columns NMFFindParts: Enforce convexity on left hand matrix NMFFindCentroids: Enforce convexity on right hand matrix NMFKernel: Type of kernel used; 1: linear; 2: quadraitc; 3: radial NMFReweighColumns: Reweigh columns in 2nd step of parts-based NMF NMFPriors: Priors on right hand matrix Output: Mt: Left hand matrix Mw: Right hand matrix MtPct: Percent robust clustered rows MwPct: Percent robust clustered columns diff: Objective minimum achieved Mh: Convexity matrix flagNonconvex: Updated non-convexity flag on left hand matrix """ # Check parameter consistency (and correct if needed) AddMessage = [] ErrMessage ='' cancel_pressed = 0 nc = int(nc) if NMFFixUserLHE*NMFFixUserRHE == 1: return Mt0, Mw0, np.array([]), np.array([]), 0, np.array([]), 0, AddMessage, ErrMessage, cancel_pressed if (nc == 1) & (NMFAlgo > 2): NMFAlgo -= 2 if NMFAlgo <= 2: NMFRobustNRuns = 0 Mmis = Mmis.astype(np.int) n_Mmis = Mmis.shape[0] if n_Mmis == 0: ID = np.where(np.isnan(M) == True) n_Mmis = ID[0].size if n_Mmis > 0: Mmis = (np.isnan(M) == False) Mmis = Mmis.astype(np.int) M[Mmis == 0] = 0 else: M[Mmis == 0] = 0 if NMFRobustResampleColumns > 0: M = np.copy(M).T if n_Mmis > 0: Mmis = np.copy(Mmis).T Mtemp = np.copy(Mw0) Mw0 = np.copy(Mt0) Mt0 = Mtemp NMFFixUserLHEtemp = NMFFixUserLHE NMFFixUserLHE = NMFFixUserRHE NMFFixUserRHE = NMFFixUserLHEtemp n, p = M.shape try: n_NMFPriors, nc = NMFPriors.shape except: n_NMFPriors = 0 NMFRobustNRuns = int(NMFRobustNRuns) MtPct = np.nan MwPct = np.nan flagNonconvex = 0 # Step 1: NMF Status = "Step 1 - NMF Ncomp=" + str(nc) + ": " Mt, Mw, diffsup, Mhsup, NMFPriors, flagNonconvex, AddMessage, ErrMessage, cancel_pressed = NMFSolve( M, Mmis, Mt0, Mw0, nc, tolerance, precision, LogIter, Status, MaxIterations, NMFAlgo, NMFFixUserLHE, NMFFixUserRHE, NMFMaxInterm, 100, NMFSparseLevel, NMFFindParts, NMFFindCentroids, NMFKernel, NMFReweighColumns, NMFPriors, flagNonconvex, AddMessage, myStatusBox) Mtsup = np.copy(Mt) Mwsup = np.copy(Mw) if (n_NMFPriors > 0) & (NMFReweighColumns > 0): # Run again with fixed LHE & no priors Status = "Step 1bis - NMF (fixed LHE) Ncomp=" + str(nc) + ": " Mw = np.ones((p, nc)) / math.sqrt(p) Mt, Mw, diffsup, Mh, NMFPriors, flagNonconvex, AddMessage, ErrMessage, cancel_pressed = NMFSolve( M, Mmis, Mtsup, Mw, nc, tolerance, precision, LogIter, Status, MaxIterations, NMFAlgo, nc, 0, NMFMaxInterm, 100, NMFSparseLevel, NMFFindParts, NMFFindCentroids, NMFKernel, 0, NMFPriors, flagNonconvex, AddMessage, myStatusBox) Mtsup = np.copy(Mt) Mwsup = np.copy(Mw) # Bootstrap to assess robust clustering if NMFRobustNRuns > 1: # Update Mwsup MwPct = np.zeros((p, nc)) MwBlk = np.zeros((p, NMFRobustNRuns * nc)) for iBootstrap in range(0, NMFRobustNRuns): Boot = np.random.randint(n, size=n) Status = "Step 2 - " + \ "Boot " + str(iBootstrap + 1) + "/" + str(NMFRobustNRuns) + " NMF Ncomp=" + str(nc) + ": " if n_Mmis > 0: Mt, Mw, diff, Mh, NMFPriors, flagNonconvex, AddMessage, ErrMessage, cancel_pressed = NMFSolve( M[Boot, :], Mmis[Boot, :], Mtsup[Boot, :], Mwsup, nc, 1.e-3, precision, LogIter, Status, MaxIterations, NMFAlgo, nc, 0, NMFMaxInterm, 20, NMFSparseLevel, NMFFindParts, NMFFindCentroids, NMFKernel, NMFReweighColumns, NMFPriors, flagNonconvex, AddMessage, myStatusBox) else: Mt, Mw, diff, Mh, NMFPriors, flagNonconvex, AddMessage, ErrMessage, cancel_pressed = NMFSolve( M[Boot, :], Mmis, Mtsup[Boot, :], Mwsup, nc, 1.e-3, precision, LogIter, Status, MaxIterations, NMFAlgo, nc, 0, NMFMaxInterm, 20, NMFSparseLevel, NMFFindParts, NMFFindCentroids, NMFKernel, NMFReweighColumns, NMFPriors, flagNonconvex, AddMessage, myStatusBox) for k in range(0, nc): MwBlk[:, k * NMFRobustNRuns + iBootstrap] = Mw[:, k] Mwn = np.zeros((p, nc)) for k in range(0, nc): if (NMFAlgo == 2) | (NMFAlgo == 4): ScaleMw = np.linalg.norm(MwBlk[:, k * NMFRobustNRuns + iBootstrap]) else: ScaleMw = np.sum(MwBlk[:, k * NMFRobustNRuns + iBootstrap]) if ScaleMw > 0: MwBlk[:, k * NMFRobustNRuns + iBootstrap] = \ MwBlk[:, k * NMFRobustNRuns + iBootstrap] / ScaleMw Mwn[:, k] = MwBlk[:, k * NMFRobustNRuns + iBootstrap] ColClust = np.zeros(p, dtype=int) if NMFCalculateLeverage > 0: Mwn, AddMessage, ErrMessage, cancel_pressed = Leverage(Mwn, NMFUseRobustLeverage, AddMessage, myStatusBox) for j in range(0, p): ColClust[j] = np.argmax(np.array(Mwn[j, :])) MwPct[j, ColClust[j]] = MwPct[j, ColClust[j]] + 1 MwPct = MwPct / NMFRobustNRuns # Update Mtsup MtPct = np.zeros((n, nc)) for iBootstrap in range(0, NMFRobustNRuns): Status = "Step 3 - " + \ "Boot " + str(iBootstrap + 1) + "/" + str(NMFRobustNRuns) + " NMF Ncomp=" + str(nc) + ": " Mw = np.zeros((p, nc)) for k in range(0, nc): Mw[:, k] = MwBlk[:, k * NMFRobustNRuns + iBootstrap] Mt, Mw, diff, Mh, NMFPriors, flagNonconvex, AddMessage, ErrMessage, cancel_pressed = NMFSolve( M, Mmis, Mtsup, Mw, nc, 1.e-3, precision, LogIter, Status, MaxIterations, NMFAlgo, 0, nc, NMFMaxInterm, 20, NMFSparseLevel, NMFFindParts, NMFFindCentroids, NMFKernel, NMFReweighColumns, NMFPriors, flagNonconvex, AddMessage, myStatusBox) RowClust = np.zeros(n, dtype=int) if NMFCalculateLeverage > 0: Mtn, AddMessage, ErrMessage, cancel_pressed = Leverage(Mt, NMFUseRobustLeverage, AddMessage, myStatusBox) else: Mtn = Mt for i in range(0, n): RowClust[i] = np.argmax(Mtn[i, :]) MtPct[i, RowClust[i]] = MtPct[i, RowClust[i]] + 1 MtPct = MtPct / NMFRobustNRuns Mt = Mtsup Mw = Mwsup Mh = Mhsup diff = diffsup if NMFRobustResampleColumns > 0: Mtemp = np.copy(Mt) Mt = np.copy(Mw) Mw = Mtemp Mtemp = np.copy(MtPct) MtPct = np.copy(MwPct) MwPct = Mtemp return Mt, Mw, MtPct, MwPct, diff, Mh, flagNonconvex, AddMessage, ErrMessage, cancel_pressed
def rNTFSolve(M, Mmis, Mt0, Mw0, Mb0, nc, tolerance, precision, LogIter, MaxIterations, NMFFixUserLHE, NMFFixUserRHE, NMFFixUserBHE, NMFAlgo, NMFRobustNRuns, NMFCalculateLeverage, NMFUseRobustLeverage, NTFFastHALS, NTFNIterations, NMFSparseLevel, NTFUnimodal, NTFSmooth, NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, NTFNConv, NMFPriors, myStatusBox)
-
Estimate NTF matrices (robust version)
Input
M: Input matrix Mmis: Define missing values (0 = missing cell, 1 = real cell) Mt0: Initial left hand matrix Mw0: Initial right hand matrix Mb0: Initial block hand matrix nc: NTF rank tolerance: Convergence threshold precision: Replace 0-values in multiplication rules LogIter: Log results through iterations MaxIterations: Max iterations NMFFixUserLHE: fix left hand matrix columns: = 1, else = 0 NMFFixUserRHE: fix right hand matrix columns: = 1, else = 0 NMFFixUserBHE: fix block hand matrix columns: = 1, else = 0 NMFAlgo: =5: Non-robust version, =6: Robust version NMFRobustNRuns: Number of bootstrap runs NMFCalculateLeverage: Calculate leverages NMFUseRobustLeverage: Calculate leverages based on robust max across factoring columns NTFFastHALS: Use Fast HALS (does not accept handle missing values and convolution) NTFNIterations: Warmup iterations for fast HALS NMFSparseLevel : sparsity level (as defined by Hoyer); +/- = make RHE/LHe sparse NTFUnimodal: Apply Unimodal constraint on factoring vectors NTFSmooth: Apply Smooth constraint on factoring vectors NTFLeftComponents: Apply Unimodal/Smooth constraint on left hand matrix NTFRightComponents: Apply Unimodal/Smooth constraint on right hand matrix NTFBlockComponents: Apply Unimodal/Smooth constraint on block hand matrix NBlocks: Number of NTF blocks NTFNConv: Half-Size of the convolution window on 3rd-dimension of the tensor NMFPriors: Elements in Mw that should be updated (others remain 0)
Output
Mt_conv: Convolutional Left hand matrix Mt: Left hand matrix Mw: Right hand matrix Mb: Block hand matrix MtPct: Percent robust clustered rows MwPct: Percent robust clustered columns diff : Objective minimum achieved
Expand source code
def rNTFSolve(M, Mmis, Mt0, Mw0, Mb0, nc, tolerance, precision, LogIter, MaxIterations, NMFFixUserLHE, NMFFixUserRHE, NMFFixUserBHE, NMFAlgo, NMFRobustNRuns, NMFCalculateLeverage, NMFUseRobustLeverage, NTFFastHALS, NTFNIterations, NMFSparseLevel, NTFUnimodal, NTFSmooth, NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, NTFNConv, NMFPriors, myStatusBox): """Estimate NTF matrices (robust version) Input: M: Input matrix Mmis: Define missing values (0 = missing cell, 1 = real cell) Mt0: Initial left hand matrix Mw0: Initial right hand matrix Mb0: Initial block hand matrix nc: NTF rank tolerance: Convergence threshold precision: Replace 0-values in multiplication rules LogIter: Log results through iterations MaxIterations: Max iterations NMFFixUserLHE: fix left hand matrix columns: = 1, else = 0 NMFFixUserRHE: fix right hand matrix columns: = 1, else = 0 NMFFixUserBHE: fix block hand matrix columns: = 1, else = 0 NMFAlgo: =5: Non-robust version, =6: Robust version NMFRobustNRuns: Number of bootstrap runs NMFCalculateLeverage: Calculate leverages NMFUseRobustLeverage: Calculate leverages based on robust max across factoring columns NTFFastHALS: Use Fast HALS (does not accept handle missing values and convolution) NTFNIterations: Warmup iterations for fast HALS NMFSparseLevel : sparsity level (as defined by Hoyer); +/- = make RHE/LHe sparse NTFUnimodal: Apply Unimodal constraint on factoring vectors NTFSmooth: Apply Smooth constraint on factoring vectors NTFLeftComponents: Apply Unimodal/Smooth constraint on left hand matrix NTFRightComponents: Apply Unimodal/Smooth constraint on right hand matrix NTFBlockComponents: Apply Unimodal/Smooth constraint on block hand matrix NBlocks: Number of NTF blocks NTFNConv: Half-Size of the convolution window on 3rd-dimension of the tensor NMFPriors: Elements in Mw that should be updated (others remain 0) Output: Mt_conv: Convolutional Left hand matrix Mt: Left hand matrix Mw: Right hand matrix Mb: Block hand matrix MtPct: Percent robust clustered rows MwPct: Percent robust clustered columns diff : Objective minimum achieved """ AddMessage = [] ErrMessage = '' cancel_pressed = 0 n, p0 = M.shape nc = int(nc) NBlocks = int(NBlocks) p = int(p0 / NBlocks) NTFNConv = int(NTFNConv) if NMFFixUserLHE*NMFFixUserRHE*NMFFixUserBHE == 1: return np.zeros((n, nc*(2*NTFNConv+1))), Mt0, Mw0, Mb0, np.zeros((n, p0)), np.ones((n, nc)), np.ones((p, nc)), AddMessage, ErrMessage, cancel_pressed Mmis = Mmis.astype(np.int) n_Mmis = Mmis.shape[0] if n_Mmis == 0: ID = np.where(np.isnan(M) == True) n_Mmis = ID[0].size if n_Mmis > 0: Mmis = (np.isnan(M) == False) Mmis = Mmis.astype(np.int) M[Mmis == 0] = 0 else: M[Mmis == 0] = 0 NTFNIterations = int(NTFNIterations) NMFRobustNRuns = int(NMFRobustNRuns) Mt = np.copy(Mt0) Mw = np.copy(Mw0) Mb = np.copy(Mb0) Mt_conv = np.array([]) # Check parameter consistency (and correct if needed) if (nc == 1) | (NMFAlgo == 5): NMFRobustNRuns = 0 if NMFRobustNRuns == 0: MtPct = np.nan MwPct = np.nan if (n_Mmis > 0 or NTFNConv > 0 or NMFSparseLevel != 0) and NTFFastHALS > 0: NTFFastHALS = 0 reverse2HALS = 1 else: reverse2HALS = 0 # Step 1: NTF Status0 = "Step 1 - NTF Ncomp=" + str(nc) + ": " if NTFFastHALS > 0: if NTFNIterations > 0: Mt_conv, Mt, Mw, Mb, diff, cancel_pressed = NTFSolve( M, Mmis, Mt, Mw, Mb, nc, tolerance, LogIter, Status0, NTFNIterations, NMFFixUserLHE, NMFFixUserRHE, NMFFixUserBHE, 0, NTFUnimodal, NTFSmooth, NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, NTFNConv, NMFPriors, myStatusBox) Mt, Mw, Mb, diff, cancel_pressed = NTFSolveFast( M, Mmis, Mt, Mw, Mb, nc, tolerance, precision, LogIter, Status0, MaxIterations, NMFFixUserLHE, NMFFixUserRHE, NMFFixUserBHE, NTFUnimodal, NTFSmooth, NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, myStatusBox) else: Mt_conv, Mt, Mw, Mb, diff, cancel_pressed = NTFSolve( M, Mmis, Mt, Mw, Mb, nc, tolerance, LogIter, Status0, MaxIterations, NMFFixUserLHE, NMFFixUserRHE, NMFFixUserBHE, NMFSparseLevel, NTFUnimodal, NTFSmooth, NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, NTFNConv, NMFPriors, myStatusBox) Mtsup = np.copy(Mt) Mwsup = np.copy(Mw) Mbsup = np.copy(Mb) diff_sup = diff # Bootstrap to assess robust clustering if NMFRobustNRuns > 1: # Update Mwsup MwPct = np.zeros((p, nc)) MwBlk = np.zeros((p, NMFRobustNRuns * nc)) for iBootstrap in range(0, NMFRobustNRuns): Boot = np.random.randint(n, size=n) Status0 = "Step 2 - " + \ "Boot " + str(iBootstrap + 1) + "/" + str(NMFRobustNRuns) + " NTF Ncomp=" + str(nc) + ": " if NTFFastHALS > 0: if n_Mmis > 0: Mt, Mw, Mb, diff, cancel_pressed = NTFSolveFast( M[Boot, :], Mmis[Boot, :], Mtsup[Boot, :], Mwsup, Mb, nc, 1.e-3, precision, LogIter, Status0, MaxIterations, 1, 0, NMFFixUserBHE, NTFUnimodal, NTFSmooth, NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, myStatusBox) else: Mt, Mw, Mb, diff, cancel_pressed = NTFSolveFast( M[Boot, :], np.array([]), Mtsup[Boot, :], Mwsup, Mb, nc, 1.e-3, precision, LogIter, Status0, MaxIterations, 1, 0, NMFFixUserBHE, NTFUnimodal, NTFSmooth, NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, myStatusBox) else: if n_Mmis > 0: Mt_conv, Mt, Mw, Mb, diff, cancel_pressed = NTFSolve( M[Boot, :], Mmis[Boot, :], Mtsup[Boot, :], Mwsup, Mb, nc, 1.e-3, LogIter, Status0, MaxIterations, 1, 0, NMFFixUserBHE, NMFSparseLevel, NTFUnimodal, NTFSmooth, NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, NTFNConv, NMFPriors, myStatusBox) else: Mt_conv, Mt, Mw, Mb, diff, cancel_pressed = NTFSolve( M[Boot, :], np.array([]), Mtsup[Boot, :], Mwsup, Mb, nc, 1.e-3, LogIter, Status0, MaxIterations, 1, 0, NMFFixUserBHE, NMFSparseLevel, NTFUnimodal, NTFSmooth, NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, NTFNConv, NMFPriors, myStatusBox) for k in range(0, nc): MwBlk[:, k * NMFRobustNRuns + iBootstrap] = Mw[:, k] Mwn = np.zeros((p, nc)) for k in range(0, nc): ScaleMw = np.linalg.norm(MwBlk[:, k * NMFRobustNRuns + iBootstrap]) if ScaleMw > 0: MwBlk[:, k * NMFRobustNRuns + iBootstrap] = \ MwBlk[:, k * NMFRobustNRuns + iBootstrap] / ScaleMw Mwn[:, k] = MwBlk[:, k * NMFRobustNRuns + iBootstrap] ColClust = np.zeros(p, dtype=int) if NMFCalculateLeverage > 0: Mwn, AddMessage, ErrMessage, cancel_pressed = Leverage(Mwn, NMFUseRobustLeverage, AddMessage, myStatusBox) for j in range(0, p): ColClust[j] = np.argmax(np.array(Mwn[j, :])) MwPct[j, ColClust[j]] = MwPct[j, ColClust[j]] + 1 MwPct = MwPct / NMFRobustNRuns # Update Mtsup MtPct = np.zeros((n, nc)) for iBootstrap in range(0, NMFRobustNRuns): Status0 = "Step 3 - " + \ "Boot " + str(iBootstrap + 1) + "/" + str(NMFRobustNRuns) + " NTF Ncomp=" + str(nc) + ": " Mw = np.zeros((p, nc)) for k in range(0, nc): Mw[:, k] = MwBlk[:, k * NMFRobustNRuns + iBootstrap] if NTFFastHALS > 0: Mt, Mw, Mb, diff, cancel_pressed = NTFSolveFast( M, Mmis, Mtsup, Mw, Mb, nc, 1.e-3, precision, LogIter, Status0, MaxIterations, 0, 1, NMFFixUserBHE, NTFUnimodal, NTFSmooth, NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, myStatusBox) else: Mt_conv, Mt, Mw, Mb, diff, cancel_pressed = NTFSolve( M, Mmis, Mtsup, Mw, Mb, nc, 1.e-3, LogIter, Status0, MaxIterations, 0, 1, NMFFixUserBHE, NMFSparseLevel, NTFUnimodal, NTFSmooth, NTFLeftComponents, NTFRightComponents, NTFBlockComponents, NBlocks, NTFNConv, NMFPriors, myStatusBox) RowClust = np.zeros(n, dtype=int) if NMFCalculateLeverage > 0: Mtn, AddMessage, ErrMessage, cancel_pressed = Leverage(Mt, NMFUseRobustLeverage, AddMessage, myStatusBox) else: Mtn = Mt for i in range(0, n): RowClust[i] = np.argmax(Mtn[i, :]) MtPct[i, RowClust[i]] = MtPct[i, RowClust[i]] + 1 MtPct = MtPct / NMFRobustNRuns Mt = Mtsup Mw = Mwsup Mb = Mbsup diff = diff_sup if reverse2HALS > 0: AddMessage.insert(len(AddMessage), 'Currently, Fast HALS cannot be applied with missing data or convolution window and was reversed to Simple HALS.') return Mt_conv, Mt, Mw, Mb, MtPct, MwPct, diff, AddMessage, ErrMessage, cancel_pressed
def rSVDSolve(M, Mmis, nc, tolerance, LogIter, LogTrials, Status0, MaxIterations, SVDAlgo, SVDCoverage, SVDNTrials, myStatusBox)
-
Estimate SVD matrices (robust version)
Input
M: Input matrix Mmis: Define missing values (0 = missing cell, 1 = real cell) nc: SVD rank tolerance: Convergence threshold LogIter: Log results through iterations LogTrials: Log results through trials Status0: Initial displayed status to be updated during iterations MaxIterations: Max iterations SVDAlgo: =1: Non-robust version, =2: Robust version SVDCoverage: Coverage non-outliers (robust version) SVDNTrials: Number of trials (robust version)
Output
Mt: Left hand matrix Mev: Scaling factors Mw: Right hand matrix Mmis: Matrix of missing/flagged outliers Mmsr: Vector of Residual SSQ Mmsr2: Vector of Reidual variance
Reference
L. Liu et al (2003) Robust singular value decomposition analysis of microarray data PNAS November 11, 2003 vol. 100 no. 23 13167–13172
Expand source code
def rSVDSolve(M, Mmis, nc, tolerance, LogIter, LogTrials, Status0, MaxIterations, SVDAlgo, SVDCoverage, SVDNTrials, myStatusBox): """Estimate SVD matrices (robust version) Input: M: Input matrix Mmis: Define missing values (0 = missing cell, 1 = real cell) nc: SVD rank tolerance: Convergence threshold LogIter: Log results through iterations LogTrials: Log results through trials Status0: Initial displayed status to be updated during iterations MaxIterations: Max iterations SVDAlgo: =1: Non-robust version, =2: Robust version SVDCoverage: Coverage non-outliers (robust version) SVDNTrials: Number of trials (robust version) Output: Mt: Left hand matrix Mev: Scaling factors Mw: Right hand matrix Mmis: Matrix of missing/flagged outliers Mmsr: Vector of Residual SSQ Mmsr2: Vector of Reidual variance Reference --------- L. Liu et al (2003) Robust singular value decomposition analysis of microarray data PNAS November 11, 2003 vol. 100 no. 23 13167–13172 """ AddMessage = [] ErrMessage = '' cancel_pressed = 0 # M0 is the running matrix (to be factorized, initialized from M) M0 = np.copy(M) n, p = M0.shape Mmis = Mmis.astype(np.bool_) n_Mmis = Mmis.shape[0] if n_Mmis > 0: M0[Mmis == False] = np.nan else: Mmis = (np.isnan(M0) == False) Mmis = Mmis.astype(np.bool_) n_Mmis = Mmis.shape[0] trace0 = np.sum(M0[Mmis] ** 2) nc = int(nc) SVDNTrials = int(SVDNTrials) nxp = n * p nxpcov = int(round(nxp * SVDCoverage, 0)) Mmsr = np.zeros(nc) Mmsr2 = np.zeros(nc) Mev = np.zeros(nc) if SVDAlgo == 2: MaxTrial = SVDNTrials else: MaxTrial = 1 Mw = np.zeros((p, nc)) Mt = np.zeros((n, nc)) Mdiff = np.zeros((n, p)) w = np.zeros(p) t = np.zeros(n) wTrial = np.zeros(p) tTrial = np.zeros(n) MmisTrial = np.zeros((n, p), dtype=np.bool) # Outer-reference M becomes local reference M, which is the running matrix within ALS/LTS loop. M = np.zeros((n, p)) wnorm = np.zeros((p, n)) tnorm = np.zeros((n, p)) denomw = np.zeros(n) denomt = np.zeros(p) StepIter = math.ceil(MaxIterations / 100) pbar_step = 100 * StepIter / MaxIterations if (n_Mmis == 0) & (SVDAlgo == 1): FastCode = 1 else: FastCode = 0 if (FastCode == 0) and (SVDAlgo == 1): denomw[np.count_nonzero(Mmis, axis=1) < 2] = np.nan denomt[np.count_nonzero(Mmis, axis=0) < 2] = np.nan for k in range(0, nc): for iTrial in range(0, MaxTrial): myStatusBox.init_bar(delay=1) # Copy values of M0 into M M[:, :] = M0 Status1 = Status0 + "Ncomp " + str(k + 1) + " Trial " + str(iTrial + 1) + ": " if SVDAlgo == 2: # Select a random subset M = np.reshape(M, (nxp, 1)) M[np.argsort(np.random.rand(nxp))[nxpcov:nxp]] = np.nan M = np.reshape(M, (n, p)) Mmis[:, :] = (np.isnan(M) == False) # Initialize w for j in range(0, p): w[j] = np.median(M[Mmis[:, j], j]) if np.where(w > 0)[0].size == 0: w[:] = 1 w /= np.linalg.norm(w) # Replace missing values by 0's before regression M[Mmis == False] = 0 # initialize t (LTS =stochastic) if FastCode == 0: wnorm[:, :] = np.repeat(w[:, np.newaxis]**2, n, axis=1) * Mmis.T denomw[:] = np.sum(wnorm, axis=0) # Request at least 2 non-missing values to perform row regression if SVDAlgo == 2: denomw[np.count_nonzero(Mmis, axis=1) < 2] = np.nan t[:] = M @ w / denomw else: t[:] = M @ w / np.linalg.norm(w) ** 2 t[np.isnan(t) == True] = np.median(t[np.isnan(t) == False]) if SVDAlgo == 2: Mdiff[:, :] = np.abs(M0 - np.reshape(t, (n, 1)) @ np.reshape(w, (1, p))) # Restore missing values instead of 0's M[Mmis == False] = M0[Mmis == False] M = np.reshape(M, (nxp, 1)) M[np.argsort(np.reshape(Mdiff, nxp))[nxpcov:nxp]] = np.nan M = np.reshape(M, (n, p)) Mmis[:, :] = (np.isnan(M) == False) # Replace missing values by 0's before regression M[Mmis == False] = 0 iIter = 0 cont = 1 while (cont > 0) & (iIter < MaxIterations): # build w if FastCode == 0: tnorm[:, :] = np.repeat(t[:, np.newaxis]**2, p, axis=1) * Mmis denomt[:] = np.sum(tnorm, axis=0) #Request at least 2 non-missing values to perform column regression if SVDAlgo == 2: denomt[np.count_nonzero(Mmis, axis=0) < 2] = np.nan w[:] = M.T @ t / denomt else: w[:] = M.T @ t / np.linalg.norm(t) ** 2 w[np.isnan(w) == True] = np.median(w[np.isnan(w) == False]) # normalize w w /= np.linalg.norm(w) if SVDAlgo == 2: Mdiff[:, :] = np.abs(M0 - np.reshape(t, (n, 1)) @ np.reshape(w, (1, p))) # Restore missing values instead of 0's M[Mmis == False] = M0[Mmis == False] M = np.reshape(M, (nxp, 1)) # Outliers resume to missing values M[np.argsort(np.reshape(Mdiff, nxp))[nxpcov:nxp]] = np.nan M = np.reshape(M, (n, p)) Mmis[:, :] = (np.isnan(M) == False) # Replace missing values by 0's before regression M[Mmis == False] = 0 # build t if FastCode == 0: wnorm[:, :] = np.repeat(w[:, np.newaxis] ** 2, n, axis=1) * Mmis.T denomw[:] = np.sum(wnorm, axis=0) # Request at least 2 non-missing values to perform row regression if SVDAlgo == 2: denomw[np.count_nonzero(Mmis, axis=1) < 2] = np.nan t[:] = M @ w / denomw else: t[:] = M @ w / np.linalg.norm(w) ** 2 t[np.isnan(t) == True] = np.median(t[np.isnan(t) == False]) # note: only w is normalized within loop, t is normalized after convergence if SVDAlgo == 2: Mdiff[:, :] = np.abs(M0 - np.reshape(t, (n, 1)) @ np.reshape(w, (1, p))) # Restore missing values instead of 0's M[Mmis == False] = M0[Mmis == False] M = np.reshape(M, (nxp, 1)) # Outliers resume to missing values M[np.argsort(np.reshape(Mdiff, nxp))[nxpcov:nxp]] = np.nan M = np.reshape(M, (n, p)) Mmis[:, :] = (np.isnan(M) == False) # Replace missing values by 0's before regression M[Mmis == False] = 0 if iIter % StepIter == 0: if SVDAlgo == 1: Mdiff[:, :] = np.abs(M0 - np.reshape(t, (n, 1)) @ np.reshape(w, (1, p))) Status = Status1 + 'Iteration: %s' % int(iIter) myStatusBox.update_status(delay=1, status=Status) myStatusBox.update_bar(delay=1, step=pbar_step) if myStatusBox.cancel_pressed: cancel_pressed = 1 return [Mt, Mev, Mw, Mmis, Mmsr, Mmsr2, AddMessage, ErrMessage, cancel_pressed] diff = np.linalg.norm(Mdiff[Mmis]) ** 2 / np.where(Mmis)[0].size if LogIter == 1: if SVDAlgo == 2: myStatusBox.myPrint("Ncomp: " + str(k) + " Trial: " + str(iTrial) + " Iter: " + str( iIter) + " MSR: " + str(diff)) else: myStatusBox.myPrint("Ncomp: " + str(k) + " Iter: " + str(iIter) + " MSR: " + str(diff)) if iIter > 0: if abs(diff - diff0) / diff0 < tolerance: cont = 0 diff0 = diff iIter += 1 # save trial if iTrial == 0: BestTrial = iTrial DiffTrial = diff tTrial[:] = t wTrial[:] = w MmisTrial[:, :] = Mmis elif diff < DiffTrial: BestTrial = iTrial DiffTrial = diff tTrial[:] = t wTrial[:] = w MmisTrial[:, :] = Mmis if LogTrials == 1: myStatusBox.myPrint("Ncomp: " + str(k) + " Trial: " + str(iTrial) + " MSR: " + str(diff)) if LogTrials: myStatusBox.myPrint("Ncomp: " + str(k) + " Best trial: " + str(BestTrial) + " MSR: " + str(DiffTrial)) t[:] = tTrial w[:] = wTrial Mw[:, k] = w # compute eigen value if SVDAlgo == 2: # Robust regression of M on tw` Mdiff[:, :] = np.abs(M0 - np.reshape(t, (n, 1)) @ np.reshape(w, (1, p))) RMdiff = np.argsort(np.reshape(Mdiff, nxp)) t /= np.linalg.norm(t) # Normalize t Mt[:, k] = t Mmis = np.reshape(Mmis, nxp) Mmis[RMdiff[nxpcov:nxp]] = False Ycells = np.reshape(M0, (nxp, 1))[Mmis] Xcells = np.reshape(np.reshape(t, (n, 1)) @ np.reshape(w, (1, p)), (nxp, 1))[Mmis] Mev[k] = Ycells.T @ Xcells / np.linalg.norm(Xcells) ** 2 Mmis = np.reshape(Mmis, (n, p)) else: Mev[k] = np.linalg.norm(t) Mt[:, k] = t / Mev[k] # normalize t if k == 0: Mmsr[k] = Mev[k] ** 2 else: Mmsr[k] = Mmsr[k - 1] + Mev[k] ** 2 Mmsr2[k] = Mmsr[k] - Mev[0] ** 2 # M0 is deflated before calculating next component M0 = M0 - Mev[k] * np.reshape(Mt[:, k], (n, 1)) @ np.reshape(Mw[:, k].T, (1, p)) trace02 = trace0 - Mev[0] ** 2 Mmsr = 1 - Mmsr / trace0 Mmsr[Mmsr > 1] = 1 Mmsr[Mmsr < 0] = 0 Mmsr2 = 1 - Mmsr2 / trace02 Mmsr2[Mmsr2 > 1] = 1 Mmsr2[Mmsr2 < 0] = 0 if nc > 1: RMev = np.argsort(-Mev) Mev = Mev[RMev] Mw0 = Mw Mt0 = Mt for k in range(0, nc): Mw[:, k] = Mw0[:, RMev[k]] Mt[:, k] = Mt0[:, RMev[k]] Mmis[:, :] = True Mmis[MmisTrial == False] = False #Mmis.astype(dtype=int) return [Mt, Mev, Mw, Mmis, Mmsr, Mmsr2, AddMessage, ErrMessage, cancel_pressed]