# library// VMA2GFM # Set of R functions for sparse estimations of graphical factor model (GFM) # by variationa mean-field annealing algorithm (VMA2) # # Contents: # vma2gfm (09/01/2010) VMA2 for GFM # hypers_solve (16/03/2009) Estimation function for identification of hyperparameters # # Descriptions: # vma2gfm conducts the maximum a posteriori (MAP) estimation of the GFM using a variational mean-field # method coupled with annealing-based optimization. Sparse solutions of linear, Gaussian latent factor # models are explored during iterative-imporivements of an augmented posterior distribution on model # parameters and sparse configurations of the factor loadings. # For algorithmic details, see Yoshida and West (2010). # # Authors: # Ryo Yoshida Institute of Statistical Mathematics (ISM) # Research Organization of Information and Systems # yoshidar@ism.ac.jp # http://daweb.ism.ac.jp/~yoshidar/ # # Usage: # # # Read data (sample data, p=30, n=100) # x <- as.matrix( read.delim("sdata_p30n100.txt", header = FALSE) ) # # Cooling schedule (linear decay of length 2500) # tmpr <- seq(3, 5.0*( 10^{-10} ), length=2000) # tmpr <- c( tmpr, rep( 5.0*(10^{-10}), length=500 ) ) # # Max number of factors # k <- 8 # # Initial values for hyperparameters (p times k matrix) # hypers <- matrix(1.3, nrow=nrow(x), ncol=k ) # # Run VMA2GFM # mapEst <- vma2gfm( x, k, tmpr, hypers, stc.search=FALSE, est.hyper=FALSE, gprm = c(2.9, 7.7) ) # # # References: # Ryo Yoshida and Mike West (2010) Bayesian Learning in Sparse Graphical Factor Models via Annealed Entropy # # Last update: # 13/05/2009 Version 1 released # 09/01/2010 Version 2 released. # #------------------------------------------------------------------------------ # function// vma2gfm # Sparse estimation of GFM by VMA2 # # Arguments: # x; Data matrix (rows: variables; column; samples) # k; Maximum dimension of factor variables # tmpr; Cooing schedule (vector of decreasing temperatures) # (NOTE) The last value is replaced by zero in anneals_gfm () # hypers; p times k matrix of hyperparameters to control spaseness degree # (needed to specify initial values beforehand) # stc.search; TRUE; stochastic search, FALSE; non-stochastic search # est.hypers; TRUE; joint estimation of "hypers", FALSE; fixe values # gprm; Vector of hyper-hyper parameters (mean and variance of Gaussian distribution) at the lowest hierarcy #------------------------------------------------------------------------------ vma2gfm <- function( x, # Data matrix (rows: variables; column; samples) k, # Maximum dimension of factor variables tmpr, # Cooing schedule (vector of decreasing temperature) hypers, # Matrix of hyperparameters to control spaseness degree stc.search=FALSE, # TRUE; stochastic search, FALSE; non-stochastic search est.hypers=TRUE, # TRUE; joint estimation of "hypers", FALSE; fixe value for "hyper" gprm = c(2.9, 7.7) # Hyper-hyper parameters (mean and variance) at lowest hierarcy ) { cmat <- x%*%t(x) # Squared-sum matrix of data n <- ncol(x) # Number of samples p <- nrow(x) # Dimension of data maxitr <- length(tmpr) - 1 # Max number of iteration steps bnry <- matrix(1, nrow=p, ncol=k) # Initial values for binary matrix ## Set initial paramaeters eig <- eigen( cmat, symmetric=TRUE ) # Eigenvalue decomposition of data load <- eig$vectors[ , 1:k] # Factor loading matrix (k dominat eigenvectors) psi <- 0.01*diag(cmat)/n # Noise variances (vector) delta <- eig$values[1:k]/n # Square of singular values (k dominat eigenvalue/n) snr <- delta/(delta+1) # Signal-to-Noise ratio cpr.load <- matrix(1/2, ncol=k, nrow=p) # Sparsity configuration probabilities ## Other control variables unit <- diag( rep(1,length=p) ) # Identity matrix ubnd <- 70 # Upper-bound to avoid overflow in computation of the exponential function if(stc.search==FALSE){act <- c(1:p) } act <- rep(1, length=p) ## Interative optimization by annealing for(itr in 1:maxitr){ # Loop@ Start ## Scaling of squared-sum matrix of data scl.cmat <- outer( psi^{-1/2}, psi^{-1/2})*cmat ## Sparsity configuration probabilities eng <- c() for(i in 1:k){ omg <- matrix( rep( cpr.load[ ,i], length=p ), ncol=p, nrow=p ) diag( omg ) <- rep( 1, length=p ) eng <- cbind(eng, ( snr[i]*load[ ,i]*( ( omg*scl.cmat )%*%load[ ,i] )) ) } eng <- (eng - hypers)/tmpr[itr] cpr.load[ bnry==1 ] <- exp( eng[bnry==1] )/( exp(eng[bnry==1]) + 1 ) cpr.load[ eng > ubnd ] <- 1 ## Factor loading matrix psi.tmp <- diag(rep(0, length=p)) bnry <-c() if(stc.search==TRUE){ act <-c() for(i in 1:k){ for(j in 1:p){ act[j] <- sample( c(1,0), 1, prob=c(cpr.load[j,i], 1-cpr.load[j,i]) ) } bnry <- cbind( bnry, act ) } }else{ bnry <- matrix(1, nrow=p, ncol=k) } for(i in 1:k){ if(delta[i]!=0){ if(stc.search == FALSE){ prj <- unit - ( cpr.load[ , i] * load[ , -i] ) %*%t( cpr.load[ , i]*load[ , -i] ) }else{ prj <- unit - ( load[ , -i] ) %*%t( load[ , -i] ) } omg <- cpr.load[ , i]%o%cpr.load[ , i] diag(omg) <- cpr.load[ , i] smat <- omg*scl.cmat if( (itr==1495)&&(i==5)){print(omg)} if( (itr==1495)&&(i==5)){print(scl.cmat)} if(stc.search==TRUE){act <- which( bnry[ ,i]==1 ) } if(stc.search==FALSE){act <- c(1:p)} if(length(act)>0){ eig <- eigen( prj[act, act]%*%smat[act, act]%*%t(prj[act, act]), symmetric=TRUE) load[ , i] <- rep(0, length=p) load[act ,i] <- t( prj[act, act] )%*%eig$vectors[ ,1] load[act ,i] <- load[act , i]/sqrt( t(load[act , i])%*%load[act , i] ) }else{ load[ ,i] <- rep(0, length=p) } psi.tmp <- psi.tmp - snr[i]*( omg*(load[ ,i]%o%load[ ,i]) ) delta[i] <- t( load[ , i] )%*%smat%*%load[ , i]/n if(itr >= 1){ delta[i] <- delta[i]-1 } } } ## Sigular value components delta[delta<=0] <- 0 snr <- delta/(delta+1) ## Noise variances tmp <- diag( (unit- diag(psi^{1/2})%*%psi.tmp%*%diag(psi^{-1/2}) )%*%cmat )/n psi[ tmp > 0 ] <- tmp[ tmp > 0 ] print(psi) cpr.load[ ,snr==0] <- 0 load[ ,snr==0] <- 0 print( round( load, digits=5) ) ## Hyperparameters if( est.hypers==TRUE ){ for(i in 1:p){ for(j in 1:k){ w <- cpr.load[i,j] res.hypers <- uniroot( hypers_solve, c(-100,100), w=w, mu=gprm[1], sigma=gprm[2]) hypers[i,j] <- res.hypers$root } } } } ## Loop@ End ## cpr <- cpr.load cpr[cpr.load > 0.5] <- 1 cpr[cpr.load <= 0.5] <- 0 adj <- (cpr*load)%*%diag(delta)%*%t(cpr*load)+ diag(psi) adj[ adj!=0 ] <- 1 prj.cov <- diag( t(cpr*load)%*%diag(psi^{-1/2})%*%cmat%*%diag(psi^{-1/2})%*%(cpr*load) ) map <- -n*sum(log(psi))-sum(diag(cmat)/psi) map <- map - n*sum( log(1-snr) ) + sum( snr*prj.cov) map <- map/2 return( list( load=load, prob.cnf=cpr, adjacent=adj, delta=delta, psi=psi, posterior=map, hypers=hypers ) ) } #------------------------------------------------------------------------------ # function// hypers.solve # Estimation equation used for infering hyperparameters (degree of sparseness) # # Arguments: # hyper; hyperparameter (scalar) # w; configuration probability (scalar) # mu; mean # sigma; variance #------------------------------------------------------------------------------ hypers_solve <- function( hypers, weight=0.1, mu=1, sigma=10 ) { -weight + exp(-hypers)/(1+exp(-hypers))-(hypers-mu)/sigma }