# Load up sparse PLS code (custom) source("spls_cb.R") # Load up a pls funsction (custom) source("pls.R") # Read in the 27 spectra from datafile "data" spectra <- scale(matrix(scan("transmission_data"), ncol=27, byrow=FALSE)) # And transpose them for convenience... spectra <- t(spectra) # Define the composition compositions <- c(seq(from=0.01, to=0.1, by=0.01), seq(from=0.2, to=0.9, by=0.1), seq(from=0.91, to=0.99, by=0.01)) x<-spectra y<-compositions # run full model in order to calculate total (co)variances plsmodel <- pls(spectra, compositions, ncomp=1) Xtotvar <- plsmodel$Xtotvar Ytotvar <- plsmodel$Ytotvar XYtotcov <- plsmodel$XYtotcov # Compare how the variance explained & number of active variables change for different values of eta etavalues <- seq(0,0.999999,0.01) Xpercent <- rep(0,length(etavalues)) Ypercent <- rep(0,length(etavalues)) XYpercent <- rep(0,length(etavalues)) nactive <- rep(0,length(etavalues)) active <- matrix(0,length(etavalues),dim(spectra)[2]) sumW <- rep(0,length(etavalues)) for (i in 1:length(etavalues)){ splsmodel <- spls_cb(spectra,compositions,etavalues[i]) # sumbeta[i]<-sum(abs(splsmodel$coefficients)) nactive[i]<-length(splsmodel$active.variables) active[i,splsmodel$active.variables]<-1 plsmodel <- pls(spectra[,splsmodel$active.variables], compositions, ncomp=1) Xpercent[i]<-100*plsmodel$Xvar/Xtotvar Ypercent[i]<-100*plsmodel$Yvar/Ytotvar XYpercent[i]<-100*plsmodel$XYcov/XYtotcov sumW[i] <- sum(abs(plsmodel$loading.weights)) } par(mfrow=c(1,1)) image(active,axes=FALSE,xlab="eta",ylab="variable") axis( 1, at=seq(0,1,length=length(etavalues)), labels=etavalues, tck=FALSE) axis( 2, at=seq(0,1,length=dim(spectra)[2]), labels=c(1:dim(spectra)[2]), tck=FALSE) par(mfrow=c(1,1)) plot(etavalues,Xpercent,type="l",xlab="eta",ylab="percentage of (co)variance accounted for") lines(etavalues,Ypercent,col=2) lines(etavalues,XYpercent,col=3) # plot(etavalues,nactive,type="l",xlab="eta",ylab="number of active variables") # plot(etavalues,sumW,type="l",xlab="eta",ylab="sum of absolute value of weights") # Crude algorithm for finding the value of eta that gives exactly ptarget active variables ptarget <- 100 eta <- 0.5 etamin <- 0 etamax <- 1 stop <- 0 while (stop==0){ splsmodel <- spls_cb(spectra,compositions,eta) # splsmodel <- spls(x,y,K=1,eta,fit="kernelpls",scale.x=FALSE) nactive <- length(splsmodel$active.variables) # print(c(eta,nactive)) if (nactiveptarget){ etamin <- eta eta <- (eta+etamax)/2 } if (nactive==ptarget){ stop <- 1 } } plsmodel <- pls(spectra[,splsmodel$active.variables], compositions, ncomp=1) print(eta) print(100*plsmodel$Xvar/Xtotvar) print(100*plsmodel$Yvar/Ytotvar) print(100*plsmodel$XYcov/XYtotcov) par(mfrow=c(1,1)) plot(spectra[round(length(compositions)/2),],type="l",ylab="counts/a.u.") points(splsmodel$active.variables,spectra[round(length(compositions)/2),splsmodel$active.variables],col=2) # added by JB write.table(splsmodel$active.variables, file=paste(ptarget,"_most_important_points",sep=""), col.names=F, row.names=F) # Continuous window of length ptarget ptarget <- 50 p <- dim(spectra)[2] d <- round(ptarget/2) cmin <- d+1 cmax <- p-d Xpercent <- rep(0,length(cmax-cmin+1)) Ypercent <- rep(0,length(cmax-cmin+1)) XYpercent <- rep(0,length(cmax-cmin+1)) for (i in cmin:cmax){ plsmodel <- pls(spectra[,(i-d):(i+d)], compositions, ncomp=1) Xpercent[i-d] <- 100*plsmodel$Xvar/Xtotvar Ypercent[i-d] <- 100*plsmodel$Yvar/Ytotvar XYpercent[i-d] <- 100*plsmodel$XYcov/XYtotcov } par(mfrow=c(2,1)) plot(spectra[round(length(compositions)/2),],type="l",ylab="counts/a.u.") lines(c(which.max(XYpercent),which.max(XYpercent)+2*d),c(0,0),col=2) plot(cmin:cmax,Xpercent,type="l",xlab="observation", ylab="% of (co)variance accounted for",xlim=c(1,p),ylim=c(0,100)) lines(cmin:cmax,Ypercent,col=2) lines(cmin:cmax,XYpercent,col=3) plsmodel <- pls(spectra, compositions, ncomp=1) pdf(file="Graphical_abstract.pdf", 8, 6) library(IDPmisc) plot(spectra[round(length(compositions)/2),],type="l",ylab="counts/a.u.", ylim=c(-1, 15)) points(splsmodel$active.variables,spectra[round(length(compositions)/2),splsmodel$active.variables]+7,col="black", pch=20) Arrows(250, 1, 250, 6, h.col.bo="red", sh.col="red", h.col="red", open=F, size=1) text(260, 3, "Sub-selection", col="red", adj=c(0,0))