#------------------------------------------------- # Data import and setting up data objects #------------------------------------------------- #wavenumber <- read.table(file="/home/burley/Documents/Nottingham/Teaching/Projects/2010_2011/Gideon/Notts_Uni/notts_uni_2_3_5a_clustering/wavenumber") wavenumber <- read.table(file="http://www.jonathanburley.com/Research/TRS_cocrystal_data/wavenumber") names(wavenumber) <- c("wavenumber") # MD5 sum of data set: da402e7e7c910bcbef75848d9b581f34 spectra <- data.frame(scale(matrix(scan("http://www.jonathanburley.com/Research/TRS_cocrystal_data/data_intensities_only"), ncol=35, byrow=FALSE))) mylabels <- c("Cocrystal-25-1", "Cocrystal-25-2", "Cocrystal-25-3", "Cocrystal-25-4", "Cocrystal-25-5", "Cocrystal-25-6", "Cocrystal-25-7", "Cocrystal-25-8", "Cocrystal-25-9", "Separate components-25-1", "Separate components-25-2", "Separate components-25-3", "Separate components-25-4", "Separate components-25-5", "Separate components-25-6", "Separate components-25-7", "Separate components-25-8", "Separate components-25-9", "Cocrystal-50-1", "Cocrystal-50-2", "Cocrystal-50-3", "Cocrystal-50-4", "Cocrystal-50-5", "Cocrystal-50-6", "Separate components-50-1", "Separate components-50-2", "Separate components-50-3", "Separate components-50-4", "Separate components-50-5", "Separate components-50-6", "Avicell", "Flurbiprofen", "Lactose", "Mg Stearate", "Nicotinamide") names(spectra) <- mylabels mydata <- cbind(wavenumber, spectra) # <- stick wavenumber and spectra together into an object called "mydata" #------------------------------------------------- # Analysis of data #------------------------------------------------- # PCA library(pcaMethods) # <- load up the pcaMethods library pcaresults <- pca(t(spectra), nPcs=5) # <- run PCA on the transpose of the spectra print(summary(pcaresults)) # <- give us a summary please # Now strip baseline and repeat #require("baseline") # <- load up the "baseline" library #baseline_stripped <- baseline.als(t(spectra), lambda = 10, p = 0.05, maxit = 20) #mydata <- baseline_stripped$corrected # Now PCA on the baseline stripped spectra pcaresults <- pca(t(spectra), nPcs=5) print(summary(pcaresults)) #------------------------------------------------- # Plotting graphs #------------------------------------------------- ####################### # Figure 2 - raw data # ####################### pdf(file="Figure_2_revised_as_per_referee_reference_spectra_only.pdf", width = 8, height = 6) par(mar=c(4.1, 4.1, 0.25, 5.5)) offset <- 1 for (i in 31) # <- Avicell {plot(cbind(wavenumber, spectra[,i]+0), type="l", xlab=bquote(Wavenumber~(cm^-1)), ylab="Intensity (scaled)", ylim=c(-1, 30))} for (i in 32) # <- Flurbiprofen {points(cbind(wavenumber, spectra[,i]+12), type="l")} for (i in 33) # <- Lactose {points(cbind(wavenumber, spectra[,i]+6), type="l")} for (i in 34) # <- Mg Stearate {points(cbind(wavenumber, spectra[,i]+2), type="l")} for (i in 35) # <- Nicotinamide {points(cbind(wavenumber, spectra[,i]+18), type="l")} mtext(" Avicell", side=4, at=c(-1.5), las=2) mtext(" Mg stearate", side=4, at=c(1.), las=2) mtext(" Lactose", side=4, at=c(5), las=2) mtext(" Flurbiprofen", side=4, at=c(12), las=2) mtext(" Nicotinamide", side=4, at=c(18), las=2) dev.off() pdf(file="Figure_2_revised_final_two_panel.pdf", width=8, height=10) layout(matrix(c(1,2), 2, 1, byrow = TRUE), c(1,1), TRUE) # <- define page layout par(mar=c(4.1, 4.1, 0.25, 7.5)) offset <- 1 for (i in 31) # <- Avicell {plot(cbind(wavenumber, spectra[,i]+0), type="l", xlab=bquote(Wavenumber~(cm^-1)), ylab="Intensity (scaled)", ylim=c(-1, 30))} for (i in 32) # <- Flurbiprofen {points(cbind(wavenumber, spectra[,i]+12), type="l")} for (i in 33) # <- Lactose {points(cbind(wavenumber, spectra[,i]+6), type="l")} for (i in 34) # <- Mg Stearate {points(cbind(wavenumber, spectra[,i]+2), type="l")} for (i in 35) # <- Nicotinamide {points(cbind(wavenumber, spectra[,i]+18), type="l")} mtext(" Avicell", side=4, at=c(-1.5), las=2) mtext(" Mg stearate", side=4, at=c(1.), las=2) mtext(" Lactose", side=4, at=c(5), las=2) mtext(" Flurbiprofen", side=4, at=c(12), las=2) mtext(" Nicotinamide", side=4, at=c(18), las=2) text(2400, 29, "a)", cex=2) spectra_raw <- data.frame((matrix(scan("http://www.jonathanburley.com/Research/TRS_cocrystal_data/data_intensities_only"), ncol=35, byrow=FALSE))) offset <- 3 cocrystals_25_percent <- cbind(wavenumber, scale(rowSums(spectra_raw[,1:9]))) mixtures_25_percent <- cbind(wavenumber, scale(rowSums(spectra_raw[,10:18]))) cocrystals_50_percent <- cbind(wavenumber, scale(rowSums(spectra_raw[,19:24]))) mixtures_50_percent <- cbind(wavenumber, scale(rowSums(spectra_raw[,25:30]))) plot(cocrystals_25_percent, xlab=bquote(Wavenumber~(cm^-1)), ylab="Intensity (scaled)", type="l", ylim=c(-1, 17.5)) points(data.frame(wavenumber, mixtures_25_percent[,2]+offset*1), type="l") points(data.frame(wavenumber, cocrystals_50_percent[,2]+offset*2), type="l") points(data.frame(wavenumber, mixtures_50_percent[,2]+offset*3), type="l") mtext(" Cocrystal 25 %", side=4, at=c(-0.5), las=2) mtext(" Separate\n components 25 %", side=4, at=c(3), las=2) mtext(" Cocrystal 50 %", side=4, at=c(5.75), las=2) mtext(" Separate\n components 50 %", side=4, at=c(9), las=2) text(2400, 16.5, "b)", cex=2) dev.off() pdf(file="Figure_2_revised_as_per_referee_tablet_spectra_only_now_SI.pdf", width = 8, height = 6) par(mar=c(5.1, 4.1, 2.1, 7.5)) offset <- 1 plot(cbind(wavenumber, spectra[,1]), xlab=bquote(Wavenumber~(cm^-1)), ylab="Intensity (scaled)", type="l", ylim=c(-1, offset*35+5)) for (i in 2:9) # <- 25 % cocrystals { points(cbind(wavenumber, spectra[,i]+offset*(i-1)), type="l") } for (i in 10:18) # <- 25 % mixtures { points(cbind(wavenumber, spectra[,i]+offset*(i-1)+offset_between_sample_types), type="l") } for (i in 19:24) # <- 50 % cocrystals { points(cbind(wavenumber, spectra[,i]+offset*(i-1)+offset_between_sample_types*2), type="l") } for (i in 25:30) # <- 50 % mixtures { points(cbind(wavenumber, spectra[,i]+offset*(i-1)+offset_between_sample_types*3), type="l") } mtext(" Cocrystal 25 %", side=4, at=c(4), las=2) mtext(" Separate\n components 25 %", side=4, at=c(13), las=2) mtext(" Cocrystal 50 %", side=4, at=c(22), las=2) mtext(" Separate\n components 50 %", side=4, at=c(29), las=2) dev.off() par(mar=c(5.1, 4.1, 4.1, 2.1)) spectra_raw <- data.frame((matrix(scan("http://www.jonathanburley.com/Research/TRS_cocrystal_data/data_intensities_only"), ncol=35, byrow=FALSE))) pdf(file="Figure_2_revised_as_per_referee_tablet_average_spectra_only.pdf", width = 8, height = 6) par(mar=c(5.1, 4.1, 4.1, 7.5)) offset <- 3 cocrystals_25_percent <- cbind(wavenumber, scale(rowSums(spectra_raw[,1:9]))) mixtures_25_percent <- cbind(wavenumber, scale(rowSums(spectra_raw[,10:18]))) cocrystals_50_percent <- cbind(wavenumber, scale(rowSums(spectra_raw[,19:24]))) mixtures_50_percent <- cbind(wavenumber, scale(rowSums(spectra_raw[,25:30]))) plot(cocrystals_25_percent, xlab=bquote(Wavenumber~(cm^-1)), ylab="Intensity (scaled)", type="l", ylim=c(-1, 17.5)) points(data.frame(wavenumber, mixtures_25_percent[,2]+offset*1), type="l") points(data.frame(wavenumber, cocrystals_50_percent[,2]+offset*2), type="l") points(data.frame(wavenumber, mixtures_50_percent[,2]+offset*3), type="l") mtext(" Cocrystal 25 %", side=4, at=c(-0.5), las=2) mtext(" Separate\n components 25 %", side=4, at=c(3), las=2) mtext(" Cocrystal 50 %", side=4, at=c(5.75), las=2) mtext(" Separate\n components 50 %", side=4, at=c(9), las=2) dev.off() par(mar=c(5.1, 4.1, 4.1, 2.1)) pdf(file = "Figure_2_all_spectra.pdf", width = 8, height = 12 ) # <- define pdf file for output layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE), c(1,1.2), c(2,1), TRUE) # <- define page layout par(mar=c(5.1, 4.1, 2.1, 9.5)) offset <- 1 offset_between_sample_types <- 2 # Plot the main panel plot(cbind(wavenumber, spectra[,1]), xlab=bquote(Wavenumber~(cm^-1)), ylab="Intensity (scaled)", type="l", ylim=c(-1, offset*35+35)) for (i in 2:9) # <- 25 % cocrystals { points(cbind(wavenumber, spectra[,i]+offset*(i-1)), type="l") } for (i in 10:18) # <- 25 % mixtures { points(cbind(wavenumber, spectra[,i]+offset*(i-1)+offset_between_sample_types), type="l") } for (i in 19:24) # <- 50 % cocrystals { points(cbind(wavenumber, spectra[,i]+offset*(i-1)+offset_between_sample_types*2), type="l") } for (i in 25:30) # <- 50 % mixtures { points(cbind(wavenumber, spectra[,i]+offset*(i-1)+offset_between_sample_types*3), type="l") } for (i in 31) # <- Avicell {points(cbind(wavenumber, spectra[,i]+42), type="l")} for (i in 32) # <- Flurbiprofen {points(cbind(wavenumber, spectra[,i]+54), type="l")} for (i in 33) # <- Lactose {points(cbind(wavenumber, spectra[,i]+48), type="l")} for (i in 34) # <- Mg Stearate {points(cbind(wavenumber, spectra[,i]+44), type="l")} for (i in 35) # <- Nicotinamide {points(cbind(wavenumber, spectra[,i]+60), type="l")} mtext(" Cocrystal 25 %", side=4, at=c(4), las=2) mtext(" Separate\n componentss 25 %", side=4, at=c(14), las=2) mtext(" Cocrystal 50 %", side=4, at=c(24), las=2) mtext(" Separate\n components 50 %", side=4, at=c(32), las=2) mtext(" Avicell", side=4, at=c(40), las=2) mtext(" Mg stearate", side=4, at=c(44), las=2) mtext(" Lactose", side=4, at=c(48), las=2) mtext(" Flurbiprofen", side=4, at=c(54), las=2) mtext(" Nicotinamide", side=4, at=c(60), las=2) # Plot the second panel par(mar=c(5.1, 4.1, 2.1, 2.1)) offset <- 0.5 offset_between_sample_types <- 2 plot(cbind(wavenumber, spectra[,1]), xlab=bquote(Wavenumber~(cm^-1)), ylab="Intensity (scaled)", type="l", ylim=c(0, offset*35+10), xlim=c(40, 300)) for (i in 2:9) # <- 25 % cocrystals { points(cbind(wavenumber, spectra[,i]+offset*(i-1)), type="l") } for (i in 10:18) # <- 25 % mixtures { points(cbind(wavenumber, spectra[,i]+offset*(i-1)+offset_between_sample_types), type="l") } for (i in 19:24) # <- 50 % cocrystals { points(cbind(wavenumber, spectra[,i]+offset*(i-1)+offset_between_sample_types*2), type="l") } for (i in 25:30) # <- 50 % mixtures { points(cbind(wavenumber, spectra[,i]+offset*(i-1)+offset_between_sample_types*3), type="l") } rect(40, 20.5, 80, 26.5) rect(40, 8.5, 80, 15) # Plot the third panel par(mar=c(5.1, 4.1, 2.1, 9.5)) offset <- 0.25 offset_between_sample_types <- 1 plot(cbind(wavenumber, spectra[,1]), xlab=bquote(Wavenumber~(cm^-1)), ylab="Intensity (scaled)", type="l", ylim=c(-0.5, offset*35+3), xlim=c(975, 1075)) for (i in 2:9) # <- 25 % cocrystals { points(cbind(wavenumber, spectra[,i]+offset*(i-1)), type="l") } for (i in 10:18) # <- 25 % mixtures { points(cbind(wavenumber, spectra[,i]+offset*(i-1)+offset_between_sample_types), type="l") } for (i in 19:24) # <- 50 % cocrystals { points(cbind(wavenumber, spectra[,i]+offset*(i-1)+offset_between_sample_types*2), type="l") } for (i in 25:30) # <- 50 % mixtures { points(cbind(wavenumber, spectra[,i]+offset*(i-1)+offset_between_sample_types*3), type="l") } rect(1030, 8.75, 1055, 11.5) rect(1030, 3, 1055, 6.25) mtext(" Cocrystal 25 %", side=4, at=c(1), las=2) mtext(" Separate\n components 25 %", side=4, at=c(4.25), las=2) mtext(" Cocrystal 50 %", side=4, at=c(7), las=2) mtext(" Separate\n components 50 %", side=4, at=c(9.75), las=2) dev.off() ########################## # Figure 3 - dendrogram # ########################## pdf( file = "Figure_3_dendrogram.pdf", width = 12, height = 8 ) # <- define pdf file for output # Calculate distances d <- dist(t(spectra), method = "euclidean") # distance matrix # Calculate dendrogram fit <- hclust(d, method="ward") # <- apply HAC to the distance matrix using the "ward" method # Plot dendrogram plot(fit, main="", xlab = "", sub="", ylab=expression("Difference "%->%"")) #<- produce the basic dendrogram rect.hclust(fit, k=7, border="red") # <- Add red rectangles to denote seven clusters dev.off() # <- Close off that pdf ########################################################### # Figure 4 - scores plot for principal component analysis # ########################################################### pdf( file = "Figure_4_scores_plot.pdf", width = 8, height = 8 ) # <- define pdf file for output plot(pcaresults@scores[31:35,], xlab=paste("PC1 (", (pcaresults@R2[1]*100), " %)", sep=""), ylab=paste("PC2 (", (pcaresults@R2[2]*100), " %)", sep="")) # <- ingredients points(pcaresults@scores[1:9,], pch=15) # <- cocrystal 25 % rect(-1.5, -3, 6, -0.5) text(6, -4, "Cocrystal 25%") points(pcaresults@scores[10:18,], pch=16) # <- mixture 25 % rect(0, 0.5, 6, 4) text(6, 5, "Separate components 25%") points(pcaresults@scores[19:24,], pch=17) # <- cocrystal 50 % rect(-8.5, -3, -3, -1.5) text(-8.6, -4, "Cocrystal 50%") points(pcaresults@scores[25:30,], pch=18) # <- mixture 50 % rect(-8.5, 1, -5, 4) text(-8.5, 5, "Separate components 50%") text(12, 1.25, "Mg stearate") text(-10, 8.25, "Flurbiprofen") text(-13, -9, "Nicotinamide") text(16.5, -0.75, "Avicell") text(16.5, -3, "Lactose") dev.off() ############################################################# # Figure 5 - Loadings plot for principal component analysis # ############################################################# PC1_normalisation_factor <- max(abs(range(pcaresults@loadings[,1]))) PC1_loading_scaled_to_unity <- data.frame(wavenumber, pcaresults@loadings[,1]/PC1_normalisation_factor) NCT_normalisation_factor <- max(abs(range(spectra$Nicotinamide))) FPB_normalisation_factor <- max(abs(range(spectra$Flurbiprofen))) Lactose_normalisation_factor <- max(abs(range(spectra$Lactose))) offset <- 0.5 pdf( file = "Figure_5_loadings_plot.pdf", width = 12, height = 8 ) # <- define pdf file for output plot(PC1_loading_scaled_to_unity, type="l", ylim=c((-0.75-offset),(0.75+2*offset)), xlab=bquote(Wavenumber~(cm^-1)), ylab="Intensity (normalised to unity)") points(data.frame(wavenumber, (spectra$Nicotinamide/NCT_normalisation_factor-offset)), type="l", col="red") points(data.frame(wavenumber, (spectra$Flurbiprofen/FPB_normalisation_factor-offset*2)), type="l", col="Blue") points(data.frame(wavenumber, (spectra$Lactose/Lactose_normalisation_factor+offset)), type="l", col="Green") # Add in the rectangles to highlight things rect(985, -0.2, 1015, 0.3) rect(985, -1.1, 1015, -0.7) segments(1000, -0.2, 1000, -0.7) rect(1025, -0.6, 1060, 0.2) rect(1570, -0.3, 1655, 0.1) rect(1570, -1.1, 1655, -0.8) segments(1612.5, -0.3, 1612.5, -0.8) rect(710, -0.1, 750, 0.35) rect(710, -1.1, 750, -0.65) segments(730, -0.1, 730, -0.65) rect(450, 0.1, 500, 0.35) rect(450, 0.7, 500, 1.3) segments(475, 0.35, 475, 0.7) rect(340, 0.2, 390, 0.6) rect(340, 0.7, 390, 1.6) segments(365, 0.6, 365, 0.7) text(2000, -0.9, "Flurbiprofen") text(2000, -0.4, "Nicotinamide") text(2000, 0.05, "PC1") text(2000, 0.5, "Lactose") dev.off() ########################## # Figure 6 - PC1 and PC2 # ########################## pdf( file = "Figure_6_PC1_vs_PC2.pdf", width = 12, height = 8 ) # <- define pdf file for output plot(cbind(wavenumber, pcaresults@loadings[,1]), type="l", xlab=bquote(Wavenumber~(cm^-1)), ylab="Intensity", ylim=c(-0.25, 1.5), xlim=c(40, 350)) points(cbind(wavenumber, pcaresults@loadings[,2]+1.0), type="l") points(cbind(wavenumber, spectra$`Cocrystal-50-1`/20+0.15), type="l") points(cbind(wavenumber, spectra$`Separate components-50-1`/20+0.4), type="l") text(320, 0.1, "PC1") text(320, 1.1, "PC2") text(320, 0.25, "Cocrystal 50 %") text(320, 0.5, "Separate components 50 %") rect(45, 0.2, 75, 1.5, col="#E77471A0") rect(85, -0.25, 130, 0.775, col="#E77471A0") dev.off()