# # This supporting file is intended to quickly comment each package feature # analyzing two LC-MS metabolomics datasets. With this code, is possible # to reproduce the plots of the study case document. # # # Study case document http://labpib.fmrp.usp.br/methods/probmetab/resources/case-study-probmetab.pdf # To easy follow the study case text we will divide this documents following # the main sections. # ProbMetab Case Study 1: Metabolomics dataset with internal standard compounds illustrates annotation performance # Dataset available at: # http://sourceforge.net/projects/mzmatch/files/Ideom/IDEOM_demo_mzXMLfiles.zip/download # # Preprocessing with xcms # # load required libraries library(ProbMetab) library(xcms) library(CAMERA) # set nslaves for the number of available cores of your machine # Follow xcms vignette to understand data structure # http://www.bioconductor.org/packages/release/bioc/vignettes/xcms/inst/doc/xcmsPreprocess.pdf nslaves <- 4 # positive acquisition mode, directory: 'POS/' xset <- xcmsSet( "POS/", method='centWave', ppm=2, peakwidth=c(10,50), snthresh=3, prefilter=c(3,100), integrate=1, mzdiff=-0.00005, verbose.columns=TRUE,fitgauss=FALSE, nSlaves=nslaves ) # negative mode xset2 <- xcmsSet( "NEG/", method='centWave', ppm=2, peakwidth=c(10,50), snthresh=3, prefilter=c(3,100), integrate=1, mzdiff=-0.00005, verbose.columns=TRUE,fitgauss=FALSE, nSlaves=nslaves ) # load("probmetab-case1-box00.RData") # run to avoid deal with raw data and go directly to examples # align retention times across samples, grouping and integration xsetP <- retcor(xset, method='obiwarp', plottype="none", profStep=1) #positive mode xsetPnofill <- group(xsetP, bw=5, mzwid=0.015) xsetP <- fillPeaks(xsetPnofill) xsetN <- retcor(xset2, method='obiwarp', plottype="none", profStep=1) #negative mode xsetNnofill <- group(xsetN, bw=5, mzwid=0.015) xsetN <- fillPeaks(xsetNnofill) #save(list=ls(all=TRUE), file="probmetab-case1-box01.RData") #run to save intermediary steps # standard CAMERA processing an <- xsAnnotate(xsetP) an <- groupFWHM(an, perfwhm = 0.6) an <- findIsotopes(an, mzabs = 0.01) # This step requires the raw data # If one wants to run this step # create a path with the exactly same folders as in # /home/rsilva/Desktop/Dados_Fabricio_08_2013/ # and download the data for this folder # http://sourceforge.net/projects/mzmatch/files/Ideom/IDEOM_demo_mzXMLfiles.zip/download # the same for negative mode below. an <- groupCorr(an, cor_eic_th = 0.75) anP <- findAdducts(an, polarity="positive") an <- xsAnnotate(xsetN) an <- groupFWHM(an, perfwhm = 0.6) an <- findIsotopes(an, mzabs = 0.01) an <- groupCorr(an, cor_eic_th = 0.75) anN <- findAdducts(an, polarity="negative") # load("probmetab-case1-box01.RData") # run to skip the previous block # ProbMetab functions examples # # Example of functions for: Ion annotation extraction and database matching # # combine positive and negative acquisition modes, keeping track of individual modes. # It is possible to combine the acquisition modes setting positive or negative # as reference input table. # comb 1 - used on Table 1 camAnot <- combinexsAnnos(anP, anN) camAnot <- combineMolIon(peaklist=camAnot, cameraobj=anP, polarity="pos") # Extract and format a set of non redundant putative molecular ions from CAMERA annotation # with ProbMetab # comb 2- used on Table 1 ionAnnotP <- get.annot(anP) ionAnnotN <- get.annot(anN, polarity="negative") # number of isotopic peaks and non redundant putative molecules sum(ionAnnotP$molIon[,3]==1) sum(ionAnnotP$molIon[,3]==0) # This is the file automatically generated by IDEOM [10]⁠: http://mzmatch.sourceforge.net/ # that was used to produce the analysis of filter_comp.xls table. # # Below also follows how to integrate the mzMatch analysis to ProbMetab. # The mzMatch package can be downloaded at # http://mzmatch.sourceforge.net/tutorial.mzmatch.r.advanced.php # Please set the working directory to where your files are setwd("mzXMLfiles/POS") rawfiles <- dir (full.names=TRUE,pattern="\\.mzXML*",recursive=TRUE) outputfiles <- paste(sub(".mzXML*","",rawfiles),".peakml",sep="") for (i in 1:length(rawfiles)){ xset <- xcmsSet(rawfiles[i], method='centWave', ppm=2, peakwidth=c(5,100), snthresh=3, prefilter=c(3,1000), integrate=1, mzdiff=0.001, verbose.columns=TRUE, fitgauss=FALSE, nSlaves=2 ) PeakML.xcms.write.SingleMeasurement(xset=xset,filename=outputfiles[i], ionisation="negative",addscans=2, writeRejected=FALSE,ApodisationFilter=TRUE ) } library (mzmatch.R) mzmatch.init (4000) MainClasses <- dir () dir.create ("combined_RSD_filtered") dir.create ("combined_RSD_rejected") dir.create ("combined") for (i in 1:length(MainClasses)){ FILESf <- dir (MainClasses[i],full.names=TRUE,pattern="\\.peakml$",recursive=TRUE) OUTPUTf <- paste ("combined/",MainClasses[i],".peakml",sep="") if(length(FILESf)>0){ mzmatch.ipeak.Combine(i=paste(FILESf,collapse=","), v=T,rtwindow=30,o=OUTPUTf,combination="set", ppm=5,label=paste(MainClasses[i],sep="") ) RSDf <- paste ("combined_RSD_filtered/",MainClasses[i],".peakml",sep="") REJf <- paste ("combined_RSD_rejected/",MainClasses[i],".peakml",sep="") if(length(FILESf)>1){ mzmatch.ipeak.filter.RSDFilter(i=OUTPUTf,o=RSDf,rejected=REJf, rsd=0.8,v=T ) else{ file.copy(OUTPUTf,RSDf) } } } INPUTDIR <- "combined_RSD_filtered" FILESf <- dir (INPUTDIR,full.names=TRUE,pattern="\\.peakml$") mzmatch.ipeak.Combine(i=paste(FILESf,collapse=","),v=T,rtwindow=30,o="combined.peakml",combination="set",ppm=5) mzmatch.ipeak.filter.NoiseFilter(i="combined.peakml",o="combined_noisef.peakml",v=T,codadw=0.8) mzmatch.ipeak.filter.SimpleFilter(i="combined_noisef.peakml", o="combined_sfdet.peakml", mindetections=3, v=T) mzmatch.ipeak.filter.SimpleFilter(i="combined_sfdet.peakml",o="combined_highintensity.peakml", minintensity=1000, v=T) PeakML.GapFiller(filename = "combined_highintensity.peakml", ionisation = "detect", Rawpath = NULL, outputfile = "highintensity_gapfilled.peakml", ppm = 0, rtwin = 0) mzmatch.ipeak.sort.RelatedPeak(i="highintensity_gapfilled.peakml",v=T,o="mzMatch_output.peakml",basepeaks="mzMatch_basepeaks.peakml",ppm=3,rtwindow=6) annot <- paste("relation.id,relation.ship,codadw,charge") mzmatch.ipeak.convert.ConvertToText(i="mzMatch_output.peakml",o="mzMATCHoutput.txt",v=T,annotations=annot) # The PeakML importing function does not work with gap filled files # so, we have to prepare a PeakML annotated file. mzmatch.ipeak.sort.RelatedPeak(i="combined_highintensity.peakml",v=T,o="mzMatch_outputPOS.peakml",basepeaks="mzMatch_basepeaks.peakml",ppm=3,rtwindow=6) # import PeakML file as xcmsSet object mzAnnotP <- get.Mzmatch.annot("POS/mzMatch_outputPOS.peakml", onlyBP=FALSE) # Repeat the process above to Negative mode to create a separate PeakML file # this is how comb 3 (see below) was obtained comb3 <- combineMolIon(mzAnnotP, mzAnnotN) # include CAMERA non-annotated compounds, and snr retrieval # comb 2+ - used on Table 1 ionAnnotP2plus <- get.annot(anP, allowMiss=TRUE, xset=xsetPnofill, toexclude=c("blank", "medium", "QC")) ionAnnotN2plus <- get.annot(anN, polarity="negative", allowMiss=TRUE, xset=xsetNnofill, toexclude=c("blank", "medium", "QC")) # Following with comb 2+ comb2plus <- combineMolIon(ionAnnotP2plus, ionAnnotN2plus) sum(comb2plus$molIon[,3]==1) sum(comb2plus$molIon[,3]==0) # this mapping of compounds-to-reactions from KEGG is automatically loaded with ProbMetab DB <- KEGGcpds reactionM <- create.reactionM(DB, comb2plus, ppm.tol=8) # number of masses with candidates inside the fixed mass window # and masses with more than one candidate length(unique(reactionM[reactionM[,"id"]!="unknown",1])) sum(table(reactionM[reactionM[,"id"]!="unknown",1])>1) # ProbMetab functions examples # # Example of functions for: Probability modeling and estimation # # Calculate the ratio between observed and theoretical isotopic patterns. # If you don't have an assessment of carbon offset to carbon number prediction # skip this step and use the reactionM as input to weigthM function. isoPatt <- incorporate.isotopes(comb2plus, reactionM, comb=1, samp=12:23, DB=DB) # calculate the likelihood of each mass to compound assignment using mass accuracy, and isotopic pattern, when present wl <- weightM(isoPatt,intervals=seq(0,1000,by=500), offset=c(3.115712, 3.434146, 2.350798)) # codify the relation between compounds, given by reaction present in the biological database DB w <- design.connection(reactionM) # Example of generic mass differences to ProbMetab modeling framework exp_masses <- matrix(c( 1035, 242.09100, 500, 224.07900, 711, 90.03215), nrow=3 ) colnames(exp_masses) <- c("rt", "massObs") reac_matrix <- matrix(0, ncol=4) ppm.tol <- 10 # match masses in a given mass window for(i in 1:nrow(exp_masses)) { logical <- abs(((exp_masses[i,2]-db.mass)/db.mass)*10^6) < ppm.tol if(sum(logical)){ reac_matrix0 <- cbind(matrix(exp_masses[i,], nrow=1), as.matrix(valid_formulas[logical,c("mass", "id")])) } reac_matrix <- rbind(reac_matrix, reac_matrix0) } reac_matrix <- reac_matrix[-1,] reac_matrix <- cbind(reac_matrix, rep("", nrow(reac_matrix))) m_diff <- outer(as.numeric(reac_matrix[,3]), as.numeric(reac_matrix[,3]), "-" ) for(i in 1:nrow(m_diff)){ for(j in 1:ncol(m_diff)){ log <- abs(m_diff[i,j])> gen_reactions[,3]-0.01 & abs(m_diff[i,j]) < gen_reactions[,3]+0.01 if(sum(log)){ reac_matrix[i,5] <- paste(gen_reactions[log,4], collapse=";") } } } cnames <- c("rt", "massObs","massDB", "id", "reactions") colnames(reac_matrix) <- cnames # Please install the suggested packages if you want to reproduce # the retention time modeling # install.packages(c( "bootstrap", "leaps", "mgcv")) # The IDEOM files can be found on "EXTERNAL FILES" link on main page db <- read.csv("IDEOM_v18_DB.csv") rtTab <- read.csv("standardRetentionTime.csv") testM <- rtTab[,5:10] predSet <- data.frame(id=reactionM[reactionM[,5]!="unknown", 5], data=as.numeric(reactionM[reactionM[,5]!="unknown", 1])/60 ) l1 <- sapply(predSet$id, function (x) which(db[,"KEGGid."]== sub("cpd:","",as.character(x)) ) ) l1[which(unlist(lapply(l1, length))==0)] <- 41639 db2 <- cbind(db[,"KEGGid."],db[,21:49]) db2 <- rbind(as.matrix(db2), rep(0,30)) sapply(which(unlist(lapply(l1, length))>1), function(x) l1[[x]] <<- l1[[x]][1] ) v1 <- unlist(l1) cols <- c(3,4,5,6,8,9) predM <- db2[v1, cols] # some compounds have missing descriptors, for those is not # possible to predict the retention time predM[which(is.na(predM), arr.ind=TRUE)[,1],] <- 0 predM <- apply(predM, 2, as.numeric) sum(apply(predM, 1, sum)==0) testSet <- data.frame(id=rtTab[,1], data=rtTab[,2]) descData <- list(predM=predM, testM=testM) # Reproducing Creek et al. (2012) to format the information to be added to the # probabilistic model myresult <- rt.predict(testSet, predSet, descData, voidTime=4.5) myweight <- weightRT(myresult, reactionM) # Example of Hadamard product (element-wise), which allows easy insertion # of information on the model. myweight$wm[1:5,1:5] wm[1:5,1:5] wm[1:5,1:5]*myweight$wm[1:5,1:5] # calculate the posterior probabilities according with the proposed model x <- 1:ncol(wl$wm) y <- 1:nrow(wl$wm) # These two will be wrapped in a the function in future # should take about 30 minutes for a regular desktop computer #(ubuntu 64bits, with 8gb of memory), with # a problem of the same size (number of masses) of the presented in this document. system.time(conn <- gibbs.samp(x, y, 5000, w, wl$wm)) # export the classification table, optionally as R matrix or .html file # This table contains the results from Table 3 on the Study Case Document. system.time(ansConn <- export.class.table(conn, reactionM, comb2plus, filename="AnalysisExample", html=TRUE, DB=DB)) # ProbMetab functions examples # # Example of functions for: Comprehensive output representation # # This script is intended to reproduce the Figure 4, # as well as to provide a working example on the main features of ProbMetab # graph functions # # Following this analysis we generate a graph with reactions overlaid with correlations # and export use additional information to provide formatting to this graph. # calculate the correlations and partial correlations and cross reference then with reactions # load(“probmetab-case1-box02.RData”) # load the necessary objects to draw the graph mw <- which(w==1,arr.ind=TRUE) corList <- reac2cor(mw, ansConn$classTable[,-c(8:18)], corprob=0) gr.cor <- ftM2graphNEL(corList$cor.vs.reac) classTable <- ansConn$classTable node.names <- apply(classTable[classTable[,4]!="",1:7], 1, function(x) paste(x[6], "-", paste(strsplit(as.matrix(x[7]), "#")[[1]][1:2], collapse="\n"), sep="") ) node.names <- sub("^\\s+", "", node.names) snode.names <- node.names[as.numeric(nodes(gr.cor))] # Example of some edge and node attributes see export2graph man page to more details form <- edgeNames(gr.cor) form <- data.frame(form, form %in% apply(corList$signif.cor[corList$signif.cor[,1]>0.75,2:3], 1, paste, collapse="~")) form <- data.frame(form, form[,1] %in% apply(corList$signif.cor[corList$signif.cor[,1]<(- 0.75),2:3], 1, paste, collapse="~") ) # Format the edge attribute table with exadecimal color codes # to export to Cytoscape. In this case red to positive correlations # higher than 0.75, and green to negative correlations smaller than # -0.75 cnames <- c("edge.name", "color.#FF0000", "color.#006400") colnames(form) <- cnames # index of known identity compounds from Supplementary File 4 csel <- "119 393 1106 661 1264 418 482 423 1114 413 459 62 1136 1067 362 767 656 92 618 109 555 1291 516 1123 1128 553 379 701 242 302 697 356 1155 47 184 896 3 40 182 4 1098 41 1031 782 45 246 560 416 1088 778 81 51 88 31 383 24 352 1121 730 744 425 1120 120 370 1086 782 757 451 52 58 498 769 376 337 551 69 477 548 543 677 526 581 766 933 90 870 317 332 212 396 594 713 748" csel <- as.numeric(strsplit(csel, " ")[[1]]) form2 <- nodes(gr.cor) form2 <- data.frame(form2, form2 %in% csel) cnames2 <- c("node.name", "lcolor.#0000FF") colnames(form2) <- cnames2 # The index of all nodes sn <- as.numeric(sub("(^\\d+)-.+", "\\1", snode.names)) # Where the known compounds are in the vector of correlated nodes coord <- sapply(csel, function(x) which(sn==x)) scoord <- coord[unlist(lapply(coord, length))!=0] scoord <- unlist(scoord) # names of known identity compounds from Supplementary File 4 cpdnames <- "AMP%Urocanate%Uracil%Pseudouridine 5'-phosphate%D-Ribose 5-phosphate%Guanine%D-Sorbitol%Propanoyl phosphate%Maleamate%L-Methionine%sn-Glycerol 3-phosphate%N6,N6,N6-Trimethyl-L-lysine%(S)-Malate%N6-Acetyl-L-lysine%L-Cysteine%Ascorbate%Glutathione%Pseudouridine%Adenosine%5'-Methylthioadenosine%L-Cystathionine%Inosine%S-Sulfo-L-cysteine%5-Amino-4-imidazole carboxylate%Mesaconate%N-Acetyl-D-glucosamine%4-Guanidinobutanal%S-Adenosyl-L-methionine%alpha-D-Glucosamine 1-phosphate%Glycine%Riboflavin%L-2,4-Diaminobutanoate%D-Xylonolactone%Xanthine%S-Adenosyl-L-homocysteine%D-Glucose%L-Arginine%L-Lysine%L-Serine%Putrescine%(R)-3-Hydroxybutanoate%L-Glutamate%Urate%D-Gluconic acid%(S)-4-Hydroxymandelonitrile%Hypoxanthine%Carnosine%L-Arabinose%L-Alanine%Citrate%Folate%Isopyridoxal%L-Cystine%L-Asparagine%L-Glutamate 5-semialdehyde%Nicotinamide%L-Valine%Taurine%2-Hydroxy-3-oxopropanoate%(S)-3-Methyl-2-oxopentanoic acid%L-Histidine%L-Threonine%Phenolsulfonphthalein%Imidazole-4-acetate%Pyruvate%L-Gulonate%Allantoin%N(pi)-Methyl-L-histidine%Pyridoxamine%L-Tyrosine%LL-2,6-Diaminoheptanedioate%3-(4-Hydroxyphenyl)pyruvate%L-1-Pyrroline-3-hydroxy-5-carboxylate%Hypotaurine%Pantothenate%N6-Acetyl-N6-hydroxy-L-lysine%D-Glucosamine%N2-(D-1-Carboxyethyl)-L-lysine%sn-glycero-3-Phosphoethanolamine%Maltose%L-Kynurenine%Cytidine%D-Glucuronolactone%Succinate%Thymidine%N-Acetylneuraminate 9-phosphate%Glycerol%Diethanolamine%D-Glucose 6-phosphate%Ethanolamine phosphate%L-Arginine phosphate%CDP-ethanolamine%Deoxyribose" cpdnames <- strsplit(cpdnames, "%")[[1]] # replace the node label of compounds identified with the true compound name scpdnames <- cpdnames[unlist(lapply(coord, length))!=0] snode.names[scoord] <- scpdnames cpdnames <- as.character(sapply(classTable[classTable[,2]!="unknown",2], function(x) DB$name[DB$id==as.character(x)]) ) classTable <- as.matrix(classTable) classTable[classTable[,2]!="unknown",2] <- cpdnames # create an initial visualization without leaving R environament createJSONToCytoscape(gr=gr.cor, node.label=snode.names) openGraph("network.json", classTable=classTable, openBrowser=TRUE) # This functions extracts pathway information from KEGG API, # and needs the KEGG codes, so we have to load the original # classification table again cpdInfo <- create.pathway.node.attributes(ansConn$classTable, graph=gr.cor, DB=DB, filename1="path1.noa", filename2="path2.noa", organismId="tbr") create.reaction.edge.attributes(classTable, graph=gr.cor, w=w, reactionM=reactionM, DB=DB, filename="reac.eda") export2cytoscape(gr.cor, node.label=snode.names, cwName="test4",node.form=form2, edge.form=form, cpdInfo=cpdInfo, classTable=classTable) # ProbMetab Case Study 2: Metabolomics dataset with internal standard compounds illustrates annotation performance # Dataset available at: # http://labpib.fmrp.usp.br/methods/probmetab/resources/sugar_cane_rawdata.zip # ProbMetab suggested application # initial parameters from http://www.nature.com/nprot/journal/v7/n3/fig_tab/nprot.2011.454_T1.html # load required packages library(ProbMetab) library(xcms) library(CAMERA) # Preprocessing xset <- xcmsSet(".", method='centWave', ppm=15, peakwidth=c(5,20), prefilter=c(0,0) ) xset <- group(xset) xset2 <- retcor(xset,method="obiwarp",profStep=0.1) xset2 <- group(xset2, mzwid=0.015,minfrac=0.5,bw =2) xset3 <- fillPeaks(xset2) an <- annotate(xset3, perfwhm=0.6, cor_eic_th=0.75, mzabs = 0.01 , polarity="positive") # load(“probmetab-case2-box00.RData”) # run to avoid to deal with raw data and go directly to examples # example of biocyc API alternative usage # Chose an organism to download metabolites on its known metabolism # ara is the organism code for Arabidopsis thaliana vpth <- get.pathway.by.organism.biocyc("ara") # optionally use parallel processing library(doMC) registerDoMC() # Retrieve all compounds associated to known Arabidopsis pathways m <- foreach(i=1:length(vpth)) %dopar% get.compounds.by.pathway.biocyc(vpth[i]) m2 <- do.call("rbind", m) m3 <- unique(m2) m4 <- m3[-grep("Error", m3[,3]),] # Retrieve all single compound reactions for each compounds rlist <- list() foreach(i=1:nrow(m4)) %dopar% get.reactions.by.compound.biocyc(m4[i,1]) # before attaching the reactions, verify if all compounds have at least one reaction which(unlist(lapply(rlist, length))==0) m4 <- cbind(m4, unlist(rlist)) colnames(m4)[4] <- "reactions" m4[,3] <- gsub("\\s", "", m4[,3]) # In a similar way we can retrieve information from KEGG keggdb <- read.table("http://rest.kegg.jp/link/compound/reaction") head(keggdb) dim(keggdb) get.name(keggdb[1,2]) get.formula.kegg(keggdb[1,2]) get.name(keggdb[2,2]) get.formula.kegg(keggdb[2,2]) # Optionally the user can extract all compound information in the standard format # with the KEGG organism code # http://www.kegg.jp/kegg/catalog/org_list.html system.time(ath <- build.database.kegg("ath")) # We provide a formated KEGG database loaded with the package # to perform the downstream analysis # Database matching DB <- KEGGcpds ionAnnot <- get.annot(an, allowMiss=TRUE, minint=1000) reactionM <- create.reactionM(DB, molIon=ionAnnot, ppm.tol=30) wl <-weightM(reactionM, useIso=FALSE) w <- design.connection(reactionM) # Probability calculations x <- 1:ncol(wl$wm) y <- 1:nrow(wl$wm) conn <- gibbs.samp(x, y, 5000, w, wl$wm) # Output representation # This table contains the results from Table 5 on the Study Case Document. system.time(ansConn <- export.class.table(conn, reactionM, ionAnnot, filename="AnalysisExample", html=TRUE, m.test="t.test", class1="F_I_Nature", class2="F_S_Nature", DB=DB) ) mw <- which(w==1,arr.ind=TRUE) corList <- reac2cor(mw, ansConn$classTable, corths=0.7, corprob=0) # creating and formatting a graph # load(“probmetab-case2-box01.RData”) # load the necessary objects to draw the graph classTable <- ansConn$classTable gr.cor <- ftM2graphNEL(corList$cor.vs.reac) node.names <- apply(classTable[classTable[,6]!="",1:7], 1, function(x) paste(x[6], "-", paste(strsplit(as.matrix(x[7]), "#")[[1]][1:2], collapse="\n"), sep="")) node.names <- sub("^\\s+", "", node.names) node.names <- node.names[as.numeric(nodes(gr.cor))] # color edges that represent correlations between mass peaks higher than # 0.7 of red form <- edgeNames(gr.cor) form <- data.frame(form, form %in% apply(corList$signif.cor[corList$signif.cor[,1]>0.70,2:3], 1, paste, collapse="~")) # there is no negative correlation form <- data.frame(form, form[,1] %in% apply(corList$signif.cor[corList$signif.cor[,1]<(-0.70),2:3], 1, paste, collapse="~")) cnames <- c("edge.name", "color.#FF0000", "color.#006400") colnames(form) <- cnames # color nodes representing differential representation of blue pvec <- as.numeric(classTable[classTable[,5]!="",5]) form2 <- nodes(gr.cor) form2 <- data.frame(form2, as.numeric(nodes(gr.cor)) %in% which(pvec<0.05)) cnames2 <- c("node.name", "lcolor.#FF0000") colnames(form2) <- cnames2 # export the basic graph format do a quick web visualization createJSONToCytoscape(gr=gr.cor, node.label=node.names) openGraph("network.json", classTable=classTable, openBrowser=TRUE) # format attribute tables to export to cytoscape visualization cpdnames <- as.character(sapply(classTable[classTable[,2]!="unknown",2], function(x) DB$name[DB$id==as.character(x)])) classTable <- as.matrix(classTable) classTable[classTable[,2]!="unknown",2] <- cpdnames cpdInfo <- create.pathway.node.attributes(ansConn$classTable, graph=gr.cor, DB=DB, filename1="path1.noa", filename2="path2.noa", organismId="zma") create.reaction.edge.attributes(classTable, graph=gr.cor, w=w, reactionM=reactionM, DB=DB, filename="reac.eda") # take care, there are not negative correlations, so the column 3 of form matrix is empty export2cytoscape(gr.cor, node.label=node.names, cwName="test4", edge.form=form[,-3], node.form=form2, cpdInfo=cpdInfo, classTable=classTable) # This script is intended to reproduce the Figure 5, # showing how to overlay information, and search for meaninful # pathway alterations. # water corList1 <- reac2cor(mw, ansConn$classTable[,-c(25:39)], corths=0.7, corprob=0) # drought corList2 <- reac2cor(mw, ansConn$classTable[,-c(8:24)], corths=0.7, corprob=0) gr.cor2 <- ftM2graphNEL(corList2$cor.vs.reac) gr.cor1 <- ftM2graphNEL(corList1$cor.vs.reac) mw1 <- t(sapply(edgeNames(gr.cor1), function(x) strsplit(x, "~")[[1]])) mw2 <- t(sapply(edgeNames(gr.cor2), function(x) strsplit(x, "~")[[1]])) mw3 <- unique(rbind(mw1, mw2)) gr.cor3 <- ftM2graphNEL(mw3) inOne <- setdiff(edgeNames(gr.cor1), edgeNames(gr.cor2)) inTwo <- setdiff(edgeNames(gr.cor2), edgeNames(gr.cor1)) form <- edgeNames(gr.cor3) form <- data.frame(form, form %in% inOne) form <- data.frame(form, form[,1] %in% inTwo) # Format and Normalize data do calculate fold change metabData <- classTable[classTable[,6]!="",] metabData2 <- apply(metabData[,8:39], 2, as.numeric) rownames(metabData2) <- metabData[,6] normalize.medFC <- function(mat) { # Perform median fold change normalisation # X - data set [Variables & Samples] medSam <- apply(mat, 1, median) medSam[which(medSam==0)] <- 0.0001 mat <- apply(mat, 2, function(mat, medSam){ medFDiSmpl <- mat/medSam vec<-mat/median(medFDiSmpl) return(vec) }, medSam) return (mat) } metabData2 <- normalize.medFC(metabData2) # Calculate differential correlations # The DiffCorr package can be found at: http://diffcorr.sourceforge.net/ source("../DiffCorr_src/R/DiffCorr.R") comp.2.cc.fdr(output.file="resM.txt", metabData2[,1:17], metabData2[,18:32], threshold=0.05) res <- read.delim("resM.txt") nres <- paste(res[,1], "~", res[,2], sep="") form <- data.frame(form, form[,1] %in% nres) # red for only in water # green for only in drought # width for differential correlation cnames <- c("edge.name", "color.#FF0000", "color.#006400", "width.5") colnames(form) <- cnames node.names <- apply(classTable[classTable[,6]!="",1:7], 1, function(x) paste(x[6], "-", paste(strsplit(as.matrix(x[7]), "#")[[1]][1:2], collapse="\n"), sep="")) node.names <- sub("^\\s+", "", node.names) node.names <- node.names[as.numeric(nodes(gr.cor3))] pvec <- as.numeric(classTable[classTable[,5]!="",5]) foldChange <- apply(metabData2, 1, function(x) mean(x[18:32])/mean(x[1:17])) colnames(classTable)[5] <- "Fold Change" classTable[classTable[,5]!="",5] <- foldChange form2 <- nodes(gr.cor3) form2 <- data.frame(form2, as.numeric(nodes(gr.cor3)) %in% which(pvec<0.05)) cnames2 <- c("node.name", "lcolor.#9400D3") colnames(form2) <- cnames2 cpdInfo <- create.pathway.node.attributes(ansConn$classTable, graph=gr.cor3, DB=DB, filename1="path1Diff.noa", filename2="path2Diff.noa", organismId="zma") create.reaction.edge.attributes(classTable, graph=gr.cor3, w=w, reactionM=reactionM, DB=DB, filename="reacDiff.eda") export2cytoscape(gr.cor3, node.label=node.names, cwName="test4", edge.form=form, node.form=form2, cpdInfo=cpdInfo, classTable=classTable) # This script is intended to reproduce the Figure 6, # and see a specific pathway in a different window classTableb <- ansConn$classTable for(i in 1:nrow(classTableb)) if(classTableb[i,6]=="") classTableb[i,6] <- classTableb[i-1,6] classTableb[,6] <- as.numeric(classTableb[,6]) classTableS <- classTableb[which(classTableb[,6] %in% nodes(gr.cor)),] kgr <- get.kgml.positions.kegg("rn00944") cnames <- sapply(sub("^cpd:(C\\d{5}).*$", "\\1", colnames(kgr$adj)), get.name) kgr1 <- as(kgr$adj, "graphNEL") form <- data.frame(nodes(kgr1)) codes <- sub("^cpd:(C\\d{5}).*$", "\\1", colnames(kgr$adj)) form <- cbind(form, codes%in% classTableS[,2]) cnames2 <- c("node.name", "lcolor.#FF0000") colnames(form) <- cnames2 export2cytoscape(kgr1, node.label=cnames, cwName="test2", node.form=form, pos=kgr) # retrieve kegg version with ProbMetab get.kegg.pathways(as.vector(form[form[,2],1]), 20)