#
# This supporting file is intended to quickly comment each package feature 
# analyzing two LC-MS metabolomics datasets. With this code, is possible
# to reproduce the plots of the study case document.
#
#

# Study case document
http://labpib.fmrp.usp.br/methods/probmetab/resources/case-study-probmetab.pdf


# To easy follow the study case text we will divide this documents following 
# the main sections.


# ProbMetab Case Study 1:  Metabolomics dataset with internal standard compounds illustrates annotation performance
# Dataset available at:
# http://sourceforge.net/projects/mzmatch/files/Ideom/IDEOM_demo_mzXMLfiles.zip/download

#
# Preprocessing with xcms
#

# load required libraries
library(ProbMetab)
library(xcms)
library(CAMERA)

# set nslaves for the number of available cores of your machine
# Follow xcms vignette to understand data structure
# http://www.bioconductor.org/packages/release/bioc/vignettes/xcms/inst/doc/xcmsPreprocess.pdf

nslaves <- 4

# positive acquisition mode, directory: 'POS/'
xset <- xcmsSet(
         "POS/", method='centWave', ppm=2, peakwidth=c(10,50),
	    snthresh=3, prefilter=c(3,100), integrate=1, mzdiff=-0.00005,
           verbose.columns=TRUE,fitgauss=FALSE, nSlaves=nslaves
        )

# negative mode
xset2 <- xcmsSet(
         "NEG/", method='centWave', ppm=2, peakwidth=c(10,50),
          snthresh=3, prefilter=c(3,100), integrate=1, mzdiff=-0.00005,
          verbose.columns=TRUE,fitgauss=FALSE, nSlaves=nslaves
         )

# load("probmetab-case1-box00.RData") # run to avoid deal with raw data and go directly to examples


# align retention times across samples, grouping and integration
xsetP <- retcor(xset, method='obiwarp', plottype="none", profStep=1) #positive mode
xsetPnofill <- group(xsetP, bw=5,  mzwid=0.015)
xsetP <- fillPeaks(xsetPnofill)

xsetN <- retcor(xset2, method='obiwarp', plottype="none", profStep=1) #negative mode
xsetNnofill <- group(xsetN, bw=5,  mzwid=0.015)
xsetN <- fillPeaks(xsetNnofill)

#save(list=ls(all=TRUE), file="probmetab-case1-box01.RData")
 #run to save intermediary steps

# standard CAMERA processing
an <- xsAnnotate(xsetP)
an <- groupFWHM(an, perfwhm = 0.6)
an <- findIsotopes(an, mzabs = 0.01)
# This step requires the raw data
# If one wants to run this step
# create a path with the exactly same folders as in
# /home/rsilva/Desktop/Dados_Fabricio_08_2013/
# and download the data for this folder
# http://sourceforge.net/projects/mzmatch/files/Ideom/IDEOM_demo_mzXMLfiles.zip/download
# the same for negative mode below.
an <- groupCorr(an, cor_eic_th = 0.75)
anP <- findAdducts(an, polarity="positive")

an <- xsAnnotate(xsetN)
an <- groupFWHM(an, perfwhm = 0.6)
an <- findIsotopes(an, mzabs = 0.01)
an <- groupCorr(an, cor_eic_th = 0.75)
anN <- findAdducts(an, polarity="negative")

# load("probmetab-case1-box01.RData") # run to skip the previous block

# ProbMetab functions examples
#
# Example of functions for: Ion annotation extraction and database matching  
#

# combine positive and negative acquisition modes, keeping track of individual modes.
# It is possible to combine the acquisition modes setting positive or negative 
# as reference input table.
# comb 1 - used on Table 1
camAnot <- combinexsAnnos(anP, anN) 
camAnot <- combineMolIon(peaklist=camAnot, cameraobj=anP, polarity="pos") 

# Extract and format a set of non redundant putative molecular ions from CAMERA annotation
# with ProbMetab
# comb 2- used on Table 1
ionAnnotP <- get.annot(anP) 
ionAnnotN <- get.annot(anN, polarity="negative") 

# number of isotopic peaks and non redundant putative molecules
sum(ionAnnotP$molIon[,3]==1)
sum(ionAnnotP$molIon[,3]==0)

# This is the file automatically generated by IDEOM [10]⁠: http://mzmatch.sourceforge.net/
# that was used to produce the analysis of filter_comp.xls table.
# 
# Below also follows how to integrate the mzMatch analysis to ProbMetab.
# The mzMatch package can be downloaded at
# http://mzmatch.sourceforge.net/tutorial.mzmatch.r.advanced.php  
# Please set the working directory to where your files are

setwd("mzXMLfiles/POS")
rawfiles <- dir (full.names=TRUE,pattern="\\.mzXML*",recursive=TRUE)
outputfiles <- paste(sub(".mzXML*","",rawfiles),".peakml",sep="")

for (i in 1:length(rawfiles)){
	xset <- xcmsSet(rawfiles[i], method='centWave', ppm=2, peakwidth=c(5,100),
 			snthresh=3, prefilter=c(3,1000), integrate=1, mzdiff=0.001,
 			verbose.columns=TRUE, fitgauss=FALSE, nSlaves=2

	)
	PeakML.xcms.write.SingleMeasurement(xset=xset,filename=outputfiles[i],
		ionisation="negative",addscans=2,
		writeRejected=FALSE,ApodisationFilter=TRUE
	)
}

library (mzmatch.R)
mzmatch.init (4000)
MainClasses <- dir ()
dir.create ("combined_RSD_filtered")
dir.create ("combined_RSD_rejected")
dir.create ("combined")

for (i in 1:length(MainClasses)){
	FILESf <- dir (MainClasses[i],full.names=TRUE,pattern="\\.peakml$",recursive=TRUE)
	OUTPUTf <- paste ("combined/",MainClasses[i],".peakml",sep="")
	if(length(FILESf)>0){
		mzmatch.ipeak.Combine(i=paste(FILESf,collapse=","),
				v=T,rtwindow=30,o=OUTPUTf,combination="set",
				ppm=5,label=paste(MainClasses[i],sep="")
		)
		RSDf <- paste ("combined_RSD_filtered/",MainClasses[i],".peakml",sep="")
		REJf <- paste ("combined_RSD_rejected/",MainClasses[i],".peakml",sep="")

		if(length(FILESf)>1){ 
			mzmatch.ipeak.filter.RSDFilter(i=OUTPUTf,o=RSDf,rejected=REJf,
				rsd=0.8,v=T
			)
	 	else{
			file.copy(OUTPUTf,RSDf)
		}
	}
}


INPUTDIR <- "combined_RSD_filtered"
FILESf <- dir (INPUTDIR,full.names=TRUE,pattern="\\.peakml$")
mzmatch.ipeak.Combine(i=paste(FILESf,collapse=","),v=T,rtwindow=30,o="combined.peakml",combination="set",ppm=5)
mzmatch.ipeak.filter.NoiseFilter(i="combined.peakml",o="combined_noisef.peakml",v=T,codadw=0.8)
mzmatch.ipeak.filter.SimpleFilter(i="combined_noisef.peakml", o="combined_sfdet.peakml", mindetections=3, v=T)
mzmatch.ipeak.filter.SimpleFilter(i="combined_sfdet.peakml",o="combined_highintensity.peakml", minintensity=1000, v=T)
PeakML.GapFiller(filename = "combined_highintensity.peakml", ionisation = "detect", Rawpath = NULL, outputfile = "highintensity_gapfilled.peakml", ppm = 0, rtwin = 0)
mzmatch.ipeak.sort.RelatedPeak(i="highintensity_gapfilled.peakml",v=T,o="mzMatch_output.peakml",basepeaks="mzMatch_basepeaks.peakml",ppm=3,rtwindow=6)
annot <- paste("relation.id,relation.ship,codadw,charge")
mzmatch.ipeak.convert.ConvertToText(i="mzMatch_output.peakml",o="mzMATCHoutput.txt",v=T,annotations=annot)

# The PeakML importing function does not work with gap filled files
# so, we have to prepare a PeakML annotated file.

mzmatch.ipeak.sort.RelatedPeak(i="combined_highintensity.peakml",v=T,o="mzMatch_outputPOS.peakml",basepeaks="mzMatch_basepeaks.peakml",ppm=3,rtwindow=6)

# import PeakML file as xcmsSet object 

mzAnnotP <- get.Mzmatch.annot("POS/mzMatch_outputPOS.peakml", onlyBP=FALSE)

# Repeat the process above to Negative mode to create a separate PeakML file
# this is how comb 3 (see below) was obtained

comb3 <- combineMolIon(mzAnnotP, mzAnnotN)

# include CAMERA non-annotated compounds, and snr retrieval 
# comb 2+ - used on Table 1	
ionAnnotP2plus <- get.annot(anP, allowMiss=TRUE, xset=xsetPnofill, toexclude=c("blank", "medium", "QC")) 
ionAnnotN2plus <- get.annot(anN, polarity="negative", allowMiss=TRUE, xset=xsetNnofill, toexclude=c("blank", "medium", "QC"))
 
# Following with comb 2+
comb2plus <- combineMolIon(ionAnnotP2plus, ionAnnotN2plus)
sum(comb2plus$molIon[,3]==1)
sum(comb2plus$molIon[,3]==0)

# this mapping of compounds-to-reactions from KEGG is automatically loaded with ProbMetab
DB <- KEGGcpds 
reactionM <- create.reactionM(DB, comb2plus, ppm.tol=8)

# number of masses with candidates inside the fixed mass window
# and masses with more than one candidate
length(unique(reactionM[reactionM[,"id"]!="unknown",1])) 
sum(table(reactionM[reactionM[,"id"]!="unknown",1])>1)

# ProbMetab functions examples
#
# Example of functions for: Probability modeling and estimation 
#

# Calculate the ratio between observed and theoretical isotopic patterns.
# If you don't have an assessment of carbon offset to carbon number prediction 
# skip this step and use the reactionM as input to weigthM function. 
isoPatt <- incorporate.isotopes(comb2plus, reactionM, comb=1, samp=12:23, DB=DB)  

# calculate the likelihood of each mass to compound assignment using mass accuracy, and isotopic pattern, when present
wl <- weightM(isoPatt,intervals=seq(0,1000,by=500), offset=c(3.115712, 3.434146, 2.350798)) 

# codify the relation between compounds, given by reaction present in the biological database DB
w <- design.connection(reactionM)

# Example of generic mass differences to ProbMetab modeling framework
exp_masses <- matrix(c(
			1035, 242.09100,
			500, 224.07900,
			711, 90.03215),
		     nrow=3
	      )

colnames(exp_masses) <- c("rt", "massObs")

reac_matrix <- matrix(0, ncol=4)
ppm.tol <- 10

# match masses in a given mass window
for(i in 1:nrow(exp_masses)) {
	logical <- abs(((exp_masses[i,2]-db.mass)/db.mass)*10^6) < ppm.tol
	if(sum(logical)){
		reac_matrix0 <- cbind(matrix(exp_masses[i,], nrow=1),
		as.matrix(valid_formulas[logical,c("mass", "id")]))
	}
	reac_matrix <- rbind(reac_matrix, reac_matrix0)
}
reac_matrix <- reac_matrix[-1,]

reac_matrix <- cbind(reac_matrix, rep("", nrow(reac_matrix)))
m_diff <- outer(as.numeric(reac_matrix[,3]),
		  as.numeric(reac_matrix[,3]), "-"
	   )
for(i in 1:nrow(m_diff)){
	for(j in 1:ncol(m_diff)){
		log <- abs(m_diff[i,j])> gen_reactions[,3]-0.01 &
			abs(m_diff[i,j]) < gen_reactions[,3]+0.01
		if(sum(log)){
			reac_matrix[i,5] <- paste(gen_reactions[log,4],
							collapse=";")
		}
	}
}
cnames <- c("rt", "massObs","massDB", "id", "reactions")
colnames(reac_matrix) <- cnames

# Please install the suggested packages if you want to reproduce 
# the retention time modeling
# install.packages(c( "bootstrap", "leaps", "mgcv"))
# The IDEOM files can be found on "EXTERNAL FILES" link on main page 
db <- read.csv("IDEOM_v18_DB.csv")
rtTab <- read.csv("standardRetentionTime.csv")
testM <- rtTab[,5:10]
predSet <- data.frame(id=reactionM[reactionM[,5]!="unknown", 5],
		data=as.numeric(reactionM[reactionM[,5]!="unknown", 1])/60
	)
l1 <- sapply(predSet$id, function (x)
				which(db[,"KEGGid."]==
					sub("cpd:","",as.character(x))
				)
		)
l1[which(unlist(lapply(l1, length))==0)] <- 41639
db2 <- cbind(db[,"KEGGid."],db[,21:49])
db2 <- rbind(as.matrix(db2), rep(0,30))
sapply(which(unlist(lapply(l1, length))>1), function(x)
							l1[[x]] <<- l1[[x]][1]
	)
v1 <- unlist(l1)
cols <- c(3,4,5,6,8,9)
predM <- db2[v1, cols]

# some compounds have missing descriptors, for those is not
# possible to predict the retention time
predM[which(is.na(predM), arr.ind=TRUE)[,1],] <- 0
predM <- apply(predM, 2, as.numeric)
sum(apply(predM, 1, sum)==0)
testSet <- data.frame(id=rtTab[,1], data=rtTab[,2])
descData <- list(predM=predM, testM=testM)

# Reproducing Creek et al. (2012) to format the information to be added to the 
# probabilistic model
myresult <- rt.predict(testSet, predSet, descData, voidTime=4.5)
myweight <- weightRT(myresult, reactionM)

# Example of Hadamard product (element-wise), which allows easy insertion
# of information on the model.

myweight$wm[1:5,1:5]
wm[1:5,1:5]
wm[1:5,1:5]*myweight$wm[1:5,1:5]

# calculate the posterior probabilities according with the proposed model
x <- 1:ncol(wl$wm)
y <- 1:nrow(wl$wm) # These two will be wrapped in a the function in future

# should take about 30 minutes for a regular desktop computer 
#(ubuntu 64bits, with 8gb of memory), with
# a problem of the same size (number of masses) of the presented in this document.
system.time(conn <- gibbs.samp(x, y, 5000, w, wl$wm)) 

# export the classification table, optionally as R matrix or .html file 
# This table contains the results from Table 3 on the Study Case Document. 
system.time(ansConn <- export.class.table(conn, reactionM, comb2plus, filename="AnalysisExample", html=TRUE, DB=DB))  

# ProbMetab functions examples
#
# Example of functions for: Comprehensive output representation
#


# This script is intended to reproduce the Figure 4,
# as well as to provide a working example on the main features of ProbMetab
# graph functions
#
# Following this analysis we generate a graph with reactions overlaid with correlations
# and export use additional information to provide formatting to this graph.

# calculate the correlations and partial correlations and cross reference then with reactions
# load(“probmetab-case1-box02.RData”) # load the necessary objects to draw the graph

mw <- which(w==1,arr.ind=TRUE)
corList <- reac2cor(mw, ansConn$classTable[,-c(8:18)], corprob=0)

gr.cor <- ftM2graphNEL(corList$cor.vs.reac)
classTable <- ansConn$classTable 
node.names <- apply(classTable[classTable[,4]!="",1:7], 1, function(x) paste(x[6], "-",
 				paste(strsplit(as.matrix(x[7]), "#")[[1]][1:2], collapse="\n"), sep="")
		)
node.names <- sub("^\\s+", "", node.names)
snode.names <- node.names[as.numeric(nodes(gr.cor))]

# Example of some edge and node attributes see export2graph man page to more details
form <- edgeNames(gr.cor)
form <- data.frame(form, form %in% apply(corList$signif.cor[corList$signif.cor[,1]>0.75,2:3], 1, paste, collapse="~"))
form <- data.frame(form, form[,1] %in% apply(corList$signif.cor[corList$signif.cor[,1]<(- 								0.75),2:3], 1, paste, collapse="~")
		)
# Format the edge attribute table with exadecimal color codes
# to export to Cytoscape. In this case red to positive correlations
# higher than 0.75, and green to negative correlations smaller than
# -0.75
cnames <- c("edge.name", "color.#FF0000", "color.#006400")
colnames(form) <- cnames

# index of known identity compounds from Supplementary File 4
csel <- "119 393 1106 661 1264 418 482 423 1114 413 459 62 1136 1067 362 767 656 92 618 109 555 1291 516 1123 1128 553 379 701 242 302 697 356 1155 47 184 896 3 40 182 4 1098 41 1031 782 45 246 560 416 1088 778 81 51 88 31 383 24 352 1121 730 744 425 1120 120 370 1086 782 757 451 52 58 498 769 376 337 551 69 477 548 543 677 526 581 766 933 90 870 317 332 212 396 594 713 748"

csel <- as.numeric(strsplit(csel, " ")[[1]]) 

form2 <- nodes(gr.cor)
form2 <- data.frame(form2, form2 %in% csel)
cnames2 <- c("node.name", "lcolor.#0000FF")
colnames(form2) <- cnames2

# The index of all nodes
sn <- as.numeric(sub("(^\\d+)-.+", "\\1", snode.names))

# Where the known compounds are in the vector of correlated nodes
coord <- sapply(csel, function(x) which(sn==x))
scoord <- coord[unlist(lapply(coord, length))!=0]
scoord <- unlist(scoord)

# names of known identity compounds from Supplementary File 4
cpdnames <- "AMP%Urocanate%Uracil%Pseudouridine 5'-phosphate%D-Ribose 5-phosphate%Guanine%D-Sorbitol%Propanoyl phosphate%Maleamate%L-Methionine%sn-Glycerol 3-phosphate%N6,N6,N6-Trimethyl-L-lysine%(S)-Malate%N6-Acetyl-L-lysine%L-Cysteine%Ascorbate%Glutathione%Pseudouridine%Adenosine%5'-Methylthioadenosine%L-Cystathionine%Inosine%S-Sulfo-L-cysteine%5-Amino-4-imidazole carboxylate%Mesaconate%N-Acetyl-D-glucosamine%4-Guanidinobutanal%S-Adenosyl-L-methionine%alpha-D-Glucosamine 1-phosphate%Glycine%Riboflavin%L-2,4-Diaminobutanoate%D-Xylonolactone%Xanthine%S-Adenosyl-L-homocysteine%D-Glucose%L-Arginine%L-Lysine%L-Serine%Putrescine%(R)-3-Hydroxybutanoate%L-Glutamate%Urate%D-Gluconic acid%(S)-4-Hydroxymandelonitrile%Hypoxanthine%Carnosine%L-Arabinose%L-Alanine%Citrate%Folate%Isopyridoxal%L-Cystine%L-Asparagine%L-Glutamate 5-semialdehyde%Nicotinamide%L-Valine%Taurine%2-Hydroxy-3-oxopropanoate%(S)-3-Methyl-2-oxopentanoic acid%L-Histidine%L-Threonine%Phenolsulfonphthalein%Imidazole-4-acetate%Pyruvate%L-Gulonate%Allantoin%N(pi)-Methyl-L-histidine%Pyridoxamine%L-Tyrosine%LL-2,6-Diaminoheptanedioate%3-(4-Hydroxyphenyl)pyruvate%L-1-Pyrroline-3-hydroxy-5-carboxylate%Hypotaurine%Pantothenate%N6-Acetyl-N6-hydroxy-L-lysine%D-Glucosamine%N2-(D-1-Carboxyethyl)-L-lysine%sn-glycero-3-Phosphoethanolamine%Maltose%L-Kynurenine%Cytidine%D-Glucuronolactone%Succinate%Thymidine%N-Acetylneuraminate 9-phosphate%Glycerol%Diethanolamine%D-Glucose 6-phosphate%Ethanolamine phosphate%L-Arginine phosphate%CDP-ethanolamine%Deoxyribose"

cpdnames <- strsplit(cpdnames, "%")[[1]]

# replace the node label of compounds identified with the true compound name
scpdnames <- cpdnames[unlist(lapply(coord, length))!=0]
snode.names[scoord] <- scpdnames

cpdnames <- as.character(sapply(classTable[classTable[,2]!="unknown",2], function(x) DB$name[DB$id==as.character(x)])
				)
classTable <- as.matrix(classTable)
classTable[classTable[,2]!="unknown",2] <- cpdnames 

# create an initial visualization without leaving R environament
createJSONToCytoscape(gr=gr.cor, node.label=snode.names) 
openGraph("network.json", classTable=classTable, openBrowser=TRUE)

# This functions extracts pathway information from KEGG API,
# and needs the KEGG codes, so we have to load the original
# classification table again

cpdInfo <- create.pathway.node.attributes(ansConn$classTable, graph=gr.cor, DB=DB, filename1="path1.noa", filename2="path2.noa", organismId="tbr") 
create.reaction.edge.attributes(classTable, graph=gr.cor, w=w, reactionM=reactionM, DB=DB, filename="reac.eda") 
export2cytoscape(gr.cor, node.label=snode.names, cwName="test4",node.form=form2, edge.form=form, cpdInfo=cpdInfo, classTable=classTable)


# ProbMetab Case Study 2:  Metabolomics dataset with internal standard compounds illustrates annotation performance
# Dataset available at:
# http://labpib.fmrp.usp.br/methods/probmetab/resources/sugar_cane_rawdata.zip 

# ProbMetab suggested application
# initial parameters from http://www.nature.com/nprot/journal/v7/n3/fig_tab/nprot.2011.454_T1.html
# load required packages
library(ProbMetab)
library(xcms)
library(CAMERA)

# Preprocessing
 xset <- xcmsSet(".", method='centWave', ppm=15, 
			peakwidth=c(5,20), prefilter=c(0,0)
	   )
 xset <- group(xset)
 xset2 <- retcor(xset,method="obiwarp",profStep=0.1)
 xset2 <- group(xset2, mzwid=0.015,minfrac=0.5,bw =2)
 xset3 <- fillPeaks(xset2)

an <- annotate(xset3, perfwhm=0.6, cor_eic_th=0.75,
         mzabs = 0.01 , polarity="positive")

# load(“probmetab-case2-box00.RData”) # run to avoid to deal with raw data and go directly to examples

# example of biocyc API alternative usage
# Chose an organism to download metabolites on its known metabolism
# ara is the organism code for Arabidopsis thaliana
vpth <- get.pathway.by.organism.biocyc("ara")
# optionally use parallel processing
library(doMC) 
registerDoMC() 

#  Retrieve all compounds associated to known Arabidopsis pathways
m <-   foreach(i=1:length(vpth)) %dopar% get.compounds.by.pathway.biocyc(vpth[i])
m2 <- do.call("rbind", m) 
m3 <- unique(m2) 
m4 <- m3[-grep("Error", m3[,3]),]

# Retrieve all single compound reactions for each compounds
rlist <- list() 
foreach(i=1:nrow(m4)) %dopar% get.reactions.by.compound.biocyc(m4[i,1]) 
# before attaching the reactions, verify if all compounds have at least one reaction
which(unlist(lapply(rlist, length))==0)
m4 <- cbind(m4, unlist(rlist))
colnames(m4)[4] <- "reactions"
m4[,3] <- gsub("\\s", "", m4[,3])

# In a similar way we can retrieve information from KEGG
keggdb <- read.table("http://rest.kegg.jp/link/compound/reaction") 
head(keggdb) 
dim(keggdb) 
get.name(keggdb[1,2]) 
get.formula.kegg(keggdb[1,2]) 
get.name(keggdb[2,2]) 
get.formula.kegg(keggdb[2,2]) 

# Optionally the user can extract all compound information in the standard format
# with the KEGG organism code
# http://www.kegg.jp/kegg/catalog/org_list.html
system.time(ath <- build.database.kegg("ath"))

# We provide a formated KEGG database loaded with the package
# to perform the downstream analysis
# Database matching
DB <- KEGGcpds
ionAnnot <- get.annot(an, allowMiss=TRUE, minint=1000)
reactionM <- create.reactionM(DB, molIon=ionAnnot, ppm.tol=30)
wl <-weightM(reactionM, useIso=FALSE) 
w <- design.connection(reactionM) 

# Probability calculations
x <- 1:ncol(wl$wm)
y <- 1:nrow(wl$wm)
conn <- gibbs.samp(x, y, 5000, w, wl$wm)

# Output representation
# This table contains the results from Table 5 on the Study Case Document. 

system.time(ansConn <- export.class.table(conn, reactionM, ionAnnot, 
							filename="AnalysisExample", 
							html=TRUE,  m.test="t.test", 
							class1="F_I_Nature", class2="F_S_Nature",
							DB=DB)
)

mw <- which(w==1,arr.ind=TRUE) 
corList <- reac2cor(mw, ansConn$classTable, corths=0.7, corprob=0) 

# creating and formatting a graph
# load(“probmetab-case2-box01.RData”) # load the necessary objects to draw the graph

classTable <- ansConn$classTable 
gr.cor <- ftM2graphNEL(corList$cor.vs.reac)
node.names <- apply(classTable[classTable[,6]!="",1:7], 1, function(x) paste(x[6], "-", paste(strsplit(as.matrix(x[7]), "#")[[1]][1:2], collapse="\n"), sep="")) 
node.names <- sub("^\\s+", "", node.names) 
node.names <- node.names[as.numeric(nodes(gr.cor))]

# color edges that represent correlations between mass peaks higher than
# 0.7 of red 
form <- edgeNames(gr.cor)
form <- data.frame(form, form %in% apply(corList$signif.cor[corList$signif.cor[,1]>0.70,2:3], 1, paste, collapse="~"))

# there is no negative correlation
form <- data.frame(form, form[,1] %in% apply(corList$signif.cor[corList$signif.cor[,1]<(-0.70),2:3], 1, paste, collapse="~"))
cnames <- c("edge.name", "color.#FF0000", "color.#006400")
colnames(form) <- cnames

# color nodes representing differential representation of blue
pvec <- as.numeric(classTable[classTable[,5]!="",5])
form2 <- nodes(gr.cor) 
form2 <- data.frame(form2, as.numeric(nodes(gr.cor)) %in% which(pvec<0.05)) 
cnames2 <- c("node.name", "lcolor.#FF0000")
colnames(form2) <- cnames2

# export the basic graph format do a quick web visualization
createJSONToCytoscape(gr=gr.cor, node.label=node.names)
openGraph("network.json", classTable=classTable, openBrowser=TRUE) 

# format attribute tables to export to cytoscape visualization
cpdnames <- as.character(sapply(classTable[classTable[,2]!="unknown",2], function(x) DB$name[DB$id==as.character(x)]))
classTable <- as.matrix(classTable)
classTable[classTable[,2]!="unknown",2] <- cpdnames 

cpdInfo <- create.pathway.node.attributes(ansConn$classTable, graph=gr.cor, DB=DB, filename1="path1.noa", filename2="path2.noa", organismId="zma")  

create.reaction.edge.attributes(classTable, graph=gr.cor, w=w, reactionM=reactionM, DB=DB, filename="reac.eda")  

# take care, there are not negative correlations, so the column 3 of form matrix is empty
export2cytoscape(gr.cor, node.label=node.names, cwName="test4", edge.form=form[,-3], node.form=form2, cpdInfo=cpdInfo, classTable=classTable)

# This script is intended to reproduce the Figure 5,
# showing how to overlay information, and search for meaninful
# pathway alterations.

# water 
corList1 <- reac2cor(mw, ansConn$classTable[,-c(25:39)], corths=0.7, corprob=0) 

# drought 
corList2 <- reac2cor(mw, ansConn$classTable[,-c(8:24)], corths=0.7, corprob=0) 
gr.cor2 <- ftM2graphNEL(corList2$cor.vs.reac) 
gr.cor1 <- ftM2graphNEL(corList1$cor.vs.reac) 

mw1 <- t(sapply(edgeNames(gr.cor1), function(x) strsplit(x, "~")[[1]])) 
mw2 <- t(sapply(edgeNames(gr.cor2), function(x) strsplit(x, "~")[[1]])) 
mw3 <- unique(rbind(mw1, mw2)) 
gr.cor3 <- ftM2graphNEL(mw3) 

inOne <- setdiff(edgeNames(gr.cor1), edgeNames(gr.cor2)) 
inTwo <- setdiff(edgeNames(gr.cor2), edgeNames(gr.cor1)) 

form <- edgeNames(gr.cor3) 
form <- data.frame(form, form %in% inOne) 
form <- data.frame(form, form[,1] %in% inTwo) 

# Format and Normalize data do calculate fold change
metabData <- classTable[classTable[,6]!="",] 
metabData2 <- apply(metabData[,8:39], 2, as.numeric) 
rownames(metabData2) <- metabData[,6] 
normalize.medFC <- function(mat) { 
   # Perform median fold change normalisation 
   #           X - data set [Variables & Samples] 
   medSam <- apply(mat, 1, median) 
   medSam[which(medSam==0)] <- 0.0001 
   mat <- apply(mat, 2, function(mat, medSam){ 
      medFDiSmpl <- mat/medSam 
      vec<-mat/median(medFDiSmpl) 
      return(vec) 
   }, medSam) 
   return (mat) 
} 
metabData2 <- normalize.medFC(metabData2) 
	  
# Calculate differential correlations
# The DiffCorr package can be found at: http://diffcorr.sourceforge.net/ 
source("../DiffCorr_src/R/DiffCorr.R") 
comp.2.cc.fdr(output.file="resM.txt", metabData2[,1:17], metabData2[,18:32], threshold=0.05) 

res <- read.delim("resM.txt") 

nres <- paste(res[,1], "~", res[,2], sep="") 
form <- data.frame(form, form[,1] %in% nres) 
# red for only in water 
# green for only in drought 
# width for differential correlation 
cnames <- c("edge.name", "color.#FF0000", "color.#006400", "width.5") 
colnames(form) <- cnames 

node.names <- apply(classTable[classTable[,6]!="",1:7], 1, function(x) paste(x[6], "-", paste(strsplit(as.matrix(x[7]), "#")[[1]][1:2], collapse="\n"), sep="")) 
node.names <- sub("^\\s+", "", node.names) 
node.names <- node.names[as.numeric(nodes(gr.cor3))] 

pvec <- as.numeric(classTable[classTable[,5]!="",5]) 
foldChange <- apply(metabData2, 1, function(x) mean(x[18:32])/mean(x[1:17])) 
colnames(classTable)[5] <- "Fold Change" 
classTable[classTable[,5]!="",5] <- foldChange 
form2 <- nodes(gr.cor3) 
form2 <- data.frame(form2, as.numeric(nodes(gr.cor3)) %in% which(pvec<0.05)) 
cnames2 <- c("node.name", "lcolor.#9400D3") 
colnames(form2) <- cnames2 

cpdInfo <- create.pathway.node.attributes(ansConn$classTable, graph=gr.cor3, DB=DB, filename1="path1Diff.noa", filename2="path2Diff.noa", organismId="zma") 
create.reaction.edge.attributes(classTable, graph=gr.cor3, w=w, reactionM=reactionM, DB=DB, filename="reacDiff.eda") 

export2cytoscape(gr.cor3, node.label=node.names, cwName="test4", edge.form=form, node.form=form2, cpdInfo=cpdInfo, classTable=classTable)


# This script is intended to reproduce the Figure 6,
# and see a specific pathway in a different window 
classTableb <- ansConn$classTable 
for(i in 1:nrow(classTableb)) if(classTableb[i,6]=="") classTableb[i,6] <- classTableb[i-1,6] 
classTableb[,6] <- as.numeric(classTableb[,6]) 
classTableS <- classTableb[which(classTableb[,6] %in% nodes(gr.cor)),] 
kgr <- get.kgml.positions.kegg("rn00944") 
cnames <- sapply(sub("^cpd:(C\\d{5}).*$", "\\1", colnames(kgr$adj)), get.name) 
kgr1 <- as(kgr$adj, "graphNEL") 
form <- data.frame(nodes(kgr1)) 
codes <- sub("^cpd:(C\\d{5}).*$", "\\1", colnames(kgr$adj)) 
form <- cbind(form, codes%in% classTableS[,2]) 
cnames2 <- c("node.name", "lcolor.#FF0000") 
colnames(form) <- cnames2 
export2cytoscape(kgr1, node.label=cnames, cwName="test2", node.form=form, pos=kgr)

# retrieve kegg version with ProbMetab
get.kegg.pathways(as.vector(form[form[,2],1]), 20)