# Requires lib/comb.R from the FEBA code base # See https://bitbucket.org/berkeleylab/feba # Load classification of gaps in IMG LoadAuxo = function() { d = read.csv("workbook/auxo/IMG_Comparison.csv",as.is=T); cat("Read",nrow(d),"rows from IMG_Comparison.csv\n"); d = d[d$category != "",]; d$category = sub("?","",d$category,fixed=T); d$Organism = sub(" $", "", d$Organism, perl=T); d$required.for = sub(", +", ", ", d$required.for, perl=T); orginfo = read.delim("data/FEBA/orginfo",as.is=T); # HerbieS was filtered out of comb stepcmp <<- merge(data.frame(Organism=paste(orginfo$genus,orginfo$species,orginfo$strain), org=orginfo$name), d); stopifnot(nrow(stepcmp) == nrow(d)); d2 = lapply(split(stepcmp, 1:nrow(stepcmp)), function(d) data.frame(org=d$org[1], ec=d$EC.number[1], desc=d$EC.description[1], category=d$category[1], aa = sub(" ","",strsplit(d$required.for, ",")[[1]]))); d2 = do.call(rbind,d2); # Lower codes are more important to report # 1 = unfilled # 2 = filled in this work # 3 = obvious # 4 = gene model or known # 5 = unspecific # (no longer used) d2$code = ifelse(d2$category == "unfilled", 1, ifelse(d2$category %in% "filled", 2, ifelse(d2$category == "obvious", 3, ifelse(d2$category %in% c("genemodel","known"), 4, ifelse(d2$category %in% "unspecific", 5, NA))))); if (any(is.na(d2$code))) warning("NA values in class, ignored"); d2 = d2[!is.na(d2$code),]; d2 = d2[order(d2$org,d2$aa,d2$code),]; d2 = d2[,c("org","aa","code")]; d2 = d2[!duplicated(d2[,c("org","aa")]),]; aacmp <<- d2; cat(nrow(aacmp),"rows in aacmp\n"); auximg <<- read.delim("workbook/auxo/IMG.tab",as.is=T); } # Load fitness data for 10 bacteria LoadFitness = function() { LoadOrgs(words("azobra BFirm Miya HerbieS Marino Phaeo psRCH2 MR1 Smeli Korea"), html="data/FEBA/html.auxo/"); SetupEssentials(base="data/FEBA"); d = read.delim("data/FEBA/orginfo",as.is=T); orginfo <<- subset(d, name %in% names(orgs)); } PlotGaps = function(file="workbook/auxo/PlotGaps.eps") { pdf(file,width=6.75,height=6.0,pointsize=10); PlotImgSimple(main=""); #PlotGapCounts2(main="B. Number of Gaps"); dev.off(); } PlotGapCounts2 = function(main="") { oldpar = par(mar=c(4,3.5,2.5,0.5), mgp=c(2,1,0)); d = stepcmp[,c("org","required.for","category")] d2 = strsplit(d$required.for, ", *", perl=T); d = do.call(rbind, lapply(1:nrow(d), function(i) data.frame(org=d$org[i], aa = d2[[i]], category=d$category[i]))); tab = table(ifelse(d$category %in% c("known","genemodel"), "other", as.character(d$category)), d$aa); tab = tab[, match(aaordering(), colnames(tab)) ]; colnames(tab) = aaordering(); # to replace NA values #tab[is.na(tab)] = 0; # keep those rows entirely blank tab = tab[words("obvious unspecific other filled unfilled"),]; cols = c("darkgrey","lightgreen","lightblue","darkgreen","red"); barplot(tab, beside=F, main=main, ylab="Number of Steps", las=2, col=cols); legend("topright", rev(ifelse(row.names(tab)=="other", "bad model/known", row.names(tab))), fill=rev(cols), border=NA, bty="n"); par(oldpar); } PlotGapsOld = function(file="workbook/auxo/PlotGaps.eps") { if(!is.null(file)) { setEPS(); postscript(file,width=7.25,height=4.0,pointsize=10); } layout(matrix(c(1,2), nrow=1, byrow=T), widths=c(2/3,1/3)); PlotImgSimple(main="A. Amino acids with Gaps"); PlotGapCounts(main="B. Classification of Gaps"); if(!is.null(file)) dev.off(); } PlotObvious = function(file="workbook/auxo/PlotObvious.eps") { cand = GetObviousToShow(); stopifnot(nrow(cand)>=8); cand$label = sub("[.]$","",cand$EC.description,perl=T); cand$label = sub(" subunit hisH","", cand$label); cand$label = paste(cand$label, " (for ", cand$required.for, ")", sep=""); if(!is.null(file)) { setEPS(); postscript(file,width=7.25,height=4.75,pointsize=10); } # extra space at top for label on top item oldpar = par(mfrow=c(1,2), mar=c(4,1,3,0), mgp=c(3,1,0)); PlotDef(cand$org[1:4], cand$LocusTag[1:4], cand$label[1:4]); abline(v=0, col=8); topLegend = 4; text(-6.5, topLegend, "without amino acids", col="red", adj=c(0,1)); text(-6.5, topLegend-1.4*strheight("A"), "+ one amino acid", col="blue", adj=c(0,1)); text(-6.5, topLegend-2*1.4*strheight("A"), "rich", col="darkgreen", adj=c(0,1)); PlotDef(cand$org[5:8], cand$LocusTag[5:8], cand$label[5:8]); abline(v=0, col=8); # text(0.5, 1.2, "without\namino acids", col=2, adj=c(0,0.5), xpd=T, cex=0.9); # text(0.5, 0.85, "with a.a.", col=1, adj=c(0,0.5), xpd=T, cex=0.9); if(!is.null(file)) dev.off(); } GetObviousToShow = function() { d = subset(stepcmp, category %in% "obvious" & fitness %in% "auxotroph"); o = data.frame(name=orginfo$name, fullname=paste(orginfo$genus,orginfo$species,orginfo$strain)); d = merge(d, o, by.x="Organism", by.y="fullname"); set.seed(10032016); # old: show ten at random #d2 = d[sample(1:nrow(d),10),]; #d2 = d2[order(d2$LocusTag),]; # new: show one at random from each bug # (except for Smeli -- the only obvious one has 2 candidates with little phenotype, # and BFirm, which has no obvious gaps, leaving 8 bugs to show) d = split(d, d$org, drop=T); d = do.call(rbind, lapply(d, function(x) x[sample(1:nrow(x),1),])); d = d[order(d$Organism, d$LocusTag),]; return(d); } PlotGapCounts = function(main="") { # and then some sort of pie chart d = ifelse(stepcmp$category %in% c("genemodel","known","various"),"other",stepcmp$category); d = table(d); d = d[c(2,1,5,4,3)]; oldpar = par(mgp=c(2,1,0), mar=c(0.5,4+1,2,6)); # extra left margin to narrow it barplot(matrix(d,ncol=1), ylab="Number of steps across 10 bacteria", main=main, col=c("lightpink","blue","lightblue","brown","grey90"), border=F); # xlim=c(0.1,1.5) top = cumsum(d); bot = c(0, top[-length(d)]); adj = 0.75 * ifelse(names(d) %in% "other", 1, ifelse(names(d) %in% "unfilled", -1, 0)); text(1.2, adj + (top+bot)/2, paste(" ",names(d), " ", "(", d, ")", sep=""), adj=c(0,0.5), xpd=T, col=c("pink3", "darkblue","blue", "brown","grey40")); #text(0.2*0.05 + 1.2*0.95, (top+bot)/2, d, adj=c(1,0.5)); } aaordering = function() words("Met Lys His Arg Leu Ile Val Phe Tyr Trp Pro Ser Thr Gly Cys Glu Gln Ala Asn Asp"); # For each aa/organism pair in the IMG Comparison, # a newline-delimited list of single-letter codes. CodesPerCell = function() { d = lapply(split(stepcmp, 1:nrow(stepcmp)), function(d) data.frame(org=d$org[1], ec=d$EC.number[1], desc=d$EC.description[1], category=d$category[1], aa = sub(" ","",strsplit(d$required.for, ",")[[1]]))); d = do.call(rbind,d); cat("Total codes from stepcmp: ", nrow(d), "\n"); d$code = ifelse(d$category == "unfilled", "?", ifelse(d$category=="filled", "F", ifelse(d$category=="known", "k", ifelse(d$category=="genemodel", "b", ifelse(d$category=="obvious", "o", ifelse(d$category=="unspecific", "*", "X")))))); d = aggregate(d[,c("code"),drop=F], d[,c("org","aa")], function(x) { cnt = table(x); #out = paste(names(x), ifelse(x > 1, paste("(",x, ")", sep=""), ""), sep=""); out = paste(names(cnt), ifelse(cnt ==1, "", ifelse(cnt == 2, names(cnt), cnt)), sep=""); paste(out, collapse="\n"); }); } # show all codes for each cell PlotImgSimple = function(main="", cexCode = 0.9) { aa = sort(unique(c(as.character(auximg$aa),as.character(aacmp$aa)))); aaorder = data.frame(aa=aa,row=1:length(aa)); codes = CodesPerCell(); # For each org, a list of values # This is used only for color coding the cells makes = lapply(split(auximg,auximg$org), function(d) { d = merge(aaorder, d, all.x=T); # makes will be NA for some values d = d[order(d$aa),]; out = d$makes; names(out) = as.character(d$aa); out; }); # 1 row per organism, 1 column per a.a. makes = t(as.data.frame(makes)); # -1 for auxotroph, 1 for prototroph, 0 for no prediction makes2 = apply(makes, 2, function(x) ifelse(is.na(x), 0, ifelse(x,1,-1))); orgtab = unique(stepcmp[,c("org","Organism")]); row.names(makes2) = orgtab$Organism[match(row.names(makes2), orgtab$org)]; makes2 = makes2[order(row.names(makes2), decreasing=T),]; # apply clustering to columns (amino acids) not rows #d = hclust(dist(t(makes2), method="manhattan")); makes2 = makes2[,aaordering() ]; oldpar = par(mar=c(5.0,1,2,12)); image(t(makes2), breaks=seq(-1.5,1.5,1), col=c("red","grey","green"), xaxt="n", yaxt="n", bty="n", main=main); x = seq(0, 1, length.out=ncol(makes2)); y = seq(0, 1, length.out=nrow(makes2)); mtext(row.names(makes2), side=4, at=y, las=1, cex=0.8, line=0.25); mtext(colnames(makes2), side=1, at=x, las=2, adj=1, line=0.25, cex=0.9); # and x and y to codes d = merge(codes, merge(aacmp[,c("org","aa")], merge(orgtab, data.frame(Organism=row.names(makes2), y=y)))); d = merge(d, data.frame(aa=colnames(makes2), x=x)); text(d$x, d$y, d$code, adj=c(0.5,0.5), cex=cexCode); # add lines between entries diffx = diff(x[1:2]); xlines = c(x[-1] - diffx/2); for (xpos in xlines) abline(v=xpos); diffy = diff(y[1:2]); for (ypos in c(y[-1] - diffy/2)) abline(h=ypos); # rectangles to emphasize filled gaps #i = which(grepl("F",d$code)); #for (mult in c(0.5 * 0.9, 0.5 * 0.95)) { # rect(d$x[i]-diffx*mult, d$y[i]-diffy*mult, d$x[i]+diffx*mult, d$y[i]+diffy*mult, border="blue",col=NA); #} w = strwidth("A")*1.25; h = strheight("A")*1.5; lx = 0.005; ly = -4*h; rect(lx, ly, lx+w, ly-h, col="green", border=NA, xpd=T); rect(lx, ly-h, lx+w, ly-2*h, col="red", border=NA, xpd=T); rect(lx, ly-2*h, lx+w, ly-3*h, col="grey", border=NA, xpd=T); lx2 = lx + w*1.5; text(lx2, ly - h/2, "IMG prototroph", adj=c(0,0.5), xpd=T); text(lx2, ly - 1.5*h, "IMG auxotroph", adj=c(0,0.5), xpd=T); text(lx2, ly - 2.5*h, "no IMG prediction", adj=c(0,0.5), xpd=T); # And legend of labels across the bottom lx1 = 0.45; #mtext("Codes: ", side=1, line=2, at=lx1, adj=1); mtext("o 'obvious'", side=1, line=2, at=lx1, adj=0); mtext("b bad gene model", side=1, line=3, at=lx1, adj=0); mtext("* nonspecific transaminase", side=1, line=4, at=lx1, adj=0); lx2 = 0.85; mtext("k known", side=1, line=2, at=lx2, adj=0); mtext("F filled", side=1, line=3, at=lx2, adj=0); mtext("? unfilled", side=1, line=4, at=lx2, adj=0); par(oldpar); } # one value per cell PlotImgSimpleOld = function(main="", cexSmall=1, cexBig=1, colSmall="black", colBig="blue") { aa = sort(unique(c(as.character(auximg$aa),as.character(aacmp$aa)))); aaorder = data.frame(aa=aa,row=1:length(aa)); # For each org, a list of values makes = lapply(split(auximg,auximg$org), function(d) { d = merge(aaorder, d, all.x=T); # makes will be NA for some values d = d[order(d$aa),]; out = d$makes; names(out) = as.character(d$aa); out; }); # 1 row per organism, 1 column per a.a. makes = t(as.data.frame(makes)); # -1 for auxotroph, 1 for prototroph, 0 for no prediction makes2 = apply(makes, 2, function(x) ifelse(is.na(x), 0, ifelse(x,1,-1))); orgtab = unique(stepcmp[,c("org","Organism")]); row.names(makes2) = orgtab$Organism[match(row.names(makes2), orgtab$org)]; makes2 = makes2[order(row.names(makes2), decreasing=T),]; # apply clustering to columns (amino acids) not rows #d = hclust(dist(t(makes2), method="manhattan")); makes2 = makes2[,aaordering() ]; oldpar = par(mar=c(7.5,1,2,12)); image(t(makes2), breaks=seq(-1.5,1.5,1), col=c("red","grey","green"), xaxt="n", yaxt="n", bty="n", main=main); x = seq(0, 1, length.out=ncol(makes2)); y = seq(0, 1, length.out=nrow(makes2)); mtext(row.names(makes2), side=4, at=y, las=1, cex=0.8, line=0.25); mtext(colnames(makes2), side=1, at=x, las=2, adj=1, line=0.25, cex=0.9) d = merge(aacmp, merge(orgtab, data.frame(Organism=row.names(makes2), y=y))); d = merge(d, data.frame(aa=colnames(makes2), x=x)); # codes are unfilled, filled, obvious, gene model or known, unspecific code_pch = c("?","F","o","b","n"); code_cex = ifelse(1:5 <= 2, cexBig, cexSmall); code_col = ifelse(1:5 <= 2, colBig, colSmall); points(d$x, d$y, pch=code_pch[d$code], cex=code_cex[d$code], col=code_col[d$code]); # fill out * for missing prototroph predictions if just 1 transaminase is needed # (these should not be in the tables) for (aa in words("Ala Asp Glu")) { icol = which(colnames(makes2)==aa); i = which(makes2[,icol] == 0); if (length(i) > 0) points(rep(x[icol],length(i)), y[i], pch=code_pch[4], cex=0.7); } diffx = diff(x[1:2]); xlines = c(x[-1] - diffx/2); for (xpos in xlines) abline(v=xpos); diffy = diff(y[1:2]); for (ypos in c(y[-1] - diffy/2)) abline(h=ypos); w = strwidth("A")*1.25; h = strheight("A")*1.5; lx = 1 + 2*w; ly = -diff(y[1:2])/2; rect(lx, ly, lx+w, ly-h, col="green", border=NA, xpd=T); rect(lx, ly-h, lx+w, ly-2*h, col="red", border=NA, xpd=T); rect(lx, ly-2*h, lx+w, ly-3*h, col="grey", border=NA, xpd=T); lx2 = lx + w*1.5; text(lx2, ly - h/2, "IMG prototroph", adj=c(0,0.5), xpd=T); text(lx2, ly - 1.5*h, "IMG auxotroph", adj=c(0,0.5), xpd=T); text(lx2, ly - 2.5*h, "no IMG prediction", adj=c(0,0.5), xpd=T); text(lx2, ly - 3.5*h, "unfilled", adj=c(0,0.5), xpd=T); text(lx2, ly - 4.5*h, "filled", adj=c(0,0.5), xpd=T); text(lx2, ly - 5.5*h, "obvious candidate", adj=c(0,0.5), xpd=T); text(lx2, ly - 6.5*h, "bad gene model or known", adj=c(0,0.5), xpd=T); text(lx2, ly - 7.5*h, "non-specific transaminase", adj=c(0,0.5), xpd=T); for (i in 1:5) { text(lx+w/2, ly-(i+2.5)*h, code_pch[i], xpd=T, cex=code_cex[i], col=code_col[i]); } par(oldpar); } # a bit complicated, no longer used PlotImgCmp = function(main="Overview of IMG Predictions and Gap Filling") { aa = sort(unique(c(as.character(auximg$aa),as.character(aacmp$aa)))); aaorder = data.frame(aa=aa,row=1:length(aa)); # For each org, a list of values makes = lapply(split(auximg,auximg$org), function(d) { d = merge(aaorder, d, all.x=T); # makes will be NA for some values d = d[order(d$aa),]; out = d$makes; names(out) = as.character(d$aa); out; }); # 1 row per organism, 1 column per a.a. makes = t(as.data.frame(makes)); # -1 for auxotroph, 1 for prototroph, 0 for no prediction makes2 = apply(makes, 2, function(x) ifelse(is.na(x), 0, ifelse(x,1,-1))); orgtab = unique(stepcmp[,c("org","Organism")]); row.names(makes2) = orgtab$Organism[match(row.names(makes2), orgtab$org)]; makes2 = makes2[order(row.names(makes2), decreasing=T),]; # apply clustering to columns (amino acids) not rows #d = hclust(dist(t(makes2), method="manhattan")); makes2 = makes2[,words("Met Lys His Arg Leu Ile Val Phe Tyr Trp Pro Ser Thr Gly Cys Glu Gln Ala Asn Asp")]; # but really I need to use image so that I can overlay stuff on top #heatmap(makes2, scale="none", # labRow=row.names(makes2), labCol=colnames(makes2), # breaks=seq(-1.5,1.5,1), col=c("red","grey","green"), margins=c(3,18)); oldpar = par(mar=c(7.5,1,2,12)); image(t(makes2), breaks=seq(-1.5,1.5,1), col=c("red","grey","green"), xaxt="n", yaxt="n", bty="n", main=""); mtext(main, side=3, line=0.75, cex=1.2, adj=0, font=2, xpd=T); x = seq(0, 1, length.out=ncol(makes2)); y = seq(0, 1, length.out=nrow(makes2)); mtext(row.names(makes2), side=4, at=y, las=1, cex=0.8, line=0.25); mtext(colnames(makes2), side=1, at=x, las=2, adj=1, line=0.25) d = merge(aacmp, merge(orgtab, data.frame(Organism=row.names(makes2), y=y))); d = merge(d, data.frame(aa=colnames(makes2), x=x)); code_pch = words("? + m x o"); points(d$x,d$y,pch=code_pch[d$code], cex=ifelse(d$code <= 2, 1.75, 0.7)); # fill out * for missing prototroph predictions if just 1 transaminase is needed # (these should not be in the tables) for (aa in words("Ala Asp Glu")) { icol = which(colnames(makes2)==aa); i = which(makes2[,icol] == 0); if (length(i) > 0) points(rep(x[icol],length(i)), y[i], pch=code_pch[4], cex=0.7); } diffx = diff(x[1:2]); xlines = c(x[-1] - diffx/2); for (xpos in xlines) abline(v=xpos); w = strwidth("A")*1.25; h = strheight("A")*1.5; lx = 1 + 2*w; ly = -diff(y[1:2])/2; rect(lx, ly, lx+w, ly-h, col="green", border=NA, xpd=T); rect(lx, ly-h, lx+w, ly-2*h, col="red", border=NA, xpd=T); rect(lx, ly-2*h, lx+w, ly-3*h, col="grey", border=NA, xpd=T); text(lx+w/2, ly-3.5*h, code_pch[2], col="black", xpd=T, cex=1.2); text(lx+w/2, ly-4.5*h, code_pch[1], col="black", xpd=T, cex=1.2); text(lx+w/2, ly-5.5*h, code_pch[5], col="black", xpd=T, cex=0.7); text(lx+w/2, ly-6.5*h, code_pch[3], col="black", xpd=T, cex=0.7); text(lx+w/2, ly-7.5*h, code_pch[4], col="black", xpd=T, cex=0.7); lx2 = lx + w*1.5; text(lx2, ly - h/2, "IMG prototroph", adj=c(0,0.5), xpd=T); text(lx2, ly - 1.5*h, "IMG auxotroph", adj=c(0,0.5), xpd=T); text(lx2, ly - 2.5*h, "no IMG prediction", adj=c(0,0.5), xpd=T); text(lx2, ly - 3.5*h, "filled gap", adj=c(0,0.5), xpd=T); text(lx2, ly - 4.5*h, "unexplained", adj=c(0,0.5), xpd=T); text(lx2, ly - 5.5*h, "obvious candidate", adj=c(0,0.5), xpd=T); text(lx2, ly - 6.5*h, "multiple candidates", adj=c(0,0.5), xpd=T); text(lx2, ly - 7.5*h, "non-specific reaction", adj=c(0,0.5), xpd=T); par(oldpar); } # These others are all based on the combined image... # aspartate kinase and asd are not shown because they are essential PlotMiyaMetThr = function(file="workbook/auxo/PlotMiyaMetThr.eps", scale=3) { if(!is.null(file)) { setEPS(); postscript(file,width=7,height=4.5,pointsize=10); } exps = metadata_by_exp("Miya"); exps2 = subset(exps, Group == "nutrient" | (short == "Defined MoLS4" & grepl("set[16]",name)) ); exps2$met = grepl("methionine", exps2$short, ignore.case=TRUE); exps2$thr = grepl("threonine", exps2$short, ignore.case=TRUE); exps2$his = grepl("histidine", exps2$short, ignore.case=T); exps2$cys = grepl("cysteine", exps2$short, ignore.case=T); exps2$ser = grepl("serine", exps2$short, ignore.case=T); exps2$label = ifelse(exps2$thr, ifelse(exps2$met, "+Met +Thr", "+Thr"), ifelse(exps2$met, "+Met", ifelse(exps2$his, "+His", ifelse(exps2$ser, "+Ser", ifelse(exps2$cys, "+Cys", "minimal"))))); exps2$o = order(ifelse(exps2$met,1,0) + 2*ifelse(exps2$thr,1,0) + 4 * ifelse(exps2$cys,1,0) + 8 * ifelse(exps2$his,1,0) + 16 * ifelse(exps2$ser,1,0)); genes = words("DvMF_1464 DvMF_0262 DvMF_0044 DvMF_0476 DvMF_1398 DvMF_2035 DvMF_1412 DvMF_0971 DvMF_1945"); desc = c("DUF39","NIL/ferredoxin","apbE-like (COG2122)", "B12-dependent methionine synthase", "B12 reactivation (DUF4445)","B12-independent methionine synthase", "homoserine dehydrogenase", "homoserine kinase", "threonine synthase"); # add blank rows sel = c(1:3,NA,4:6,NA,7:9); genes = genes[sel]; desc = desc[sel]; # top to bottom genes = rev(genes); desc = rev(desc); d = get_fit("Miya", genes); d = subset(d, exps$name %in% exps2$name); oldpar = par(mar=c(8, 0.5, 0.5, 16)); image(d[exps2$o,], col=myHeatColors(), breaks=breaksUse(scale=scale), useRaster=TRUE, xlab="", ylab="", xaxt="n", yaxt="n", bty="n"); # las = 2 (perpendicular to axis) mtext(ifelse(is.na(genes),"", paste(" ", genes,": ",desc,sep="")), las=2, side=4, at=seq(0, 1, length.out=length(genes))); # uniquify the labels on the x axis d = split(1:nrow(exps2), exps2$label[exps2$o]); # on x axis, 0 is the center of the 1st cell; 1 is the center of the last cell; # 0.5/(nexps-1) is the boundary of 1st/2nd cell nexps = nrow(exps2); for (l in names(d)) { at = (d[[l]]-1)/(nexps-1); delta = 0.5/(nexps-1); mtext(paste(l," ",sep=""), las=2, side=1, at=mean(at)); abline(v=min(at)-delta, col="white", lwd=1); abline(v=max(at)+delta, col="white", lwd=1); } # on y axis, 0 is the center of the 1st row; 1 is the center of the last row; # and 0.5/nrows is the boundary of 1st/2nd row nrows = length(genes); for (y in 0:nrows) { abline(h=y/(nrows-1) - 0.5/(nrows-1), col="white", lwd=1); } # and add scale ShowColorRange(1 + 1.75*0.5/(nrow(exps2)-1), -0.5/(length(genes)-1), scale=scale); par(oldpar); if(!is.null(file)) dev.off(); } PlotPhaeoMet = function(file="workbook/auxo/PlotPhaeoMet.eps", scale=3) { if(!is.null(file)) { setEPS(); postscript(file,width=7,height=4,pointsize=10); } labels = data.frame(Group = c(rep("carbon source",3), rep("nitrogen source",2)), Condition_1 = c("casamino acids", "Sodium L-Lactate", "Sucrose", "Ammonium chloride", "L-Methionine"), label = c("CAS + NH4", "L-lactate + NH4", "Sucrose + NH4", "Sucrose + NH4", "Sucrose + L-Methionine"), order = c(5,4,1,2,3)); exps = metadata_by_exp("Phaeo"); exps$row = 1:nrow(exps); exps2 = merge(exps, labels); exps2 = exps2[order(exps2$order),]; exps2[,c("name","Group","short","Condition_1","label")]; genes = words("PGA1_c14560 PGA1_c23540 PGA1_c13370 PGA1_c16040 PGA1_c13350 PGA1_c15200 PGA1_c13340"); desc = c("MetA", "MetZ", "methyltransferase domain", "pterin-binding domain", "vitamin B12-binding & cap", "RamA-like (DUF4445)", "DUF1638"); # add spacer i = c(1:2,NA,3:5,NA,6:7); genes = genes[i]; desc = desc[i]; # top to bottom genes = rev(genes); desc = rev(desc); d = get_fit("Phaeo",genes); oldpar = par(mar=c(11, 0.5, 0.5, 16)); image(d[exps2$row,], col=myHeatColors(), breaks=breaksUse(scale=scale), useRaster=TRUE, xlab="", ylab="", xaxt="n", yaxt="n", bty="n"); # las = 2 (perpendicular to axis) mtext(ifelse(is.na(genes),"",paste(" ", genes,": ",desc,sep="")), las=2, side=4, at=seq(0, 1, length.out=length(genes))); #mtext(paste(exps2$label, " "), las=2, side=1, # at=seq(0, 1, length.out=nrow(exps2))); # and add scale ShowColorRange(1 + 2.0*0.5/(nrow(exps2)-1), -0.5/(length(genes)-1), scale=scale); d = split(1:nrow(exps2), exps2$label); nexps = nrow(exps2); for (l in names(d)) { at = (d[[l]]-1)/(nexps-1); delta = 0.5/(nexps-1); mtext(paste(l," ",sep=""), las=2, side=1, at=mean(at)); abline(v=min(at)-delta, col="white", lwd=1); abline(v=max(at)+delta, col="white", lwd=1); } # on y axis, 0 is the center of the 1st row; 1 is the center of the last row; # and 0.5/nrows is the boundary of 1st/2nd row nrows = length(genes); for (y in 0:nrows) { abline(h=y/(nrows-1) - 0.5/(nrows-1), col="white", lwd=1); } par(oldpar); if(!is.null(file)) dev.off(); } ShowColorRange = function(xLeft, yTop, scale=3) { stopifnot(scale %in% 2:3); charH = strheight("A"); charW = strwidth("A"); # the color range keyTop = yTop - charH*1.5; keyHeight = 10 * charH; keyBottom = keyTop - keyHeight; keyLeft = xLeft + charW*1.5; keyRight = keyLeft + 2 * charW; colorsAll = myHeatColors(); nColors = length(colorsAll); rect(keyLeft, keyBottom + keyHeight * (0:(nColors-1))/nColors, keyRight, keyBottom + keyHeight * (1:nColors)/nColors, col=rev(colorsAll), border=NA, xpd=T); textShow = if (scale == 3) c("+3 or more", "+2","+1","0","-1","-2", "-3 or less") else c("+2 or more","+1","0","-1","-2 or less"); nShow = length(textShow); keyCenters = keyBottom + keyHeight * ((1:nColors)-0.5)/nColors; delta = diff(range(keyCenters)); text(keyRight+charH, keyCenters[1] + delta * seq(0,1,length.out=nShow), textShow, xpd=T, adj=c(0,0.5)) text(keyLeft, keyTop + charH*1.25, "Gene Fitness", xpd=T, adj=c(0,0.5), font=3); } PlotBFirm = function(file="workbook/auxo/PlotBFirm.eps", scale=3) { if(!is.null(file)) { setEPS(); postscript(file,width=7,height=4.5,pointsize=10); } layout(matrix(1:2, nrow=1), widths=c(0.45,0.55)); PlotBFirmHeatmap(scale=scale, main="A. Key Genes"); PlotBFirmSerVsMin(main="B. All 5,428 Genes"); if(!is.null(file)) dev.off(); } PlotBFirmHeatmap = function(scale=3,main="") { # conditions are: # glucose + NH4 (from nutrient, or from N ammonia, or from C glucose) # glucose + NH4 + CAS (nutrient) # glucose + NH4 + serine (nutrient) # glucose + NH4 + histidine (nutrient) # glucose + serine (N serine) # histidine + NH4 (C histidine) # gly-glu + NH4 (C gly-glu) ?? # genes are BPHYT_RS03150 (the dehydrogenase), BPHYT_RS14915 (phosphoserine aminotransferase, not surprising), # BPHYT_RS03625 (P-ase for histidine) # Later reduced to just 2 genes = words("BPHYT_RS03150 BPHYT_RS14915"); desc = c("novel\nphosphoglycerate\ndehydrogenase","phosphoserine\naminotransferase\n(serC)"); genes = rev(genes); desc = rev(desc); labels = data.frame(Group=c("lb", "carbon source", "nitrogen source", "nutrient", "nutrient", "nutrient", "nutrient", "nitrogen source", "carbon source"), Condition_1=c("", "D-Glucose", "Ammonium chloride", "", "casamino acids","L-Serine","L-Histidine", "L-Serine","L-Histidine"), label=c("LB", rep("minimal", 3), c("minimal + CAS", "minimal + serine", "minimal + histidine", "glucose + serine", "histidine + NH4")), order=c(1, rep(2, 3), 3, 4, 5, 7, 6)); exps = metadata_by_exp("BFirm"); exps$row = 1:nrow(exps); exps2 = merge(exps,labels); exps2 = exps2[order(exps2$order),]; oldpar = par(mar=c(7.5, 0.5, 3.5, 7.5)); d = get_fit("BFirm",genes); image(d[exps2$row,], col=myHeatColors(), breaks=breaksUse(scale=scale), useRaster=TRUE, xlab="", ylab="", xaxt="n", yaxt="n", bty="n", main=main); # las = 2 (perpendicular to axis) mtext(paste(genes,"\n",desc,sep=""), las=2, side=4, line=0.25, at=seq(0, 1, length.out=length(genes)), cex=1.0); x = aggregate(1:nrow(exps2), exps2[,"label",drop=F], mean); mtext(paste(x$label, " "), las=2, side=1, at=(x$x - 1)/(nrow(exps2)-1), cex=1.0); xmin = aggregate(1:nrow(exps2), exps2[,"label",drop=F], min)$x - 0.5; xmax = aggregate(1:nrow(exps2), exps2[,"label",drop=F], max)$x + 0.5; at = unique(c(xmin,xmax)); for(x in at) abline(v = (x-1)/(nrow(exps2)-1), col="white", lwd=1); for(y in c(1.5, 2.5)) abline(h = (y-1)/(length(genes)-1), col="white", lwd=1); # and add scale ShowColorRange(1 + 2.0*0.5/(nrow(exps2)-1), -0.5/(length(genes)-1), scale=scale); par(oldpar); } PlotBFirmSerVsHis = function(main="") { oldpar = par(mgp=c(2,1,0), mar=c(3.5,4.0,3.5,2)); exps = metadata_by_exp("BFirm"); expsHis = exps$name[exps$Group=="nutrient" & exps$Condition_1=="L-Histidine"]; expsSer = exps$name[exps$Group=="nutrient" & exps$Condition_1=="L-Serine"]; x = rowMeans(orgs$BFirm$lrn[,expsHis,drop=F]); y = rowMeans(orgs$BFirm$lrn[,expsSer,drop=F]); genes = words("BPHYT_RS03150 BPHYT_RS14915 BPHYT_RS03625 BPHYT_RS19340"); desc = c("dehydrogenase", "serC", "phosphatase", "serine\ndehydratase"); hisGenes = words("BPHYT_RS17670 BPHYT_RS17715 BPHYT_RS17690 BPHYT_RS17680 BPHYT_RS12530 BPHYT_RS17685 BPHYT_RS17700 BPHYT_RS17675"); g = orgs$BFirm$g; plot(x,y, xlab="Gene fitness in minimal + histidine", ylab="Gene fitness in minimal + serine",main=main,bty="l", pch=20, cex=ifelse(g %in% c(genes,hisGenes), 0.9, 0.6), col=ifelse(g %in% genes[1:2], "red", ifelse(g %in% genes[3], "darkgreen", ifelse(g %in% genes[4], "blue", ifelse(g %in% hisGenes, "magenta", "grey40"))))); abline(h=0); abline(v=0); abline(0,1); charH = strheight("A"); charW = strwidth("A"); d = merge(data.frame(x,y,g), data.frame(g=genes, desc=desc)); text(d$x + ifelse(d$g %in% genes[1], -6*charW, -charW/2), d$y + ifelse(d$g %in% genes[c(1,3)], -1.5*charH, 0), d$desc, col=ifelse(d$g %in% genes[1:2], "red", ifelse(d$g %in% genes[3], "darkgreen", "blue")), adj=c(0,-0.25), xpd=T); text(0.1, -6, "histidine\nsynthesis\ngenes", col="magenta", adj=c(0,0.5), xpd=T); par(oldpar); } PlotBFirmSerVsMin = function(main="") { oldpar = par(mgp=c(2,1,0), mar=c(3.5,4.0,3.5,2)); exps = metadata_by_exp("BFirm"); expsMin = exps$name[exps$Group=="nutrient" & exps$Condition_1==""]; expsSer = exps$name[exps$Group=="nutrient" & exps$Condition_1=="L-Serine"]; x = rowMeans(orgs$BFirm$lrn[,expsMin,drop=F]); y = rowMeans(orgs$BFirm$lrn[,expsSer,drop=F]); genes = words("BPHYT_RS03150 BPHYT_RS14915 BPHYT_RS19340"); desc = c("dehydrogenase", "serC", " serine\n dehydratase"); col = c("blue", "magenta", "red"); d = data.frame(g=orgs$BFirm$g, x=x, y=y); d = merge(d, data.frame(g=genes, desc=desc, col=col), all.x=T); plot(d$x, d$y, xlab="Gene fitness in minimal", ylab="Gene fitness in minimal + serine",main=main,bty="l", pch=20, cex=ifelse(is.na(d$col), 0.6, 1.0), col=ifelse(is.na(d$col), "grey40", as.character(d$col))); abline(h=0); abline(v=0); abline(0,1); charH = strheight("A"); charW = strwidth("A"); d2 = subset(d, !is.na(d$col)); for (i in 1:3) { text(d2$x[i], d2$y[i], d2$desc[i], col=as.character(d2$col[i]), adj=c(0.5, c(1.25,-0.4,-0.25)[i])); } par(oldpar); } PlotHisComb = function(file="workbook/auxo/PlotHisComb.eps", ...) { if(!is.null(file)) { setEPS(); postscript(file,width=6.75,height=6.75,pointsize=10); } oldpar = par(mgp=c(2,1,0), mar=c(3.5,4.0,2,0.5), mfrow=c(2,2), bty="l"); d = PlotHisPiece("BFirm", main="A. Burkholderia phytofirmans", hisGenes = words("BPHYT_RS17670 BPHYT_RS17715 BPHYT_RS17690 BPHYT_RS17680 BPHYT_RS12530 BPHYT_RS17685 BPHYT_RS17700 BPHYT_RS17675"), focal = "BPHYT_RS03625", ...); # text(d$x, d$y, "phosphatase (_RS03625) ", # col="blue", adj=c(1,-0.25)); # text(-6.5, -1.6, "histidine synthesis", col=hiscol, adj=c(0,0.5)); d = PlotHisPiece("psRCH2", main="B. Pseudomonas stutzeri", hisGenes = get_genes("psRCH2", words("Psest_3298 Psest_0155 Psest_0687 Psest_0153 Psest_0152 Psest_3943 Psest_3944 Psest_3299")), focal = get_genes("psRCH2","Psest_3864"), ...); # text(d$x, d$y, "phosphatase (_3864) ", # col="blue", adj=c(0.3,-0.6)); # text(-6.2, -1.0, "histidine synthesis", col=hiscol, adj=c(0,0.5)); d = PlotHisPiece("Marino", main="C. Marinobacter adhaerens", hisGenes = get_genes("Marino", words("HP15_2495 HP15_2429 HP15_2427 HP15_2428 HP15_3194 HP15_3195")), focal = get_genes("Marino", "HP15_461"), ...); # text(d$x, d$y, "phosphatase (_461) ", # col="blue", adj=c(0.5,-2)); # text(-6.0, -0.8, "histidine synthesis", col=hiscol, adj=c(0,0.5)); d = PlotHisPiece("HerbieS", main="D. Herbaspirillum seropedicae", hisGenes = words("HSERO_RS20310 HSERO_RS20315 HSERO_RS20320 HSERO_RS20325 HSERO_RS20330 HSERO_RS20345 HSERO_RS20350"), focal = NULL, # no data for "HSERO_RS03150" ...); # text(-7.0, 0, "histidine synthesis", col=hiscol, adj=c(0,1.5)); par(oldpar); if(!is.null(file)) dev.off(); } # Returns a data frame with x, y, and g of the focal gene PlotHisPiece = function(org, hisGenes=NULL, focal=NULL, bigcex=1.0, smallcex=0.6, hiscol="darkmagenta", newcol="blue", othercol="grey40", main="") { exps = metadata_by_exp(org); expsHis = exps$name[exps$Group=="nutrient" & exps$Condition_1=="L-Histidine"]; expsMin = exps$name[exps$Group=="nutrient" & exps$Condition_1==""]; cat(org, "L-Histidine Concentrations: ", exps$Concentration_1[exps$name %in% expsHis], unique(exps$Units_1[exps$name %in% expsHis]), "\n"); cat(org, "#minimal",length(expsMin),"\n"); x = rowMeans(orgs[[org]]$lrn[,expsMin,drop=F]); y = rowMeans(orgs[[org]]$lrn[,expsHis,drop=F]); g = orgs[[org]]$g; plot(x,y, xlab="Gene fitness in minimal", ylab="Gene fitness in minimal + histidine", main=main, pch=ifelse(g %in% focal, 8, ifelse(g %in% hisGenes, 1, 20)), cex=ifelse(g %in% c(focal,hisGenes), bigcex, smallcex), col=ifelse(g %in% focal, newcol, ifelse(g %in% hisGenes, hiscol, othercol))); abline(h=0); abline(v=0); abline(0,1); charH = strheight("A"); x1 = min(x)+charH/2; y1 = max(y) + charH/4; text(x1, y1, "histidine synthesis genes", col=hiscol, adj=c(0,1)); if(!is.null(focal)) { sysname = with(orgs[[org]]$genes, sysName[locusId %in% focal]); shortName = sub("^.*_", "_", sysname, perl=T); text(x1, y1-charH*1.5, paste("phosphatase (", shortName, ")", sep=""), col=newcol, adj=c(0,1)); } return(merge(data.frame(x,y,g), data.frame(g=focal))); } PlotHerbieSSerHis = function(file="workbook/auxo/PlotHerbieSSerHis.eps") { if(!is.null(file)) { setEPS(); postscript(file,width=4.0,height=4.0,pointsize=10); } oldpar = par(mgp=c(2,1,0), mar=c(3.5,4.0,0.5,0.5)); x = rowMeans(orgs$HerbieS$lrn[, words("set4IT045 set4IT046")]); # His y = rowMeans(orgs$HerbieS$lrn[, words("set4IT047 set4IT048")]); # Ser hisGenes = words("HSERO_RS20310 HSERO_RS20315 HSERO_RS20320 HSERO_RS20325 HSERO_RS20330 HSERO_RS20345 HSERO_RS20350"); thrLyase = words("HSERO_RS19510 HSERO_RS00265"); dnaGenes = words("HSERO_RS15630 HSERO_RS02090"); dnaNames = words("xth ruvA"); g = orgs$HerbieS$g; plot(x, y, pch=20, bty="l", main="", xlab="Gene fitness in minimal + histidine", ylab="Gene fitness in minimal + serine", cex=ifelse(g %in% c(hisGenes,thrLyase,dnaGenes), 0.9, 0.6), col = ifelse(g %in% hisGenes, "magenta", ifelse(g %in% thrLyase, "red", ifelse(g %in% dnaGenes, "darkgreen", "darkgrey")))); abline(h=0); abline(v=0); abline(0,1); text(-0.1,-5.5,"histidine\nsynthesis\ngenes",adj=c(1,0.5),col="magenta"); text(x[g %in% thrLyase], y[g %in% thrLyase], rep(" ilvA",2), col="red", adj=c(0,0.5)); text(x[g %in% dnaGenes[1]], y[g %in% dnaGenes[1]], paste("",dnaNames[1]), col="darkgreen", adj=c(0,0.5)); text(x[g %in% dnaGenes[2]], y[g %in% dnaGenes[2]], paste("",dnaNames[2]), col="darkgreen", adj=c(0,0.5)); par(oldpar); if(!is.null(file)) dev.off(); } PlotRCH2SerHis = function(file="workbook/auxo/PlotRCH2SerHis.eps") { if(!is.null(file)) { setEPS(); postscript(file,width=4.0,height=4.0,pointsize=10); } oldpar = par(mgp=c(2,1,0), mar=c(3.5,4.0,0.5,0.5)); d = metadata_by_exp("psRCH2"); d$x = get_fit("psRCH2","Psest_3864"); # for histidine d$y = get_fit("psRCH2","Psest_0489"); # for serine d$z = get_fit("psRCH2","Psest_2327"); # the other one (and for homoserine kinase?) d = subset(d, Aerobic_v_Anaerobic == "Aerobic" & Group %in% c("carbon source","nitrogen source","nutrient")); cat("Showing ", nrow(d), " experiments and", nrow(unique(d[,c("Group","Condition_1","Condition_2")])), "conditions\n"); class = ifelse(d$Condition_1=="casamino acids", 1, ifelse(d$Condition_1=="L-Histidine", 2, 3)); plot(d$x, d$y, col=words("red blue black")[class], xlim=c(-6.5,2), ylim=c(-6.5,2), pch=20, bty="l", xlab="Gene fitness of Psest_3864", ylab="Gene fitness of Psest_0489"); abline(0,1); abline(h=0); abline(v=0); text(-0.4, mean(d$y[class==1]), "CAS\n+ NH4", col="red", adj=c(1,0.5)); text(0.4, min(d$y[class==2]), "glucose\n+ NH4\n+ histidine", col="blue", adj=c(0,0.5)); par(oldpar); if(!is.null(file)) dev.off(); } PlotMarinoHis = function(file="workbook/auxo/PlotMarinoHis.eps") { if(!is.null(file)) { setEPS(); postscript(file,width=4.0,height=4.0,pointsize=10); } oldpar = par(mgp=c(2,1,0), mar=c(3.5,4.0,0.5,0.5)); hisGenes = get_genes("Marino", words("HP15_2495 HP15_2429 HP15_2427 HP15_2428 HP15_3194 HP15_3195")); newG = get_genes("Marino", "HP15_461"); x = rowMeans(orgs$Marino$lrn[, words("set9IT065 set9IT066")]); # minimal y = rowMeans(orgs$Marino$lrn[, words("set9IT061 set9IT062")]); # +His g = orgs$Marino$g; plot(x, y, pch=20, bty="l", main="", xlab="Gene fitness in minimal media", ylab="Gene fitness in minimal media + histidine", cex = ifelse(g %in% c(hisGenes,newG), 0.9, 0.6), col = ifelse(g %in% hisGenes, "magenta", ifelse(g %in% newG, "blue", "darkgrey"))); abline(h=0); abline(v=0); abline(0,1); text(-6, max(y[g %in% hisGenes]), "histidine synthesis genes\n", adj=c(0,0), col="magenta"); text(x[g==newG], y[g==newG], " HP15_461", adj=c(0,1), col="blue") par(oldpar); if(!is.null(file)) dev.off(); } writeDelim = function(table,file=stdout(),report=FALSE,...) { write.table(table,file,sep="\t",quote=FALSE,row.names=FALSE,...); if(report) cat("Wrote",nrow(table),"rows","to",file,"\n") } # Each argument may be a vector PlotDef = function(orglist, genelist, labels, main="", xlab="Gene Fitness\n(log2 change in mutant abundance)", xlim=c(-7,2), evenbkgnd="grey90", jitterBy=0.07, off=2.1*jitterBy, pt.cex=0.7, nLeft=0) { # For each row (length(genelist):1) there are two distributions, # -amino acids (in red) and +amino acids (in black) # These are centered at row + off and row - off and use jitter n = length(genelist); plot(xlim, c(1-jitterBy-off-0.1, n+jitterBy+off), bty="n", xlab=xlab, ylab="", yaxt="n", pch=NA, yaxs="i", main=main); for(i in 1:n) { left = xlim[1] - nLeft * strwidth("A"); top2 = n + 1 - i + off + jitterBy + 1.75*strheight("A"); # top of lab top = top2 + 1.25*strheight("A"); # top of gene/organism line if (i%%2 == 0 && !is.null(evenbkgnd)) { # rely on clipping to bound the +/-1 rect(xlim[1]-1, top+strheight("A")/2, xlim[2]+1, n+1-i-off-jitterBy - strheight("A")/2, col=evenbkgnd, border=NA); } org = as.character(orglist[i]); if(is.na(org)) next; # to allow deliberate spacing locusId = get_genes(org, as.character(genelist[i])); if(length(locusId) != 1) stop("Multiple genes from ", genelist[i]); if(is.na(locusId)) stop("Unrecognized gene in org ", org, " : ", genelist[i]); x = get_fit(org,locusId); if(any(is.na(x))) stop("No fitness data for ", locusId," ",genelist[i]," in org ",org); exps = metadata_by_exp(org); rich = exps$Group %in% c("lb","marine broth","rich moyls4") | (exps$Group %in% c("stress","pH","temperature") & exps$Media %in% c("MoYLS4","LB","marine_broth_2216")); cas = exps$Condition_1 %in% "casamino acids"; rescue = rep(FALSE, nrow(exps)); hisRescue = c("AZOBR_RS20485", "Ga0059261_1046", "Ga0059261_1048", "HP15_2428"); leuRescue = c("PGA1_c29830", "PGA1_c29780", "Psest_2590"); if (genelist[i] %in% hisRescue) { rescue = exps$Condition_1 %in% "L-Histidine"; } else if (genelist[i] %in% leuRescue) { rescue = exps$Condition_1 %in% "L-Leucine"; } def = exps$Group %in% c("carbon source","nitrogen source","nutrient","vitamin") & !cas; col = ifelse(rich | cas, "darkgreen", ifelse(rescue, "blue", ifelse(def, "red", NA))); y = n + 1 - i + ifelse(rescue, 0, ifelse(def, off, -off) + jitterBy * runif(length(x), -1, 1)); points(pmax(xlim[1], pmin(xlim[2], x)), y, col=col, pch=20, cex=pt.cex, xpd=T); points(pmax(xlim[1], pmin(xlim[2], x)), y, col=col, pch=ifelse(rescue, 3, NA), cex=pt.cex, xpd=T); o = subset(orginfo, name == org); shorten = function(g) paste(substr(g,1,1),".",sep=""); text(left, top, paste(genelist[i]," from ", shorten(o$genus)," ",o$species, sep=""), adj=c(0,1), xpd=T); # top aligned text(xlim[1], top2, labels[i], adj=c(0,1), xpd=T, cex=0.9); } } # condspec should be a data frame of Group, Condition_1, col as integer or string, and optionally cex, pch PlotCSS = function(locispec, condspec, default.col=1, default.cex=1, label.cex=1, jitterBy=0.25, xlim=c(-4,4), sepstring=" in ", nLeft = 0, # n characters left to push the species line short_genus = FALSE, show_strain = TRUE, xlab="Fitness", main="") { # The y layout is to jitter by +/- jitterBy, centered at nrow(locispec):1 plot(xlim, c(1-0.4,nrow(locispec)+0.4), bty="n", xlab=xlab, ylab="", yaxt="n", pch=NA, yaxs="i", main=main); for (i in 1:nrow(locispec)) { org = as.character(locispec$org[i]); if(is.na(org)) next; # to allow deliberate spacing locusId = get_genes(org, as.character(locispec$gene[i])); if(length(locusId) != 1) stop("Multiple genes from ", locispec$gene[i]); if(is.na(locusId)) stop("Unrecognized gene in org ", org, " : ", locispec$gene[i]); x = get_fit(org,locusId); if(any(is.na(x))) stop("No fitness data for ", locusId," ",locispec$gene[i]," in org ",org); # Ignore dimethyl sulfoxide as it is sometimes the solute, hopefully is not toxic by itself. # Should maybe do that elsewhere too. # Set up per-experiment colors is_simple = with(orgs[[org]]$exps, IsSimpleCond(Group,Condition_1,Condition_2)); col=rep(default.col, length(x)); assigned = rep(FALSE, length(x)); cex=rep(default.cex, length(x)); pch=rep(20, length(x)); for (j in 1:nrow(condspec)) { expsSet = with(orgs[[org]]$exps, name[tolower(Group) %in% tolower(condspec$Group[j]) & (Condition_1 %in% condspec$Condition_1[j] | condspec$Condition_1[j]=="") & is_simple]); u = row.names(x) %in% expsSet & !assigned; assigned[u] = TRUE; col[u] = if(is.character(default.col)) as.character(condspec$col[j]) else condspec$col[j]; if(!is.null(condspec$pch)) pch[u] = condspec$pch[j]; if(!is.null(condspec$cex)) cex[u] = condspec$cex[j]; if(!any(row.names(x) %in% expsSet)) cat("No match in ", org, " for condspec ", as.character(condspec$Group[j]), " ", as.character(condspec$Condition_1[j]),"\n"); } y = jitterBy * (runif(length(x)) - 0.5) * 2; points(pmax(xlim[1], pmin(xlim[2], x)), y + nrow(locispec) - i + 1, cex=cex, pch=pch, col=col); # emphasize assigned points by replotting on top? points(pmax(xlim[1], pmin(xlim[2], x)), y + nrow(locispec) - i + 1, cex=cex, pch=pch, col=ifelse(assigned, col, NA)); o = orginfo[orginfo$name %in% org,]; shorten = function(g) if(short_genus) paste(substr(g,1,1),".",sep="") else g; text(xlim[1] - nLeft * strwidth("A"), nrow(locispec)-i+1+0.3+0.05, paste(locispec$gene[i],sepstring,shorten(o$genus)," ",o$species," ", ifelse(show_strain,o$strain,""), sep=""), adj=c(0,0), xpd=T, cex=label.cex); # adjust to left bottom } } # Identify "simple" conditions that have a Group, have Condition_1 set, and do not have Condition_2 set # (or are stresses with Condition_2 as "Ethanol" or "Dimethyl Sulfoxide" as carriers) # returns a vector of TRUE or FALSE IsSimpleCond = function(Group, Condition_1, Condition_2) { return(!is.na(Group) & Group != "" & !is.na(Condition_1) & Condition_1 != "" & (is.na(Condition_2) | Condition_2 == "" | (tolower(Group) == "stress" & Condition_2 %in% c("Ethanol","Dimethyl Sulfoxide")))); }