mads <- function(madat) { tapply(madat$intensities$m,madat$intensities$block,mad,na.rm=T) } medianas <- function(madat) { tapply(madat$intensities$m,madat$intensities$block,median,na.rm=T) } medias <- function(madat) { tapply(madat$intensities$m,madat$intensities$block,mean,na.rm=T) } desvs <- function(madat) { tapply(madat$intensities$m,madat$intensities$block,sd,na.rm=T) } matorg <- function(madat) { A <- madat$intensities$a M <- madat$intensities$m R <- 2^M * sqrt(2^(2*A-M)) G <- sqrt(2^(2*A-M)) data.frame(R=R,G=G) } media.na.reciproca <- function(madat1,madat2) { nomes1 <- row.names(madat1$intensities) nomes2 <- row.names(madat2$intensities) nomes.intersec <- nomes2[match(nomes1,nomes2,nomatch=0)] result <- NULL for(i in 1:length(nomes.intersec)) { dad1 <- madat1$intensities[nomes.intersec[i],] dad2 <- madat2$intensities[nomes.intersec[i],] oneliner <- c(dad1$block,dad1$row,dad1$column,mean(c(dad1$m,-dad2$m)),mean(c(dad1$a,dad2$a)),min(dad1$flags,dad2$flags)) result <- rbind(result,oneliner) } colnames(result) <- c('block','row','column','m','a','flags') result <- data.frame(result,row.names=NULL) list(intensities=result,lamina=paste(madat1$lamina,madat2$lamina,sep='+')) } media.na.lamina <- function(madat,pares) { #dada uma amostra pareada proveniente de pareia.replicas(), vamos #gerar um madat com as médias das réplicas em lâmina pares <- as.matrix(pares) colnames(pares) <- NULL gera.val <- function(pares,madat) { b <- pares[1] r1 <- pares[2] c1 <- pares[3] r2 <- pares[4] c2 <- pares[5] ma1 <- as.vector(unlist(subset(madat$intensities,block == b & row == r1 & column == c1,select=c(m,a,flags)))) ma2 <- as.vector(unlist(subset(madat$intensities,block == b & row == r2 & column == c2,select=c(m,a,flags)))) if ((length(ma1) != 3) || (length(ma2) != 3)) { return(c(b,r1,c1,NA,NA,-200)) } flags <- min(c(ma1[3],ma2[3])) # we take the flags to be the min. amongst the two of them, in other words: if one is screwed up, we consider both of them to be. c(b,r1,c1,mean(c(ma1[1],ma2[1])),mean(c(ma1[2],ma2[2])),flags) } results <- apply(pares,1,gera.val,madat) results <- t(results) colnames(results) <- c('block','row','column','m','a','flags') results <- data.frame(results) list(intensities=results,lamina=madat$lamina) } intersectEXPR <- function(expr1,expr2) { # this function should take two expression sets and return a brc # containing only the spots which have a sign reversal # OBS: apparently we forgot to take in account that some expression levels # might be unavailable due to a < -200 flag in the raw data. to account for # that, we'll first clean up those spots in that situation if (dim(expr1)[1] > dim(expr2)[2]) { maior <- expr1 menor <- expr2 } else { menor <- expr1 maior <- expr2 } maior <- maior[row.names(menor),] maior <- subset(maior,!is.na(block)) menor <- menor[row.names(maior),] menor <- subset(menor,!is.na(block)) good.ind <- (maior$m * menor$m) < 0 result <- maior[good.ind,] result <- subset(result,1==1,select=c(block,row,column)) class(result) <- c('brc','data.frame') result } inten2set <- function(inten) { babyset <- subset(inten,1==1,select=c(block,row,column)) class(babyset) <- c('brc','data.frame') babyset } spot2set <- function(spot) { oneset <- data.frame(block=spot$block,row=spot$row,column=spot$column) class(oneset) <- c('brc','data.frame') oneset } setraw <- function(brc,madat) { cy3.inten <- cy5.inten <- NULL lamina <- madat$lamina for (i in 1:dim(brc)[1]) { spotinfo <- summary(cspot(lamina,brc[i,1],brc[i,2],brc[i,3])) cy3.inten <- c(cy3.inten,spotinfo$cy3.median) cy5.inten <- c(cy5.inten,spotinfo$cy5.median) } cbind(brc,cy3.inten,cy5.inten) } setexpr <- function(brc,madat) { results <- NULL for (i in 1:dim(brc)[1]) { oneliner <- subset(madat$intensities,block == brc[i,1] & row == brc[i,2] & column == brc[i,3]) results <- rbind(results,oneliner) } results } setdesc <- function(brc,madat) { geneid <- genedesc <- NULL for (i in 1:dim(brc)[1]) { spotinfo <- summary(cspot(madat$lamina,brc[i,1],brc[i,2],brc[i,3])) geneid <- c(geneid,spotinfo$gene.id) genedesc <- c(genedesc,spotinfo$desc) } cbind(brc,geneid,genedesc) } setview <- function(brc,madat,limit=10,...) { lamina <- madat$lamina if (dim(brc)[1] > limit) { stop('Set a higher limit or take a smaller set.\n') } else { for(i in 1:dim(brc)[1]) { viewspot(cspot(lamina,brc[i,1],brc[i,2],brc[i,3]),...) } } } setfinder <- function(madat) { ind <- identify(madat$intensities$a,madat$intensities$m,col='blue',labels=rep('+',length(madat$intensities$m)),offset=0) babyset <- madat$intensities[ind,] babyset <- subset(babyset,1==1,select=c(block,row,column)) class(babyset) <- c('brc','data.frame') babyset } spotfinder <- function(madat) { ind <- identify(madat$intensities$a,madat$intensities$m,n=1,col='blue',labels=rep('+',length(madat$intensities$m)),offset=0) madat$intensities[ind,]$block -> ind.block madat$intensities[ind,]$row -> ind.row madat$intensities[ind,]$column -> ind.column cspot(madat$lamina,ind.block,ind.row,ind.column) } maplot <- function(madat,...,lowess=TRUE,lowess.f=2/3,lowess.col='red',zerocurve=TRUE,main,reverse=FALSE,flaglevel=-200,highlight) { madat$intensities <- subset(madat$intensities,flags > flaglevel) M <- madat$intensities$m A <- madat$intensities$a if (reverse == TRUE) { M <- -M } if (missing(main)) { titulo <- paste('MAPlot: Lâmina',madat$lamina) if (!(is.null(madat$norm))) { titulo <- paste(titulo,'(normalizada) \n Normalização:',madat$norm) } } else { titulo <- main } if (missing(highlight)) { plot(A,M,main=titulo,...,pch=1) } else { brc.is.in <- function(brc,inten) { inten <- cbind(inten,indic=rep(FALSE,dim(inten)[1])) for (i in 1:dim(brc)[1]) { inten[inten$block == brc[i,1] & inten$row == brc[i,2] & inten$column == brc[i,3],]$indic <- TRUE } inten$indic } destacados <- brc.is.in(highlight,madat$intensities) plot(A,M,main=titulo,...,col=ifelse(destacados == TRUE,'blue','black'),pch=ifelse(destacados == TRUE,16,1)) } if (lowess == TRUE) { lines(lowess(A,M,f=lowess.f),col=lowess.col) } if (zerocurve == TRUE) { curve(0*x,col='blue',add=T) } } boxplot.pg <- function(madat,zerocurve=FALSE,...) { titulo <- paste('Log das Razões por Subarray:',madat$lamina) if (!(is.null(madat$norm))) { titulo <- paste(titulo,'(normalizada) \n Normalização:',madat$norm) } boxplot(m ~ block,data=madat$intensities,main=titulo,...) if (zerocurve == TRUE) { curve(0*x,col='blue',add=T) } } craw <- function(lamina) { con <- checkMAConnect() .data <- dbGetQuery(con,paste('SELECT block_number,row,colum,F635_Median_m_B635,F532_Median_m_B532,Flags FROM ',lamina,' ORDER BY block_number,row',sep='')) # Aqui vamos botar um Flag de -200 para qq spot com intensidade negativa # em algum dos canais (depois da correção por background) .data[.data[[4]] <= 0 | .data[[5]] <= 0,]$Flags <- .data[.data[[4]] <= 0 | .data[[5]] <= 0,]$Flags - 200 .data[[4]] <- as.real(.data[[4]]) .data[[5]] <- as.real(.data[[5]]) M <- ifelse(.data$Flags > -200,.data[[4]]/.data[[5]],NA) A <- ifelse(.data$Flags > -200,.data[[4]]*.data[[5]],NA) M = log2(M) A = log2(A)/2 intensities <- data.frame(block=.data[[1]],row=.data[[2]],column=.data[[3]],m=M,a=A,flags=.data[[6]]) list(intensities=intensities,lamina=lamina) } craw.inten <- function(lamina) { con <- checkMAConnect() .data <- dbGetQuery(con,paste('SELECT block_number,row,colum,F635_Median,B635_Median,F532_Median,B532_Median,F635_Median_m_B635,F532_Median_m_B532 FROM ',lamina,' ORDER BY block_number,row',sep='')) .data[[4]] <- as.real(.data[[4]]) .data[[5]] <- as.real(.data[[5]]) .data[[6]] <- as.real(.data[[6]]) .data[[7]] <- as.real(.data[[7]]) .data[[8]] <- as.real(.data[[8]]) .data[[9]] <- as.real(.data[[9]]) data.frame(block=.data[[1]],row=.data[[2]],column=.data[[3]],cy3fg=.data[[6]],cy3bg=.data[[7]],cy5fg=.data[[4]],cy5bg=.data[[5]],cy3c=.data[[9]],cy5c=.data[[8]]) } replica <- function(spot) { con <- checkMAConnect() tipolamina <- dbGetQuery(con,paste('SELECT lamina FROM add_info WHERE name = \'',spot$lamina,'\'',sep=''))[[1]] desc <- dbGetQuery(con,paste('SELECT description FROM ',tipolamina,' WHERE block_number = ',spot$block,' AND row = ',spot$row,' AND colum = ',spot$column,sep=''))[[1]] if (!(is.na(desc))) { if( desc != 'ESTs') { .spot <- dbGetQuery(con,paste('SELECT row,colum FROM ',tipolamina,' WHERE description = \'',desc,'\' AND block_number = ',spot$block,' AND (row != ',spot$row,' OR colum != ',spot$colum,')',sep='')) cspot(spot$lamina,spot$block,.spot[[1]],.spot[[2]]) } else { gene.id <- dbGetQuery(con,paste('SELECT gene_id FROM ',tipolamina,' WHERE block_number = ',spot$block,' AND row = ',spot$row,' AND colum = ',spot$column,sep=''))[[1]] if (!(is.na(gene.id))) { .spot <- dbGetQuery(con,paste('SELECT row,colum FROM ',tipolamina,' WHERE gene_id = \'',gene.id,'\' AND block_number = ',spot$block,' AND (row != ',spot$row,' OR colum != ',spot$colum,')',sep='')) cspot(spot$lamina,spot$block,.spot[[1]],.spot[[2]]) } } } else { NA } } gera.dif <- function(madat,pares) { #dada uma amostra pareada proveniente de pareia.replicas(), vamos #gerar os graficos que a Julia pediu pares <- as.matrix(pares) colnames(pares) <- NULL gera.val <- function(pares,madat) { b <- pares[1] r1 <- pares[2] c1 <- pares[3] r2 <- pares[4] c2 <- pares[5] lograt.r1 <- as.vector(unlist(subset(madat$intensities,block == b & row == r1 & column == c1,select=m))) lograt.r2 <- as.vector(unlist(subset(madat$intensities,block == b & row == r2 & column == c2,select=m))) if (length(lograt.r1) != 1) { lograt.r1 = NA } if (length(lograt.r2) != 1) { lograt.r2 = NA } c(lograt.r1,lograt.r2) } results <- apply(pares,1,gera.val,madat) results <- t(results) colnames(results) <- c('m1','m2') results <- data.frame(results) results } pareia.aleatoria <- function(madat) { # Mesmo que pareia.replicas() mas retorna replicas aleatorias dentro de um mesmo # subarray pares <- NULL spots <- cbind(subset(madat$intensities,select=c(block,row,column)),lixo=0) for (i in 1:dim(spots)[1]) { if (spots[i,]$lixo == 1) { next; } possiveis.replicas <- subset(spots,block == spots[i,]$block & lixo == 0 & (row != spots[i,]$row | column != spots[i,]$column)) if (dim(possiveis.replicas)[1] == 0) { next; } indice <- sample(nrow(possiveis.replicas),1) replica.row <- possiveis.replicas[indice,]$row replica.column <- possiveis.replicas[indice,]$column pares <- rbind(pares,c(spots[i,]$block,spots[i,]$row,spots[i,]$column,replica.row,replica.column)) spots[spots$block == spots[i,]$block & spots$row == replica.row & spots$column == replica.column,]$lixo = 1 write(i,'/tmp/status',append=T) } pares <- as.data.frame(pares) colnames(pares) <- c('block','r1','c1','r2','c2') pares } pareia.replicas <- function(madat) { # Recebe um arquivo raw de intensidades e retorna uma matriz na forma # bloco r1 c1 r2 c2 # Onde (r1,c1) e (r2,c2) sao replicas lixo <- data.frame(block=0,row=0,column=0) pares <- NULL spots <- subset(madat$intensities,select=c(block,row,column)) for (i in 1:dim(spots)[1]) { if (dim(subset(lixo,row == spots[i,]$row & column == spots[i,]$column & block == spots[i,]$block))[1] > 0) { next; } .replica <- adv.replica(cspot(madat$lamina,spots[i,]$block,spots[i,]$row,spots[i,]$column)) if (sum(is.na(.replica))) { next; } pares <- rbind(pares,c(spots[i,]$block,spots[i,]$row,spots[i,]$column,.replica$row,.replica$column)) lixo <- rbind(lixo,c(.replica$block,.replica$row,.replica$column)) write(i,'/tmp/status',append=T) } pares <- as.data.frame(pares) t colnames(pares) <- c('block','r1','c1','r2','c2') pares } vet.pareia.replicas <- function(madat) { # Recebe um arquivo raw de intensidades e retorna uma matriz na forma # bloco r1 c1 r2 c2 # Onde (r1,c1) e (r2,c2) sao replicas lixo <- data.frame(block=0,row=0,column=0) pares <- NULL spots <- subset(madat$intensities,select=c(block,row,column)) spots <- as.matrix(spots) colnames(spots) <- NULL fazpareamento <- function(vec,pares,lixo) { b <- vec[1] r <- vec[2] c <- vec[3] if (dim(subset(lixo,row == r,block == b,column == c))[1] > 0) { return(rep(NA,5)) } # if (sum(lixo$row == r && lixo$block == b && lixo$column == c) > 0) { # return(rep(NA,5)) # } } apply(spots,1,fazpareamento,pares,lixo) for (i in 1:dim(spots)[1]) { if (sum(lixo$row == spots[i,]$row && lixo$column == spots[i,]$column && lixo$block == spots[i,]$block) > 0) { next; } .replica <- adv.replica(cspot(madat$lamina,spots[i,]$block,spots[i,]$row,spots[i,]$column)) if (sum(is.na(.replica))) { next; } pares <- rbind(pares,c(spots[i,]$block,spots[i,]$row,spots[i,]$column,.replica$row,.replica$column)) } pares <- as.data.frame(pares) colnames(pares) <- c('block','r1','c1','r2','c2') } adv.replica <- function(x) { con <- checkMAConnect() tipolamina <- dbGetQuery(con,paste('SELECT lamina FROM add_info WHERE name = \'',x$lamina,'\'',sep=''))[[1]] desc <- dbGetQuery(con,paste('SELECT gene_id FROM ',tipolamina,' WHERE block_number = ',x$block,' AND row = ',x$row,' AND colum = ',x$column,sep=''))[[1]] if (!(is.na(desc))) { .spot <- dbGetQuery(con,paste('SELECT row,colum FROM ',tipolamina, ' WHERE gene_id = \'',desc,'\' AND block_number = ',x$block,' AND (row != ',x$row,' OR colum != ',x$colum,')',sep='')) if (dim(.spot)[1] == 0) { return(NA) } .spot <- as.matrix(.spot) matdist <- NULL for (i in 1:dim(.spot)[1]) { dist <- distancia(x$row,x$column,.spot[i,1],.spot[i,2],29) matdist <- rbind(matdist,c(.spot[i,1],.spot[i,2],dist)) } matdist <- as.data.frame(matdist) colnames(matdist) <- c('row','column','dist') choosen <- subset(matdist,dist == min(dist),select=c(row,column)) cspot(x$lamina,x$block,choosen[[1]],choosen[[2]]) } else { return(NA) } } distancia <- function(x,y,z,w,n) { # Computa a distancia entre dois spots que estao numa mesma sublamina nxn # onde as coordenadas sao spot1 : (x,y) # spot2: (z,w) # (row,colum) if (x == z) { abs(y-w) } dist <- (z-x-1)*n + (n-y)+w abs(dist) } reciproca <- function(spot) { con <- checkMAConnect() .rec <- dbGetQuery(con,paste('SELECT reciproca FROM add_info WHERE name = \'',spot$lamina,'\'',sep='')) cspot(.rec[[1]],spot$block,spot$row,spot$column) } print.summary.spot <- function(x) { cat(' Spot summary information\n\n') cat(paste(' array: ',x$lamina,' subarray: ',x$block,' row: ',x$row,' column: ',x$column,'\n',sep='')) cat(paste(' gene_id: ',x$gene.id,'\n description: ',x$desc,sep='')) cat('\n\n Corrected intensities (foreground - background):\n') cat(' \t\tMean \tMedian \tDescription\n') cat(paste(' Cy 3 \t',x$cy3.mean,'\t',x$cy3.median,'\t',x$cy3.extract,'\n',sep='')) cat(paste(' Cy 5 \t',x$cy5.median,'\t',x$cy5.median,'\t',x$cy5.extract,'\n',sep='')) invisible(x) } summary.spot <- function(x) { con <- checkMAConnect() tipolamina <- dbGetQuery(con,paste('SELECT lamina FROM add_info WHERE name = \'',x$lamina,'\'',sep=''))[[1]] spot.info <- dbGetQuery(con,paste('SELECT F635_Median_m_B635,F532_Median_m_B532,F635_Mean_m_B635,F532_Mean_m_B532,gene_id,description FROM ',x$lamina,',',tipolamina,' WHERE ',x$lamina,'.block_number = ',tipolamina,'.block_number AND ',x$lamina,'.row = ',tipolamina,'.row AND ',x$lamina,'.colum = ',tipolamina,'.colum AND ',tipolamina,'.block_number = ',x$block,' AND ',tipolamina,'.row = ',x$row,' AND ',tipolamina,'.colum = ',x$colum,sep='') ) hybrid <- dbGetQuery(con,paste('SELECT cy3_desc,cy5_desc,cy3_control FROM add_info WHERE name = \'',x$lamina,'\'',sep='')) gene.id <- spot.info[[5]] desc <- spot.info[[6]] cy3.mean <- spot.info[[4]] cy3.median <- spot.info[[2]] cy5.mean <- spot.info[[3]] cy5.median <- spot.info[[1]] cy3.extract <- hybrid[[1]] cy5.extract <- hybrid[[2]] result <- list(lamina=x$lamina,block=x$block,row=x$row,column=x$column,gene.id=gene.id,desc=desc,cy3.mean=cy3.mean,cy3.median=cy3.median,cy5.mean=cy5.mean,cy5.median=cy5.median,cy3.extract=cy3.extract,cy5.extract=cy5.extract) class(result) <- c('summary.spot','list') result } cspot <- function(lamina,block,row,column) { .spot <- list(lamina=lamina,block=block,row=row,column=column) class(.spot) <- "spot" .spot } viewspot <- function(spot,surrounds=FALSE) { con <- checkMAConnect() lamina.info <- dbGetQuery(con,paste('SELECT pixel_size,x_origin,y_origin FROM lamina WHERE name = \'',spot$lamina,'\'',sep="")) scale <- lamina.info$'pixel_size' x.origin <- lamina.info$'x_origin'/scale y.origin <- lamina.info$'y_origin'/scale spot.info <- dbGetQuery(con,paste('SELECT x,y,dia FROM ',spot$lamina,' WHERE block_number = ',spot$block,' AND row = ',spot$row,' AND colum = ',spot$column,sep="")) dia <- spot.info$dia/scale x <- spot.info$x/scale - x.origin - dia/2-1 y <- spot.info$y/scale - y.origin - dia/2-1 offset <- dia + 3 if (surrounds) { offset <- dia + 10 } titulo <- paste(spot$lamina,' B',spot$block,' R',spot$row,' C',spot$column,sep='') imgpath <- paste('/var/lib/misc/microarray/',spot$lamina,'.tif',sep="") displaycmd <- paste('display -crop ',offset,'x',offset,'+',x,'+',y,' -geometry 200x200 -title \'',titulo,'\' ',imgpath,'&',sep="") system(displaycmd) } checkMAConnect <- function() { if (!(exists('isIdCurrent'))) { library(RMySQL) } if (!(isIdCurrent(get('.MAConnect',pos=.GlobalEnv)))) { warning('DB Connection not active, setting it up.\n') assign('.MAConnect', dbConnect(dbDriver('MySQL'),'microarray',user='mentus',password='kafka'),pos=.GlobalEnv) } get('.MAConnect',pos=.GlobalEnv) } .MAConnect <- NULL