library("DESeq2") #--------------------------------------------------------------------------------------------------------------------------------- #DEG function - takes as arguments: counData (dataframe containing gene counts with sample_names as columns and genes as rownames); # colData (metadata-dataframe-columns: sample names, conditions and type(SR or PE)); condition_list (list of paired # conditions which are compared); cook (logical: T (default)-cook distance filtering turned on) #--------------------------------------------------------------------------------------------------------------------------------- multiple_deseq2_analyseR=function(countData,colData,condition_list,cook=T){ dds=DESeqDataSetFromMatrix(countData=countData,colData=colData,design=~condition) #filtering out genes with 0 counts dds=dds[rowSums(counts(dds))>1,] #differential expression analysis dds=DESeq(dds) res_list=list() for (i in seq(from=1, to=length(condition_list))){ res=results(dds,cooksCutoff=T,contrast=c("condition",condition_list[[i]])) colnames(res)=sapply(colnames(res),function(x) paste(x,sapply(condition_list[i], "[", c(1)),sep="_")) res_list[[i]]=res } #merging all result tables in one res_temp=merge(as.data.frame(res_list[[1]]),as.data.frame(res_list[[2]]),by.x="row.names",by.y="row.names") colnames(res_temp)[1]="Gene_id" if (length(condition_list)>2){ for (i in seq(from=3, to=length(condition_list))){ res_temp=merge(res_temp,as.data.frame(res_list[[i]]),by.x="Gene_id",by.y="row.names") } } assign("res_temp",res_temp,envir = .GlobalEnv) #returns the list to global environment #returns the list to global environment } #-------------------------------------------------------------------------------------------------------------------------------------- #Producing normalized reads #-------------------------------------------------------------------------------------------------------------------------------------- multiple_deseq2_analyseR_nR=function(countData,colData,cook=T){ ds=DESeqDataSetFromMatrix(countData=countData,colData=colData,design=~condition) #filtering out genes with 0 counts dds=dds[rowSums(counts(dds))>1,] #differential expression analysis dds=DESeq(dds) rld=rlog(dds,blind=F) return(rld) } #-------------------------------------------------------------------------------------------------------------------------------------- #Plotting function-returns in form of pdf file cook distance plot, dispersio plot, heatmap of top 50 disregulated genes, heatmap of #sample to sample distances and PCA plot. Arguments: colData (df with metadata) and countData (count matrix) #-------------------------------------------------------------------------------------------------------------------------------------- plotteR=function(colData,countData){ dds=DESeqDataSetFromMatrix(countData=countData,colData=colData,design=~condition) #filtering out genes with 0 counts dds=dds[rowSums(counts(dds))>1,] dds=DESeq(dds) #generatig normalised counts rld=rlog(dds,blind=F) #graph plotting pdf("RNA_seq_plots",onefile = T) boxplot(log10(assays(dds)[["cooks"]]),range=0,las=2,main="Cooks distance") plotDispEsts(dds,main="Dispersion plot") library("pheatmap") select=order(rowMeans(counts(dds,normalized=T)),decreasing = T)[1:20] pheatmap(assay(rld)[select,], cluster_rows=F, show_rownames=T, cluster_cols=T,main="Heatmap of top 50 disregulated genes") sampleDists <- dist(t(assay(rld))) #Heatmap of sample_to_sample_distance library("RColorBrewer") sampleDistMatrix <- as.matrix(sampleDists) rownames(sampleDistMatrix) <- paste(colnames(assay(rld))) colnames(sampleDistMatrix) <- NULL colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) pheatmap(sampleDistMatrix, clustering_distance_rows=sampleDists, clustering_distance_cols=sampleDists, col=colors,main="Heatmap of sample_to_sample_distance") #PCA analysis #PCA plotting function library(rgl) plotPCA_f=function(x,y, nGroup,v) { n=ncol(x) if(!(n %in% c(2,3))) { # check if 2d or 3d stop("x must have either 2 or 3 columns") } fit=hclust(dist(x), method="complete") # cluster groups= cutree(fit, k=nGroup) if(n == 3) { # 3d plot plot3d(x, col=v, type="s", size=1, axes=F) text3d(x, text=rownames(model$x[,1:2]),adj=1.5) axes3d(edges=c("x--", "y--", "z"), lwd=3, axes.len=2, labels=TRUE) grid3d("x") grid3d("y") grid3d("z") } else { # 2d plot maxes= apply(abs(x), 2, max) rangeX= c(-maxes[1], maxes[1]) rangeY= c(-maxes[2], maxes[2]) plot(x, col=colData$condition, pch=19, xlab=c(colnames(x)[1],y[1]), ylab=c(colnames(x)[2],y[2]), xlim=rangeX, ylim=rangeY,main="PCA plot") lines(c(0,0), rangeX*2) lines(rangeY*2, c(0,0)) text(x, labels=rownames(model$x[,1:2]), pos= 1 ) } } model= prcomp(t(assay(rld)), scale=TRUE) names(vector_3d)=rownames(colData) vector_3d=as.numeric(colData$condition) PoV <- (model$sdev^2/sum(model$sdev^2))*100 PoV=as.character(PoV) PoV=paste(PoV,"%") plotPCA_f(model$x[,1:2], PoV, 2,vector_3d) plotPCA_f(model$x[,1:3],PoV, 4,vector_3d) dev.off() } exporteR=function(res_df,cutoff,file_name){ padj_vector=colnames(res_df)[grepl("padj",names(res_df))] res_tmp=res_df[res_df[,padj_vector[1]]1){ for (i in seq(from=2,to=length(padj_vector))){ res_tmp=res_df[res_df[,padj_vector[i]]