diff --git a/files/Snakefile b/files/Snakefile index c67362fc71341f84123518f4bf1e8499b2cbf286..ddb07525692313ba961fe778f06a107fb320687f 100644 --- a/files/Snakefile +++ b/files/Snakefile @@ -358,6 +358,7 @@ rule edger: de_table = config["results_dir"]+'/'+config["edger_output_dir"]+'/de_table.csv', r_data = config["results_dir"]+'/'+config["edger_output_dir"]+'/data.RData', PCA = config["results_dir"]+'/'+config["edger_output_dir"]+'/PCA_mqc.png', + MD = config["results_dir"]+'/'+config["edger_output_dir"]+'/MD_mqc.png', Top_genes = config["results_dir"]+'/'+config["edger_output_dir"]+'/Top_genes_mqc.tsv', Heatmap = config["results_dir"]+'/'+config["edger_output_dir"]+'/Heatmap_mqc.png', log: config["results_dir"]+'/logs/edger/edger_log.txt' diff --git a/files/scripts/edger.script.R b/files/scripts/edger.script.R index 26720b3f283330c61254a1bd16b6185d4fd6b25a..0aa3eec20c8985e02b4ce96f1777dff88ca981d6 100644 --- a/files/scripts/edger.script.R +++ b/files/scripts/edger.script.R @@ -22,7 +22,7 @@ if (snakemake@config[["edger_tx2gene"]] == TRUE){ } cts <- txi$counts -dgelist <- DGEList(cts) +dgelist <- DGEList(cts,group=as.factor(conditions)) #Filtering countsPerMillion <- cpm(dgelist) @@ -33,25 +33,22 @@ dgelist <- dgelist[keep,] dgelist <- calcNormFactors(dgelist, method=snakemake@config[["edger_normfact"]]) #plotMDS(dgelist) -#'grep' is a string matching function. -designMat <- model.matrix(~conditions) -colnames(designMat)[2] = "Conditions" -de.com <- c() -dgelist$samples$group = designMat[,2] +groups = as.factor(conditions) +design <- model.matrix(~groups) if (snakemake@config[["edger_testType"]] == "exactTest"){ if (snakemake@config[["edger_dispersion"]] == 0){ - dgelist <- edgeR::estimateDisp(dgelist, designMat) + dgelist <- edgeR::estimateDisp(dgelist,design=design) de.com <- edgeR::exactTest(dgelist) }else{ de.com <- edgeR::exactTest(dgelist, dispersion=snakemake@config[["edger_dispersion"]]) } } else if (snakemake@config[["edger_testType"]] == "glmLRT"){ if (snakemake@config[["edger_dispersion"]] == 0){ - dgelist <- edgeR::estimateDisp(dgelist, designMat) - fit <- edgeR::glmFit(dgelist, designMat) + dgelist <- edgeR::estimateDisp(dgelist,design=design) + fit <- edgeR::glmFit(dgelist) } else { - fit <- edgeR::glmFit(dgelist, designMat, dispersion=snakemake@config[["edger_dispersion"]]) + fit <- edgeR::glmFit(dgelist, dispersion=snakemake@config[["edger_dispersion"]]) } de.com <- edgeR::glmLRT(fit,coef=2) } @@ -77,6 +74,9 @@ png(filename = paste(snakemake@output[["PCA"]],sep = "/"), height=800, width=800 ggplot(data.frame(pca$x),aes(x =pca$x[,1] , y = pca$x[,2],col = conditions)) + geom_point() dev.off() +png(filename = paste(snakemake@output[["MD"]],sep = "/"), height=800, width=800) +plotMD(de.com) +dev.off() #plotMDS(dgelist, main = "Distances between gene expression profiles") #plotSmear(lrt, de.tags=tt$table$gene.id) @@ -84,8 +84,8 @@ dev.off() topGenes = topTags(de.com,n = 40) cat("# id: Top_genes -# section_name: 'Top genes' -# description: 'Tableau des top genes' +# section_name: 'Top 40 genes' +# description: 'Tableau des 40 top genes' # format: 'tsv' # plot_type: 'table'\ngene\t", file=snakemake@output[["Top_genes"]])