# # Script Name: rblast.R # Purpose of script: Run Blast # Author: Morgane Bruno # Contact: morgane.bruno@cefe.cnrs.fr # Licence: MIT # Requirements: BLAST software (installation: https://github.com/mhahsler/rBLAST/blob/devel/INSTALL) # #_libs if (!require("rBLAST", quietly = TRUE)) { BiocManager::install("rBLAST") } if (!require("Biostrings", quietly = TRUE)) { BiocManager::install("Biostrings") } library(stringr) library(readr) library(here) library(rBLAST) library(Biostrings) #_input ---- ## MIDORI2 "https://www.reference-midori.info/" dl_midori_db <- TRUE gb_version_date <- "263_2024-10-13" #required if dl_midori_db is TRUE subdatabase <- "srRNA" #required if dl_midori_db is TRUE ## Database db_fasta <- here::here("database", "database.fasta") ## Query query_fasta <- here::here("data", "query_sequences.fasta") ## BLAST min_cov <- 80 min_identity <- 60 blast_output <- here::here("blast.out") #_function ---- download_midori_fasta <- function( midori_url = "https://www.reference-midori.info/download/Databases/", midori_gb_v_date, midori_subdatabase, out_fa, dl_method = "curl", midori_subdatabase_name = c(paste0("A", c(6,8)), paste0("CO", 1:3), paste0("ND", 1:6), paste0(c("lr", "sr"), "RNA"), "Cytb", "ND4L") ) { if (!midori_subdatabase %in% midori_subdatabase_name){ stop(paste(midori_subdatabase, " doesn't exist")) } midori_gb_v <- stringr::word(string = midori_gb_v_date, 1, 1, sep = "_") dl_url <- paste0(midori_url, "GenBank", midori_gb_v_date, "/RAW/total/MIDORI2_TOTAL_NUC_GB", midori_gb_v, "_", midori_subdatabase, "_RAW.fasta.gz") dir.create(path = dirname(out_fa), showWarnings = FALSE) if (!file.exists(out_fa)) { outfa_gz <- paste0(out_fa, ".gz") download.file(url = dl_url, destfile = outfa_gz, method = dl_method) system(command = paste("gunzip", outfa_gz)) } } #_main ---- ## Download midori2 fasta file (genbank subdatabase) ---- if (dl_midori_db) { download_midori_fasta(midori_gb_v_date = gb_version_date, midori_subdatabase = subdatabase, out_fa = db_fasta) } ## Make BLAST database ---- rBLAST::makeblastdb(file = db_fasta, dbtype = "nucl") ## Run BLASTN ---- bl <- rBLAST::blast(db = db_fasta) query_seq <- Biostrings::readDNAStringSet(filepath = query_fasta) blast_res <- predict(bl, query_seq, BLAST_args = paste("-qcov_hsp_perc", min_cov, "-perc_identity", min_identity)) readr::write_csv(x = blast_res, file = blast_output)