From 842ba5f27e67211ce6ccaa84e22a19cb2fccc538 Mon Sep 17 00:00:00 2001 From: peguerin <pierre-edouard.guerin@cefe.cnrs.fr> Date: Mon, 11 Jan 2021 17:03:26 +0100 Subject: [PATCH] download large file --- usage_rentrez.R | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/usage_rentrez.R b/usage_rentrez.R index c9f5e0c..121a566 100644 --- a/usage_rentrez.R +++ b/usage_rentrez.R @@ -13,7 +13,8 @@ entrez_db_searchable('nucleotide') query <- entrez_search(db="nucleotide", term= "(MHC[GENE] OR (major histocompatibility complex[MeSH])) AND actinopterygii[ORGN] AND 1992:2021[PDAT]", - retmax=99999) + retmax=99999, + use_history=TRUE) search_year <- function(year, term){ query <- paste(term, " AND (", year, "[PDAT])") @@ -28,3 +29,31 @@ publications <- sapply(year, USE.NAMES=FALSE) plot(year, publications, type="l", main="MHC actinopterygii") + +recs <- entrez_fetch(db="nucleotide", id=query$ids[1:30], rettype="fasta") +write(recs, file="query.fasta") + +## upload large queries + +for( seq_start in seq(1,length(query$ids),50)){ + recs <- entrez_fetch(db="nucleotide", web_history=query$web_history, + rettype="fasta", retmax=50, retstart=seq_start) + cat(recs, file="query.fasta", append=TRUE) + cat(seq_start+49, "sequences downloaded\r") +} + + + + +### seek MHC gene +entrez_db_summary("gene") +entrez_db_searchable("gene") + +query <- entrez_search(db="gene", term= "(MHC II DRB[GENE]) OR (major histocompatibility complex[MeSH])") + +link <- entrez_link(db="nucleotide", dbfrom="gene", id=query[1]) + +nuclinks <- link$links$gene_nuccore + +recs <- entrez_fetch(db="nucleotide", id=nuclinks[1], rettype="fasta") + -- GitLab