From 842ba5f27e67211ce6ccaa84e22a19cb2fccc538 Mon Sep 17 00:00:00 2001
From: peguerin <pierre-edouard.guerin@cefe.cnrs.fr>
Date: Mon, 11 Jan 2021 17:03:26 +0100
Subject: [PATCH] download large file

---
 usage_rentrez.R | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/usage_rentrez.R b/usage_rentrez.R
index c9f5e0c..121a566 100644
--- a/usage_rentrez.R
+++ b/usage_rentrez.R
@@ -13,7 +13,8 @@ entrez_db_searchable('nucleotide')
 query <- entrez_search(db="nucleotide",
                        term= "(MHC[GENE] OR (major histocompatibility
 complex[MeSH])) AND actinopterygii[ORGN] AND 1992:2021[PDAT]",
-                       retmax=99999)
+                       retmax=99999,
+                       use_history=TRUE)
 
 search_year <- function(year, term){
   query <- paste(term, " AND (", year, "[PDAT])")
@@ -28,3 +29,31 @@ publications <- sapply(year,
                  USE.NAMES=FALSE)
 
 plot(year, publications, type="l",  main="MHC actinopterygii")
+
+recs <- entrez_fetch(db="nucleotide", id=query$ids[1:30], rettype="fasta")
+write(recs, file="query.fasta")
+
+## upload large queries
+
+for( seq_start in seq(1,length(query$ids),50)){
+  recs <- entrez_fetch(db="nucleotide", web_history=query$web_history,
+                       rettype="fasta", retmax=50, retstart=seq_start)
+  cat(recs, file="query.fasta", append=TRUE)
+  cat(seq_start+49, "sequences downloaded\r")
+}
+
+
+
+
+### seek MHC gene
+entrez_db_summary("gene")
+entrez_db_searchable("gene")
+
+query <- entrez_search(db="gene", term= "(MHC II DRB[GENE]) OR (major histocompatibility complex[MeSH])")
+
+link <- entrez_link(db="nucleotide", dbfrom="gene", id=query[1])
+
+nuclinks <- link$links$gene_nuccore
+
+recs <- entrez_fetch(db="nucleotide", id=nuclinks[1], rettype="fasta")
+
-- 
GitLab