服务热线
178 0020 3020
任务一:
任务二:获得并存储cell杂志2017年发表文章的pubmedid
killDbConnections() library("httr") totalNum=562 pageSize=10 totalPage=ceiling(totalNum/pageSize) currentPage=1 term="(cell[TA]) AND 2017[DP]" usehistory='Y' querykey=" " webenv=" " postSearchUrl="https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi" while(currentPage<=totalPage){ retstart=(currentPage-1)*pageSize r <- POST(postSearchUrl, body = list( db="pubmed", term=term, retmode="json", retstart=retstart, retmax=pageSize, usehistory=usehistory, rettype="uilist" ) ) stop_for_status(r) #clear http status data=content(r, "parsed", "application/json") esearchresult=data$esearchresult #$idlist=array $count=562,$retmax=20, $retstart=0,$querykey=1, $webenv=NCID_1_30290513_130.14.18.34_9001_1515165012_617859421_0MetA0_S_MegaStore_F_1 querykey=esearchresult$querykey webenv=esearchresult$webenv idlist =esearchresult$idlist n = length(idlist) pmid=c() i = 1 while(i<=n){ pmid=c(pmid, as.character(idlist[i][1])) i = i+1 } article=data.frame("pmid"=pmid) dbWriteTable(con,"article",article,append=TRUE) currentPage = currentPage + 1 } #close dbDisconnect(con)
任务三:获得并存储cell杂志2017年发表文章的title和abstract
library("RMySQL") library("xml2") library("httr") killDbConnections() con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="") dbSendQuery(con,'SET NAMES utf8') on.exit(dbDisconnect(con)) rs <- dbSendQuery(con, "SELECT * FROM article WHERE isdone=0") while (!dbHasCompleted(rs)) { chunk <- dbFetch(rs, 10) #mode(chunk) #print(chunk) #chunk[x,3] pmidStr="" i=1 n=nrow(chunk) while (i<=n){ pmidStr = paste(pmidStr,chunk[i,3],sep=",") i = i + 1 } postFetchUrl="https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi" r2 <- POST(postFetchUrl, body = list( db="pubmed", id=pmidStr, retmode="xml" ) ) stop_for_status(r2) #clear http status data2=content(r2, "parsed", "application/xml") article=xml_children(data2) #xml_length(article) count=length(article) cnt=1 while(cnt<=count){ title=xml_text(xml_find_first(article[cnt],".//ArticleTitle")) abstract=xml_text(xml_find_first(article[cnt],".//AbstractText")) pmid=xml_text(xml_find_first(article[cnt],".//PMID")) title = gsub("'","",title) abstract = gsub("'","",abstract) sql=paste("UPDATE article SET title='",title,"',abstract='",abstract,"',isdone=1"," where pmid='",pmid,"'",sep="") con2 <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="") dbSendQuery(con2,'SET NAMES utf8') dbSendQuery(con2,sql) dbDisconnect(con2) cnt = cnt + 1
附件