服务热线
178 0020 3020
任务一:


任务二:获得并存储cell杂志2017年发表文章的pubmedid
killDbConnections()
library("httr")
totalNum=562
pageSize=10
totalPage=ceiling(totalNum/pageSize)
currentPage=1
term="(cell[TA]) AND 2017[DP]"
usehistory='Y'
querykey=" "
webenv=" "
postSearchUrl="https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
while(currentPage<=totalPage){
retstart=(currentPage-1)*pageSize
r <- POST(postSearchUrl,
body = list(
db="pubmed",
term=term,
retmode="json",
retstart=retstart,
retmax=pageSize,
usehistory=usehistory,
rettype="uilist"
)
)
stop_for_status(r) #clear http status
data=content(r, "parsed", "application/json")
esearchresult=data$esearchresult
#$idlist=array $count=562,$retmax=20, $retstart=0,$querykey=1, $webenv=NCID_1_30290513_130.14.18.34_9001_1515165012_617859421_0MetA0_S_MegaStore_F_1
querykey=esearchresult$querykey
webenv=esearchresult$webenv
idlist =esearchresult$idlist
n = length(idlist)
pmid=c()
i = 1
while(i<=n){
pmid=c(pmid, as.character(idlist[i][1]))
i = i+1
}
article=data.frame("pmid"=pmid)
dbWriteTable(con,"article",article,append=TRUE)
currentPage = currentPage + 1
}
#close
dbDisconnect(con)任务三:获得并存储cell杂志2017年发表文章的title和abstract
library("RMySQL")
library("xml2")
library("httr")
killDbConnections()
con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")
dbSendQuery(con,'SET NAMES utf8')
on.exit(dbDisconnect(con))
rs <- dbSendQuery(con, "SELECT * FROM article WHERE isdone=0")
while (!dbHasCompleted(rs)) {
chunk <- dbFetch(rs, 10)
#mode(chunk)
#print(chunk)
#chunk[x,3]
pmidStr=""
i=1
n=nrow(chunk)
while (i<=n){
pmidStr = paste(pmidStr,chunk[i,3],sep=",")
i = i + 1
}
postFetchUrl="https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
r2 <- POST(postFetchUrl,
body = list(
db="pubmed",
id=pmidStr,
retmode="xml"
)
)
stop_for_status(r2) #clear http status
data2=content(r2, "parsed", "application/xml")
article=xml_children(data2)
#xml_length(article)
count=length(article)
cnt=1
while(cnt<=count){
title=xml_text(xml_find_first(article[cnt],".//ArticleTitle"))
abstract=xml_text(xml_find_first(article[cnt],".//AbstractText"))
pmid=xml_text(xml_find_first(article[cnt],".//PMID"))
title = gsub("'","",title)
abstract = gsub("'","",abstract)
sql=paste("UPDATE article SET title='",title,"',abstract='",abstract,"',isdone=1"," where pmid='",pmid,"'",sep="")
con2 <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")
dbSendQuery(con2,'SET NAMES utf8')
dbSendQuery(con2,sql)
dbDisconnect(con2)
cnt = cnt + 1
附件