R2-09-第二期-2 R读取pubmed存入mysql数据库

Giant Panda 2018-02-07 19:38:09 阅读: 1278

任务一:

111.png


2222.png


任务二:获得并存储cell杂志2017年发表文章的pubmedid

killDbConnections()
library("httr")

totalNum=562
pageSize=10 
totalPage=ceiling(totalNum/pageSize)
currentPage=1 
term="(cell[TA]) AND 2017[DP]"

usehistory='Y'
querykey=" "
webenv=" "

postSearchUrl="https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
while(currentPage<=totalPage){
  retstart=(currentPage-1)*pageSize
  r <- POST(postSearchUrl, 
            body = list(
              db="pubmed",
              term=term,
              retmode="json",
              retstart=retstart,
              retmax=pageSize,
              usehistory=usehistory,
              rettype="uilist"
            )
  )
  
  stop_for_status(r) #clear http status
  data=content(r, "parsed", "application/json")
 
  esearchresult=data$esearchresult
  #$idlist=array $count=562,$retmax=20, $retstart=0,$querykey=1, $webenv=NCID_1_30290513_130.14.18.34_9001_1515165012_617859421_0MetA0_S_MegaStore_F_1
  
  querykey=esearchresult$querykey
  webenv=esearchresult$webenv
  idlist =esearchresult$idlist
  n = length(idlist)
  pmid=c()
  i = 1
  while(i<=n){
    pmid=c(pmid, as.character(idlist[i][1]))
    i = i+1
  }
  article=data.frame("pmid"=pmid)
  dbWriteTable(con,"article",article,append=TRUE) 
  currentPage = currentPage + 1
}
#close
dbDisconnect(con)


任务三:获得并存储cell杂志2017年发表文章的title和abstract

library("RMySQL")
library("xml2")
library("httr")
killDbConnections()  
con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")
dbSendQuery(con,'SET NAMES utf8')
on.exit(dbDisconnect(con))
rs <- dbSendQuery(con, "SELECT * FROM article WHERE isdone=0")
while (!dbHasCompleted(rs)) {
  chunk <- dbFetch(rs, 10)
  #mode(chunk)
  #print(chunk)
  #chunk[x,3] 
  pmidStr=""
  i=1
  n=nrow(chunk) 
  while (i<=n){
    pmidStr = paste(pmidStr,chunk[i,3],sep=",")
    i = i + 1
  }
  postFetchUrl="https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
  r2 <- POST(postFetchUrl, 
             body = list(
               db="pubmed",
               id=pmidStr,
               retmode="xml"
             )
  )
  stop_for_status(r2) #clear http status
  data2=content(r2, "parsed", "application/xml")
  article=xml_children(data2)
  #xml_length(article)
  count=length(article)
  cnt=1
  while(cnt<=count){
    title=xml_text(xml_find_first(article[cnt],".//ArticleTitle")) 
    abstract=xml_text(xml_find_first(article[cnt],".//AbstractText"))
    pmid=xml_text(xml_find_first(article[cnt],".//PMID"))

    title = gsub("'","",title)
    abstract = gsub("'","",abstract)
    
    sql=paste("UPDATE article SET title='",title,"',abstract='",abstract,"',isdone=1"," where pmid='",pmid,"'",sep="")

    con2 <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")
    dbSendQuery(con2,'SET NAMES utf8')
    dbSendQuery(con2,sql)
    dbDisconnect(con2)
    cnt = cnt + 1

7777.png

 
邀请讨论

附件

{{f.title}} 大小 {{f.file_size}} 下载 {{f.count_download}} 金币 {{f.count_gold}}
{{item.nick_name}} 受邀请回答 {{item.create_time}}
{{item.refer_comment.nick_name}} {{item.refer_comment.create_time}}

附件

{{f.title}} 大小 {{f.file_size}} 下载 {{f.count_download}} 金币 {{f.count_gold}}
切换到完整回复 发送回复
赞({{item.count_zan}}) 踩({{item.count_cai}}) 删除 回复 关闭
科研狗©2015-2025 科研好助手,京ICP备20005780号-1 建议意见

服务热线

178 0020 3020

微信服务号