R2-35 第二次

德先森 2018-02-13 12:51:23 阅读: 1466

第一题

安装的黄色变不绿的为问题,主要是80的端口占用的问题,具体建议看

http://blog.csdn.net/it429/article/details/51143836

install.packages("RMySQL")

library(RMySQL)

help(package="RMySQL")

con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")

dbSendQuery(con,'SET NAMES utf8')

killDbConnections <- function () {

  all_cons <- dbListConnections(MySQL())

  print(all_cons)

  for(con in all_cons)

    +  dbDisconnect(con)

  print(paste(length(all_cons), " connections killed."))

}

dbGetInfo(con) 

dbListTables(con) 

dbRemoveTable(con,"test")

> dbGetInfo(con) 

$host

[1] "localhost"

$user

[1] "root"

$dbname

[1] "rdb"

$conType

[1] "localhost via TCP/IP"

$serverVersion

[1] "5.7.19"

$protocolVersion

[1] 10

$threadId

[1] 45

$rsId

$rsId[[1]]

<MySQLResult:0,0,0>

>

> dbListTables(con) 

[1] "article" "word"  

>

> dbRemoveTable(con,"test")

[1] FALSE

第二题

killDbConnections()

install.packages("httr")

library(DBI)

library(RMySQL)

con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")

dbSendQuery(con,'SET NAMES utf8')

library(httr)

totalNum=562

pageSize=10

totalPage=ceiling(totalNum/pageSize)

currentPage=1

term='(cell[TA]) AND 2017[DP]'

usehistory='Y'

querykey=''

webenv=''

postSearchUrl='https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'

while(currentPage<=totalPage){

  retstart=(currentPage-1)*pageSize

  r <- POST(postSearchUrl,

       

            body = list(

              db='pubmed',

              term=term,

              retmode='json',

              retstart=retstart,

              retmax=pageSize,

              usehistory=usehistory,

              rettype='uilist'

            )

           

  )

  stop_for_status(r)

  data=content(r, "parsed", "application/json")

  esearchresult=data$esearchresult

  querykey=esearchresult$querykey

  webenv=esearchresult$webenv

  idlist =esearchresult$idlist

  n = length(idlist)

  pmid=c()

  i = 1

  while(i<=n){

    pmid=c(pmid, as.character(idlist[i][1]))

    i = i+1

  }

  article=data.frame('pmid'=pmid)

  dbWriteTable(con,"article",article,append=TRUE)

  currentPage = currentPage + 1

}

dbDisconnect(con)

killDbConnections()

install.packages("httr")

library(DBI)

library(RMySQL)

con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")

dbSendQuery(con,'SET NAMES utf8')

library(httr)

totalNum=562

pageSize=10

totalPage=ceiling(totalNum/pageSize)

currentPage=1

term='(cell[TA]) AND 2017[DP]'

usehistory='Y'

querykey=''

webenv=''

postSearchUrl='https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'

while(currentPage<=totalPage){

  retstart=(currentPage-1)*pageSize

  r <- POST(postSearchUrl,

       

            body = list(

              db='pubmed',

              term=term,

              retmode='json',

              retstart=retstart,

              retmax=pageSize,

              usehistory=usehistory,

              rettype='uilist'

            )

           

  )

  stop_for_status(r)

  data=content(r, "parsed", "application/json")

  esearchresult=data$esearchresult

  querykey=esearchresult$querykey

  webenv=esearchresult$webenv

  idlist =esearchresult$idlist

  n = length(idlist)

  pmid=c()

  i = 1

  while(i<=n){

    pmid=c(pmid, as.character(idlist[i][1]))

    i = i+1

  }

  article=data.frame('pmid'=pmid)

  dbWriteTable(con,"article",article,append=TRUE)

  currentPage = currentPage + 1

}

dbDisconnect(con)

第三题

install.packages("xml2")
library(RMySQL)
library(xml2)
library(httr)

con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")
dbSendQuery(con,'SET NAMES utf8')
on.exit(dbDisconnect(con))

rs <- dbSendQuery(con, "SELECT * FROM article WHERE isdone=0")
while (!dbHasCompleted(rs)) {
  chunk <- dbFetch(rs, 10)
  #mode(chunk)
  #print(chunk)
  #chunk[x,3] 第3列为获取到的pmid
  pmidStr=""
  i=1
  n=nrow(chunk) #获得总行数,和上面设置的10一致,最后的时候是3
  while (i<=n){ #循环将各个pmid之间用逗号连接起来
    pmidStr = paste(pmidStr,chunk[i,3],sep=",")
    i = i + 1
  }
  #pmid=",29195067,29195066,29195065,29195064,29153837,29153836,29153835,29153834,29153833,29153832"
  #去掉第一个逗号,从第2位起,到100000位,上面字符串没有这么多字符,因此到末尾
  pmidStr=substr(pmidStr,2,100000)
  #上面字符串就是我们post到pubmed上面的字符串,用于获取title和abstract
 
  #下面就是第一次作业里面获取title和abstract
  postFetchUrl='https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
  r2 <- POST(postFetchUrl,
             body = list(
               db='pubmed',
               id=pmidStr,
               retmode='xml'
             )
  )
  stop_for_status(r2) #clear http status
  data2=content(r2, "parsed", "application/xml")
  article=xml_children(data2)
  #xml_length(article)为里面文章的数量
  count=length(article)
  cnt=1
  while(cnt<=count){
    #下面的xml_text和xml_find_first均为XML2包里面的函数
    title=xml_text(xml_find_first(article[cnt],".//ArticleTitle")) #找到第一个ArticleTitle节点
    abstract=xml_text(xml_find_first(article[cnt],".//AbstractText"))
    pmid=xml_text(xml_find_first(article[cnt],".//PMID"))
    title = gsub("'","",title)
    abstract = gsub("'","",abstract)
    sql=paste("UPDATE article SET title='",title,"',abstract='",abstract,"',isdone=1"," where pmid='",pmid,"'",sep="")
    con2 <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")
    dbSendQuery(con2,'SET NAMES utf8')
    dbSendQuery(con2,sql)
    dbDisconnect(con2)
    cnt = cnt + 1
    Sys.sleep(1)
  }
}

在row6 开始报错,还在想办法中,先提交这些结果吧

1.PNG

 
邀请讨论

附件

{{f.title}} 大小 {{f.file_size}} 下载 {{f.count_download}} 金币 {{f.count_gold}}
{{item.nick_name}} 受邀请回答 {{item.create_time}}
{{item.refer_comment.nick_name}} {{item.refer_comment.create_time}}

附件

{{f.title}} 大小 {{f.file_size}} 下载 {{f.count_download}} 金币 {{f.count_gold}}
切换到完整回复 发送回复
赞({{item.count_zan}}) 踩({{item.count_cai}}) 删除 回复 关闭
科研狗©2015-2025 科研好助手,京ICP备20005780号-1 建议意见

服务热线

178 0020 3020

微信服务号