R2-39 第二期,第二次作业

redpanda 2018-02-14 05:22:50 阅读: 1372


Task1:

install.packages("RMySQL")

install.packages("DBI")

library(RMySQL)

con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")

dbSendQuery(con,'SET NAMES utf8’)

dbGetInfo(con)  

Task2:

> library(httr)

> totalNum=563 #上一次得到的总数

> pageSize=10 #每页数目,获取摘要的时候设置数目过大容易引起网络阻塞

> totalPage=ceiling(totalNum/pageSize) #按题目要求得到总页数

> currentPage=1 #当前页数

> term='(cell[TA]) AND 2017[DP]'

> usehistory='Y'#是否使用历史搜索

> querykey=''

> webenv=''

> postSearchUrl='https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'

> while(currentPage<=totalPage){

+   retstart=(currentPage-1)*pageSize

+   r <- POST(postSearchUrl, 

+             body = list(db='pubmed',

+                         term=term,

+                         retmode='json',

+                         retstart=retstart,

+                         retmax=pageSize,

+                         usehistory=usehistory,

+                         rettype='uilist' 

+             )

+   )

+   stop_for_status(r) #clear http status

+   data=content(r, "parsed", "application/json")

+   esearchresult=data$esearchresult

+   querykey=esearchresult$querykey

+   webenv=esearchresult$webenv

+   idlist =esearchresult$idlist #idlist为搜索结果中pmid的合集,代码用于拼接出Rmysql需要的数据

+   n = length(idlist)

+   pmid=c()

+   i = 1

+   while(i<=n){

+     pmid=c(pmid, as.character(idlist[i][1]))

+     i = i+1

+   }

+   article=data.frame('pmid'=pmid)#写入article数据表内,不能加append=TRUE

+   dbWriteTable(con,"article",article,append=TRUE) 

+   currentPage = currentPage + 1 #当currentPage>totalPage,退出循环

+ }

> dbDisconnect(con)

[1] TRUE

Task3:

> library(RMySQL)

> library(xml2)

> library(httr)

> killDbConnections()

list()

[1] "0  connections killed."

> con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")

> dbSendQuery(con,'SET NAMES utf8')

<MySQLResult:62718216,1,0>

> on.exit(dbDisconnect(con))

> rs <- dbSendQuery(con, "SELECT * FROM article WHERE isdone=0")

> while (!dbHasCompleted(rs)) {

+     chunk <- dbFetch(rs, 10)

+ pmidStr=""

+ i=1

+ n=nrow(chunk)

+ while (i<=n){

+     pmidStr = paste(pmidStr,chunk[i,3],sep=",")

+     i = i + 1

+ }

+ pmidStr=substr(pmidStr,2,100000)

+ postFetchUrl='https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'

+ r2 <- POST(postFetchUrl, 

+            body = list(

+                db='pubmed',

+                id=pmidStr,

+                retmode='xml'

+            )

+ )

+ stop_for_status(r2) #clear http status

+ data2=content(r2, "parsed", "application/xml")

+ article=xml_children(data2)

+ count=length(article)

+ cnt=1

+ while(cnt<=count){

+     title=xml_text(xml_find_first(article[cnt],".//ArticleTitle"))

+     abstract=xml_text(xml_find_first(article[cnt],".//AbstractText"))

+     pmid=xml_text(xml_find_first(article[cnt],".//PMID"))

+ title = gsub("'","",title)

+ abstract = gsub("'","",abstract)

+ sql=paste("UPDATE article SET title='",title,"',abstract='",abstract,"',isdone=1"," where pmid='",pmid,"'",sep="")

+ con2 <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")

+ dbSendQuery(con2,'SET NAMES utf8')

+ dbSendQuery(con2,sql)

+ dbDisconnect(con2)

+ cnt = cnt + 1

+ Sys.sleep(1)

+ }

+ }


 

 
邀请讨论

附件

{{f.title}} 大小 {{f.file_size}} 下载 {{f.count_download}} 金币 {{f.count_gold}}
{{item.nick_name}} 受邀请回答 {{item.create_time}}
{{item.refer_comment.nick_name}} {{item.refer_comment.create_time}}

附件

{{f.title}} 大小 {{f.file_size}} 下载 {{f.count_download}} 金币 {{f.count_gold}}
切换到完整回复 发送回复
赞({{item.count_zan}}) 踩({{item.count_cai}}) 删除 回复 关闭
科研狗©2015-2025 科研好助手,京ICP备20005780号-1 建议意见

服务热线

178 0020 3020

微信服务号