服务热线
178 0020 3020
第一题
安装的黄色变不绿的为问题,主要是80的端口占用的问题,具体建议看
http://blog.csdn.net/it429/article/details/51143836
install.packages("RMySQL")
library(RMySQL)
help(package="RMySQL")
con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")
dbSendQuery(con,'SET NAMES utf8')
killDbConnections <- function () {
all_cons <- dbListConnections(MySQL())
print(all_cons)
for(con in all_cons)
+ dbDisconnect(con)
print(paste(length(all_cons), " connections killed."))
}
dbGetInfo(con)
dbListTables(con)
dbRemoveTable(con,"test")
> dbGetInfo(con)
$host
[1] "localhost"
$user
[1] "root"
$dbname
[1] "rdb"
$conType
[1] "localhost via TCP/IP"
$serverVersion
[1] "5.7.19"
$protocolVersion
[1] 10
$threadId
[1] 45
$rsId
$rsId[[1]]
<MySQLResult:0,0,0>
>
> dbListTables(con)
[1] "article" "word"
>
> dbRemoveTable(con,"test")
[1] FALSE
第二题
killDbConnections()
install.packages("httr")
library(DBI)
library(RMySQL)
con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")
dbSendQuery(con,'SET NAMES utf8')
library(httr)
totalNum=562
pageSize=10
totalPage=ceiling(totalNum/pageSize)
currentPage=1
term='(cell[TA]) AND 2017[DP]'
usehistory='Y'
querykey=''
webenv=''
postSearchUrl='https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
while(currentPage<=totalPage){
retstart=(currentPage-1)*pageSize
r <- POST(postSearchUrl,
body = list(
db='pubmed',
term=term,
retmode='json',
retstart=retstart,
retmax=pageSize,
usehistory=usehistory,
rettype='uilist'
)
)
stop_for_status(r)
data=content(r, "parsed", "application/json")
esearchresult=data$esearchresult
querykey=esearchresult$querykey
webenv=esearchresult$webenv
idlist =esearchresult$idlist
n = length(idlist)
pmid=c()
i = 1
while(i<=n){
pmid=c(pmid, as.character(idlist[i][1]))
i = i+1
}
article=data.frame('pmid'=pmid)
dbWriteTable(con,"article",article,append=TRUE)
currentPage = currentPage + 1
}
dbDisconnect(con)
killDbConnections()
install.packages("httr")
library(DBI)
library(RMySQL)
con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")
dbSendQuery(con,'SET NAMES utf8')
library(httr)
totalNum=562
pageSize=10
totalPage=ceiling(totalNum/pageSize)
currentPage=1
term='(cell[TA]) AND 2017[DP]'
usehistory='Y'
querykey=''
webenv=''
postSearchUrl='https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
while(currentPage<=totalPage){
retstart=(currentPage-1)*pageSize
r <- POST(postSearchUrl,
body = list(
db='pubmed',
term=term,
retmode='json',
retstart=retstart,
retmax=pageSize,
usehistory=usehistory,
rettype='uilist'
)
)
stop_for_status(r)
data=content(r, "parsed", "application/json")
esearchresult=data$esearchresult
querykey=esearchresult$querykey
webenv=esearchresult$webenv
idlist =esearchresult$idlist
n = length(idlist)
pmid=c()
i = 1
while(i<=n){
pmid=c(pmid, as.character(idlist[i][1]))
i = i+1
}
article=data.frame('pmid'=pmid)
dbWriteTable(con,"article",article,append=TRUE)
currentPage = currentPage + 1
}
dbDisconnect(con)
第三题
install.packages("xml2")
library(RMySQL)
library(xml2)
library(httr)
con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")
dbSendQuery(con,'SET NAMES utf8')
on.exit(dbDisconnect(con))
rs <- dbSendQuery(con, "SELECT * FROM article WHERE isdone=0")
while (!dbHasCompleted(rs)) {
chunk <- dbFetch(rs, 10)
#mode(chunk)
#print(chunk)
#chunk[x,3] 第3列为获取到的pmid
pmidStr=""
i=1
n=nrow(chunk) #获得总行数,和上面设置的10一致,最后的时候是3
while (i<=n){ #循环将各个pmid之间用逗号连接起来
pmidStr = paste(pmidStr,chunk[i,3],sep=",")
i = i + 1
}
#pmid=",29195067,29195066,29195065,29195064,29153837,29153836,29153835,29153834,29153833,29153832"
#去掉第一个逗号,从第2位起,到100000位,上面字符串没有这么多字符,因此到末尾
pmidStr=substr(pmidStr,2,100000)
#上面字符串就是我们post到pubmed上面的字符串,用于获取title和abstract
#下面就是第一次作业里面获取title和abstract
postFetchUrl='https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
r2 <- POST(postFetchUrl,
body = list(
db='pubmed',
id=pmidStr,
retmode='xml'
)
)
stop_for_status(r2) #clear http status
data2=content(r2, "parsed", "application/xml")
article=xml_children(data2)
#xml_length(article)为里面文章的数量
count=length(article)
cnt=1
while(cnt<=count){
#下面的xml_text和xml_find_first均为XML2包里面的函数
title=xml_text(xml_find_first(article[cnt],".//ArticleTitle")) #找到第一个ArticleTitle节点
abstract=xml_text(xml_find_first(article[cnt],".//AbstractText"))
pmid=xml_text(xml_find_first(article[cnt],".//PMID"))
title = gsub("'","",title)
abstract = gsub("'","",abstract)
sql=paste("UPDATE article SET title='",title,"',abstract='",abstract,"',isdone=1"," where pmid='",pmid,"'",sep="")
con2 <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")
dbSendQuery(con2,'SET NAMES utf8')
dbSendQuery(con2,sql)
dbDisconnect(con2)
cnt = cnt + 1
Sys.sleep(1)
}
}
在row6 开始报错,还在想办法中,先提交这些结果吧
附件