服务热线
178 0020 3020
任务一:
> library(RMySQL)
> help(package="RMySQL")
> con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")
> dbSendQuery(con,'SET NAMES utf8')
<MySQLResult:49650536,1,0>
> #summary(con)
> dbGetInfo(con)
$host
[1] "localhost"
$user
[1] "root"
$dbname
[1] "rdb"
$conType
[1] "localhost via TCP/IP"
$serverVersion
[1] "5.6.17"
$protocolVersion
[1] 10
$threadId
[1] 606
$rsId
$rsId[[1]]
<MySQLResult:0,1,0>
任务二:
> killDbConnections()
[[1]]
<MySQLConnection:0,0>
[[2]]
<MySQLConnection:0,1>
[1] "2 connections killed."
Warning messages:
1: Closing open result sets
2: Closing open result sets
> library(httr)
> con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")
> dbSendQuery(con,'SET NAMES utf8')
<MySQLResult:-2,2,0>
> library(httr)
> library(RMySQL)
> totalNum=562
> pageSize=10
> totalPage=ceiling(totalNum/pageSize)
> currentPage=1
> term='(cell[TA]) AND 2017[DP]'
> usehistory='Y'
> querykey=''
> webenv=''
> postSearchUrl='https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
> while(currentPage<=totalPage){
retstart=(currentPage-1)*pageSize
r <- POST(postSearchUrl,
body = list(
db='pubmed',
term=term,
retmode='json',
retstart=retstart,
retmax=pageSize,
usehistory=usehistory,
rettype='uilist'
)
)
+ stop_for_status(r)
+ data=content(r, "parsed", "application/json")
+ esearchresult=data$esearchresult
+ querykey=esearchresult$querykey
+ webenv=esearchresult$webenv
+ idlist =esearchresult$idlist #idlist
+ n = length(idlist)
+ pmid=c()
+ i = 1
+ while(i<=n){
+ pmid=c(pmid, as.character(idlist[i][1]))
+ i = i+1
+ }
+ article=data.frame('pmid'=pmid)
+ dbWriteTable(con,"article",article,append=TRUE)
+ currentPage = currentPage + 1
+ }
> dbDisconnect(con)
[1] TRUE
>
任务三:
> library(RMySQL)
> library(xml2)
> library(httr)
> killDbConnections()
list()
[1] "0 connections killed."
> con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")
> dbSendQuery(con,'SET NAMES utf8')
<MySQLResult:222872288,4,0>
> on.exit(dbDisconnect(con))
> rs <- dbSendQuery(con, "SELECT * FROM article WHERE isdone=0")
> while (!dbHasCompleted(rs)) {
+ chunk <- dbFetch(rs, 10)
+ pmidStr=""
+ i=1
+ n=nrow(chunk)
+ while (i<=n){
+ pmidStr = paste(pmidStr,chunk[i,3],sep=",")
+ i = i + 1
+ }
+ pmidStr=substr(pmidStr,2,100000)
+ postFetchUrl='https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
+ r2 <- POST(postFetchUrl,
+ body = list(
+ db='pubmed',
+ id=pmidStr,
+ retmode='xml'
+ )
+ )
+ stop_for_status(r2) #clear http status
+ data2=content(r2, "parsed", "application/xml")
+ article=xml_children(data2)
+ count=length(article)
+ cnt=1
+ while(cnt<=count){
+ title=xml_text(xml_find_first(article[cnt],".//ArticleTitle"))
+ abstract=xml_text(xml_find_first(article[cnt],".//AbstractText"))
+ pmid=xml_text(xml_find_first(article[cnt],".//PMID"))
+ title = gsub("'","",title)
+ abstract = gsub("'","",abstract)
+ sql=paste("UPDATE article SET title='",title,"',abstract='",abstract,"',isdone=1"," where pmid='",pmid,"'",sep="")
+ con2 <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")
+ dbSendQuery(con2,'SET NAMES utf8')
+ dbSendQuery(con2,sql)
+ dbDisconnect(con2)
+ cnt = cnt + 1
+ Sys.sleep(1)
+ }
+ }
No encoding supplied: defaulting to UTF-8.
No encoding supplied: defaulting to UTF-8.
No encoding supplied: defaulting to UTF-8.
No encoding supplied: defaulting to UTF-8.
No encoding supplied: defaulting to UTF-8.
No encoding supplied: defaulting to UTF-8.
No encoding supplied: defaulting to UTF-8.
No encoding supplied: defaulting to UTF-8.
No encoding supplied: defaulting to UTF-8.
There were 50 or more warnings (use warnings() to see the first 50)
> .................................................................................
附件