R2-09 第2阶段第三次

Giant Panda 2018-03-30 00:38:38 阅读: 1335

library("DBI")

library("RMySQL")

killDbConnections <- function () {

  all_cons <- dbListConnections(MySQL())

  print(all_cons)

  for(con in all_cons)

    +  dbDisconnect(con)

  print(paste(length(all_cons), " connections killed."))

}

killDbConnections() #删除连接

con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")

dbSendQuery(con,'SET NAMES utf8') #创建新连接

rs<- dbSendQuery(con, "SELECT * FROM article WHERE isdone=1")

words = data.frame(word=c(), freq = c()) #创建词汇及频率列表

while (!dbHasCompleted("rs")) {

  chunk <- dbFetch("rs", 10) #提取前10条数据

  chunk$abstract<-iconv(chunk$abstract,"WINDOWS-1252","UTF-8") #转换Abstract编码格式

  count=nrow(chunk) #取每行的字符

  cnt=1

  while(cnt<=count){

    str = gsub("[[:punct:]]", "", tolower(chunk[cnt,5])) #将字母转换成小写,且去掉所有标点符

    temp = as.vector(unlist(strsplit(str, split = " "))) #将字符串分割成字符串数组的list后再转变位向量

    temp_len = length(temp)

    cnt2 = 1

    while(cnt2 <= temp_len){

      if(temp[cnt2] %in% words$word){ #判断是否存在dataframe中

        words[words$word == temp[cnt2],]$freq = words[words$word == temp[cnt2],]$freq+1

      }else{

        words = rbind(words,data.frame(word=c(temp[cnt2]),freq=c(1)))

      }

      cnt2 = cnt2+1

    }

    cnt = cnt +1

  }

}

head("words")


 1.png


install.packages("wordcloud2")

install.packages("jsonlite")

library(wordcloud2)

wordcloud2(words[0:1000,]) 

2.png


new_words=words[order(words$freq,decreasing=T),]
del_word=c('of','the','and','in','to','a','that','is','for','buy','with',
             'we','are','an','this','these','as','from','which','at','their',
             'have','or','our','its','but','how','be','as','here','on','can',
             'into','data','between','both','also','by','yet','than','well',
             'it','not')
words2=words[which(!words$word %in% del_word),]
wordcloud2(words2,shape='triangle',size=0.5)

444.png

 
邀请讨论

附件

{{f.title}} 大小 {{f.file_size}} 下载 {{f.count_download}} 金币 {{f.count_gold}}
{{item.nick_name}} 受邀请回答 {{item.create_time}}
{{item.refer_comment.nick_name}} {{item.refer_comment.create_time}}

附件

{{f.title}} 大小 {{f.file_size}} 下载 {{f.count_download}} 金币 {{f.count_gold}}
切换到完整回复 发送回复
赞({{item.count_zan}}) 踩({{item.count_cai}}) 删除 回复 关闭
科研狗©2015-2024 科研好助手,京ICP备20005780号-1 建议意见

服务热线

178 0020 3020

微信服务号