wordcloud.py 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. #!/usr/bin/env python
  2. # -*- encoding: utf-8 -*-
  3. '''
  4. @File : wordcloud.py
  5. @Time : 2019/05/27 22:10:32
  6. @Author : Liuyuqi
  7. @Version : 1.0
  8. @Contact : liuyuqi.gov@msn.cn
  9. @License : (C)Copyright 2019
  10. @Desc : 作图。
  11. '''
  12. import pandas as pd
  13. import numpy as np
  14. from pandas import Series,DataFrame
  15. import re
  16. import jieba
  17. import wordcloud
  18. import matplotlib.pyplot as plt
  19. from collections import Counter
  20. from PIL import Image
  21. jieba.load_userdict("new.txt")
  22. df=pd.read_excel('完美日记评价.xlsx')
  23. comments=str()
  24. for comment in df['评价']:
  25. comments=comments+comment
  26. stopwords = {}.fromkeys([ line.rstrip() for line in open('stopwords.txt') ])
  27. segs = jieba.cut(comments,cut_all=False)
  28. final =[]
  29. for seg in segs:
  30. # seg = seg.encode('gbk')、
  31. if seg not in stopwords:
  32. final.append(seg)
  33. #print(final)
  34. cloud_text=final
  35. #cloud_text="".join(final)
  36. #print(cloud_text)
  37. fre= Counter(cloud_text)
  38. #print(cloud_text)
  39. print(fre)
  40. mask = np.array(Image.open('wmrj.jpg')) # 定义词频背景
  41. wc = wordcloud.WordCloud(
  42. font_path='Hiragino Sans GB.ttc', # 设置字体格式
  43. mask=mask, # 设置背景图
  44. max_words=30, # 最多显示词数
  45. max_font_size=200 # 字体最大值
  46. )
  47. print(type(fre))
  48. dd=pd.DataFrame({'k':fre})
  49. dd.to_excel('完美日记高频词.xlsx')
  50. #print(fre)
  51. #wc=wordcloud.generate(cloud_text)
  52. wc.generate_from_frequencies(fre) # 从字典生成词云
  53. #wc.generate(cloud_text)
  54. image_colors = wordcloud.ImageColorGenerator(mask) # 从背景图建立颜色方案
  55. wc.recolor(color_func=image_colors) # 将词云颜色设置为背景图方案
  56. plt.imshow(wc) # 显示词云
  57. plt.axis('off') # 关闭坐标轴
  58. plt.show() # 显示图像
  59. wc.to_file('完美日记_pic.png')