forked from yanshengli/sina_weibo_crawler
/
plotMain.py
52 lines (51 loc) · 1.83 KB
/
plotMain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#-*-coding=utf-8-*-
import mystats
from Count import *
from pandas import *
import pylab as pl
import numpy as np
import matplotlib.pyplot as plt
#from nlp import pre_process_cn
def makeBlogFamework(path,filename):
mystats.extendTime(path,filename)
newfile = path+'/new_'+filename+'.csv'
blog = read_csv(newfile, header='infer', sep=',')
return blog
# this block for plot time-freq histgram for individuals
def plot_hist(runornot):
while runornot:
plt.figure(1)
timelist = ['year','month','day','hour','dayofweek']
layoutlist = [231,232,234,235,236]
for timeiterm in timelist:
plt.subplot(layoutlist[timelist.index(timeiterm)])
plt.hist(blog[timeiterm].values, bins = len(set(blog[timeiterm].values)), facecolor='blue', alpha=0.5)
#plt.xlabel()
plt.ylabel('freq')
plt.title(timeiterm)
#plt.text(60, .025, r'$\mu=100,\ \sigma=15$')
#plt.axis([40, 160, 0, 0.03])
#plt.grid(True)
plt.show()
#.title('Histogram')
plt.show()
path = './data/data10'
filename = 'smallS'
blog=makeBlogFamework(path, filename)
plot_hist(False)
'''
wordlist=[]
for c in contenttext:
for i in c:
wordlist.append(i.encode('utf-8'))
'''
# I plan to do text mining, 1. word freq, 2. affective analysis, in the framework of time, affective change 3. ML 4.;
"""
what I need to do?
make it clear: conception? package or modules to do that work, what main logic in this analysis?
what are main steps of natural language process?
词性标注、名词短语抽取、情感分析、分类、翻译
词性标注、名词短语抽取、情感分析、分类、翻译
词性标注工具(Part-Of-Speech Tagger),N元搜索(n-gram search),情感分析(sentiment analysis),WordNet
Polyglot,136种语言的情感分析
"""