def test_snownlp(): import snownlp from snownlp.seg import seg string=u'中国人民从此站起来了' print(seg(string)) print(snownlp.SnowNLP(string).words) pass
def get_doc_for_rank(sents): doc = [] for sent in sents: words = seg.seg(sent) words = filter_stop(words, stopwords) doc.append(words) return doc
def main(): t = normal.zh2hans(text) sents = normal.get_sentences(t) doc = [] for sent in sents: words = seg.seg(sent) words = normal.filter_stop(words) doc.append(words) rank = textrank.TextRank(doc) rank.solve() for index in rank.top_index(5): print(sents[index]) keyword_rank = textrank.KeywordTextRank(doc) keyword_rank.solve() for w in keyword_rank.top_index(5): print(w)
def parse_keyword(text): t = normal.zh2hans(text.decode("UTF-8")) sents = normal.get_sentences(t) doc = [] for sent in sents: words = seg.seg(sent) words = normal.filter_stop(words) doc.append(words) keywords = [] keyword_rank = textrank.KeywordTextRank(doc) keyword_rank.solve() for w in keyword_rank.top_index(5): keywords.append(w) return keywords
def query_xym(): offset = request.values['offset'] pagesize = request.values['pageSize'] resp = XymTripModel.query.offset(offset).limit(pagesize) t = normal.zh2hans( "随着智能手机和平板电脑的普及,相机也变得无处不在,而且分享照片也越来越简单。MOOC的明星教授说,把45分钟的讲座变成10分钟一段的视频让他们被迫“升级课程”。不是每个老师都能通过这种方式吸引一批学生,但是他们可以参考这个经验,为课堂制作自己的视频,例如实地考察录像。让整个班都出去跑一趟可能不可行,但利用视频和照片,可以把考察点“带”到课室中来。利用智能手机耳机上配备的话筒,还可以为视频配上讲解,从而高效地用多个视频介绍完一个知识点。" ) sents = normal.get_sentences(t) doc = [] for sent in sents: words = seg.seg(sent) words = normal.filter_stop(words) doc.append(words) rank = textrank.TextRank(doc) rank.solve() for index in rank.top_index(5): print(sents[index]) keyword_rank = textrank.KeywordTextRank(doc) keyword_rank.solve() for w in keyword_rank.top_index(5): print(w) return "\'..."
m[-1] += (self.d * self.weight[j][i] / self.weight_sum[j] * self.vertex[j]) if abs(m[-1] - self.vertex[i]) > max_diff: max_diff = abs(m[-1] - self.vertex[i]) self.vertex = m if max_diff <= self.min_diff: break self.top = list(enumerate(self.vertex)) self.top = sorted(self.top, key=lambda x: x[1], reverse=True) def top_index(self, limit): return list(map(lambda x: x[0], self.top))[:limit] def top(self, limit): return list(map(lambda x: self.docs[x[0]], self.top)) if __name__ == '__main__': sents = lists.get_sentences(text) doc = [] for sent in sents: words = seg.seg(sent) # words = list(jieba.cut(sent)) words = bools.filter_stop(words) doc.append(words) print(doc) rank = TextRank(doc) rank.text_rank() for index in rank.top_index(3): print(sents[index])
MOOC是学校的一种新形式,欧伯恩建议在起步的时候,先为每门课程的课件加上指针,再利用软件工具,就可以轻松根据学生的学习进度添加课程。他希望,在学生使用在线社区的同时,教师也能发现参与在线社区的方式。 5.从线上到线下 MOOC的一个缺陷就是无法组建高效的学习小组,而教师在这方面可以大有作为。当学生们看到其他同学更新了课程内容,他们就知道谁掌握了所学的知识,从而邀请这些同学合作完成任务,或向他们请教。我经常向教师们介绍这个例子:我在Google+圈子里发了一条信息,例如“明天我们会讨论矛盾冲突在吸引读者注意力方面的作用。今晚,在你回家的路上,拍一张照片或一段录像。用文字介绍你的见闻,以证明这个观点,并邀请其他同学参与讨论。”我收到的作业包括交通堵塞,猫狗对峙,被泡在水里的花园以及足球训练中的射门。第二天,学生们就可以归纳整理前一天晚上在网络上收集到的评论了。 6.用好你的相机 随着智能手机和平板电脑的普及,相机也变得无处不在,而且分享照片也越来越简单。MOOC的明星教授说,把45分钟的讲座变成10分钟一段的视频让他们被迫“升级课程”。不是每个老师都能通过这种方式吸引一批学生,但是他们可以参考这个经验,为课堂制作自己的视频,例如实地考察录像。让整个班都出去跑一趟可能不可行,但利用视频和照片,可以把考察点“带”到课室中来。利用智能手机耳机上配备的话筒,还可以为视频配上讲解,从而高效地用多个视频介绍完一个知识点。 将MOOC应用到传统课堂教学 随着大规模网络公开课的发展,教师可以考虑把在线教育的方法应用到自己的课堂教学中。MOOC的课程制作涉及比较复杂的技术,但使用这些课程几乎不费吹灰之力,而且成本也远远不及课程制作。没有加入edX或Coursera的大部分学校可以进行更多自创内容的尝试,就像自出版一样,这也是许多cMOOC的尝试。教师也可以向自己的目标努力。通过打开课堂,建立网络社区和制作教学视频,可以让更多的教师和学生享受到MOOC的投入带来的收益。 ''' from snownlp import normal from snownlp import seg from snownlp.summary import textrank if __name__ == '__main__': t = normal.zh2hans(text) sents = normal.get_sentences(t) doc = [] for sent in sents: words = seg.seg(sent) words = normal.filter_stop(words) doc.append(words) rank = textrank.TextRank(doc) rank.solve() for index in rank.top_index(10): print sents[index]
def getJson(fold, filename): result = {} try: count = 0 cotent = u'' title = '' time = '' abstract = '' path = fold + '/' + filename # ======================================== # 读取文件的时间、标题、内容 # ======================================== for line in open(path, 'r'): if (count == 0): title = line count += 1 # print (title) continue if (count == 1): time = line count += 1 # print (time) continue if (count > 1): count += 1 cotent += line # print (line) # ======================================== # 生成摘要 # ======================================= t = normal.zh2hans(cotent) sents = normal.get_sentences(t) doc = [] for sent in sents: words = seg.seg(sent) words = normal.filter_stop(words) doc.append(words) rank = textrank.TextRank(doc) rank.solve() for index in rank.top_index(5): abstract = abstract + sents[index] + ' ' keyword_rank = textrank.KeywordTextRank(doc) keyword_rank.solve() word0 = {} word1 = {} word2 = {} word3 = {} word4 = {} wordcount = 0 for w in keyword_rank.top_index(5): if wordcount == 0: word0["word"] = w word0["frequency"] = float(cotent.count(w)) / float( len(cotent)) if wordcount == 1: word1["word"] = w word1["frequency"] = float(cotent.count(w)) / float( len(cotent)) if wordcount == 2: word2["word"] = w word2["frequency"] = float(cotent.count(w)) / float( len(cotent)) if wordcount == 3: word3["word"] = w word3["frequency"] = float(cotent.count(w)) / float( len(cotent)) if wordcount == 4: word4["word"] = w word4["frequency"] = float(cotent.count(w)) / float( len(cotent)) wordcount += 1 s = SnowNLP(cotent) score = (s.sentiments - 0.5) * 2 # -1-1规范化 keywords = [word0, word1, word2, word3, word4] result["code"] = 0 result["message"] = "sucess" except IOError: result["code"] = 1 result["message"] = "wrong format" return result result["tilte"] = title.strip() result["time"] = time.strip() result['abstract'] = abstract result['sentiment'] = score result["keywords"] = keywords return result