def view_entry(date, dir_name): dir_path = os.path.join(NEWSDIR, date, dir_name) if not IsDirectory(dir_path): return redirect(url_for('error_page', errcode='No this directory')) # get news contents and news comments news = {} comments = [] for file_name in os.listdir(dir_path): file_path = os.path.join(dir_path, file_name) if IsFile(file_path) and file_name[-4:] == 'json': f = codecs.open(file_path, 'r', 'utf-8') js = json.load(f) f.close() news[js['source']] = js comments.extend(GetComments(js)) # sort the comments comments.sort(key=lambda x: x['time']) comment_abstract = GetPassageAbstract( '\n'.join([comment['content'] for comment in comments]), 0.5, 0.1, '|') #comment_abstract.encode('utf-8') return render_template('view_news.html', news=news, comments=comments, comment_abstract=comment_abstract)
def GetTermFreqFromFile(tags, file_path): if not IsFile(file_path): print(file_path + " not exists or not a file, can't get TF") return None f = open(file_path, 'r') js = json.load(f) passage = js['contents']['passage'] f.close() return GetTermFreqFromContent(tags, passage)
def ExtractTagsFromFile(file_path, num_of_tags): # print 'Extracting from ' + file_path + ' ...' if not IsFile(file_path): print "Path not exists or not a file" sys.exit(2) f = codecs.open(file_path, 'r', 'utf-8') js = json.load(f) content = js['contents']['passage'] # f.read() tags = ExtractTagsFromContent(content, num_of_tags) # jieba.analyse.extract_tags(content, topK = num_of_tags) f.close() return tags
def init_object(self): if len(self.title) == 0: current_path = os.path.join(self.parent_dir, self.dir_name) for file_name in os.listdir(current_path): file_path = os.path.join(current_path, file_name) if IsFile(file_path) and file_name[-4:] == 'json': self.file_paths.append(file_path) f = codecs.open(file_path, 'r', 'utf-8') js = json.load(f) f.close() if len(self.title) == 0: self.title = js['contents']['title'] self.sources.append(js['source'])
def ExtractTagsFromFile(file_path, num_of_tags): """ :param file_path: 输入文章路径 :param num_of_tags: 输入关键词个数 :return: 返回文章关键词 """ # print 'Extracting from ' + file_path + ' ...' if not IsFile(file_path): print("Path not exists or not a file") sys.exit(2) f = open(file_path, 'r') js = json.load(f) content = js['contents']['passage'] # f.read() tags = ExtractTagsFromContent( content, num_of_tags) # jieba.analyse.extract_tags(content, topK = num_of_tags) f.close() return tags