def get_wordcloud_with_project(project_id=None): """ 获取词云 :return: """ # 字体文件路径 font = CONF.TTF_PATH + 'msyh.ttc' # 源数据处理(模块名称) str_list = get_model_data_with_project(project_id) # 分词 text = "" for item in str_list: text += item + " " # 生成词云 pil_img = WordCloud( font_path=font, width=800, height=300, background_color="white", prefer_horizontal=0.6, collocations=False).generate(text=text).to_image() # base64输出 img = io.BytesIO() pil_img.save(img, "PNG") img.seek(0) img_base64 = base64.b64encode(img.getvalue()).decode() return img_base64
def get_wordcloud(text): pil_img = WordCloud(stopwords=stopwords).generate(text=text).to_image() img = io.BytesIO() pil_img.save(img, "PNG") img.seek(0) img_b64 = base64.b64encode(img.getvalue()).decode() return img_b64
def get_wordcloud(text): #text = text.decode("utf-8") if langue == 'zh-CN': pil_img = WordCloud( font_path=font_path, background_color="white", max_words=2000, max_font_size=100, random_state=42, width=1000, height=860, margin=2, ).generate(jieba_processing_txt(text)) else: pil_img = WordCloud(width=1600, height=800, scale=20, background_color='white', mode="RGBA", max_font_size=600).generate( text=text) #mask = mask, img = BytesIO( ) #mask = np.array(Image.open(mask).convert('RGB')) #image_colors = ImageColorGenerator(mask) plt.figure(figsize=[7, 7]) pil_img = pil_img.to_image() plt.axis("off") #no axis? plt.tight_layout(pad=0) img = BytesIO() #save it to a temporary buffer pil_img.save(img, "PNG") #save this byte to a PNG img.seek(0) img_64 = base64.b64encode(img.getvalue()).decode('utf-8') return img_64
def results(): # "if-the-internet-is-slow-so-local-items" tester word_freq = [('hawk', 10), ('apple', 3), ('spoon', 2), ('red', 1), ('mine', 1)] list_of_tokens = [ 'hawk', 'hawk', 'hawk', 'hawk', 'hawk', 'hawk', 'hawk', 'hawk', 'hawk', 'hawk', 'apple', 'apple', 'apple', 'spoon', 'spoon', 'red', 'mine' ] # url = request.form['url'] # word_freq, list_of_tokens = quick_prime(url) text = " ".join(list_of_tokens) cloud_PIL = WordCloud(background_color='white').generate(text).to_image() # converts cloudPIL from PIL to bytes output = BytesIO() cloud_PIL.save(output, format='JPEG') # converts bytes to base64 encoding minus the "b'"" prexif and "'" suffix img = base64.b64encode(output.getvalue()) img = str(img)[2:-1] return render_template('results.html', items=word_freq, img=img)
async def _generate_word_cloud_async(self, msg_id: str, reply_msg, to_chat, search_chat, user, start: datetime, end: datetime): try: import jieba from wordcloud import WordCloud except ImportError as e: print(e) return words = defaultdict(int) count = 0 initial_msg = reply_msg.text + '\n' async for msg in self._client.iter_messages(search_chat, from_user=user, offset_date=end): if start and msg.date < start: break if msg.text: for word in jieba.cut(msg.text): word = word.lower() if not await self.redis.sismember( f'{self.prefix}stop_words', word): words[word] += 1 # words += [w for w in jieba.cut(msg.text) if not await self.redis.sismember(f'{self.prefix}stop_words', w)] # if msg.sticker: # words += [a.alt for a in msg.sticker.attributes if isinstance(a, DocumentAttributeSticker)] count += 1 if count >= 1000: p = math.floor(math.log(count, 10)) if count % int(math.pow(10, p)) == 0 and count // 1000: try: await reply_msg.edit(text=initial_msg + '.' * (count // 1000)) except Exception as _: traceback.print_exc() wordcloud_msg = None try: image = WordCloud( font_path="simsun.ttf", width=800, height=400).generate_from_frequencies(words).to_image() stream = BytesIO() image.save(stream, 'PNG') wordcloud_msg = await self._client.send_message( to_chat, '词云 for\n{}{}{}'.format( f'{search_chat.title}', f'\n{utils.get_display_name(user)}' if user else '', '\n{}-{}'.format( start.strftime('%Y/%m/%d') if start else 'Join', end.strftime('%Y/%m/%d') if end else 'Now') if start or end else ''), reply_to=msg_id, file=stream.getvalue()) except Exception as _: traceback.print_exc() finally: await reply_msg.delete()
def wordcloud(text): stopwords = set(STOPWORDS) wordcloud = WordCloud(width=800, height=800, background_color='white', stopwords=stopwords, min_font_size=10).generate(text).to_image() img = BytesIO() wordcloud.save(img, "PNG") img.seek(0) img_b64 = base64.b64encode(img.getvalue()).decode() return img_b64
def get_word_cloud(text): # font = "./SimHei.ttf" # pil_img = WordCloud(width=500, height=500, font_path=font).generate(text=text).to_image() pil_img = WordCloud( width=800, height=300, background_color="white").generate(text=text).to_image() img = io.BytesIO() pil_img.save(img, "PNG") img.seek(0) img_base64 = base64.b64encode(img.getvalue()).decode() return img_base64
def get_wordcloud(text): """ Convert text to a word cloud """ pillow_image = WordCloud().generate(text=text).to_image() img = io.BytesIO() pillow_image.save(img, "PNG") img.seek(0) # .decode -> returns base64 string instead of base64 byte string # .b64encode -> returns base64 byte string img_b64 = base64.b64encode(img.getvalue()).decode() return img_b64
def wordcloud(): sentence = wordsentence() stopwords = set(STOPWORDS) wordcloud = WordCloud(width=800, height=800, background_color='white', stopwords=stopwords, min_font_size=10).generate(sentence) wordcloud = wordcloud.to_image() img = BytesIO() wordcloud.save(img, format='PNG') return 'data:image/png;base64,{}'.format( base64.b64encode(img.getvalue()).decode())
async def _generate_word_cloud_async(self, chat, user, start, end): try: import jieba from wordcloud import WordCloud except ImportError as e: print(e) return chat = await self._get_entity(chat) if user is not None: user = await self._get_entity(user) if start is not None: start = parser.parse(start).replace(tzinfo=timezone.utc) if end is not None: end = parser.parse(end) words = [] async for msg in self._client.iter_messages(chat, from_user=user, offset_date=end): if start and msg.date < start: break if msg.text: print("[{}][{}] {}".format( msg.date, utils.get_display_name(await msg.get_sender()) if user is None else utils.get_display_name(user), msg.text)) words += [ w for w in jieba.cut_for_search(msg.text) if w not in stop_words ] image = WordCloud(font_path="simsun.ttf", width=800, height=400).generate(' '.join(words)).to_image() stream = BytesIO() image.save(stream, 'PNG') await self._client.send_message( 'gua_mei_debug', '{}{}{}'.format( f'{chat.title}', f'\n{utils.get_display_name(user)}' if user else '', '\n{}-{}'.format( start.strftime('%Y/%m/%d') if start else 'Join', end.strftime('%Y/%m/%d') if end else 'Now') if start or end else ''), file=stream.getvalue())
def main(argv): #Given by Dr. Brown on Piazza argc = len(argv) if 2 > argc: print("More arguments, please.") return 1 frequency = WordFrequency() for i in range(1, argc): frequency.open(argv[i]) count = 23 stop_words = ['A', 'An', 'And', 'But', 'That', 'The', 'Which', 'a', 'an', 'and', 'but', 'that', 'the', 'which'] for count in [5, 20, 33, 50]: cloud = WordCloud(count, frequency, stop_words) cloud.save('cloud-{0}.txt'.format(count)) cloud = HtmlWordCloud(count, frequency, stop_words) cloud.save('cloud-{0}.html'.format(count)) return 0
def index(): r = requests.get( "http://loklak.org/api/search.json?q={0}&count=100".format( request.args.get('q'))) data = r.json() text = " " for value in data["statuses"]: value = re.sub(r"@", " ", value["text"]) value = re.sub(r"\#", " ", value) value = re.sub( r"https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)", " ", value) text += value cloud = WordCloud(width=800, height=400, stopwords=STOPWORDS).generate(text).to_image() #tempFile = NamedTemporaryFile(mode='w+b',suffix='jpg') #copyfileobj(cloud,tempFileObj) buffer = cStringIO.StringIO() cloud.save(buffer, format="PNG") #cloud.close() #tempFile.seek(0,0) return base64.b64encode(buffer.getvalue())
async def group_word(context): imported_1 = False if len(context.parameter) >= 1: imported_1 = True if not imported: try: await context.edit("支持库 `jieba` 未安装...\n正在尝试自动安装...") await execute(f'{executable} -m pip install jieba') await sleep(10) result = await execute(f'{executable} -m pip show jieba') if len(result) > 0: await context.edit('支持库 `jieba` 安装成功...\n正在尝试自动重启...') await context.client.disconnect() else: await context.edit(f"自动安装失败..请尝试手动安装 `{executable} -m pip install jieba` 随后,请重启 PagerMaid-Modify 。") return except: return if not imported_ and imported_1: try: await context.edit("支持库 `paddlepaddle-tiny` 未安装...\n正在尝试自动安装...") await execute(f'{executable} -m pip install paddlepaddle-tiny') await sleep(10) result = await execute(f'{executable} -m pip show paddlepaddle-tiny') if len(result) > 0 and not 'WARNING' in result: await context.edit('支持库 `paddlepaddle-tiny` 安装成功...\n正在尝试自动重启...') await context.client.disconnect() else: await context.edit(f"自动安装失败,可能是系统不支持..\nAI 分词不可用,切换到基础分词。\n" f"您可以尝试手动安装 `{executable} -m pip install paddlepaddle-tiny` 。") await sleep(4) except: return try: await context.edit('正在生成中。。。') except: return if not exists("plugins/groupword"): makedirs("plugins/groupword") if not exists("plugins/groupword/wqy-microhei.ttc"): await context.edit('正在拉取中文字体文件。。。(等待时间请评估你的服务器)') r = get('https://cdn.jsdelivr.net/gh/anthonyfok/fonts-wqy-microhei/wqy-microhei.ttc') with open("plugins/groupword/wqy-microhei.ttc", "wb") as code: code.write(r.content) words = defaultdict(int) count = 0 try: if imported_ and imported_1: try: jieba.enable_paddle() except: imported_1 = False async for msg in context.client.iter_messages(context.chat, limit=500): if msg.id == context.id: continue if msg.text and not msg.text.startswith('/') and not msg.text.startswith('-') and not '//' in msg.text: try: if imported_ and imported_1: for word in jieba.cut(msg.text.translate(punctuation), use_paddle=True): word = word.lower() words[word] += 1 else: for word in jieba.cut(msg.text.translate(punctuation)): word = word.lower() words[word] += 1 count += 1 except: pass except: if count == 0: try: await context.edit('您已被 TG 官方限制。') return except: return try: image = WordCloud(font_path="plugins/groupword/wqy-microhei.ttc", width=800, height=400).generate_from_frequencies( words).to_image() stream = BytesIO() image.save(stream, 'PNG') except: await context.edit('词云生成失败。') return try: await context.client.send_message(context.chat, f'对最近的 {count} 条消息进行了分析。', file=stream.getvalue()) await context.delete() except: return