def getdata4(): text = "".join(title) with open("stopword.txt", "r", encoding='UTF-8') as f: stopword = f.readlines() for i in stopword: print(i) i = str(i).replace("\r\n", "").replace("\r", "").replace("\n", "") text = text.replace(i, "") word_list = jieba.cut(text) result = " ".join(word_list) # 分词用 隔开 # 制作中文云词 icon_name = 'fab fa-qq' """ # icon_name='',#国旗 # icon_name='fas fa-dragon',#翼龙 icon_name='fas fa-dog',#狗 # icon_name='fas fa-cat',#猫 # icon_name='fas fa-dove',#鸽子 # icon_name='fab fa-qq',#qq """ gen_stylecloud(text=result, icon_name=icon_name, font_path='simsun.ttc', output_name="大学课程名称词云化.png") # 必须加中文字体,否则格式错误
def cria_nuvem_de_palavras(texto, word_cloud_space, collocations, background_color, custom_max_words, custom_stopwords, custom_seed, font_color, invertido, icone_escolhido, gradiente): import matplotlib.pyplot as plt import stylecloud stopwords = get_stopwords(linguagem) # Create stopword list: stopwords = set(stopwords) stopwords.update(["tweet", "twitter", "rt"]) stopwords.update(custom_stopwords) color, palette = texto_2_color_and_palette(font_color) bg_color = texto_bg_2_color(background_color) icone = texto_2_icon(icone_escolhido) stylecloud.gen_stylecloud(text=texto, icon_name=icone, colors=color, palette=palette, background_color=bg_color, gradient=gradiente, custom_stopwords=stopwords, output_name='wordcloud.png', max_words=custom_max_words, collocations=collocations, invert_mask=invertido, random_state=custom_seed) word_cloud_space.image('wordcloud.png')
def an4_pic(): ###词云图标 fa_list = [ 'fas fa-play', 'fas fa-audio-description', 'fas fa-circle', 'fas fa-eject', 'fas fa-stop', 'fas fa-video', 'fas fa-volume-off', 'fas fa-truck', 'fas fa-apple-alt', 'fas fa-mountain', 'fas fa-tree', 'fas fa-database', 'fas fa-wifi', 'fas fa-mobile', 'fas fa-plug' ] z = 0 ##开始绘图 for filename in os.listdir("text"): print(filename) with open("text/" + filename, "r") as f: text = (f.readlines())[0] with open("stopword.txt", "r", encoding='UTF-8') as f: stopword = f.readlines() for i in stopword: print(i) i = str(i).replace("\r\n", "").replace("\r", "").replace("\n", "") text = text.replace(i, "") word_list = jieba.cut(text) result = " ".join(word_list) # 分词用 隔开 # 制作中文云词 icon_name = str(fa_list[z]) gen_stylecloud(text=result, icon_name=icon_name, font_path='simsun.ttc', output_name=str(filename.replace(".txt", "")) + "词云图.png") # 必须加中文字体,否则格式错误 z = z + 1
def word_cloud(texto, path_texto, path_imagen): """ Parameters ---------- texto : Lista o Serie Serie de pandas con textos a crear path_texto : cadena de texto Path para guardar el txt generado path_imagen : TYPE Path para salvar la imagen Returns ------- TYPE Recibe una lista con sus path para guardar los textos e imagenes y regresa un wordcloud como imagen. """ # CReacion de .txt with open(path_texto, "w") as f: for text in texto: f.write(str(text + ' ')) f.close() # paths path_texto = path_texto path_imagen = path_imagen stylecloud.gen_stylecloud(file_path=path_texto, output_name=path_imagen) # mostrar imagen return Image.open(path_imagen)
def gen_my_stylecloud(text, file_name, icon_name='fas fa-heart'): stylecloud.gen_stylecloud(text=' '.join(text), max_words=1000, collocations=False, font_path=r'C:\Windows\Fonts\msyh.ttc', icon_name=icon_name, size=653, output_name=f'../image/{file_name}.png')
def cloud(file_name): with open(file_name,'r',encoding='utf8') as f: word_list = jieba.cut(f.read()) result = " ".join(word_list) #分词用 隔开 #制作中文云词 gen_stylecloud(text=result, font_path='pachong/simhei.ttf', palette='cartocolors.diverging.TealRose_2', output_name='t2.png', icon_name='fas fa-plane', ) #必须加中文字体,否则格式错误
def jieba_cloud(file_name): with open(file_name, 'r', encoding='utf8') as f: word_list = extract_tags(f.read(), topK=100) result = " ".join(word_list) # 分词用 隔开 # 制作中文云词 base_file_name = os.path.basename(file_name) png_name = base_file_name.split('.')[0] png_name = png_name + '.png' gen_stylecloud(text=result, font_path="/System/Library/fonts/PingFang.ttc", output_name=os.path.join(picture_dir, 'stylecloud', png_name)) # 必须加中文字体,否则格式错误
def create_wordcloud(): for i in range(1, 565): file_name = os.path.join("mp4_img_mask/", str(i) + '.png') # print(file_name) result = os.path.join("work/mp4_img_analysis/", 'result' + str(i) + '.png') # print(result) stylecloud.gen_stylecloud(text=text_content, font_path='方正兰亭刊黑.TTF', output_name=result, background_color="black", mask_img=file_name)
def analysis3(): # 制作词云 content = '' for i in range(len(data)): content += data['标题'][i] wl = jieba.cut(content, cut_all=True) wl_space_split = ' '.join(wl) pic = '词云图.png' gen_stylecloud( text=wl_space_split, font_path='simsun.ttc', # icon_name='fas fa-envira', icon_name='fab fa-qq', max_words=100, max_font_size=70, output_name=pic, ) # 必须加中文字体,否则格式错误
def an5(): contents = (df_all['content']).tolist() text = "".join(contents) with open("stopword.txt", "r", encoding='UTF-8') as f: stopword = f.readlines() for i in stopword: print(i) i = str(i).replace("\r\n", "").replace("\r", "").replace("\n", "") text = text.replace(i, "") word_list = jieba.cut(text) result = " ".join(word_list) # 分词用 隔开 # 制作中文云词 icon_name = 'fas fa-play' gen_stylecloud(text=result, icon_name=icon_name, font_path='simsun.ttc', output_name="评论内容词云.png") # 必须加中文字体,否则格式错误
def make_cloud(): dic = dict() with open("./data/total_disease2_count_FINAL.csv", 'r', encoding='cp949') as f: reader = csv.DictReader(f) for row in reader: dic[row['title']] = int(row['count']) # print(dic) import stylecloud stylecloud.gen_stylecloud( text= dic, # text, 대신에 file_path로 경로를 넣을 수도 있음. text옵션을 쓸 땐 단어를 단순히 띄어쓰기로만 토큰화시켜 빈도수 계산. size=1028, # file_path로 할 땐 단어: 빈도 형태의 딕셔너리 파일을 넣어야 함. icon_name="fas fa-comment-alt", palette='colorbrewer.qualitative.Paired_10', background_color='white', font_path='/usr/share/fonts/NanumBarunGothic.ttf', output_name="./img/testwordcloud.png")
def draw_cloud(reviews): tags = {} # r = lambda: random.randint(0,255) # color = lambda: (r(), r(), r()) for review in reviews: h = Hannanum() nouns = h.nouns(review) count = dict(Counter(nouns)) tags = { k: tags.get(k, 0) + count.get(k, 0) for k in set(tags) | set(count) } gen_stylecloud(text=tags, output_name="wordcloud.png", icon_name="fas fa-square-full", background_color="white", font_path="Jua-Regular.ttf", size=1024)
def make_wordcloud(self, word_count): twitter = Twitter() sentences_tag = [] try: # 형태소 분석하여 리스트에 넣기 for sentence in self.title_list: morph = twitter.pos(sentence) sentences_tag.append(morph) print(morph) print('-' * 30) print(sentences_tag) print('\n' * 3) noun_adj_list = [] # 명사와 형용사만 구분하여 이스트에 넣기 for sentence1 in sentences_tag: for word, tag in sentence1: if tag in ['Noun', 'Adjective']: noun_adj_list.append(word) # 형태소별 count counts = Counter(noun_adj_list) tags = counts.most_common(word_count) print(tags) # WordCloud, matplotlib: 단어 구름 그리기 stylecloud.gen_stylecloud( text=dict(tags), background_color='#3A3547', font_path='C:\\Windows\\Fonts\\HANBatangB.ttf', icon_name="fas fa-dog", palette="colorbrewer.diverging.Spectral_11", gradient="horizontal", output_name="petwordcloud.png") return 'Success' except Exception as e: return 'Fail'
def gen_twcloud(username=None, search=None, limit=500, colors='white', background_color='#1DA1F2', icon_name='fab fa-twitter', custom_stopwords=STOPWORDS, output_name='twcloud.png', **kwargs): """Generates a twcloud of any public Twitter account or search query! See stylecloud docs for additional parameters. :param username: Twitter @ username to gather tweets. :param search: Search query to use to gather tweets. :param limit: Number of tweets retrieved. """ tweets = get_tweet_text(username, search, limit) # If `palette` is specified, override `colors`. # This is the opposite behavior of stylecloud. if 'palette' in kwargs: colors = None # Some stopwords (e.g. I'm, I've) must have quotes removed # to match removed smart quotes from tweets. noquote_stop = [ re.sub(r"'", '', word) for word in custom_stopwords if "'" in word ] custom_stopwords.update(set(noquote_stop)) print("Generating the twcloud...") gen_stylecloud(text=tweets, output_name=output_name, colors=colors, background_color=background_color, icon_name=icon_name, custom_stopwords=custom_stopwords, **kwargs)
def jieba_cloud(file_name, icon): with open(file_name, 'r', encoding='utf8') as f: word_list = jieba.cut(f.read()) result = " ".join(word_list) #分词用 隔开 #制作中文云词 icon_name = "" if icon == "1": icon_name = '' elif icon == "2": icon_name = 'fas fa-dragon' elif icon == "3": icon_name = 'fas fa-dog' elif icon == "4": icon_name = 'fas fa-cat' elif icon == "5": icon_name = 'fas fa-dove' elif icon == "6": icon_name = 'fab fa-qq' """ # icon_name='',#国旗 # icon_name='fas fa-dragon',#翼龙 icon_name='fas fa-dog',#狗 # icon_name='fas fa-cat',#猫 # icon_name='fas fa-dove',#鸽子 # icon_name='fab fa-qq',#qq """ picp = file_name.split('.')[0] + str(icon) + '.png' if icon_name is not None and len(icon_name) > 0: gen_stylecloud(text=result, icon_name=icon_name, font_path='simsun.ttc', output_name=picp) #必须加中文字体,否则格式错误 else: gen_stylecloud(text=result, font_path='simsun.ttc', output_name=picp) #必须加中文字体,否则格式错误 return picp
def get_styled_cloud(comments, extra_stop_words=None, icon_selected='fas fa-cloud'): stop_words = get_stop_words('en') if extra_stop_words: stop_words += extra_stop_words cloud_name = ''.join( random.choices(string.ascii_uppercase + string.digits, k=7)) + '.png' # cloud_name = str(random.randint(1, 100)) + '.png' file_path = os.path.join(PRETTY_LITTLE_WORD_CLOUD_PATH, cloud_name) text = ' '.join(comments) stylecloud.gen_stylecloud(text=text, size=1024, icon_name=icon_selected, palette='colorbrewer.diverging.Spectral_11', background_color='black', gradient='horizontal', custom_stopwords=stop_words, output_name=file_path) return cloud_name
def make(text, idx, png_name1, png_name2): # wc = WordCloud(font_path='C://windows\\Fonts\\HANYGO230.ttf', \ # # background_color="white",\ # width=1000,\ # height=1000,\ # max_words=100,\ # max_font_size=300) # wc.generate(text) # wc.to_file(text[1]+'.png') try: if not os.path.exists("../frontend/src/assets/img/itscience/" + str(png_name1)): os.makedirs("../frontend/src/assets/img/itscience/" + str(png_name1)) except: pass try: if not os.path.exists("../frontend/src/assets/img/itscience/" + str(png_name1) + "/" + str(png_name2)): os.makedirs("../frontend/src/assets/img/itscience/" + str(png_name1) + "/" + str(png_name2)) except: pass wc = stylecloud.gen_stylecloud( text=text, icon_name="fab fa-twitter", font_path='C://windows\\Fonts\\HANYGO230.ttf', colors=[ '#032859', '#016F94', '#FFE4B6', '#FFB06D', '#FE6A2C', '#FCBB6D', '#D8737F', '#AB6C8C', '#685D79', '#475C7A' ], palette="colorbrewer.diverging.Spectral_11", background_color='#EFEFF0', # gradient="horizontal", output_name="../frontend/src/assets/img/itscience/" + str(png_name1) + "/" + str(png_name2) + "/" + str(idx) + ".png") # 출처: https://excelsior-cjh.tistory.com/93 [EXCELSIOR] # url = 'https://news.naver.com/main/read.nhn?mode=LSD&mid=shm&sid1=105&oid=293&aid=0000033262' # textrank = TextRank(url) # for row in textrank.summarize(4): # print(row) # print() # print('keywords :',textrank.keywords())
# -*- coding: utf-8 -*- """Stylecloud configuration for the article.""" import stylecloud stylecloud.gen_stylecloud( size=730, file_path='./pyenv-readme.txt', icon_name='fab fa-python', palette='colorbrewer.diverging.Spectral_11', background_color='black', gradient='horizontal', output_name='pyenv-stylecloud.png', invert_mask=False, max_font_size=100, )
def cloud(text, max_word, max_font, random, colormap, background_color, gradient_direction, icon, size2, invert_mask, gradient, font): stopwords = set(STOPWORDS) stopwords.update([ 'us', 'one', 'will', 'said', 'now', 'well', 'man', 'may', 'little', 'say', 'must', 'way', 'long', 'yet', 'mean', 'put', 'seem', 'asked', 'made', 'half', 'much', 'certainly', 'might', 'came', "a", "à", "â", "abord", "afin", "ah", "ai", "aie", "ainsi", "allaient", "allo", "allô", "allons", "après", "assez", "attendu", "au", "aucun", "aucune", "aujourd", "aujourd'hui", "auquel", "aura", "auront", "aussi", "autre", "autres", "aux", "auxquelles", "auxquels", "avaient", "avais", "avait", "avant", "avec", "avoir", "ayant", "b", "bah", "beaucoup", "bien", "bigre", "boum", "bravo", "brrr", "c", "ça", "car", "ce", "ceci", "cela", "celle", "celle-ci", "celle-là", "celles", "celles-ci", "celles-là", "celui", "celui-ci", "celui-là", "cent", "cependant", "certain", "certaine", "certaines", "certains", "certes", "ces", "cet", "cette", "ceux", "ceux-ci", "ceux-là", "chacun", "chaque", "cher", "chère", "chères", "chers", "chez", "chiche", "chut", "ci", "cinq", "cinquantaine", "cinquante", "cinquantième", "cinquième", "clac", "clic", "combien", "comme", "comment", "compris", "concernant", "contre", "couic", "crac", "d", "da", "dans", "de", "debout", "dedans", "dehors", "delà", "depuis", "derrière", "des", "dès", "désormais", "desquelles", "desquels", "dessous", "dessus", "deux", "deuxième", "deuxièmement", "devant", "devers", "devra", "différent", "différente", "différentes", "différents", "dire", "divers", "diverse", "diverses", "dix", "dix-huit", "dixième", "dix-neuf", "dix-sept", "doit", "doivent", "donc", "dont", "douze", "douzième", "dring", "du", "duquel", "durant", "e", "effet", "eh", "elle", "elle-même", "elles", "elles-mêmes", "en", "encore", "entre", "envers", "environ", "es", "ès", "est", "et", "etant", "étaient", "étais", "était", "étant", "etc", "été", "etre", "être", "eu", "euh", "eux", "eux-mêmes", "excepté", "f", "façon", "fais", "faisaient", "faisant", "fait", "feront", "fi", "flac", "floc", "font", "g", "gens", "h", "ha", "hé", "hein", "hélas", "hem", "hep", "hi", "ho", "holà", "hop", "hormis", "hors", "hou", "houp", "hue", "hui", "huit", "huitième", "hum", "hurrah", "i", "il", "ils", "importe", "j", "je", "jusqu", "jusque", "k", "l", "la", "là", "laquelle", "las", "le", "lequel", "les", "lès", "lesquelles", "lesquels", "leur", "leurs", "longtemps", "lorsque", "lui", "lui-même", "m", "ma", "maint", "mais", "malgré", "me", "même", "mêmes", "merci", "mes", "mien", "mienne", "miennes", "miens", "mille", "mince", "moi", "moi-même", "moins", "mon", "moyennant", "n", "na", "ne", "néanmoins", "neuf", "neuvième", "ni", "nombreuses", "nombreux", "non", "nos", "notre", "nôtre", "nôtres", "nous", "nous-mêmes", "nul", "o", "o|", "ô", "oh", "ohé", "olé", "ollé", "on", "ont", "onze", "onzième", "ore", "ou", "où", "ouf", "ouias", "oust", "ouste", "outre", "p", "paf", "pan", "par", "parmi", "partant", "particulier", "particulière", "particulièrement", "pas", "passé", "pendant", "personne", "peu", "peut", "peuvent", "peux", "pff", "pfft", "pfut", "pif", "plein", "plouf", "plus", "plusieurs", "plutôt", "pouah", "pour", "pourquoi", "premier", "première", "premièrement", "près", "proche", "psitt", "puisque", "q", "qu", "quand", "quant", "quanta", "quant-à-soi", "quarante", "quatorze", "quatre", "quatre-vingt", "quatrième", "quatrièmement", "que", "quel", "quelconque", "quelle", "quelles", "quelque", "quelques", "quelqu'un", "quels", "qui", "quiconque", "quinze", "quoi", "quoique", "r", "revoici", "revoilà", "rien", "s", "sa", "sacrebleu", "sans", "sapristi", "sauf", "se", "seize", "selon", "sept", "septième", "sera", "seront", "ses", "si", "sien", "sienne", "siennes", "siens", "sinon", "six", "sixième", "soi", "soi-même", "soit", "soixante", "son", "sont", "sous", "stop", "suis", "suivant", "sur", "surtout", "t", "ta", "tac", "tant", "te", "té", "tel", "telle", "tellement", "telles", "tels", "tenant", "tes", "tic", "tien", "tienne", "tiennes", "tiens", "toc", "toi", "toi-même", "ton", "touchant", "toujours", "tous", "tout", "toute", "toutes", "treize", "trente", "très", "trois", "troisième", "troisièmement", "trop", "tsoin", "tsouin", "tu", "u", "un", "une", "unes", "uns", "v", "va", "vais", "vas", "vé", "vers", "via", "vif", "vifs", "vingt", "vivat", "vive", "vives", "vlan", "voici", "voilà", "vont", "vos", "votre", "vôtre", "vôtres", "vous", "vous-mêmes", "vu", "w", "x", "y", "z", "zut", "alors", "aucuns", "bon", "devrait", "dos", "droite", "début", "essai", "faites", "fois", "force", "haut", "ici", "juste", "maintenant", "mine", "mot", "nommés", "nouveaux", "parce", "parole", "personnes", "pièce", "plupart", "seulement", "soyez", "sujet", "tandis", "valeur", "voie", "voient", "état", "étions" ]) inv_mask = False #gradient = None font = font + ".ttf" palette = 'cartocolors.qualitative.{}'.format(colormap) if invert_mask == "Yes": inv_mask = True if gradient_direction is not None: gradient = gradient_direction stylecloud.gen_stylecloud( text=text, custom_stopwords=stopwords, background_color=background_color, random_state=random, max_words=max_word, max_font_size=max_font, palette=palette, gradient=gradient, invert_mask=inv_mask, font_path=font, # size= size, icon_name='fas fa-{}'.format(icon)) elif gradient_direction is None: if size2 == 'square': size = (512, 512) if size2 == 'rectangle': size = (1024, 512) # generate the style cloud stylecloud.gen_stylecloud(text=text, custom_stopwords=stopwords, background_color=background_color, random_state=random, max_words=max_word, max_font_size=max_font, palette=palette, size=size, font_path=font, invert_mask=inv_mask, icon_name='fas fa-{}'.format(icon)) st.image('stylecloud.png')
import stylecloud import os from stop_words import get_stop_words stop_words = get_stop_words('english') #based on https://towardsdatascience.com/how-to-easily-make-beautiful-wordclouds-in-python-55789102f6f5 for filename in os.listdir("."): if filename.endswith(".txt"): print("Processing "+filename) outputfile=filename.replace(".txt",".png") stylecloud.gen_stylecloud(file_path=filename, icon_name= "fas fa-circle", background_color='white', output_name=outputfile, size=(800, 800), custom_stopwords=stop_words)
Wir unterstützen Engagement, das über den Unterricht hinausgeht. Wir bringen die nötige Flexibilität gegenüber Jugendlichen mit besonderer Begabung auf, um das Nebeneinander von Schule und Talent zu ermöglichen. 4. begegnen, bereichern, Brücken schlagen Alle Menschen an unserer Schule haben dieselben Chancen. Der Austausch ist für uns eine Bereicherung und eine Möglichkeit, die eigene Position zu hinterfragen. 5. Vertrauen, Verbindlichkeit, Verständnis Wir kommunizieren fair, transparent und rechtzeitig. Wir schaffen den passenden Rahmen, wo wir einander zuhören und einander ausreden lassen. Bei Unklarheiten fragen wir nach und suchen gemeinsam nach Lösungen. An Abmachungen halten wir uns. 6. Biel, Bienne, bilingue Wir profitieren von der Zweisprachigkeit des Standorts Biel-Bienne und pflegen den Kontakt zur anderen Sprachund Kulturgruppe. Wir pflegen den Austausch mit Institutionen der Region. 7. nachhaltig, vorbildlich, weitsichtig Wir legen Wert auf einen verantwortungsvollen und nachhaltigen Umgang mit unseren Ressourcen und unserer Umwelt. Wir achten darauf, dass unser Verhalten andere nicht beeinträchtigt. Wir tragen Sorge zu den Gebäuden und Räumlichkeiten, der Einrichtung und dem Schulmaterial. 8. mitreden, mitmachen, mitwirken Wir legen Wert auf konstruktives Mitwirken und Eigenverantwortung. Kritik- und Teamfähigkeit, Toleranz und ein friedliches Miteinander sowie weltoffenes Denken prägen unser Handeln ''' stylecloud.gen_stylecloud( text=LEITBILD, custom_stopwords=[ 'und', 'zu', 'von', 'an', 'bei', 'den', 'des', 'eine', 'ein', 'auf', 'der', 'das', 'die', 'mit', 'für', 'uns', 'wir', 'über' ], icon_name='fas fa-graduation-cap', )
def eng_crawling_url(update, context, eng_title): global delete_message_id path = (r"C:\pythonProject\sentimental_analysis\chromedriver.exe") driver = webdriver.Chrome(path) # google driver.get('https://www.google.com') search_box = driver.find_element_by_name('q') search_box.send_keys(f'{eng_title}') search_box.submit() try: a = driver.find_elements_by_xpath( '//*[@id="kp-wp-tab-overview"]/div[1]/div[2]/div/div/div[1]/div[1]/a[2]' ) driver.get(a[0].get_attribute('href')) a = driver.find_elements_by_xpath('//*[@id="criticHeaders"]/a[1]') driver.get(a[0].get_attribute('href')) url = driver.current_url model = load_model( r'C:\pythonProject\sentimental_analysis\models\eng_model_2.h5') eng_pos_num = 0 eng_neg_num = 0 eng_sum_score = 0 p_eng = list() n_eng = list() eng_pos_dict = dict() eng_neg_dict = dict() max_len = 300 wordnet = WordNetLemmatizer() with open( r'C:\pythonProject\sentimental_analysis\models\tokenizer_data_eng.pkl', 'rb') as handle: tokenizer = pickle.load(handle) for i in range(1, 6): new_url = url + f'?type=&sort=&page={i}' response = requests.get(new_url) html = response.text.strip() soup = BeautifulSoup(html, 'html.parser') selector = '#content > div > div > div > div.review_table > div > div.col-xs-16.review_container > div.review_area > div.review_desc > div.the_review' links = soup.select(selector) for link in links: eng_text = link.text.strip() new = re.sub('[^a-zA-Z]', ' ', eng_text) words = new.lower().split() stop_words = set(stopwords.words('english')) meaning_words = [w for w in words if not w in stop_words] words = [wordnet.lemmatize(w) for w in meaning_words] words = [w for w in words if not w in ['spanish', 'review']] words = [w for w in words if len(w) > 2] new = tokenizer.texts_to_sequences([words]) eng_texts_test = pad_sequences(new, maxlen=max_len) eng_score = float(model.predict(eng_texts_test)) # 예측 eng_sum_score += eng_score ### 영어 그대로 가져올 경우 if (eng_score > 0.5): eng_pos_num += 1 for word in words: if word not in eng_pos_dict: eng_pos_dict[word] = 1 else: eng_pos_dict[word] += 1 else: eng_neg_num += 1 for word in words: if word not in eng_neg_dict: eng_neg_dict[word] = 1 else: eng_neg_dict[word] += 1 ### 한글로 전환할 경우 # if (eng_score > 0.5): # eng_pos_num += 1 # p_eng.append(eng_text) # else: # eng_neg_num += 1 # n_eng.append(eng_text) # # kr_stopwords = ['의', '가', '이', '은', '들', '는', '좀', '잘', '걍', '과', '도', '을', '를', '으로', '자', '에', '와', # '한', '하다', '스페인어'] # # driver.implicitly_wait(10) # driver.get('https://papago.naver.com/?sk=en&tk=ko&hn=0') # # new_list_p = [] # new_list_n = [] # # search = driver.find_element_by_xpath( # "/html/body/div/div/div[1]/section/div/div[1]/div[1]/div/div[3]/label/textarea").send_keys(p_eng) # time.sleep(2) # # 번역 버튼 # button = driver.find_element_by_css_selector("#btnTranslate > span.translate_pc___2dgT8").click() # # # 번역창에 뜬 결과물 가져오기 # result = driver.find_element_by_css_selector("#txtTarget > span").text # # # 결과물 전처리 # tokenized_sentence = okt.pos(result, stem=True) # 토큰화 # exst_tok_sentence = [word[0] for word in tokenized_sentence if not word[0] in kr_stopwords] # 불용어 제거 # # new_list_p.append(exst_tok_sentence) # # driver.implicitly_wait(10) # driver.get('https://papago.naver.com/?sk=en&tk=ko&hn=0') # # # 검색창에 영어 문장/단어 넣기 (부정적) # search_n = driver.find_element_by_xpath( # "/html/body/div/div/div[1]/section/div/div[1]/div[1]/div/div[3]/label/textarea").send_keys(n_eng) # time.sleep(2) # # 번역 버튼 # button_n = driver.find_element_by_css_selector("#btnTranslate > span.translate_pc___2dgT8").click() # # # 번역창에 뜬 결과물 가져오기 # result_n = driver.find_element_by_css_selector("#txtTarget > span").text # # # 결과물 전처리 # tokenized_sentence_n = okt.pos(result_n, stem=True) # 토큰화 # exst_tok_sentence_n = [word[0] for word in tokenized_sentence_n if # not word[0] in kr_stopwords] # 불용어 제거 # # new_list_n.append(exst_tok_sentence_n) # # for i in range(len(new_list_p)): # for word in new_list_p[i]: # if word in eng_pos_dict: # eng_pos_dict[word] += 1 # else: # eng_pos_dict[word] = 1 # # for i in range(len(new_list_n)): # for word in new_list_n[i]: # if word in eng_neg_dict: # eng_neg_dict[word] += 1 # else: # eng_neg_dict[word] = 1 context.bot.delete_message(chat_id=update.effective_chat.id, message_id=delete_message_id) context.bot.send_message(chat_id=update.effective_chat.id, text='로튼 토마토 검색 결과입니다') context.bot.send_message( chat_id=update.effective_chat.id, text= f'긍정 리뷰는 {eng_pos_num}개로 전체 리뷰 중 {round(eng_pos_num / (eng_pos_num + eng_neg_num) * 100, 2)}%이며,' + '\n' + f'부정 리뷰는 {eng_neg_num}개로 전체 리뷰 중 {round(eng_neg_num / (eng_pos_num + eng_neg_num) * 100, 2)}%입니다.' ) context.bot.send_message(chat_id=update.effective_chat.id, text=f'잠시 후에 리뷰를 요약한 이미지가 표출됩니다') eng_pos_dict = dict( sorted(eng_pos_dict.items(), reverse=True, key=lambda item: item[1])) eng_neg_dict = dict( sorted(eng_neg_dict.items(), reverse=True, key=lambda item: item[1])) ### 한글로 변환 시 필요한 stopwords # wcstopwords = {'영화', '보다', '되다', '있다', '없다', '아니다', '이다', '좋다', '않다', '같다', # '많다', '때', '것', '바', '그', '수', ',', '.', '[', ']', '...', '인', '그것', '적', # '스럽다', '더', '로', '다', '중', '인데', '에서', '곳', '가장', '일', '못', '에게', # '까지', '님', '수도', '정도', '"', "'"} # for w in wcstopwords: # if w in eng_pos_dict: # eng_pos_dict.pop(w) # if w in eng_neg_dict: # eng_neg_dict.pop(w) # stylecloud part stylecloud.gen_stylecloud(text=eng_pos_dict, font_path='C:/Windows/Fonts/BMJUA_ttf.ttf', icon_name="fas fa-thumbs-up", palette="cartocolors.sequential.Peach_5", background_color='black', output_name="results/eng_positive.png") stylecloud.gen_stylecloud(text=eng_neg_dict, font_path='C:/Windows/Fonts/BMJUA_ttf.ttf', icon_name="fas fa-thumbs-down", palette="colorbrewer.sequential.YlGn_4", background_color='black', output_name="results/eng_negative.png") context.bot.send_photo(chat_id=update.effective_chat.id, photo=open('results/eng_positive.png', 'rb')) context.bot.send_photo(chat_id=update.effective_chat.id, photo=open('results/eng_negative.png', 'rb')) except IndexError: context.bot.delete_message(chat_id=update.effective_chat.id, message_id=delete_message_id) context.bot.send_message(chat_id=update.effective_chat.id, text='로튼 토마토에 해당 영화의 리뷰가 등록되어 있지 않습니다!')
def crawling_url(update, context): global eng_title context.bot.send_animation(animation=open('loading.gif', 'rb'), chat_id=update.message.chat_id) first_delete_message_id = update.message.message_id + 1 movie_title = update.message.text[7:] # 드라이버 세팅 path = ('chromedriver.exe') driver = webdriver.Chrome(path) driver.get('https://www.naver.com') # search_box = driver.find_element_by_name('query') search_box.send_keys(f'영화 {movie_title}') search_box.submit() try: a = driver.find_elements_by_xpath( '//*[@id="main_pack"]/div[1]/div[1]/div[1]/h2/a') driver.get(a[0].get_attribute('href')) req = driver.page_source soup = BeautifulSoup(req, 'html.parser') selector = '#content > div.article > div.mv_info_area > div.mv_info > h3 > a' links = soup.select(selector) c = [] for link in links: c.append(link['href']) code = c[0][-6:] if '=' in code: code = c[0][-5:] selector2 = '#content > div.article > div.mv_info_area > div.mv_info > strong' texts = [] links2 = soup.select(selector2) for link in links2: texts.append(link.text) eng_title = texts[0] print(eng_title) total_count = 100 # int(result.replace(',', '')) sum_score = 0 pos_num = 0 neg_num = 0 pos_dict = dict() neg_dict = dict() for i in range(1, int(total_count / 10) + 1): url = ( f'https://movie.naver.com/movie/bi/mi/pointWriteFormList.nhn?code={code}&type=after&page=' + str(i)) print(f'{url} is parsing...') resp = requests.get(url) html = BeautifulSoup(resp.content, 'html.parser') score_result = html.find('div', {'class': 'score_result'}) lis = score_result.findAll('li') for li in lis: review_text = li.find( 'p').getText() # span id = _filtered_ment_0 review_text = review_text.replace("관람객", "") review_text = review_text.strip() score = int(li.find('em').getText()) sum_score += score tokenized_sentence = okt.pos(review_text, stem=True) # 토큰화 exstopw_ts = [ word for word in tokenized_sentence if not word[0] in kr_stopwords ] # 불용어 제거 exst_tok_sentence = [ word[0] for word in tokenized_sentence if not word[0] in kr_stopwords ] encoded = tokenizer.texts_to_sequences([exst_tok_sentence ]) # 정수 인코딩 pad_new = pad_sequences(encoded, maxlen=max_len) # 패딩 pd_score = float(model.predict(pad_new)) # 예측 if (pd_score > 0.5): pos_num += 1 for word in exstopw_ts: if word[1] in ['Noun', 'Adjective', 'Verb']: if word[0] not in pos_dict: pos_dict[word[0]] = 1 else: pos_dict[word[0]] += 1 else: neg_num += 1 for word in exstopw_ts: if word[1] in ['Noun', 'Adjective', 'Verb']: if word[0] not in neg_dict: neg_dict[word[0]] = 1 else: neg_dict[word[0]] += 1 avg_score = sum_score / total_count pos_dict = dict( sorted(pos_dict.items(), reverse=True, key=lambda item: item[1])) neg_dict = dict( sorted(neg_dict.items(), reverse=True, key=lambda item: item[1])) context.bot.delete_message(chat_id=update.effective_chat.id, message_id=first_delete_message_id) context.bot.send_message(chat_id=update.effective_chat.id, text=f'네이버 영화리뷰 검색 결과입니다.') context.bot.send_message(chat_id=update.effective_chat.id, text=f'관람객 평균 평점은 {avg_score}점 입니다.') context.bot.send_message( chat_id=update.effective_chat.id, text= f'긍정 리뷰는 {pos_num}개로 전체 리뷰 중 {round(pos_num / (pos_num + neg_num) * 100, 2)}%이며,' + '\n' + f'부정 리뷰는 {neg_num}개로 전체 리뷰 중 {round(neg_num / (pos_num + neg_num) * 100, 2)}%입니다.' ) context.bot.send_message(chat_id=update.effective_chat.id, text=f'잠시 후에 리뷰를 요약한 이미지가 표출됩니다') # PNG file create -------------------------------------------- wcstopwords = { '영화', '보다', '되다', '있다', '없다', '아니다', '이다', '좋다', '않다', '같다', '많다', '때', '것', '바', '그', '수' } for w in wcstopwords: if w in pos_dict: pos_dict.pop(w) if w in neg_dict: neg_dict.pop(w) # stylecloud part stylecloud.gen_stylecloud( text=pos_dict, # 긍정 리뷰 사전 font_path='C:/Windows/Fonts/BMJUA_ttf.ttf', # 폰트 icon_name="fas fa-carrot", # 당근 palette="cartocolors.sequential.Peach_5", # 주황~분홍 background_color='black', # 배경 output_name="results/positive.png") stylecloud.gen_stylecloud(text=neg_dict, font_path='C:/Windows/Fonts/BMJUA_ttf.ttf', icon_name="fas fa-bomb", palette="colorbrewer.sequential.YlGn_4", background_color='black', output_name="results/negative.png") context.bot.send_photo(chat_id=update.effective_chat.id, photo=open('results/positive.png', 'rb')) context.bot.send_photo(chat_id=update.effective_chat.id, photo=open('results/negative.png', 'rb')) buttons = [[InlineKeyboardButton('로튼 토마토 반응도 보고싶어', callback_data=1)], [InlineKeyboardButton('여기까지 볼래', callback_data=2)]] reply_markup = InlineKeyboardMarkup(buttons) context.bot.send_message(chat_id=update.message.chat_id, text='이어서 해당 영화의 로튼 토마토 반응도 살펴보실 수 있습니다.', reply_markup=reply_markup) except (requests.exceptions.MissingSchema, IndexError): context.bot.delete_message(chat_id=update.effective_chat.id, message_id=first_delete_message_id) context.bot.send_message(chat_id=update.effective_chat.id, text='잘못된 입력입니다')
# -*- coding: utf-8 -*- """Stylecloud configuration for the article.""" import stylecloud stylecloud.gen_stylecloud( size=730, file_path='./117-vscode-extensions-readme.txt', icon_name='fab fa-windows', palette='colorbrewer.diverging.Spectral_11', background_color='#00a2ed', gradient='horizontal', output_name='117-extensions-vs-code-stylecloud.png', invert_mask=False, max_font_size=200, )
import requests import threading import time import stylecloud ₩ if __name__ == '__main__': source = 'http://arirang.com/index.asp?sys_lang=Eng' req = requests.get(source) dataset = BeautifulSoup(req.text, 'html.parser') print(dataset) stylecloud.gen_stylecloud(dataset.text, icon_name = 'fas fa-crown', palette = 'colorbrewer.diverging.Spectral_11', gradient = 'vertical', #horizontal size=(1024, 512) ) # def thread_run(): # stylecloud.gen_stylecloud(dataset.text) # print(dataset) # threading.Timer(10, thread_run).start() # # thread_run()
import stylecloud ip_files = ('top_authors_2019.csv', 'top_authors_2021.csv') op_files = ('top_authors_2019.png', 'top_authors_2021.png') for ip_file, op_file in zip(ip_files, op_files): stylecloud.gen_stylecloud(file_path=ip_file, icon_name='fas fa-book-open', background_color='black', gradient='horizontal', output_name=op_file)
# 存储数据 df = pd.DataFrame({ 'nick_name': nick_name, 'content': content, 'comment_time': comment_time, 'praise_num': praise_num }) # 追加数据 df_all = df_all.append(df, ignore_index=True) # 休眠一秒 time.sleep(1) return df_all # 运行函数 df = get_qq_comment(page_num=20) text1 = get_cut_words(content_series=df.content) text1[:5] ['致敬', '久石', '人生', '旋转', '木马'] stylecloud.gen_stylecloud(text=' '.join(text1), max_words=1000, collocations=False, font_path='pachong/simhei.ttf', icon_name='fas fa-music', size=624, output_name='QQ音乐评论词云图.png')
# In[89]: # 分词 import jieba title_word = article['文章'] title_word = ' '.join(title_word) word = jieba.lcut(title_word) # 词云生成 from stylecloud import gen_stylecloud gen_stylecloud(text=' '.join(word), collocations=False, palette='tableau.Tableau_20', font_path=r'C:\Windows\Fonts\msyh.ttc', icon_name='fas fa-file-alt', size=400, output_name='../output/标题词云.png') # ## 文章发送成功人数的走势 # In[90]: send_peo = article[['群发时间', '发送成功人数']] # 取年,月 send_peo['群发时间'] = send_peo['群发时间'].astype(str) send_peo['群发时间'] = send_peo['群发时间'].str[:7] # 以年月分组计算这个年月中最大的数,即为当月用户数
palette='cartocolors.qualitative.Bold_5', # 调色板(通过 palettable 实现)。[default: cartocolors.qualitative.Bold_6] colors=None, background_color="white", # 背景颜色 max_font_size=200, # stylecloud 中的最大字号 max_words=2000, # stylecloud 可包含的最大单词数 stopwords=True, # 布尔值,用于筛除常见禁用词 custom_stopwords=STOPWORDS, # 去除停用词 icon_dir='.temp', output_name='stylecloud.png', # stylecloud 的输出文本名 gradient=None, # 梯度方向 font_path=os.path.join(STATIC_PATH,'Staatliches-Regular.ttf'), # stylecloud 所用字体 random_state=None, # 控制单词和颜色的随机状态 collocations=True, invert_mask=False, pro_icon_path=None, pro_css_path=None) ''' stylecloud.gen_stylecloud( text=word_list, palette='tableau.BlueRed_6', icon_name='fas fa-apple-alt', font_path='./田英章楷书3500字.ttf', output_name='词云图.png', # custom_stopwords=stopwords ) Image.open('词云图.png') print('成功生成词云图!!') end_time = time.time() print('运行共耗时 {:.1f}秒'.format(end_time - begin_time))
from os import listdir from os.path import isfile, join import stylecloud data_path = "data/processed/keywords/" fs = [f for f in listdir(data_path) if isfile(join(data_path, f))] for f in fs: fname = f[:-3] + "png" stylecloud.gen_stylecloud(file_path=join(data_path, f), stopwords=True, background_color='#1A1A1A', max_words=50, icon_name="fas fa-record-vinyl", output_name=f"vizs/decades/{fname}")