def crawling(self): i = 0 x = [] y = [] new_sheet = naver_finance.new_sheet self.get_price("DJI@DJI", x, y, new_sheet) self.get_price("LNS@FTSE100", x, y, new_sheet) self.get_price("NAS@IXIC", x, y, new_sheet) self.get_price("SPI@SPX", x, y, new_sheet) self.get_price("SHS@000001", x, y, new_sheet) self.get_price("HSI@HSI", x, y, new_sheet) self.get_price("PAS@CAC40", x, y, new_sheet) self.get_price("STX@SX5E", x, y, new_sheet) self.get_price("IDI@JKSE", x, y, new_sheet) self.get_price("NII@NI225", x, y, new_sheet) self.get_price("XTR@DAX30", x, y, new_sheet) self.get_price("BRI@BVSP", x, y, new_sheet) self.get_price("RUI@RTSI", x, y, new_sheet) self.get_price("MYI@KLSE", x, y, new_sheet) self.get_price("NAS@SOX", x, y, new_sheet) wb = naver_finance.wb wb.save('D:\\naver_finance.xlsx') fm.get_fontconfig_fonts() font_location = 'C:/Windows/Fonts/malgun.ttf' font_name = fm.FontProperties(fname = font_location).get_name() pyplot.rc('font', family=font_name) pyplot.rcParams["figure.figsize"] = (14, 7 ) data_count = len(x) ypos = np.arange(data_count) rects = pyplot.barh(ypos, y, align = 'center', height = 0.7) pyplot.yticks(ypos, x) pyplot.xlabel('전일대비 변동량') pyplot.show()
def composer_histogram(composers): global composer_set composer_cnt = Counter() composers = list(composers) sz = 0 for composer in composers: if composer is np.nan: continue sz += 1 for name in ast.literal_eval(composer): composer_cnt[name] += 1 s = sum(composer_cnt.values()) a_list = [(x[0], round(x[1] / s * 100, 2)) for x in composer_cnt.items() if x[1] > 20] a_list = sorted(a_list, key=lambda x: x[1], reverse=True) xdata = [x[0] for x in a_list] composer_set = set(xdata) ydata = [x[1] for x in a_list] sns.set_style("whitegrid") fm.get_fontconfig_fonts() font_location = r'C:/Windows/Fonts/NanumBarunGothic.ttf' font_name = fm.FontProperties(fname=font_location).get_name() mpl.rc('font', family=font_name) g = sns.barplot(x=xdata, y=ydata) g.set_title("20곡 이상을 작업한 작곡가(총{}곡)(%)".format(sz)) g.set_xticklabels(xdata, rotation=70) for p in g.patches: height = p.get_height() g.text(p.get_x() + p.get_width() / 2., height, '{:1.2f}'.format(height), ha="center") plt.show()
def set_fonts(): if os.name == 'posix': fm.get_fontconfig_fonts() font_location = '/Library/Fonts/NanumSquareOTFRegular.otf' font_name = fm.FontProperties(fname=font_location).get_name() rc('font', family=font_name) else: rc('font', family='NanumSquare')
def set_fonts(name=None): if os.name == 'posix': fm.get_fontconfig_fonts() font_location = '/Library/Fonts/NanumSquareOTFRegular.otf' font_name = fm.FontProperties(fname=font_location).get_name() rc('font', family=font_name) else: if name is not None: rc('font', family=name) if fm.FontProperties().get_name() == 'DejaVu Sans': rc('font', family='NanumSquareRound') else: rc('font', family='NanumSquareRound')
def plot_with_labels(low_dim_embs, labels, filename='tsne_' + str(word2vec_dim) + '.png'): import matplotlib matplotlib.use('Agg') # font 설정 import matplotlib.pyplot as plt from matplotlib import font_manager, rc print("font_list: ", font_manager.get_fontconfig_fonts()) font_name = font_manager.FontProperties(fname='/Library/Fonts/NanumSquareBold.ttf').get_name() rc('font', family=font_name) assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings" plt.figure(figsize=(18, 18)) # in inches for i, label in enumerate(labels): x, y = low_dim_embs[i, :] plt.scatter(x, y) plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom') plt.savefig(filename)
def crnn_infer(axes, model_path="./weights/crnn.pth", visualize=False): fm.get_fontconfig_fonts() font_name = fm.FontProperties(fname="./data/fonts/H2GTRM.TTF").get_name() plt.rc('font', family=font_name) toTensor = transforms.ToTensor() labels = [] if visualize: for img_num, img in enumerate(axes): vis_img = deepcopy(img) img = toTensor(img) img = img.view(1, *img.size()) crnn = CRNN(64, 3, 1443, 256) crnn.load_state_dict(torch.load(model_path, map_location="cpu")) preds = crnn(img) predict = mapping_seq(preds) label = get_seq2str(predict[0]) label = "".join(label) plt.imshow(vis_img) plt.title("predict label : " + "".join(label)) plt.show() labels.append(label) return labels else: for img_num, img in enumerate(axes): img = toTensor(img) img = img.view(1, *img.size()) crnn = CRNN(64, 3, 1443, 256) crnn.load_state_dict(torch.load(model_path, map_location="cpu")) preds = crnn(img) predict = mapping_seq(preds) label = get_seq2str(predict[0]) label = "".join(label) labels.append(label) return labels
def genre_histogram(genres): genre_cnt = Counter() genres = list(genres) for genre in genres: genre_cnt[genre] += 1 s = sum(genre_cnt.values()) a_list = genre_cnt.items() a_list = [(x[0], round(x[1] / s * 100, 2)) for x in a_list] a_list = sorted(a_list, key=lambda x: x[1], reverse=True) xdata = [x[0] for x in a_list] ydata = [x[1] for x in a_list] for idx, genre in enumerate(xdata): genre_list = genre.split(",") if len(genre_list) == 2: if genre_cnt[genre_list[0]] > genre_cnt[genre_list[1]]: genre_fix_dict[genre] = genre_list[0] else: genre_fix_dict[genre] = genre_list[1] elif ydata[idx] < 0.9: genre_fix_dict[genre] = "etc" else: genre_fix_dict[genre] = genre sns.set_style("whitegrid") fm.get_fontconfig_fonts() font_location = r'C:/Windows/Fonts/NanumBarunGothic.ttf' font_name = fm.FontProperties(fname=font_location).get_name() mpl.rc('font', family=font_name) g = sns.barplot(x=xdata, y=ydata) g.set_title("장르(총{}곡)(%)".format(s)) g.set_xticklabels(xdata, rotation=70) for p in g.patches: height = p.get_height() g.text(p.get_x() + p.get_width() / 2., height, '{:1.2f}'.format(height), ha="center") plt.show()
def plot_word_embeddng(wv_model_ko): embedding_weights = wv_model_ko.wv.syn0 final_embeddings = embedding_weights labels = wv_model_ko.wv.index2word import matplotlib matplotlib.use('Agg') from matplotlib import font_manager, rc print("font_list: ", font_manager.get_fontconfig_fonts()) font_name = font_manager.FontProperties( fname='/Library/Fonts/NanumSquareBold.ttf').get_name() rc('font', family=font_name) def plot_with_labels(low_dim_embs, labels, filename='./data_out/tsne_' + str(args.word2vec_dim) + '.png'): assert low_dim_embs.shape[0] >= len( labels), "More labels than embeddings" plt.figure(figsize=(18, 18)) # in inches for i, label in enumerate(labels): x, y = low_dim_embs[i, :] plt.scatter(x, y) plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom') plt.savefig(filename) try: from sklearn.manifold import TSNE import matplotlib.pyplot as plt tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000) plot_only = 500 low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only, :]) labels = [labels[i] for i in range(plot_only)] plot_with_labels(low_dim_embs, labels) except ImportError: print( "Please install sklearn, matplotlib, and scipy to visualize embeddings." )
def main(argv): """ The main driving function. Author: SMM Date: 08/01/2018 """ if len(argv) == 0: print_welcome() quit() else: print("Let me load the LSDMappingTools functions for you.") set_path() print("Let me check the fonts. They are:") import matplotlib.font_manager as fm flist = fm.get_fontconfig_fonts() names = [fm.FontProperties(fname=fname).get_name() for fname in flist] print(names) fm.findfont('Liberation Sans', rebuild_if_missing=True) print("The arguments are: ") print(argv) # import Maping tools import LSDMapWrappers as LSDMW DataDir = os.getcwd() + os.sep DataFname = "WA" argv[0] = int(argv[0]) if argv[0] == 1: print("Getting basic hillshade") LSDMW.SimpleHillshade(DataDir, DataFname) elif argv[0] == 2: print("Plotting some basins") LSDMW.PrintBasins(DataDir, DataFname) elif argv[0] == 3: print("Plotting the channels") LSDMW.PrintChannels(DataDir, DataFname) else: print("I didn't understand what you wanted.") print("Your choice was:" + str(argv[0]))
def plot_calls(values, miscs, title=None, specify_count=0, print_std=False, no_color=False, call_option=None, legends=False): # pX, pZ, result, st, sb # inn, bc, stuff, speed from matplotlib import font_manager, rc import os if os.name == 'posix': import matplotlib.font_manager as fm fm.get_fontconfig_fonts() font_location = '/Library/Fonts/NanumSquareOTFRegular.otf' font_name = fm.FontProperties(fname=font_location).get_name() rc('font', family=font_name) else: rc('font', family='NanumSquare') lb = -1.5 # leftBorder rb = +1.5 # rightBorder tb = +4.0 # topBorder bb = +1.0 # bottomBorder ll = -17 / 24 # leftLine rl = +17 / 24 # rightLine tl = +3.325 # topLine bl = +1.579 # bototmLine oll = -17 / 24 - 1 / 8 # outerLeftLine orl = +17 / 24 + 1 / 8 # outerRightLine otl = +3.325 + 1 / 8 # outerTopLine obl = +1.579 - 1 / 8 # outerBottomLine if print_std is True: tb = +21 / 12 bb = -21 / 12 tl = +1.0 bl = -1.0 otl = +1.0 + 1 / 8 obl = -1.0 - 1 / 8 # strikes, balls fig, ax = plt.subplots(1, 1) fig.set_size_inches(4, 4) fig.set_dpi(80) fig.set_facecolor('#898f99') ax.set_facecolor('#898f99') ax.tick_params(axis='x', colors='white') ax.tick_params(axis='x', colors='white') if title is not None: st = fig.suptitle(title, fontsize=20) st.set_color('white') st.set_weight('bold') st.set_horizontalalignment('center') if call_option is None: svalues = values[np.where(values[:, 2] == 2)[0]] bvalues = values[np.where(values[:, 2] == 1)[0]] if print_std is True: for row in svalues: st = row[3] sb = row[4] row[1] = (row[1] - (st + sb) / 2) / ((st - sb) / 2) for row in bvalues: st = row[3] sb = row[4] row[1] = (row[1] - (st + sb) / 2) / ((st - sb) / 2) if specify_count <= 0: plt.scatter(svalues[:, 0], svalues[:, 1], color='#ef2926', alpha=.5, s=np.pi * 50, label='스트라이크') if no_color is True: plt.scatter(bvalues[:, 0], bvalues[:, 1], color='#ef2926', alpha=.5, s=np.pi * 50) else: plt.scatter(bvalues[:, 0], bvalues[:, 1], color='#3245ef', alpha=.5, s=np.pi * 50, label='볼') else: smiscs = miscs[np.where(values[:, 2] == 2)[0]] bmiscs = miscs[np.where(values[:, 2] == 1)[0]] plt.scatter(svalues[np.where(smiscs[:, 1] == specify_count), 0], svalues[np.where(smiscs[:, 1] == specify_count), 1], color='#ef2926', alpha=.5, s=np.pi * 50, label='{}구'.format(specify_count)) plt.scatter(svalues[np.where(smiscs[:, 1] != specify_count), 0], svalues[np.where(smiscs[:, 1] != specify_count), 1], color='#3245ef', alpha=.5, s=np.pi * 50) for r, m in zip(svalues, smiscs): # pX, pZ, result, st, sb # inn, bc, stuff, speed if m[1] == specify_count: if (lb < r[0]) and (r[0] < rb) and (bb < r[1]) and (r[1] < tb): ax.text(r[0], r[1] - 0.05, str(specify_count), color='white', fontsize=10, horizontalalignment='center') plt.scatter(bvalues[np.where(bmiscs[:, 1] == specify_count), 0], bvalues[np.where(bmiscs[:, 1] == specify_count), 1], color='#ef2926', alpha=.5, s=np.pi * 50, label='{}구'.format(specify_count)) plt.scatter(bvalues[np.where(bmiscs[:, 1] != specify_count), 0], bvalues[np.where(bmiscs[:, 1] != specify_count), 1], color='#3245ef', alpha=.5, s=np.pi * 50) for r, m in zip(bvalues, bmiscs): # pX, pZ, result, st, sb # inn, bc, stuff, speed if m[1] == specify_count: if (lb < r[0]) and (r[0] < rb) and (bb < r[1]) and (r[1] < tb): ax.text(r[0], r[1] - 0.05, str(int(r[4])), color='white', fontsize=10, horizontalalignment='center') else: if type(call_option) is list: tvalues = None tmiscs = None for co in call_option: c = Results[co].value if tvalues is None: tvalues = values[np.where(values[:, 2] == c)[0]] else: tvalues = np.vstack( (tvalues, values[np.where(values[:, 2] == c)[0]])) if print_std is True: for row in tvalues: st = row[3] sb = row[4] row[1] = (row[1] - (st + sb) / 2) / ((st - sb) / 2) if specify_count <= 0: for co in call_option: c = Results[co].value if no_color is True: plt.scatter(tvalues[np.where(tvalues[:, 2] == c), 0], tvalues[np.where(tvalues[:, 2] == c), 1], color='#ef2926', alpha=.5, s=np.pi * 50) else: plt.scatter(tvalues[np.where(tvalues[:, 2] == c), 0], tvalues[np.where(tvalues[:, 2] == c), 1], color=Colors[c], alpha=.5, s=np.pi * 50, label=co) else: tmiscs = None for co in call_option: c = Results[co].value if tmiscs is None: tmiscs = miscs[np.where(values[:, 2] == c)[0]] else: tmiscs = np.vstack( (tmiscs, miscs[np.where(values[:, 2] == c)[0]])) plt.scatter(tvalues[np.where(tmiscs[:, 1] == specify_count), 0], tvalues[np.where(tmiscs[:, 1] == specify_count), 1], color='#ef2926', alpha=.5, s=np.pi * 50, label='{}구'.format(specify_count)) plt.scatter(tvalues[np.where(tmiscs[:, 1] != specify_count), 0], tvalues[np.where(tmiscs[:, 1] != specify_count), 1], color='#3245ef', alpha=.5, s=np.pi * 50) for r, m in zip(tvalues, tmiscs): # pX, pZ, result, st, sb # inn, bc, stuff, speed if m[1] == specify_count: if (lb < r[0]) and (r[0] < rb) and (bb < r[1]) and ( r[1] < tb): ax.text(r[0], r[1] - 0.05, str(specify_count), color='white', fontsize=10, horizontalalignment='center') elif type(call_option) is str: c = Results[call_option].value tvalues = values[np.where(values[:, 2] == c)[0]] if print_std is True: for row in tvalues: st = row[3] sb = row[4] row[1] = (row[1] - (st + sb) / 2) / ((st - sb) / 2) if specify_count <= 0: if no_color is True: plt.scatter(tvalues[:, 0], tvalues[:, 1], color='#ef2926', alpha=.5, s=np.pi * 50, label=call_option) else: plt.scatter(tvalues[:, 0], tvalues[:, 1], color=Colors[c], alpha=.5, s=np.pi * 50, label=call_option) else: tmiscs = miscs[np.where(values[:, 2] == c)[0]] plt.scatter(tvalues[np.where(tmiscs[:, 1] == specify_count), 0], tvalues[np.where(tmiscs[:, 1] == specify_count), 1], color='#ef2926', alpha=.5, s=np.pi * 50, label='{}구'.format(specify_count)) plt.scatter(tvalues[np.where(tmiscs[:, 1] != specify_count), 0], tvalues[np.where(tmiscs[:, 1] != specify_count), 1], color='#3245ef', alpha=.5, s=np.pi * 50) for r, m in zip(tvalues, tmiscs): # pX, pZ, result, st, sb # inn, bc, stuff, speed if m[1] == specify_count: if (lb < r[0]) and (r[0] < rb) and (bb < r[1]) and ( r[1] < tb): ax.text(r[0], r[1] - 0.05, str(specify_count), color='white', fontsize=10, horizontalalignment='center') else: plt.scatter(r[0], r[1], color='#3245ef', alpha=.5, s=np.pi * 50) else: print() print('ERROR: call option must be string/list') exit(1) plt.plot([ll, ll], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll + (rl - ll) / 3, ll + (rl - ll) / 3], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll + (rl - ll) * 2 / 3, ll + (rl - ll) * 2 / 3], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([rl, rl], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [bl, bl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [bl + (tl - bl) / 3, bl + (tl - bl) / 3], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [bl + (tl - bl) * 2 / 3, bl + (tl - bl) * 2 / 3], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [tl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([oll, oll], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5) plt.plot([orl, orl], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5) plt.plot([oll, orl], [obl, obl], color='#d0cfd3', linestyle='-', lw=0.5) plt.plot([oll, orl], [otl, otl], color='#d0cfd3', linestyle='-', lw=0.5) plt.axis([lb, rb, bb, tb]) x = np.arange(lb, rb, 1 / 12) y = np.arange(bb, tb, 1 / 12) X, Y = np.meshgrid(x, y) plt.rcParams['axes.unicode_minus'] = False ax.set_yticklabels([]) ax.set_xticklabels([]) plt.axis('off') ax.autoscale_view('tight') if (legends is True) and (no_color is False): plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=2) plt.show()
def liste_polices(): for fname in font_manager.get_fontconfig_fonts(): try: yield font_manager.FontProperties(fname=fname).get_name() except RuntimeError: pass
wordcloud = WordCloud(font_path="C:/Windows/Fonts/Art.ttf", relative_scaling=0.2, background_color='white' ).generate_from_frequencies(tmp_data) plt.figure(figsize=(500,500)) plt.imshow(wordcloud) plt.axis("off") plt.show() import matplotlib as mpl import matplotlib.pylab as plt plt.plot(visual['판매금액']) plt.suptitle("즐") mpl.matplotlib_fname() font_manager.get_fontconfig_fonts() import matlab a=fitdist(visual['판매금액'],'normal') import numpy as np from scipy.stats import norm import matplotlib.mlab as mlab plt.plot(visual['판매금액'],norm.pdf(visual['판매금액'],0,2)) from sklearn.preprocessing import scale sca = scale(visual['판매금액']) plt.figure(figsize=(300,500)) plt.plot(sca,mlab.normpdf(sca,0,1),c="b",lw=5,ls="--",marker = "o",ms=15,mec="g",mew=5, mfc="r")
def mk_portfolio(self): """포트폴리오 만드는 함수, r1: ETF비율, r2 : 채권 비율""" capital = self.user_info[0] * 10000 if self.user_info[7] == self.risk_list[0]: r1 = 1 r2 = 0.67 elif self.user_info[7] == self.risk_list[1]: r1 = 0.8 r2 = 0.4 elif self.user_info[7] == self.risk_list[2]: r1 = 0.6 r2 = 0.3 elif self.user_info[7] == self.risk_list[3]: r1 = 0.4 r2 = 0.1 elif self.user_info[7] == self.risk_list[4]: r1 = 0.2 r2 = 0 if self.user_info[1] == self.term_list[0] or self.user_info[ 1] == self.term_list[1]: r2 = 0 # 투자 기간이 짧으면 채권 제외 real_r0 = int((1 - r1) * 100) real_r1 = int((r1 - r2) * 100) real_r2 = int(r2 * 100) recommender = Recommender(self.path, self.stock_path, self.etf_path, self.user_info[5]) recommender.cal_weight() rec_stock_lst = recommender.rec_stock() df = pd.read_csv(self.path + "/data/stock_list2.csv", encoding="cp949") names = [i[0] for i in rec_stock_lst] a = list(df[df["종목명"].isin(names)][["종목명", "가중치"]].sort_values( by="가중치", ascending=False).종목명.values) rec_stock_lst.sort(key=lambda x: a.index(x[0])) # print(rec_stock_lst) # 중복의 경우 처리필요 res_etf1, res_etf2 = recommender.rec_etf() print("\n\n고객님의 포트폴리오입니다.\n") 주식리스트 = [] 채권리스트 = [] 일반리스트 = [] 주식별금액리스트 = [] 채권별금액리스트 = [] 일반별금액리스트 = [] self.portfolios1, penny1 = self.dist(capital, rec_stock_lst, 1 - (r1), 10) print("\n주식 종목 : {}원\n".format(capital * (1 - r1) - penny1)) for name, info in self.portfolios1.items(): print("{}, {}개 매입. {} 전략. 현재가: {}".format(name, info[0], info[1][1], info[1][0])) 주식리스트.append(name) 주식별금액리스트.append(info[1]) self.portfolios2, penny2 = self.dist(capital + penny1, res_etf1, r2, 5) print("\n채권 ETF 종목 : {}원\n".format((capital + penny1) * r2 - penny2)) for name, info in self.portfolios2.items(): print("{}, {}개 매입.기간 내 보유 권장. 현재가: {}".format( name, info[0], info[1][0])) 채권리스트.append(name) 채권별금액리스트.append(info[1]) self.portfolios3, penny3 = self.dist(capital + penny2, res_etf2, r1 - r2, 5) print("\n일반 ETF 종목 : {}원\n".format((capital + penny2) * (r1 - r2) - penny3)) for name, info in self.portfolios3.items(): print("{}, {}개 매입. 20일 후 리밸런싱 권장. 현재가: {}".format( name, info[0], info[1][0])) 일반리스트.append(name) 채권별금액리스트.append(info[1]) # 포트폴리오 1번 보여주기 self.portfolio_viz() ## 포트폴리오 상세정보 주식금액 = capital * (1 - r1) - penny1 채권금액 = (capital + penny1) * r2 - penny2 일반금액 = (capital + penny2) * (r1 - r2) - penny3 # 막대 그래프 생성 kindx = ["주식", "일반 ETF", "채권 ETF"] values = [주식금액, 일반금액, 채권금액] colors = ["silver", "gold", "lightgray"] fm.get_fontconfig_fonts() font_name = fm.FontProperties(fname=self.fontpath).get_name() plt.rc("font", family=font_name, size=20) fig = plt.figure(figsize=(7, 7)) plt.bar(kindx, values, width=0.6, color=colors, edgecolor="lightgray") plt.savefig(self.path + "/red/interface/image/portfolio/bar_chart.png") plt.close() # 경로별 이미지 불러오기 im_tend = Image.open(self.path + "/red/interface/image/portfolio/red_3.png") im_chart = Image.open(self.path + "/red/interface/image/portfolio/bar_chart.png") font = ImageFont.truetype(self.fontpath, 24) # 칼라 설정 b, g, r, a = 0, 0, 0, 0 # 이미지에 텍스트 삽입 draw = ImageDraw.Draw(im_tend) if real_r0 == 80: # 80 : 20 : 00 try: draw.text((635, 120), str(주식리스트[0]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 164.333), str(주식리스트[1]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 208.666), str(주식리스트[2]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 253), str(주식리스트[3]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 297.333), str(주식리스트[4]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 341.666), str(일반리스트[0]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 386), str(일반리스트[1]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((805, 430.333), "···", font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) elif real_r0 == 60: # 60 : 30 : 10 if real_r2 == 0: try: draw.text((635, 120), str(주식리스트[0]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 164.333), str(주식리스트[1]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 208.666), str(주식리스트[2]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 253), str(주식리스트[3]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 297.333), str(일반리스트[0]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 341.666), str(일반리스트[1]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 386), str(일반리스트[2]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((805, 430.333), "···", font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) else: try: draw.text((635, 120), str(주식리스트[0]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 164.333), str(주식리스트[1]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 208.666), str(주식리스트[2]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 253), str(주식리스트[3]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 297.333), str(채권리스트[0]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 341.666), str(채권리스트[1]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 386), str(일반리스트[0]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((805, 430.333), "···", font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) elif real_r0 == 40: # 40 : 30 : 30 if real_r2 == 0: try: draw.text((635, 120), str(주식리스트[0]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 164.333), str(주식리스트[1]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 208.666), str(주식리스트[2]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 253), str(일반리스트[0]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 297.333), str(일반리스트[1]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 341.666), str(일반리스트[2]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 386), str(일반리스트[3]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((805, 430.333), "···", font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) else: try: draw.text((635, 120), str(주식리스트[0]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 164.333), str(주식리스트[1]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 208.666), str(주식리스트[2]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 253), str(채권리스트[0]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 297.333), str(채권리스트[1]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 341.666), str(일반리스트[0]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 386), str(일반리스트[1]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((805, 430.333), "···", font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) elif real_r0 == 19: # 19 : 40 : 40 if real_r2 == 0: try: draw.text((635, 120), str(주식리스트[0]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 164.333), str(주식리스트[1]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 208.666), str(일반리스트[0]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 253), str(일반리스트[1]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 297.333), str(일반리스트[2]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 341.666), str(일반리스트[3]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 386), str(일반리스트[4]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((805, 430.333), "···", font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) else: try: draw.text((635, 120), str(주식리스트[0]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 164.333), str(주식리스트[1]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 208.666), str(채권리스트[0]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 253), str(채권리스트[1]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 297.333), str(채권리스트[2]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 341.666), str(일반리스트[0]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 386), str(일반리스트[1]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((805, 430.333), "···", font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) elif real_r0 == 0: # 0 : 33 : 67 if real_r2 == 0: try: draw.text((635, 120), str(일반리스트[0]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 164.333), str(일반리스트[1]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 208.666), str(일반리스트[2]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 253), str(일반리스트[3]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 297.333), str(일반리스트[4]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) else: try: draw.text((635, 120), str(채권리스트[0]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 164.333), str(채권리스트[1]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 208.666), str(채권리스트[2]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 253), str(일반리스트[0]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 297.333), str(일반리스트[1]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 341.666), str(일반리스트[2]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((635, 386), str(일반리스트[3]), font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) try: draw.text((805, 430.333), "···", font=font, fill=(b, g, r, a)) except: draw.text((0, 0), "", font=font, fill=(b, g, r, a)) # 이미지에 파이차트 삽입 im_tend.paste(im_chart, (30, 10)) display(im_tend) # 마무리 # portfolios4 = dict(portfolios1, **portfolios2) # portfolios4.update(portfolios3) return self.portfolios1, self.portfolios2, self.portfolios3
def main(data, visulize): import tensorflow as tf import warnings import os tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) warnings.filterwarnings("ignore") os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import time import re import pickle import numpy as np import pandas as pd import matplotlib.pyplot as plt import matplotlib.font_manager as fm import seaborn as sns from sklearn.model_selection import train_test_split from preprocessing.preprocessing_code_190418 import preprocess, title_catcher, date_process, phone_process, time_process, title_process from konlpy.tag import Komoran import keras from keras import backend as K from keras.layers import Input, Embedding, Bidirectional, CuDNNLSTM, BatchNormalization from keras.layers import RepeatVector, Permute, Multiply, Lambda, TimeDistributed from keras.layers import Dense, Flatten from keras.models import Model, Sequential, model_from_json from keras.callbacks import ModelCheckpoint from keras.optimizers import Adam from keras.engine.topology import Layer from keras.preprocessing.sequence import pad_sequences print("Analyzing Paragraph") def load_dataset(data): origin_data = pd.read_excel(data) if len(origin_data.columns) == 9: origin_data.columns = [ 'doc_id', 'par_id', 'art_id', 'line_id', 'text', 'par_label', 'line_label', 'none1', 'none2' ] origin_data['split_id'] = origin_data['doc_id'].map( str) + '_' + origin_data['par_label'] elif len(origin_data.columns) == 7: origin_data.columns = [ 'doc_id', 'par_id', 'art_id', 'line_id', 'text', 'par_label', 'line_label' ] origin_data['split_id'] = origin_data['doc_id'].map( str) + '_' + origin_data['par_label'] else: raise ValueError("Columns is not 7 or 9!") return origin_data def join_date(original_data): p = re.compile( '[0-9]{4}[ .년]{0,3}[0-9]{1,2}[ .월]{0,3}[0-9]{1,2}[ .일]{0,3}') split_date_idx = [ idx for idx, [lines, labels] in enumerate(original_data[ ['text', 'line_label']].values) if len(p.findall(str(lines))) == 1 and labels == 'PR-04-13' and len(lines) <= 15 ] date_diff = [ i for i in range(len(split_date_idx) - 1) if split_date_idx[i + 1] - split_date_idx[i] >= 3 ] try: seq_date_idx = [] seq_date_idx.append(split_date_idx[0:date_diff[0] + 1]) for i in range(len(date_diff) - 1): seq_date_idx.append(split_date_idx[date_diff[i] + 1:date_diff[i + 1] + 1]) for j in seq_date_idx: original_data.iloc[j[0], 4] = ' '.join( original_data.iloc[j]['text'].values) processed_data = original_data.drop( np.concatenate([i[1:] for i in seq_date_idx])) except: processed_data = original_data return processed_data def document_label_dataset(processed_data): processed_data = processed_data.reset_index() contents = processed_data.iloc[:, 5].tolist() temp = [] for text in processed_data['text']: try: result = title_catcher(text) temp.append(result) except BaseException: temp.append(False) processed_data['title'] = temp start_idx = processed_data[processed_data['title'] == True].index.tolist() end_idx = start_idx[1:] end_idx.append(processed_data.index[-1] + 1) contract = [] for start, end in zip(start_idx, end_idx): temp = processed_data['text'][start:end] contract.append(list(temp.values)) new_df = pd.DataFrame({"doc": contract}).reset_index() return new_df def split_newdataset(data, standard, seed): contract_names = np.unique(data[standard]) x_all = [] for name in contract_names: temp = data[data[standard] == name] temp_contract = [] for c in temp['doc'].values: temp_contract.append(c) x_all.append(temp_contract) return x_all def make_paragraph_x_dataset(x): return [x[paragraph][0] for paragraph in range(len(x))] def make_paragraph_y_dataset(y): return [y[paragraph][0].split(',') for paragraph in range(len(y))] def text_preprocess(text): text = preprocess(text) text = title_process(text) text = time_process(text) text = date_process(text) text = phone_process(text) text = re.sub('[^가-힣".,()~%_ ]+', '', text) try: text = ' '.join(np.array(komoran.pos(text))[:, 0]) except BaseException: text = '_빈칸_' return text def word2idx(text): try: re_text = re.sub('[^가-힣".,()~%_ ]+', '', text) re_text = re.sub('[^가-힣_]+', 'PUNC', re_text) return vocab_to_int[re_text] except BaseException: return 1 def sentence2idx(sentence): p = re.compile('([ㄱ-ㅎㅏ-ㅣ]+)') return [ word2idx(word) for word in sentence.split() if len(p.findall(word)) == 0 ] def contract2idx(contract, max_len): temp = [sentence2idx(text_preprocess(line)) for line in contract] return pad_sequences(temp, maxlen=max_len) def x_dataset(contracts, max_row, max_len): contracts = [contract2idx(contract, max_len) for contract in contracts] return pad_sequences(contracts, maxlen=max_row, padding='post') def y_dataset(labels): output = np.zeros(class_size) for label in labels: if label in label2num.keys(): output += np.eye(class_size)[label2num[label]] return output class AttentionLayer(Layer): def __init__(self, attention_dim=100, **kwargs): self.attention_dim = attention_dim super(AttentionLayer, self).__init__(**kwargs) def build(self, input_shape): self.W = self.add_weight(name='Attention_Weight', shape=(input_shape[-1], self.attention_dim), initializer='random_normal', trainable=True) self.b = self.add_weight(name='Attention_Bias', shape=(self.attention_dim, ), initializer='random_normal', trainable=True) self.u = self.add_weight(name='Attention_Context_Vector', shape=(self.attention_dim, 1), initializer='random_normal', trainable=True) super(AttentionLayer, self).build(input_shape) def call(self, x): u_it = K.tanh(K.dot(x, self.W) + self.b) a_it = K.dot(u_it, self.u) a_it = K.squeeze(a_it, -1) a_it = K.softmax(a_it) return a_it def compute_output_shape(self, input_shape): return (input_shape[0], input_shape[1]) def WeightedSum(attentions, representations): repeated_attentions = RepeatVector( K.int_shape(representations)[-1])(attentions) repeated_attentions = Permute([2, 1])(repeated_attentions) aggregated_representation = Multiply()( [representations, repeated_attentions]) aggregated_representation = Lambda(lambda x: K.sum(x, axis=1))( aggregated_representation) return aggregated_representation def SenWeightedSum(attentions, representations): repeated_attentions = RepeatVector( K.int_shape(representations)[-1])(attentions) repeated_attentions = Permute([2, 1])(repeated_attentions) aggregated_representation = Multiply()( [representations, repeated_attentions]) aggregated_representation = Lambda(lambda x: K.sum(x, axis=1))( aggregated_representation) return aggregated_representation def Hie_Attention(): embedding_layer = Embedding(input_dim=max_nb_words, output_dim=embedding_dim, input_length=max_len, trainable=True, mask_zero=False) # Sentence Encoder sentence_input = Input(shape=(max_len, ), name='sentence_input') embedded_sentence = embedding_layer(sentence_input) contextualized_sentence = Bidirectional( CuDNNLSTM(lstm_dim, return_sequences=True), name="WORD_BiLSTM")(embedded_sentence) word_attention = AttentionLayer(attention_dim)(contextualized_sentence) sentence_representation = WeightedSum(word_attention, contextualized_sentence) sentence_encoder = Model(inputs=[sentence_input], outputs=[sentence_representation]) # Document Encoder document_input = Input(shape=( max_row, max_len, ), name='document_input') embedded_document = TimeDistributed(sentence_encoder)(document_input) contextualized_document = Bidirectional( CuDNNLSTM(lstm_dim, return_sequences=True), name="SENTENCE_BiLSTM")(embedded_document) sentence_attention = AttentionLayer(attention_dim)( contextualized_document) document_representation = SenWeightedSum(sentence_attention, contextualized_document) layer = Dense(dense_size, activation='relu')(document_representation) output = Dense(class_size, activation='sigmoid')(layer) model = Model(inputs=[document_input], outputs=[output]) # Attention Extractor word_attention_extractor = Model(inputs=[sentence_input], outputs=[word_attention]) word_attentions = TimeDistributed(word_attention_extractor)( document_input) attention_extractor = Model(inputs=[document_input], outputs=[sentence_attention]) model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate), metrics=['accuracy']) return model, attention_extractor def vecs2labels(vecs): output = [] for i, vec in enumerate(vecs): if vec == 1: output.append(num2label[i]) return output def model_output(output_): return [[vecs2labels((output > threshold) * 1)] for output in output_] def model_pred(model, input_): out = [] for contract in input_: out.extend(model.predict(np.array([contract]))) return [[vecs2labels((output > threshold) * 1)] for output in out] def model_probability(model, input_): out = [] for contract in input_: out.extend(model.predict(np.array([contract]))) return [output for output in out] def multilabel_evaluate(class_pred, output, original_x): accuracy = [] class_pred = [class_pred[label][0] for label in range(len(class_pred))] output = [output[label][0] for label in range(len(output))] for i, contract in enumerate(original_x): contract_class_pred = class_pred[i] contract_output = output[i] ans = set(contract_class_pred) pred = set(contract_output) if (pred <= ans and len(pred) > 0) or (len(pred) == 0 and len(ans) == 0): score = 1 else: score = 0 accuracy.append(score) return np.mean(accuracy) fm.get_fontconfig_fonts() font_location = './font/H2GTRE.TTF' font_name = fm.FontProperties(fname=font_location).get_name() plt.rc('font', family=font_name) plt.rcParams.update({'figure.max_open_warning': 0}) print("Load Data directory:", data) original_data = load_dataset(data) processed_data = join_date(original_data) new_df = document_label_dataset(processed_data) x_all = split_newdataset(new_df, 'index', 1103) x_all = make_paragraph_x_dataset(x_all) para_index = pd.read_excel('./data/index_par_label.xlsx') para_index = para_index.iloc[:, 1:] para_dict = {} for i in para_index.values: para_dict[i[0]] = i[1] label2num = para_dict num2label = {word: i for i, word in label2num.items()} class_size = len(num2label) max_row = 100 max_len = 200 komoran = Komoran(userdic='preprocessing/userdict_190411.txt') with open('./preprocessing/para_int_to_vocab.pickle', 'rb') as f: int_to_vocab = pickle.load(f) with open('./preprocessing/para_vocab_to_int.pickle', 'rb') as f: vocab_to_int = pickle.load(f) x_all_ = x_dataset(x_all, max_row, max_len) max_nb_words = len(int_to_vocab) + 1 embedding_dim = 200 attention_dim = 100 lstm_dim = 100 learning_rate = 0.0001 dense_size = 256 print("Load Pragraph Model") model, attention_extractor = Hie_Attention() model.load_weights("model/para/model_30.h5") model.compile(loss="binary_crossentropy", optimizer=Adam(learning_rate), metrics=['accuracy']) attention_distribution = attention_extractor.predict(x_all_) df_result = pd.DataFrame() sentence_value, attention_value = [], [] for sentence, attention in zip(x_all, attention_distribution): if len(sentence) <= 100: sentence_value.append(sentence[:len(sentence)]) attention_value.append(attention[:len(sentence)]) else: sentence_value.append(sentence[:100]) attention_value.append(attention[:100]) if visulize == 'on': print('paragraph visual file extracting...') for idx_1, sentence_attention in enumerate( zip(sentence_value, attention_value)): sentence = sentence_attention[0] attention = sentence_attention[1] tmp1 = np.array(attention).reshape(-1, 1) tmp2 = np.array(sentence).reshape(-1, 1) fig, ax = plt.subplots(figsize=(10, len(tmp1))) ax.tick_params(labelsize=20) ax = sns.heatmap(data=tmp1, yticklabels=tmp2, annot=True, cmap="Reds", annot_kws={"size": 30}, cbar=False) plt.savefig("./output/paragraph/output_par_vis/" + "paragraph_attention_sequence_" + str(idx_1 + 1) + ".png", bbox_inches="tight") plt.close(fig) print('Check the /output/paragraph/output_par_dis, output_par_vis', end='\n\n') else: print('skip visualize', end='\n\n') df_sample = pd.DataFrame({ "Sentence": sentence_value, "Attention": attention_value }) df_result = pd.concat([df_result, df_sample], axis=0) df_result.to_excel( 'output/paragraph/output_par_dis/paragraph_distribution.xlsx', encoding="utf-8") # Data to Excel paragraph_prob = model_probability(model, x_all_) return paragraph_prob, num2label
def test_get_fontconfig_fonts(): assert sys.platform == 'win32' or len(get_fontconfig_fonts()) > 1
import matplotlib as mpl from matplotlib import rc from operator import add import matplotlib.font_manager as fm print fm.get_fontconfig_fonts() mpl.rcParams['toolbar'] = 'None' print filter(lambda x: x.startswith('font'), mpl.rcParams.keys()) #rc('font',**{'name': 'Univers LT STd', 'size':20}) #rc('font',**{'family':'serif','serif':['Computer Modern Roman']}) #rc('text', usetex=True) rc('font', size=20) #rc('font', **{'family':'UniversLTStd-Cn'}) #rc('font', **{'family':'Univers LT Std 57 Cn', 'serif':'57 Cn', 'sans-serif':'57 Cn'}) rc('font', **{'family':'Univers LT Std 57 Cn'}) rc('text', color='w') rc('grid', color='w') rc('axes', edgecolor='w') rc('axes', facecolor='k') rc('axes', labelcolor='w') rc('axes', linewidth=2) rc('ytick', color='w') rc('xtick', color='w') rc('xtick.major', pad=10) rc('ytick.major', pad=10) rc('text', dvipnghack=True) _ = lambda x: x import wxversion
def main(prob_pkl,num2label,data,visualize): import numpy as np import pandas as pd import sys sys.path.append('..') from preprocessing.input_data_index_embedding import load_dataset,split_dataset,text_preprocess,join_date,bow_vocab,load_bow_vocab,bow_label,max_length,x_data_set,y_data_set,int_to_label,labels_to_vecs,make_index_embed,evaluate,split_newdataset_sw,split_ptl_inference,document_label_dataset_training,document_label_dataset_infer,tagging_row_index,row_embed,vecs2labels from preprocessing.preprocessing_code_190418 import title_catcher, preprocess, date_process import pickle import re from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix, classification_report, precision_score, recall_score from sklearn.metrics import classification_report from sklearn.preprocessing import OneHotEncoder from collections import Counter from konlpy.tag import Komoran import keras import matplotlib.pyplot as plt from keras.layers import Input, Embedding, Dense, LSTM, Bidirectional, Dropout, Concatenate, Flatten, Conv1D, Conv2D, GlobalMaxPooling1D, TimeDistributed, SpatialDropout1D, GRU, multiply, Lambda, Reshape, CuDNNGRU, CuDNNLSTM, Permute, RepeatVector, Multiply from keras.layers import MaxPool1D from keras.models import Model, Sequential from keras import backend as K from keras.callbacks import ModelCheckpoint from keras.optimizers import Adam from keras.engine.topology import Layer from keras.preprocessing.sequence import pad_sequences from keras import regularizers from keras.layers.normalization import BatchNormalization from keras.layers import Activation import seaborn as sn import tensorflow as tf import warnings import os tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) warnings.filterwarnings("ignore") os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' print("Analyzing Sentence") test_prob = prob_pkl data_dir = data vocab_to_int_dir = './preprocessing/insu_vocab_to_int.pkl' int_to_vocab_dir = './preprocessing/insu_int_to_vocab.pkl' origin_data = load_dataset(data_dir) origin_data = join_date(origin_data) origin_data.head(3) ptl_df = document_label_dataset_infer(origin_data) ptl_df.head(3) def split_newdataset_sw_(data, standard): contract_names = np.unique(data[standard]) x_all = [] for name in contract_names: temp = data[data[standard] == name] temp_contract = [] for c in temp['doc'].values: temp_contract.append(c) x_all.append(temp_contract) x_all = [x_all[con][0] for con in range(len(x_all))] return x_all x_all = split_ptl_inference(ptl_df, 'index') valid_class = np.array([ '-', 'PR-02-01', 'PR-02-02', 'PR-02-03', 'PR-02-04', 'PR-02-05', 'PR-02-06', 'PR-02-07', 'PR-02-08', 'PR-02-09', 'PR-02-10', 'PR-02-11', 'PR-02-12', 'PR-02-13', 'PR-02-14', 'PR-02-15', 'PR-02-16', 'PR-02-17', 'PR-02-18', 'PR-02-19', 'PR-02-20', 'PR-02-21', 'PR-02-22', 'PR-02-24', 'PR-02-25', 'PR-02-26', 'PR-02-27', 'PR-02-28', 'PR-02-29', 'PR-02-30', 'PR-02-31', 'PR-02-32', 'PR-03-01', 'PR-03-02', 'PR-03-03', 'PR-03-04', 'PR-04-01', 'PR-04-02', 'PR-04-03', 'PR-04-04', 'PR-04-05', 'PR-04-06', 'PR-04-07', 'PR-04-08', 'PR-04-09', 'PR-04-10', 'PR-04-11', 'PR-04-12', 'PR-04-13', 'PR-04-14', 'PR-04-15', 'PR-04-16', 'PR-04-17', 'PR-04-18', 'PR-04-19', 'PR-04-20', 'PR-04-21', 'PR-04-22', 'PR-04-23', 'PR-04-24', 'PR-04-25', 'PR-04-26', 'PR-04-27', 'PR-04-28', 'PR-04-29', 'PR-04-30', 'PR-04-31', 'PR-04-32', 'PR-04-33', 'PR-04-34', 'PR-04-35', 'PR-04-36', 'PR-04-37', 'PR-04-38', 'PR-04-39', 'PR-04-40', 'PR-04-41', 'PR-04-42', 'PR-04-43', 'PR-04-44', 'PR-04-45', 'PR-04-46', 'PR-04-47', 'PR-04-48', 'PR-04-49', 'PR-04-50', 'PR-04-51', 'PR-04-52', 'PR-05-01', 'PR-05-02', 'PR-05-03', 'PR-05-04', 'PR-05-05', 'PR-05-06', 'PR-05-07', 'PR-05-08', 'PR-05-09', 'PR-05-10', 'PR-05-11', 'PR-05-12', 'PR-05-13', 'PR-05-14', 'PR-05-15', 'PR-05-16', 'PR-05-17', 'PR-05-18', 'PR-05-19', 'PR-05-20', 'PR-05-21', 'PR-06-01', 'PR-06-02', 'PR-06-03', 'PR-06-04', 'PR-06-05', 'PR-06-06', 'PR-06-07', 'PR-06-08', 'PR-06-09', 'PR-06-10', 'PR-06-11', 'PR-06-12', 'PR-06-13', 'PR-06-14', 'PR-07-01', 'PR-07-02', 'PR-07-03', 'PR-07-04', 'PR-07-05', 'PR-07-06', 'PR-07-07', 'PR-08-01', 'PR-08-02', 'PR-08-03', 'PR-08-04', 'PR-08-05', 'PR-08-06', 'PR-08-07', 'PR-08-08', 'PR-08-09', 'PR-08-10', 'PR-08-11', 'PR-08-12', 'PR-08-13', 'PR-08-14', 'PR-08-15', 'PR-08-16', 'PR-08-17', 'PR-08-18', 'PR-08-19', 'PR-08-20', 'PR-08-21', 'PR-08-22', 'PR-08-23', 'PR-08-24', 'PR-08-25', 'PR-08-26', 'PR-08-27', 'PR-08-28', 'PR-08-29', 'PR-09-01', 'PR-09-02', 'PR-09-03', 'PR-09-04', 'PR-09-05', 'PR-09-06', 'PR-09-07', 'PR-09-08', 'PR-09-09', 'PR-09-10', 'PR-09-11', 'PR-09-12', 'PR-09-13', 'PR-09-14', 'PR-09-15', 'PR-10-01', 'PR-10-02', 'PR-10-03', 'PR-10-04', 'PR-10-05', 'PR-10-06', 'PR-11-01', 'PR-11-02', 'PR-11-03', 'PR-11-04', 'PR-11-05', 'PR-11-07', 'PR-11-08', 'PR-12-01', 'PR-13-01', 'PR-13-02', 'PR-13-03', 'PR-14-01', 'PR-14-02', 'PR-14-03', 'PR-14-04', 'PR-14-05', 'PR-14-06', 'PR-15-01', 'PR-16-01', 'PR-17-01', 'PR-18-01', 'PR-19-01', 'PR-19-02', 'PR-20-01', 'PR-20-02', 'PR-20-03', 'PR-20-04', 'PR-20-05', 'PR-20-06', 'PR-20-07', 'PR-20-08', 'PR-20-09', 'PR-20-10', 'PR-20-11', 'PR-20-12', 'PR-20-13', 'PR-20-14', 'PR-21-01', 'PR-21-02', 'PR-22-01', 'PR-23-01', 'PR-23-02', 'PR-23-03', 'PR-23-04', 'PR-23-05', 'PR-23-06', 'PR-24-01', 'PR-24-02', 'PR-24-03', 'PR-24-04', 'PR-24-05', 'PR-24-06', 'PR-24-07', 'PR-24-08', 'PR-24-09', 'PR-24-10', 'PR-25-01', 'PR-25-02', 'PR-25-03', 'PR-25-04', 'PR-26-01', 'PR-27-01', 'PR-28-01', 'PR-29-01', 'PR-29-02', 'PR-29-03', 'PR-29-04', 'PR-29-05', 'PR-29-06', 'PR-30-01']) class_size = len(valid_class) vocab_to_int, int_to_vocab = load_bow_vocab(vocab_to_int_dir, int_to_vocab_dir) max_len = 350 max_row = 121 unique_par_class = ['정의', '인수_및_모집', '본_사채의_발행조건', '수요예측', '모집관계사항', '불성실_수요예측_참여자의_관리', '발행회사의_보장', '기업실사', '확약_또는_선약', '인수시기', '제서식의_작성_및_공고', '수수료', '비용', '원리금_상환사무의_대행', '인수_및_모집_일정의_변경', '사채권의_발행여부', '채권상장신처_및_채권등록_발행', '특약사항', '사채금_사용용도', '원리금_지급의무', '책임부담', '해지_또는_해제', '통보_및_요청', '자료제출', '평과결과_공시_등', '관할법원', '계약의_해석원칙_등', '공모금리_결정_및_배정', '개별책임'] label_to_int = bow_label(unique_par_class)[0] test_proba = np.concatenate([[test_prob[num] for cnt in x_all[num]] for num in range(len(x_all))]) x_row = tagging_row_index(x_all) x_row_ = row_embed(x_row, max_row) x_all_ = x_data_set(x_all, max_len, vocab_to_int) n_words = len(int_to_vocab) + 2 embed_size = 100 batch_size = 8 learning_rate = 0.0001 epochs = 500 sentence_wise_lstm_size = 128 dense_dropout = 0.5 l2_reg = regularizers.l2(0.0001) dense_size = 128 attention_dim = 100 rnn_dim = 256 class AttentionLayer(Layer): def __init__(self, attention_dim, **kwargs): self.attention_dim = attention_dim super(AttentionLayer, self).__init__(**kwargs) def build(self, input_shape): self.W = self.add_weight(name='Attention_Weight', shape=(input_shape[-1], self.attention_dim), initializer='random_normal', trainable=True) self.b = self.add_weight(name='Attention_Bias', shape=(self.attention_dim, ), initializer='random_normal', trainable=True) self.u = self.add_weight(name='Attention_Context_Vector', shape=(self.attention_dim, 1), initializer='random_normal', trainable=True) super(AttentionLayer, self).build(input_shape) def call(self, x): u_it = K.tanh(K.dot(x, self.W) + self.b) a_it = K.dot(u_it, self.u) a_it = K.squeeze(a_it, -1) a_it = K.softmax(a_it) return a_it def compute_output_shape(self, input_shape): return (input_shape[0], input_shape[1]) def WeightedSum(attentions, representations): repeated_attentions = RepeatVector(K.int_shape(representations)[-1])(attentions) repeated_attentions = Permute([2, 1])(repeated_attentions) aggregated_representation = Multiply()([representations, repeated_attentions]) aggregated_representation = Lambda(lambda x: K.sum(x, axis=1))(aggregated_representation) return aggregated_representation def SenWeightedSum(attentions, representations): repeated_attentions = RepeatVector(K.int_shape(representations)[-1])(attentions) repeated_attentions = Permute([2, 1])(repeated_attentions) aggregated_representation = Multiply()([representations, repeated_attentions]) return aggregated_representation def TabSen(): K.clear_session() np.random.seed(1201) row_embed = Input(shape = (max_row, ), name = 'row_input') col_embed = Input(shape = (len(unique_par_class), ), name = 'col_input') row_layer = Dense(128)(row_embed) col_layer = Dense(128)(col_embed) word_inp_embed = Input(shape = (None, ), name = 'word_input') word_embed = Embedding(n_words, embed_size, trainable = True)(word_inp_embed) lstm = Bidirectional(CuDNNLSTM(sentence_wise_lstm_size, return_sequences=True))(word_embed) lstm_bn = BatchNormalization()(lstm) attn_score = AttentionLayer(attention_dim)(lstm_bn) attn_out = WeightedSum(attn_score, lstm_bn) concat = Concatenate()([attn_out, row_layer, col_layer]) fc_layer = Dense(dense_size, activation='relu', kernel_regularizer = keras.regularizers.l2(1e-5), bias_regularizer = keras.regularizers.l1(1e-3))(concat) dropout = Dropout(dense_dropout)(fc_layer) output = Dense(class_size, activation = 'softmax')(dropout) model = Model(inputs = [word_inp_embed, row_embed, col_embed], outputs = output) word_attention_extractor = Model(inputs=[word_inp_embed], outputs=[attn_score]) model.compile(loss = 'categorical_crossentropy', optimizer = Adam(learning_rate), metrics = ['accuracy']) return model, word_attention_extractor print("Load Sentence Model") tabsen, word_attention_extractor = TabSen() tabsen.load_weights('./model/sentence/33-0.1379.hdf5') def int_to_label(y_vectors, valid_class): enc = OneHotEncoder(handle_unknown='ignore') enc.fit(valid_class.reshape(-1,1)) labels = enc.inverse_transform(y_vectors) return labels int_to_vocab[0] = 'pad' int_to_vocab[1] = 'UNK' import matplotlib import matplotlib.font_manager as fm fm.get_fontconfig_fonts() font_location = 'font/malgun.ttf' # For Windows font_name = fm.FontProperties(fname=font_location).get_name() init_x_dict = {} init_x_dict['sequence'] = x_all_ split = 'sequence' threshold=0.5 pred_attention = word_attention_extractor.predict(init_x_dict[split]) pred=tabsen.predict([x_all_, x_row_, test_proba]) labels = [i for i in np.concatenate(int_to_label(pred, valid_class))] if visualize=='on' : print('sentence visual file extracting...') plt.rcParams.update({'figure.max_open_warning': 0}) words_list = [] for sent_idx, sentence in enumerate(init_x_dict[split]): if sentence[0] == 0: continue for word_idx in range(max_len): if sentence[word_idx] == 0: words = [int_to_vocab[word_id] for word_id in sentence[0:word_idx]] pred_att = pred_attention[sent_idx][0:len(words)] pred_att = np.expand_dims(pred_att, axis=0) break fig, ax = plt.subplots(figsize=(len(words), 1)) plt.rc('font', family=font_name) plt.rc('xtick', labelsize=12) midpoint = (max(pred_att[:, 0]) - min(pred_att[:, 0])) / 2 heatmap = sn.heatmap(pred_att, xticklabels=words, yticklabels=False, square=True, linewidths=0.1, cmap='coolwarm', center=midpoint, vmin=0, vmax=1) words_list.append([np.array(pred_att[0]), words, labels[sent_idx]]) plt.xticks(rotation=45) plt.title(labels[sent_idx],) fig = plt.gcf() fig.savefig('./output/sentence/output_sen_vis/sentence_attention_{}_{}'.format(split,sent_idx+1), bbox_inches = "tight") plt.close(fig) scores = [i[0] for i in words_list] tokens = [i[1] for i in words_list] for sen_idx, (score,token) in enumerate(zip(scores, tokens)) : df = pd.DataFrame(score,token).T df.to_excel('./output/sentence/output_sen_dis/sentence_distribution_{}_{}.xlsx'.format(split, sen_idx+1), index=False) print('Check the /output/sentence/output_sen_dis, output_sen_vis', end='\n\n') elif visualize=='off': print('skip visualze', end='\n\n') else : print('visualize param check!') line_index = pd.read_excel('./data/index_line_label.xlsx', header=None) line_dict = {} for i in line_index.values: line_dict[i[1]] = i[0] line_dict['-'] = '-' para_dict = num2label output_filename = './output/contract_tagging.xlsx' def output_prediction(infer_pred): pred_label = [line_dict[i] for i in np.concatenate(int_to_label(infer_pred, valid_class))] par_pred = [np.where(i>=threshold)[0] for i in test_prob] par_pred_label = [', '.join([para_dict[j] for j in i]) for i in par_pred] par_pred_length = [len(i) for i in ptl_df.doc.values] par_pred_label_ = np.concatenate([length*[pars] for length,pars in zip(par_pred_length, par_pred_label)]) origin_data['line_label'] = pred_label origin_data['par_label'] = par_pred_label_ output = origin_data.iloc[:,:-1] output.columns = ['문서번호','문단번호','조항번호','라인번호','내용','문단클래스','라인클래스'] output.to_excel(output_filename, index=False) return output output = output_prediction(pred) print("Finish Contract Analysis") print("Check the Output file ", output_filename)
def portfolio_viz(self): self.to_home_button.on_click(self.RED_start) if (self.user_info[6] == self.know_list[0]) or (self.user_info[6] == self.know_list[1]): danger = Image.open(self.path + "/red/interface/image/portfolio/위험고지.png") display(danger) # 관심 산업 상관관계 보여주기 if self.user_info[5] == self.sector_list[0]: s1 = Image.open(self.path + "/red/interface/image/industry/건설양.png") s2 = Image.open(self.path + "/red/interface/image/industry/건설음.png") display(s1) display(s2) elif self.user_info[5] == self.sector_list[5]: s3 = Image.open(self.path + "/red/interface/image/industry/운수장비음.png") display(s3) elif self.user_info[5] == self.sector_list[7]: s4 = Image.open(self.path + "/red/interface/image/industry/의약음.png") display(s4) # 포트폴리오 비율 capital = self.user_info[0] * 10000 if self.user_info[7] == self.risk_list[0]: r1 = 1 r2 = 0.67 elif self.user_info[7] == self.risk_list[1]: r1 = 0.8 r2 = 0.4 elif self.user_info[7] == self.risk_list[2]: r1 = 0.6 r2 = 0.3 elif self.user_info[7] == self.risk_list[3]: r1 = 0.4 r2 = 0.1 elif self.user_info[7] == self.risk_list[4]: r1 = 0.2 r2 = 0 if self.user_info[1] == self.term_list[0] or self.user_info[ 1] == self.term_list[1]: r2 = 0 real_r0 = int((1 - r1) * 100) real_r1 = int((r1 - r2) * 100) real_r2 = int(r2 * 100) p_profit = 0 p_sigma = 0 p_num = 0 p_ratio = 0 for equity in (self.portfolios1, self.portfolios2, self.portfolios3): p_num += 1 if p_num == 1: p_ratio = 1 - r1 elif p_num == 2: p_ratio = r2 else: p_ratio = r1 - r2 cnt = 0 profit = 0 sigma = 0 for name, info in equity.items(): cnt += info[0] profit += info[1][-2] * info[0] sigma += info[1][-1] * info[0] if cnt > 0: profit /= cnt sigma /= cnt p_profit += profit * p_ratio p_sigma += sigma * p_ratio 수익률 = round(((1 + p_profit / 100)**12 - 1) * 100, 2) 표준편차 = round(p_sigma * 100, 2) # 파이 차트 생성 if r2 == 0: ratio = [real_r0, real_r1] labels = ["주식", "일반 ETF"] colors = ["silver", "gold"] wedgeprops = {"width": 0.7, "edgecolor": "w", "linewidth": 5} fm.get_fontconfig_fonts() font_name = fm.FontProperties(fname=self.fontpath).get_name() matplotlib.rc("font", family=font_name) fig = plt.figure(figsize=(7, 7)) plt.pie( ratio, labels=labels, startangle=90, autopct="%.0f%%", shadow=True, textprops={"fontsize": 20}, colors=colors, wedgeprops=wedgeprops, ) if real_r0 == 19: plt.legend(labels, fontsize=13, loc="lower left") else: plt.legend(labels, fontsize=13, loc="upper left") plt.savefig(self.path + "/red/interface/image/portfolio/pie_chart.png") plt.close() else: ratio = [real_r0, real_r1, real_r2] labels = ["주식", "일반 ETF", "채권 ETF"] colors = ["silver", "gold", "lightgray"] wedgeprops = {"width": 0.7, "edgecolor": "w", "linewidth": 5} fm.get_fontconfig_fonts() font_name = fm.FontProperties(fname=self.fontpath).get_name() matplotlib.rc("font", family=font_name) fig = plt.figure(figsize=(7, 7)) plt.pie( ratio, labels=labels, startangle=90, autopct="%.0f%%", shadow=True, textprops={"fontsize": 20}, colors=colors, wedgeprops=wedgeprops, ) if real_r0 == 19: plt.legend(labels, fontsize=13, loc="lower right") else: plt.legend(labels, fontsize=13, loc="lower left") plt.savefig(self.path + "/red/interface/image/portfolio/pie_chart.png") plt.close() # 경로별 이미지 불러오기 im_tend = Image.open(self.path + "/red/interface/image/portfolio/red.png") im_chart = Image.open(self.path + "/red/interface/image/portfolio/pie_chart.png") font = ImageFont.truetype(self.fontpath, 22) # 칼라 설정 b, g, r, a = 0, 0, 0, 0 # 이미지에 텍스트 삽입 draw = ImageDraw.Draw(im_tend) draw.text((228, 80.5), "연 " + str(수익률) + "% 내외 추구", font=font, fill=(b, g, r, a)) draw.text((228, 244), "평균 위험률 연 " + str(표준편차) + "%", font=font, fill=(b, g, r, a)) draw.text((228, 405), "전체 주식 비중 " + str(real_r0) + "% 수준", font=font, fill=(b, g, r, a)) # 이미지에 파이차트 삽입 im_tend.paste(im_chart, (510, 10)) display(im_tend)
# from xgboost.compat import XGBLabelEncoder # => is IDENTICAL to `sklearn.preprocessing.LabelEncoder` import lime from lime import lime_tabular import pdpbox from pdpbox import pdp, info_plots for _pkg in [np, pd, skl, xgb, mpl, pdpbox]: print(f'{_pkg.__name__:<7} = {_pkg.__version__}') font_dict = { path.split('/')[-1][:-4]: path for path in fm.get_fontconfig_fonts() if 'dejavu' in path.lower().split('/')[-1] } plt.rcParams['font.family'] = sorted(font_dict.keys(), key=len)[0] # os.chdir('../git/xgboost-lime-pdp') fpath = '.' # %% Classes & Functions ----------------------------------------------------- def as_int(string): return np.fromstring(string, dtype=np.int64, sep=',')[0] def as_int_str(string):
def draw222(values, variables, notch=False, title="boxplot_result"): label_title = variables["factor1"]["name"] labels = variables["factor1"]["value"] top_title = variables["factor2"]["name"] titles = variables["factor2"]["value"] color_title = variables["mask"]["name"] color_names = variables["mask"]["value"] value_name = variables["numeric"]["name"] left_positions = [-0.4, 0.4] right_positions = [1.6, 2.4] ticks = [0, 2] left_color = ['pink'] right_color = ['lightgreen'] # fontprop = fm.FontProperties("NanumGothic") if platform == "linux" or platform == "linux2": flist = fm.get_fontconfig_fonts() available_fonts = [ fm.FontProperties(fname=fname).get_name() for fname in flist ] fontprop = fm.FontProperties("NanumGothic") elif platform == "darwin": fontprop = fm.FontProperties("AppleGothic") elif platform == "win32": fontprop = fm.FontProperties("Malgun Gothic") else: print( "User platform could not be identified. Korean characters may not be shown correctly when visualizing." ) # first plot fig = plt.figure(figsize=(10, 8)) fig.suptitle(top_title + "(" + titles[0] + ")", fontsize=35, fontproperties=fontprop) left_group1 = [values[0], values[1]] right_group1 = [values[2], values[3]] bplot1_1 = plt.boxplot(left_group1[0], widths=0.35, positions=[left_positions[0]], notch=notch, patch_artist=True) bplot1_2 = plt.boxplot(left_group1[1], widths=0.35, positions=[left_positions[1]], notch=notch, patch_artist=True) bplot2_1 = plt.boxplot(right_group1[0], widths=0.35, positions=[right_positions[0]], notch=notch, patch_artist=True) bplot2_2 = plt.boxplot(right_group1[1], widths=0.35, positions=[right_positions[1]], notch=notch, patch_artist=True) plt.xticks(ticks, labels, fontsize=15, fontproperties=fontprop) for bplot in (bplot1_1, bplot1_2, bplot2_1, bplot2_2): if bplot == bplot1_1 or bplot == bplot2_1: for patch, color in zip(bplot['boxes'], left_color): patch.set_facecolor(color) else: for patch, color in zip(bplot['boxes'], right_color): patch.set_facecolor(color) plt.grid(True) plt.xlabel(label_title, fontsize=20, fontproperties=fontprop) plt.ylabel(value_name, fontsize=20, fontproperties=fontprop) plt.legend([bplot1_1["boxes"][0], bplot1_2["boxes"][0]], color_names, loc='upper right', fontsize=15, prop=fontprop) plt.savefig("./" + title + "1.jpg", dpi=400, fontproperties=fontprop) plt.show() # second plot fig = plt.figure(figsize=(10, 8)) fig.suptitle(top_title + "(" + titles[1] + ")", fontsize=35, fontproperties=fontprop) left_group2 = [values[4], values[5]] right_group2 = [values[6], values[7]] bplot3_1 = plt.boxplot(left_group2[0], widths=0.35, positions=[left_positions[0]], notch=notch, patch_artist=True) bplot3_2 = plt.boxplot(left_group2[1], widths=0.35, positions=[left_positions[1]], notch=notch, patch_artist=True) bplot4_1 = plt.boxplot(right_group2[0], widths=0.35, positions=[right_positions[0]], notch=notch, patch_artist=True) bplot4_2 = plt.boxplot(right_group2[1], widths=0.35, positions=[right_positions[1]], notch=notch, patch_artist=True) plt.xticks(ticks, labels, fontsize=15, fontproperties=fontprop) for bplot in (bplot3_1, bplot3_2, bplot4_1, bplot4_2): if bplot == bplot3_1 or bplot == bplot4_1: for patch, color in zip(bplot['boxes'], left_color): patch.set_facecolor(color) else: for patch, color in zip(bplot['boxes'], right_color): patch.set_facecolor(color) plt.grid(True) plt.xlabel(label_title, fontsize=20, fontproperties=fontprop) plt.ylabel(value_name, fontsize=20, fontproperties=fontprop) plt.legend([bplot4_1["boxes"][0], bplot4_2["boxes"][0]], color_names, loc='upper right', fontsize=15, prop=fontprop) plt.savefig("./" + title + "2.jpg", dpi=400) plt.show()
def word2vec_test(file_list, w2v_name) : # 단어를 담을 리스트 선언 total_word_list = list() source_dir ='./data/' fig_file = '3_word2vec_tsne.png' font_name = '/usr/share/fonts/truetype/nanum/NanumBarunGothic.ttf' # word2vec 모델 로드 model = models.Word2Vec.load(source_dir + w2v_name) # 품사 태깅 된 데이터 추출 및 리스트 저장 data_list = list() data1 = pre.konlpy_pos_tag('배우') data_list.append(data1) data2 = pre.konlpy_pos_tag('엄마') data_list.append(data2) data3 = pre.konlpy_pos_tag('여자') data_list.append(data3) data4 = pre.konlpy_pos_tag('남자') data_list.append(data4) # 모델에 적용하여 결과 출력 # model.doesnt_match, model.most_similar의 method는 4.0.0 버전에서 deprecated print(model[data1]) print(model.wv.doesnt_match(data_list)) print(model.wv.most_similar(positive=[data1], topn=10)) print(model.wv.most_similar(positive=[data2, data4], negative=[data3], topn=1)) print(model.wv.similarity(data1, data2)) print(model.wv.similarity(data1, data3)) for file in file_list: with open(source_dir + file,'r', encoding='UTF-8') as f: load_data = [line.split('\t') for line in f.read().splitlines()] for data in load_data : total_word_list += data[0].split() # 단어 리스트 중 가장 많이 사용된 100개 단어 추출 counter = Counter(total_word_list).most_common(100) word_list = [word[0] for word in counter] print(word_list) # 설정 가능한 폰트 리스트 출력 font_list = font_manager.get_fontconfig_fonts() print([font for font in font_list if 'nanum' in font]) # 폰트 설정 rc('font', family=font_manager.FontProperties(fname=font_name).get_name()) # 단어에 대한 벡터 리스트 vector_list = model[word_list] # 2차원으로 차원 축소 transformed = TSNE(n_components=2).fit_transform(vector_list) print(transformed) # 2차원의 데이터를 x, y 축으로 저장 x_plot = transformed[:, 0] y_plot = transformed[:, 1] # 이미지의 사이즈 셋팅 pyplot.figure(figsize=(10, 10)) # x, y 축을 점 및 텍스트 표시 pyplot.scatter(x_plot, y_plot) for i in range(len(x_plot)): pyplot.annotate(word_list[i], xy=(x_plot[i], y_plot[i])) # 이미지로 저장 pyplot.savefig(source_dir + fig_file)
import re from matplotlib import rcParams, pyplot as pp from cycler import cycler POLAR_YLIM_CONST = (-18, -6) POLAR_YLIM_CONST_MEAS = (-22, -10) POLAR_YLIM_CONST_MEAS = (-15, -3) POLAR_YLIM_CONST_ALT = (-32, -6) GAIN_FIXED_YLIM1 = (-20, -0) GAIN_FIXED_YLIM1_REVISED = (-12, -2) GAIN_FIXED_YLIM2 = (-30, -0) GAIN_FIXED_YLIM3 = (-10, 10) GAIN_FIXED_YLIM4 = (-0, 20) GAIN_FIXED_YLIM5 = (-20, 0) fcFontList = FM.get_fontconfig_fonts() # Search only for fonts that have name matches similar to this # note this is ALSO a priority list fontsDesired = ['Times', 'Helvetica', 'Arial'] fontsDesiredRe = re.compile('|'.join(fontsDesired), flags=re.IGNORECASE) # Create a unique set of the fonts selected out of all of the system fonts fontsAvailable = frozenset([FM.FontProperties(fname=fcFont).get_name()\ for fcFont in fcFontList if fontsDesiredRe.search(fcFont) != None]) fontSelected = None for fontSearch in fontsDesired: for fontFound in fontsAvailable: if re.search(fontSearch, fontFound, flags=re.IGNORECASE) != None: fontSelected = fontFound break if fontSelected != None:
def use_kor_font(): fm.get_fontconfig_fonts() font_location = '/Library/Fonts/NanumBarunGothicBold.ttf' font_name = fm.FontProperties(fname=font_location).get_name() plt.rc('font', family=font_name)
def make_graph(self): w2v_data = pd.read_csv('data/result/' + self.file_name + '_final_w2v_result.csv', encoding='utf-8-sig') cause_df = w2v_data[(w2v_data['type'] == 'C') & (w2v_data['value'] > 0.0)] result_df = w2v_data[(w2v_data['type'] == 'R') & (w2v_data['value'] > 0.0)] print(cause_df) cause_list_seed = cause_df['seed_term'].to_list() result_list_seed = result_df['seed_term'].to_list() cause_list_weight = cause_df['value'].to_list() nsize_cause = np.array([v for v in cause_list_weight]) nsize_cause = 2000 * (nsize_cause - min(nsize_cause)) / ( max(nsize_cause) - min(nsize_cause)) cause_list_weight = nsize_cause.tolist() print(cause_list_weight) cause_list_terms = cause_df['terms'].to_list() result_list_terms = result_df['terms'].to_list() result_list_weight = result_df['value'].to_list() cause_set = [] result_set = [] df_cause = pd.DataFrame({ 'from': cause_list_seed, 'to': cause_list_terms, 'weight': cause_list_weight }) df_result = pd.DataFrame({ 'from': result_list_seed, 'to': result_list_terms, 'weight': result_list_weight }) print(df_cause) # i = 0 # for s, c, w in zip(cause_list_seed, cause_list_terms, cause_list_weight): # # print(f'[{i}] seed : {s} | term : {c} | weight : {w}') # cause_set.append((s, c, {'weight': w})) # i += 1 # df_cause = pd.DataFrame({'items':cause_set}) # # j = 0 # for s, c, w in zip(result_list_seed, result_list_terms, result_list_weight): # # print(f'[{j}] seed : {s} | term : {c} | weight : {w}') # result_set.append((s, c, {'weight': w})) # j += 1 # df_result = pd.DataFrame({'items':result_set}) # cause_list = [(s,c, {'weight':w}) for s, c, w in zip(cause_list_seed, cause_list_terms, cause_list_weight)] fm.get_fontconfig_fonts() # font_location = '/usr/share/fonts/truetype/nanum/NanumGothicOTF.ttf' font_location = 'C:/Windows/Fonts/NanumGothic.ttf' # For Windows font_name = fm.FontProperties(fname=font_location).get_name() plt.rc('font', family=font_name) G_cause = nx.Graph() G_cause = nx.from_pandas_edgelist(df_cause, 'from', 'to', create_using=nx.DiGraph()) # ar_cause = (df_cause['items']) # G_cause.add_edges_from(ar_cause) # print(ar_cause) G_result = nx.Graph() G_result = nx.from_pandas_edgelist(df_result, 'from', 'to', create_using=nx.DiGraph()) # ar_result = (df_result['items']) # G_result.add_edges_from(ar_result) # nsize = np.array([v for v in cause_list_weight]) # cause_list_weight.insert(0, 1.000) # result_list_weight.insert(0, 1.000) # nsize_cause = np.array([v for v in cause_list_weight]) # nsize_cause = 2000 * (nsize_cause-min(nsize_cause)) / (max(nsize_cause)- min(nsize_cause)) # print(nsize_cause) # nsize_result = np.array([v for v in result_list_weight]) # nsize_result = 2000 * (nsize_result - min(nsize_result)) / (max(nsize_result) - min(nsize_result)) # print(nsize_cause) # nsize_cause = np.insert(nsize_cause, 0, 1000) # print(nsize_cause) pos_cause = nx.spring_layout(G_cause) pos_result = nx.spring_layout(G_result) plt.figure(figsize=(16, 12)) plt.title('원인') cmap = cm.get_cmap('Dark2') print(G_cause.nodes) # nx.draw_networkx(G_cause, font_size=14, font_family=font_name,pos=pos_cause, node_color=list(cause_list_weight), node_size=nsize_cause, alpha=0.7, edge_color='.5', cmap=cmap) # nx.draw_networkx(G_cause, font_size=14, font_family=font_name, pos=pos_cause, node_color=list(cause_list_weight), node_size=nsize_cause, alpha=0.7, edge_color='.5', cmap=cmap) nx.draw_networkx(G_cause, pos=pos_cause, node_size=1000, node_color='dark', alpha=.1, font_family=font_name, with_labels=True) plt.savefig('data/result/' + self.file_name + '_cause.png', bbox_inches='tight') nx.draw(G_result, font_family=font_name, with_labels=True) # nx.draw_networkx(G_result, font_size=14, font_family=font_name, pos=pos_result, node_color=list(result_list_weight), node_size=nsize_result, alpha=0.7, edge_color='.5', cmap=cmap) plt.savefig('data/result/' + self.file_name + '_result.png', bbox_inches='tight')
def test_get_fontconfig_fonts(): assert len(get_fontconfig_fonts()) > 1
def plot_heatmap(res, print_std=False, title=None): # pX, pZ, result, stop, sbot # inn, bc, stuff, speed P, S = get_heatmap(res, print_std=print_std) lb = -1.5 # leftBorder rb = +1.5 # rightBorder x = np.arange(lb, rb, 1 / 12) if print_std is True: bb = -1.5 # bottomBorder tb = +1.5 # topBorder y = np.arange(bb, tb, 1 / 12) else: bb = +1.0 # bottomBorder tb = +4.0 # topBorder y = np.arange(+1.0, +4.0, 1 / 12) X, Y = np.meshgrid(x, y) fig = plt.figure(figsize=(6, 5), dpi=80, facecolor='white') from matplotlib import font_manager, rc import os if os.name == 'posix': import matplotlib.font_manager as fm fm.get_fontconfig_fonts() font_location = '/Library/Fonts/NanumSquareOTFRegular.otf' font_name = fm.FontProperties(fname=font_location).get_name() rc('font', family=font_name) else: rc('font', family='NanumSquare') plt.rcParams['axes.unicode_minus'] = False #plt.pcolormesh(X, Y, P, cmap='gist_gray') plt.pcolormesh(X, Y, P) plt.colorbar(format=ticker.FuncFormatter(fmt)) ll = -17 / 24 rl = +17 / 24 oll = -20 / 24 orl = +20 / 24 bl = 1.579 tl = 3.325 obl = 1.579 - 3 / 24 otl = 3.325 + 3 / 24 if print_std is True: bl = -1.0 tl = +1.0 obl = -1.0 - 3 / 24 otl = +1.0 + 3 / 24 plt.plot([ll, ll], [bl, tl], color='#ffffff', linestyle='-', lw=1) plt.plot([rl, rl], [bl, tl], color='#ffffff', linestyle='-', lw=1) plt.plot([ll, rl], [bl, bl], color='#ffffff', linestyle='-', lw=1) plt.plot([ll, rl], [tl, tl], color='#ffffff', linestyle='-', lw=1) plt.plot([oll, oll], [obl, otl], color='#ffffff', linestyle='-', lw=1) plt.plot([orl, orl], [obl, otl], color='#ffffff', linestyle='-', lw=1) plt.plot([oll, orl], [obl, obl], color='#ffffff', linestyle='-', lw=1) plt.plot([oll, orl], [otl, otl], color='#ffffff', linestyle='-', lw=1) if title is None: plt.title('2017 KBO S-Zone heatmap') else: plt.title(title) plt.axis([lb + 1 / 12, rb - 1 / 12, bb + 1 / 12, tb - 1 / 12])
def plot_match_calls(values, title=None): # pX, pZ, result, st, sb # inn, bc, stuff, speed from matplotlib import font_manager, rc import os if os.name == 'posix': import matplotlib.font_manager as fm fm.get_fontconfig_fonts() font_location = '/Library/Fonts/NanumSquareOTFRegular.otf' font_name = fm.FontProperties(fname=font_location).get_name() rc('font', family=font_name) else: rc('font', family='NanumSquare') #Results = Enum('Results', '볼 스트라이크 헛스윙 파울 타격 번트파울 번트헛스윙') #Stuffs = Enum('Stuffs', '직구 슬라이더 포크 체인지업 커브 투심 싱커 커터 너클볼') bvalues = values[np.where(values[:, 2] == 1)] svalues = values[np.where(values[:, 2] == 2)] wvalues = values[np.where(values[:, 2] == 3) or np.where(values[:, 2] == 7)] fvalues = values[np.where(values[:, 2] == 4) or np.where(values[:, 2] == 6)] ivalues = values[np.where(values[:, 2] == 5)] # strikes, balls fig = plt.figure(figsize=(12, 7), dpi=160, facecolor='#898f99') from matplotlib import font_manager, rc import os if os.name == 'posix': import matplotlib.font_manager as fm fm.get_fontconfig_fonts() font_location = '/Library/Fonts/NanumSquareOTFRegular.otf' font_name = fm.FontProperties(fname=font_location).get_name() rc('font', family=font_name) else: rc('font', family='NanumSquare') plt.rcParams['axes.unicode_minus'] = False ax = fig.add_subplot(231, facecolor='#313133') ax.tick_params(axis='x', colors='white') lb = -1.5 # leftBorder rb = +1.5 # rightBorder tb = +4.0 # topBorder bb = +1.0 # bottomBorder ll = -17 / 24 # leftLine rl = +17 / 24 # rightLine tl = +3.325 # topLine bl = +1.579 # bototmLine oll = -17 / 24 - 1 / 8 # outerLeftLine orl = +17 / 24 + 1 / 8 # outerRightLine otl = +3.325 + 1 / 8 # outerTopLine obl = +1.579 - 1 / 8 # outerBottomLine if title is not None: st = fig.suptitle(title, fontsize=20) st.set_color('white') st.set_weight('bold') st.set_horizontalalignment('center') for r in svalues: plt.scatter(r[0], r[1], color='#ef2926', alpha=.5, s=np.pi * 50, label='스트라이크') for r in bvalues: plt.scatter(r[0], r[1], color='#3245ef', alpha=.5, s=np.pi * 50, label='볼') plt.plot([ll, ll], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll + (rl - ll) / 3, ll + (rl - ll) / 3], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll + (rl - ll) * 2 / 3, ll + (rl - ll) * 2 / 3], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([rl, rl], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [bl, bl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [bl + (tl - bl) / 3, bl + (tl - bl) / 3], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [bl + (tl - bl) * 2 / 3, bl + (tl - bl) * 2 / 3], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [tl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([oll, oll], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5) plt.plot([orl, orl], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5) plt.plot([oll, orl], [obl, obl], color='#d0cfd3', linestyle='-', lw=0.5) plt.plot([oll, orl], [otl, otl], color='#d0cfd3', linestyle='-', lw=0.5) plt.axis([lb, rb, bb, tb]) plt.rcParams['axes.unicode_minus'] = False ax.set_yticklabels([]) ax.set_xticklabels([]) plt.axis('off') ax.autoscale_view('tight') ax.text('0', '3.8', '스트라이크+볼', color='white', fontsize=14, horizontalalignment='center', weight='bold') ############ # strikes ############ ax = fig.add_subplot(232, facecolor='#313133') ax.tick_params(axis='x', colors='white') for r in svalues: plt.scatter(r[0], r[1], color='#ef2926', alpha=.5, s=np.pi * 50, label='스트라이크') plt.plot([ll, ll], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll + (rl - ll) / 3, ll + (rl - ll) / 3], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll + (rl - ll) * 2 / 3, ll + (rl - ll) * 2 / 3], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([rl, rl], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [bl, bl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [bl + (tl - bl) / 3, bl + (tl - bl) / 3], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [bl + (tl - bl) * 2 / 3, bl + (tl - bl) * 2 / 3], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [tl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([oll, oll], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5) plt.plot([orl, orl], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5) plt.plot([oll, orl], [obl, obl], color='#d0cfd3', linestyle='-', lw=0.5) plt.plot([oll, orl], [otl, otl], color='#d0cfd3', linestyle='-', lw=0.5) plt.axis([lb, rb, bb, tb]) plt.rcParams['axes.unicode_minus'] = False ax.set_yticklabels([]) ax.set_xticklabels([]) plt.axis('off') ax.autoscale_view('tight') ax.text('0', '3.8', '스트라이크', color='white', fontsize=14, horizontalalignment='center', weight='bold') ############ # balls ############ ax = fig.add_subplot(233, facecolor='#313133') ax.tick_params(axis='x', colors='white') for r in bvalues: plt.scatter(r[0], r[1], color='#3245ef', alpha=.5, s=np.pi * 50, label='볼') plt.plot([ll, ll], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll + (rl - ll) / 3, ll + (rl - ll) / 3], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll + (rl - ll) * 2 / 3, ll + (rl - ll) * 2 / 3], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([rl, rl], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [bl, bl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [bl + (tl - bl) / 3, bl + (tl - bl) / 3], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [bl + (tl - bl) * 2 / 3, bl + (tl - bl) * 2 / 3], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [tl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([oll, oll], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5) plt.plot([orl, orl], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5) plt.plot([oll, orl], [obl, obl], color='#d0cfd3', linestyle='-', lw=0.5) plt.plot([oll, orl], [otl, otl], color='#d0cfd3', linestyle='-', lw=0.5) plt.axis([lb, rb, bb, tb]) plt.rcParams['axes.unicode_minus'] = False ax.set_yticklabels([]) ax.set_xticklabels([]) plt.axis('off') ax.text('0', '3.8', '볼', color='white', fontsize=14, horizontalalignment='center', weight='bold') ax.autoscale_view('tight') ############ # whiffs ############ ax = fig.add_subplot(2, 3, 4, facecolor='#313133') ax.tick_params(axis='x', colors='white') for r in wvalues: plt.scatter(r[0], r[1], color='#1a1b1c', alpha=.5, s=np.pi * 50, label='헛스윙') plt.plot([ll, ll], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll + (rl - ll) / 3, ll + (rl - ll) / 3], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll + (rl - ll) * 2 / 3, ll + (rl - ll) * 2 / 3], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([rl, rl], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [bl, bl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [bl + (tl - bl) / 3, bl + (tl - bl) / 3], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [bl + (tl - bl) * 2 / 3, bl + (tl - bl) * 2 / 3], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [tl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([oll, oll], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5) plt.plot([orl, orl], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5) plt.plot([oll, orl], [obl, obl], color='#d0cfd3', linestyle='-', lw=0.5) plt.plot([oll, orl], [otl, otl], color='#d0cfd3', linestyle='-', lw=0.5) plt.axis([lb, rb, bb, tb]) plt.rcParams['axes.unicode_minus'] = False ax.set_yticklabels([]) ax.set_xticklabels([]) plt.axis('off') ax.text('0', '3.8', '헛스윙', color='white', fontsize=14, horizontalalignment='center', weight='bold') ax.autoscale_view('tight') ############ # fouls ############ ax = fig.add_subplot(235, facecolor='#313133') ax.tick_params(axis='x', colors='white') for r in fvalues: plt.scatter(r[0], r[1], color='#edf72c', alpha=.5, s=np.pi * 50, label='파울') plt.plot([ll, ll], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll + (rl - ll) / 3, ll + (rl - ll) / 3], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll + (rl - ll) * 2 / 3, ll + (rl - ll) * 2 / 3], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([rl, rl], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [bl, bl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [bl + (tl - bl) / 3, bl + (tl - bl) / 3], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [bl + (tl - bl) * 2 / 3, bl + (tl - bl) * 2 / 3], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [tl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([oll, oll], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5) plt.plot([orl, orl], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5) plt.plot([oll, orl], [obl, obl], color='#d0cfd3', linestyle='-', lw=0.5) plt.plot([oll, orl], [otl, otl], color='#d0cfd3', linestyle='-', lw=0.5) plt.axis([lb, rb, bb, tb]) plt.rcParams['axes.unicode_minus'] = False ax.set_yticklabels([]) ax.set_xticklabels([]) plt.axis('off') ax.text('0', '3.8', '파울', color='white', fontsize=14, horizontalalignment='center', weight='bold') ax.autoscale_view('tight') ############ # inplays ############ ax = fig.add_subplot(236, facecolor='#d19c49') ax.tick_params(axis='x', colors='white') for r in ivalues: plt.scatter(r[0], r[1], color='#8348d1', alpha=.5, s=np.pi * 50, label='인플레이') plt.plot([ll, ll], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll + (rl - ll) / 3, ll + (rl - ll) / 3], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll + (rl - ll) * 2 / 3, ll + (rl - ll) * 2 / 3], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([rl, rl], [bl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [bl, bl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [bl + (tl - bl) / 3, bl + (tl - bl) / 3], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [bl + (tl - bl) * 2 / 3, bl + (tl - bl) * 2 / 3], color='#f9f9ff', linestyle='-', lw=1) plt.plot([ll, rl], [tl, tl], color='#f9f9ff', linestyle='-', lw=1) plt.plot([oll, oll], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5) plt.plot([orl, orl], [obl, otl], color='#d0cfd3', linestyle='-', lw=0.5) plt.plot([oll, orl], [obl, obl], color='#d0cfd3', linestyle='-', lw=0.5) plt.plot([oll, orl], [otl, otl], color='#d0cfd3', linestyle='-', lw=0.5) plt.axis([lb, rb, bb, tb]) plt.rcParams['axes.unicode_minus'] = False ax.set_yticklabels([]) ax.set_xticklabels([]) plt.axis('off') ax.text('0', '3.8', '인플레이', color='white', fontsize=14, horizontalalignment='center', weight='bold') ax.autoscale_view('tight') plt.show()
print(num_1[::-1]) print(num_1.sum()) print(num_1.mean()) print(np.median(num_1)) num_2 = num_1 + 5 result = np.corrcoef([num_1, num_2]) print(result) num_1= num_1.reshape(6, 5) print(num_1) import matplotlib import matplotlib.font_manager as fm fm.get_fontconfig_fonts() import matplotlib.pyplot as plt x = [x for x in range(7,13)] y = [456,492,578,599,670,854] plt.plot(x,y,marker = 'o',color = 'orange') plt.xlabel('month') plt.ylabel('user') plt.title('신규사용자') plt.show()
#!/usr/bin/env python3 # # Filename: get_fontNames.py # Author: Zhiguo Wang # Date: 2/7/2020 # # Description: # Retrieve the names of all available system fonts # Run this script from the command line from matplotlib import font_manager f_list = font_manager.get_fontconfig_fonts() f_names = [] for font in f_list: try: f = font_manager.FontProperties(fname=font).get_name() f_names.append(f) except: pass print(f_names)