def hash_class_image(graph_file, save=True): data_file = graph_file.split('-')[0] def get_lhs(graph_file, hl): doc_labels = util.get_doc_labels(data_file) doc_hashes = {k: v[hl] for k, v in get_doc_hashes(graph_file).items()} label_hashes = [] for doc, label in doc_labels.items(): h = doc_hashes[doc] label_hashes.append((label, h)) return label_hashes assert 'lsh' in graph_file num_hashes = int(graph_file.split('-')[-1].split('b')[0][1:]) num_bits = int(graph_file.split('-')[-1].split('b')[1]) num_labels = util.get_num_labels(data_file) hls = string.ascii_lowercase[:num_hashes] for hl in hls: lh = get_lhs(graph_file, hl) grid = np.zeros((2**num_bits, num_labels + 1)) for l, h in lh: grid[int(h, 2)][int(l)] += 1 plt.imshow(grid, aspect='equal', interpolation='nearest') plt.xlabel('Labels') plt.ylabel('Hashes (as base10 integers)') plt.title('%s: label / hash count for hash %s' % (graph_file, hl)) plt.colorbar() if save: util.save_plot(plt, graph_file + hl) else: plt.show() plt.clf()
def hash_class_image(graph_file, save=True): data_file = graph_file.split('-')[0] def get_lhs(graph_file, hl): doc_labels = util.get_doc_labels(data_file) doc_hashes = {k: v[hl] for k, v in get_doc_hashes(graph_file).items()} label_hashes = [] for doc, label in doc_labels.items(): h = doc_hashes[doc] label_hashes.append((label, h)) return label_hashes assert 'lsh' in graph_file num_hashes = int(graph_file.split('-')[-1].split('b')[0][1:]) num_bits = int(graph_file.split('-')[-1].split('b')[1]) num_labels = util.get_num_labels(data_file) hls = string.ascii_lowercase[:num_hashes] for hl in hls: lh = get_lhs(graph_file, hl) grid = np.zeros((2 ** num_bits, num_labels + 1)) for l, h in lh: grid[int(h, 2)][int(l)] += 1 plt.imshow(grid, aspect='equal', interpolation='nearest') plt.xlabel('Labels') plt.ylabel('Hashes (as base10 integers)') plt.title('%s: label / hash count for hash %s' % (graph_file, hl)) plt.colorbar() if save: util.save_plot(plt, graph_file + hl) else: plt.show() plt.clf()
def plot_yearly(df, y='count()', output='finished.html'): chart = alt.Chart(df[df.is_read & df.end]) \ .mark_bar() \ .encode( x='finished_year:O', y=y, color=alt.Color('is_fiction', scale=fiction_scale), ) save_plot(chart, output)
def reading_ease(df): df = df[df.fre.notna() & df.fkg.notna() & df.gfi.notna()] opacity = 0.2 color = alt.Color('is_fiction', scale=fiction_scale) a = alt.Chart(df).mark_point(opacity=opacity) \ .encode(x='fre', y='fkg', color=color) b = alt.Chart(df).mark_point(opacity=opacity) \ .encode(x='fre', y='gfi', color=color) save_plot(a | b, 'reading_ease.html')
def number_of_books_per_author(df, output='books_per_author.html'): df = df[df.is_read] x = df.author.value_counts() foo = pd.DataFrame(data={'author': x.index, 'count': x.values}) foo.sort_values('count', ascending=False, inplace=True) chart = alt.Chart(foo) \ .mark_bar() \ .encode(y=alt.Y('author', sort=None), x='count') save_plot(chart, output)
def plot_pubdate(df, output='pubdate.html'): df = df[df.pubdate.notna()] years = alt.Chart(df).mark_bar().encode(x='pubyear:O', y='count(year):N') years_nonfiction = alt.Chart(df[~df.is_fiction]) \ .mark_bar(color='orange') \ .encode(x='pubyear:O', y='count(year):N') months = alt.Chart(df).mark_bar().encode(x='pubmonth:O', y='count(pubmonth):N') days = alt.Chart(df).mark_bar().encode(x='pubday:O', y='count(pubday):N') years.width = 965 save_plot((years + years_nonfiction) & (months | days), output)
def save_graphics(C, ct, Cout, mcon, salto=3600): x_data = ct[1::3600] plt.figure('tmp') # Gráficas de tiempo for k, titulo in config.TITULOS_GRAFICAS_PUNTO.items(): save_plot(plt, titulo, config.X_LABEL_GRAFICAS_TIEMPO, config.Y_LABEL_GRAFICAS_ESPACIO[k], [x_data, mcon[k][1::salto, -1]], directorio_salida) c_x = C['wd'][:, 0] # Gráficas de espacio for k, titulo in config.TITLULOS_GRAFICAS_ESPACIO.items(): save_plot(plt, titulo, config.X_LABEL_GRAFICAS_ESPACIO, config.Y_LABEL_GRAFICAS_ESPACIO[k], [c_x, Cout['c_{}'.format(k)]], directorio_salida)
def plot_ranges(df, output='ranges.html'): """Print date ranges in which the books have been is_read, how many books have been is_read at any given point in time and how many words have been is_read per day. """ if cutoff_date is not None: # df = df[(df.start >= cutoff_date) & (df.end >= cutoff_date)] df = df[df.end.isna() | (df.end >= cutoff_date)] df.end.fillna(dummy_end_date) df = df[df.start.notna()].assign(ys=-allocate_ys(df[df.start.notna()])) bars = alt.Chart(df) \ .mark_bar(clip=True) \ .encode( x=alt.X('start', axis=alt.Axis(labelAngle=45, title='Date')), x2='end', y=alt.Y('ys:N', axis=None), color=alt.Color('is_fiction', scale=fiction_scale, legend=None), tooltip='title' ) bars.width = 1600 overlapped = alt.Chart(df[df.start.notna()]) \ .mark_bar(clip=True, opacity=0.1) \ .encode( x=alt.X('start', axis=None), x2='end', y=alt.Y('is_fiction', axis=None), color=alt.Color('is_fiction', scale=fiction_scale, legend=None) ) overlapped.width = bars.width baz = df[df.series.notna()] if cutoff_date is not None: baz = baz[baz.start.notna() & (baz.end.isna() | (baz.end >= cutoff_date))] else: baz = baz[df.start.notna()] by_series = alt.Chart(baz) \ .mark_bar(clip=True, opacity=0.7) \ .encode( x=alt.X('start', axis=alt.Axis(labelAngle=45, title='Date')), x2='end', y=alt.Y('series', title='Series'), tooltip='title' ) by_series.width = bars.width baz = df[df.author.notna()] if cutoff_date is not None: baz = baz[baz.start.notna() & (baz.end.isna() | (baz.end >= cutoff_date))] else: baz = baz[df.start.notna()] baz.ys = -allocate_ys(baz[baz.start.notna()]) by_author = alt.Chart(baz) \ .mark_bar(clip=True, opacity=0.7) \ .encode( x=alt.X('start', axis=alt.Axis(labelAngle=45, title='Date')), x2='end', y=alt.Y('author', title='Author'), color='series', tooltip='title' ) by_author.width = bars.width save_plot(overlapped & bars & by_series, output) save_plot(by_author, 'by_author.html')
from util import DataLoader # load data print('main | Initializing ... ') digits, noise = DataLoader.load_data() D = Discriminator() G = Generator() # train GAN print('main | Training ... ') epochs = 7000 dErrors = [] gErrors = [] for epoch in range(epochs): d_error1, d_error2, g_error = 0, 0, 0 for digit in digits: d_error1 += D.fit(digit, isDigit=True) gOut = G.generate() d_error2 = D.fit(gOut) g_error += G.fit(gOut, D) if (epoch % 100) == 0: dErrors.append(((d_error1 + d_error2) / 2) / 14) gErrors.append(g_error / 14) # show results sprt = [i for i in range(epochs // 100)] # for x-axis util.save_png(G.generate(), "gen_image_ephocs_" + str(epochs)) util.save_plot(gErrors, dErrors, sprt, "error_plot_ephocs_" + str(epochs)) print('main | Completed.')