示例#1
0
def hash_class_image(graph_file, save=True):
    data_file = graph_file.split('-')[0]

    def get_lhs(graph_file, hl):
        doc_labels = util.get_doc_labels(data_file)
        doc_hashes = {k: v[hl] for k, v in get_doc_hashes(graph_file).items()}
        label_hashes = []
        for doc, label in doc_labels.items():
            h = doc_hashes[doc]
            label_hashes.append((label, h))
        return label_hashes

    assert 'lsh' in graph_file
    num_hashes = int(graph_file.split('-')[-1].split('b')[0][1:])
    num_bits = int(graph_file.split('-')[-1].split('b')[1])
    num_labels = util.get_num_labels(data_file)
    hls = string.ascii_lowercase[:num_hashes]
    for hl in hls:
        lh = get_lhs(graph_file, hl)
        grid = np.zeros((2**num_bits, num_labels + 1))
        for l, h in lh:
            grid[int(h, 2)][int(l)] += 1
        plt.imshow(grid, aspect='equal', interpolation='nearest')
        plt.xlabel('Labels')
        plt.ylabel('Hashes (as base10 integers)')
        plt.title('%s: label / hash count for hash %s' % (graph_file, hl))
        plt.colorbar()
        if save:
            util.save_plot(plt, graph_file + hl)
        else:
            plt.show()
        plt.clf()
示例#2
0
def hash_class_image(graph_file, save=True):
    data_file = graph_file.split('-')[0]
    def get_lhs(graph_file, hl):
        doc_labels = util.get_doc_labels(data_file)
        doc_hashes = {k: v[hl] for k, v in get_doc_hashes(graph_file).items()}
        label_hashes = []
        for doc, label in doc_labels.items():
            h = doc_hashes[doc]
            label_hashes.append((label, h))
        return label_hashes
    assert 'lsh' in graph_file
    num_hashes = int(graph_file.split('-')[-1].split('b')[0][1:])
    num_bits = int(graph_file.split('-')[-1].split('b')[1])
    num_labels = util.get_num_labels(data_file)
    hls = string.ascii_lowercase[:num_hashes]
    for hl in hls:
        lh = get_lhs(graph_file, hl)
        grid = np.zeros((2 ** num_bits, num_labels + 1))
        for l, h in lh:
            grid[int(h, 2)][int(l)] += 1
        plt.imshow(grid, aspect='equal', interpolation='nearest')
        plt.xlabel('Labels')
        plt.ylabel('Hashes (as base10 integers)')
        plt.title('%s: label / hash count for hash %s' % (graph_file, hl))
        plt.colorbar()
        if save:
            util.save_plot(plt, graph_file + hl)
        else:
            plt.show()
        plt.clf()
示例#3
0
def plot_yearly(df, y='count()', output='finished.html'):
    chart = alt.Chart(df[df.is_read & df.end]) \
        .mark_bar() \
        .encode(
            x='finished_year:O',
            y=y,
            color=alt.Color('is_fiction', scale=fiction_scale),
        )
    save_plot(chart, output)
示例#4
0
def reading_ease(df):
    df = df[df.fre.notna() & df.fkg.notna() & df.gfi.notna()]
    opacity = 0.2
    color = alt.Color('is_fiction', scale=fiction_scale)

    a = alt.Chart(df).mark_point(opacity=opacity) \
        .encode(x='fre', y='fkg', color=color)
    b = alt.Chart(df).mark_point(opacity=opacity) \
        .encode(x='fre', y='gfi', color=color)

    save_plot(a | b, 'reading_ease.html')
示例#5
0
def number_of_books_per_author(df, output='books_per_author.html'):
    df = df[df.is_read]
    x = df.author.value_counts()
    foo = pd.DataFrame(data={'author': x.index,
                             'count': x.values})
    foo.sort_values('count', ascending=False, inplace=True)

    chart = alt.Chart(foo) \
        .mark_bar() \
        .encode(y=alt.Y('author', sort=None), x='count')
    save_plot(chart, output)
示例#6
0
def plot_pubdate(df, output='pubdate.html'):
    df = df[df.pubdate.notna()]

    years = alt.Chart(df).mark_bar().encode(x='pubyear:O', y='count(year):N')
    years_nonfiction = alt.Chart(df[~df.is_fiction]) \
        .mark_bar(color='orange') \
        .encode(x='pubyear:O', y='count(year):N')
    months = alt.Chart(df).mark_bar().encode(x='pubmonth:O',
                                             y='count(pubmonth):N')
    days = alt.Chart(df).mark_bar().encode(x='pubday:O', y='count(pubday):N')
    years.width = 965
    save_plot((years + years_nonfiction) & (months | days), output)
示例#7
0
def save_graphics(C, ct, Cout, mcon, salto=3600):
    x_data = ct[1::3600]

    plt.figure('tmp')

    # Gráficas de tiempo
    for k, titulo in config.TITULOS_GRAFICAS_PUNTO.items():
        save_plot(plt, titulo, config.X_LABEL_GRAFICAS_TIEMPO,
                  config.Y_LABEL_GRAFICAS_ESPACIO[k],
                  [x_data, mcon[k][1::salto, -1]], directorio_salida)

    c_x = C['wd'][:, 0]
    # Gráficas de espacio
    for k, titulo in config.TITLULOS_GRAFICAS_ESPACIO.items():
        save_plot(plt, titulo, config.X_LABEL_GRAFICAS_ESPACIO,
                  config.Y_LABEL_GRAFICAS_ESPACIO[k],
                  [c_x, Cout['c_{}'.format(k)]], directorio_salida)
示例#8
0
def plot_ranges(df, output='ranges.html'):
    """Print date ranges in which the books have been is_read, how many
    books have been is_read at any given point in time and how many words
    have been is_read per day.
    """
    if cutoff_date is not None:
        # df = df[(df.start >= cutoff_date) & (df.end >= cutoff_date)]
        df = df[df.end.isna() | (df.end >= cutoff_date)]
    df.end.fillna(dummy_end_date)
    df = df[df.start.notna()].assign(ys=-allocate_ys(df[df.start.notna()]))

    bars = alt.Chart(df) \
        .mark_bar(clip=True) \
        .encode(
            x=alt.X('start', axis=alt.Axis(labelAngle=45, title='Date')),
            x2='end',
            y=alt.Y('ys:N', axis=None),
            color=alt.Color('is_fiction', scale=fiction_scale, legend=None),
            tooltip='title'
        )
    bars.width = 1600

    overlapped = alt.Chart(df[df.start.notna()]) \
        .mark_bar(clip=True, opacity=0.1) \
        .encode(
            x=alt.X('start', axis=None),
            x2='end',
            y=alt.Y('is_fiction', axis=None),
            color=alt.Color('is_fiction', scale=fiction_scale, legend=None)
        )
    overlapped.width = bars.width

    baz = df[df.series.notna()]
    if cutoff_date is not None:
        baz = baz[baz.start.notna() & (baz.end.isna() |
                                       (baz.end >= cutoff_date))]
    else:
        baz = baz[df.start.notna()]
    by_series = alt.Chart(baz) \
        .mark_bar(clip=True, opacity=0.7) \
        .encode(
            x=alt.X('start', axis=alt.Axis(labelAngle=45, title='Date')),
            x2='end',
            y=alt.Y('series', title='Series'),
            tooltip='title'
        )
    by_series.width = bars.width

    baz = df[df.author.notna()]
    if cutoff_date is not None:
        baz = baz[baz.start.notna() & (baz.end.isna() |
                                       (baz.end >= cutoff_date))]
    else:
        baz = baz[df.start.notna()]
    baz.ys = -allocate_ys(baz[baz.start.notna()])
    by_author = alt.Chart(baz) \
        .mark_bar(clip=True, opacity=0.7) \
        .encode(
            x=alt.X('start', axis=alt.Axis(labelAngle=45, title='Date')),
            x2='end',
            y=alt.Y('author', title='Author'),
            color='series',
            tooltip='title'
        )
    by_author.width = bars.width

    save_plot(overlapped & bars & by_series, output)
    save_plot(by_author, 'by_author.html')
from util import DataLoader

# load data
print('main | Initializing ... ')
digits, noise = DataLoader.load_data()
D = Discriminator()
G = Generator()

# train GAN
print('main | Training ... ')
epochs = 7000
dErrors = []
gErrors = []

for epoch in range(epochs):
    d_error1, d_error2, g_error = 0, 0, 0
    for digit in digits:
        d_error1 += D.fit(digit, isDigit=True)
        gOut = G.generate()
        d_error2 = D.fit(gOut)
        g_error += G.fit(gOut, D)

    if (epoch % 100) == 0:
        dErrors.append(((d_error1 + d_error2) / 2) / 14)
        gErrors.append(g_error / 14)

# show results
sprt = [i for i in range(epochs // 100)]  # for x-axis
util.save_png(G.generate(), "gen_image_ephocs_" + str(epochs))
util.save_plot(gErrors, dErrors, sprt, "error_plot_ephocs_" + str(epochs))
print('main | Completed.')