Exemplo n.º 1
0
def train(hidden_size, batch_size):
    batcher = Batcher()
    print('Data:')
    print(batcher.inputs.shape)
    print(batcher.targets.shape)

    model = get_model(hidden_size, batcher.chars_len())

    model.compile(loss={
        'op': 'categorical_crossentropy',
        'char': 'categorical_crossentropy'
    },
                  optimizer='adam',
                  metrics=['accuracy'])

    model.summary()

    for grad_step in range(int(1e9)):
        ppp = gen_large_chunk_single_thread(batcher,
                                            batcher.inputs,
                                            batcher.targets,
                                            chunk_size=batch_size)
        x_train, y_train_1, y_train_2, x_val, y_val_1, y_val_2, val_sub_inputs, val_sub_targets = ppp
        model.train_on_batch(x=x_train, y=[y_train_1, y_train_2])
        print(
            dict(
                zip(model.metrics_names,
                    model.test_on_batch(x=x_val, y=[y_val_1, y_val_2]))))
        # guess = c_table.decode(preds[0], calc_argmax=False)
        # top_passwords = predict_top_most_likely_passwords_monte_carlo(model, row_x, 100)
        # p = model.predict(row_x, batch_size=32, verbose=0)[0]
        # p.shape (12, 82)
        # [np.random.choice(a=range(82), size=1, p=p[i, :]) for i in range(12)]
        # s = [np.random.choice(a=range(82), size=1, p=p[i, :])[0] for i in range(12)]
        # c_table.decode(s, calc_argmax=False)
        # Could sample 1000 and take the most_common()
        if grad_step % 100 == 0:
            row_x, password_target, password_input = x_val, val_sub_targets, val_sub_inputs
            ops, char = model.predict(row_x, verbose=0)
            predicted_chars = list(batcher.decode(char))
            ops = ops.argmax(axis=1)
            decoded_op = []
            for op in ops:
                if op == 0:
                    decoded_op.append('insert')
                elif op == 1:
                    decoded_op.append('replace')
                else:
                    decoded_op.append('delete')
            for i, (x, y, pc, po) in enumerate(
                    zip(password_input, password_target, predicted_chars,
                        decoded_op)):
                print('x            :', x)
                print('y            :', y)
                print('predict char :', pc)
                print('predict op   :', po)
                print('---------------------')
                if i >= 100:
                    break
Exemplo n.º 2
0
def predict_top_most_likely_passwords(sed: Batcher, model, row_x, n):
    p = model.predict(row_x, batch_size=32, verbose=0)[0]
    most_likely_passwords = []
    for i in range(n):
        # of course should take the edit distance constraint.
        pa = np.array([
            np.random.choice(a=range(sed.ENCODING_MAX_SIZE_VOCAB + 2),
                             size=1,
                             p=p[j, :])
            for j in range(sed.ENCODING_MAX_PASSWORD_LENGTH)
        ]).flatten()
        most_likely_passwords.append(sed.decode(pa, calc_argmax=False))
    return most_likely_passwords