Пример #1
0
def make_model(src_vocab,
               tgt_vocab,
               N=6,
               d_model=512,
               d_ff=2048,
               h=8,
               dropout=0.1):
    "Helper: Construct a model from hyperparameters."
    c = copy.deepcopy
    attn = MultiHeadedAttention(h, d_model)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)
    model = EncoderDecoder(
        Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
        Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N),
        nn.Sequential(Embeddings(d_model, src_vocab), c(position)),
        nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)),
        Generator(d_model, tgt_vocab))

    # This was important from their code.
    # Initialize parameters with Glorot / fan_avg.
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform(p)
    return model
Пример #2
0
 def load_model_from_package(cls, package):
     encoder = Encoder(package['d_input'],
                       package['n_layers_enc'],
                       package['n_head'],
                       package['d_k'],
                       package['d_v'],
                       package['d_model'],
                       package['d_inner'],
                       dropout=package['dropout'],
                       pe_maxlen=package['pe_maxlen'])
     decoder = Decoder(
         package['sos_id'],
         package['eos_id'],
         package['vocab_size'],
         package['d_word_vec'],
         package['n_layers_dec'],
         package['n_head'],
         package['d_k'],
         package['d_v'],
         package['d_model'],
         package['d_inner'],
         dropout=package['dropout'],
         tgt_emb_prj_weight_sharing=package['tgt_emb_prj_weight_sharing'],
         pe_maxlen=package['pe_maxlen'],
     )
     model = cls(encoder, decoder)
     model.load_state_dict(package['state_dict'])
     LFR_m, LFR_n = package['LFR_m'], package['LFR_n']
     return model, LFR_m, LFR_n
Пример #3
0
def main():
    # Load a dictionary of Michael's quotes to their season and episode
    print("Attempting to load quotes from file")
    quotes = load_quotes()
    if quotes is None:
        print("Scraping the web for new quotes")
        quotes = scrape()

    print("Creating sentence encoder")
    encoder = Encoder()

    print("Attempting to load quote embeddings from file")
    quote_embeddings = load_quote_embeddings()
    if quote_embeddings is None:
        print("Generating new quote embeddings")
        quote_embeddings = generate_quote_embeddings(encoder, quotes)
        print("Saving new quote embeddings to {0}".format(embeddings_file))
        save_pickle(quote_embeddings, embeddings_file)

    print("Creating predictor")
    predictor = Predictor(encoder, quote_embeddings)

    while True:
        input_sentence = query_input()
        prediction = predictor.predict_output(input_sentence)
        output_quote = prediction[0]
        output_season = prediction[1]['season']
        output_episode = prediction[1]['episode']
        print("Michael says: \"{0}\" in season {1}, episode {2}".format(
            output_quote, output_season, output_episode))
Пример #4
0
    def second_pass(self, file_lines):
        memory_address = self.MEM_START_ADDR
        for line in file_lines:
            parser = Parser(instruction=line)
            encoder = Encoder(instruction_type=parser.instruction_type)

            if parser.instruction_type == InstructionType.c_instruction:
                hack_line = encoder.encode(dest=parser.dest,
                                           comp=parser.comp,
                                           jump=parser.jump)

            elif parser.instruction_type == InstructionType.a_instruction:
                try:
                    integer_address = int(parser.address)
                except ValueError:
                    if self.symbol_table.get(parser.address) is None:
                        self.symbol_table[parser.address] = memory_address
                        memory_address += 1

                    integer_address = self.symbol_table.get(parser.address)

                hack_line = encoder.encode(address=integer_address)

            else:
                continue

            self.hack_file.write(hack_line + '\r\n')
Пример #5
0
    def _build(self, Log, Scorer):
        if Log == None:
            Log = Logger()
        if Scorer == None:
            Scorer = Score()

        self.Log = Log
        self.Scorer = Scorer
        self.Encoder = Encoder()
        self.GPT = GPT2LanguageModel(model_name=self.model)
Пример #6
0
def run(train_df, test_df):
    encoder = None
    encoder = Encoder(train_df)
    lr = modelDict["GBM"](need_scale=False)
    encoder.transform(train_df)
    n_train_df = pd.get_dummies(train_df)
    lr.train(n_train_df)
    encoder.transform(test_df)
    n_test_df = pd.get_dummies(test_df)
    y = lr.test(n_test_df)
    save(test_df, y, encoder)
Пример #7
0
    def _build(self, mod, Log):
        ''' Builds application using variables provided by user '''
        if Log == None:
            Log = Log()

        if (self._seed < 1):
            random.seed(time.time())
            self._seed = random.random()

        self.Log = Log
        self.Scorer = Score(mod, self.Log)
        self.Encoder = Encoder(seed=self._seed, probability=self._probability)
        self.GPT = GPT2LanguageModel(model_name=self.model)
Пример #8
0
    def __init__(self, params, embedding_matrix):
        super(RVAE_dilated, self).__init__()

        self.params = params

        self.word_embeddings = nn.Embedding(params.word_vocab_size, params.word_embed_size)
        self.word_embeddings.weight = Parameter(t.from_numpy(embedding_matrix).float(),
                                                requires_grad=False)
        self.encoder = Encoder(self.params)

        self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size)
        self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size)

        self.decoder = Decoder(self.params)
Пример #9
0
 def __init__(self):
     self.leftFEncoder = Encoder()
     self.leftMEncoder = Encoder()
     self.leftREncoder = Encoder()
     self.rightFEncoder = Encoder()
     self.rightMEncoder = Encoder()
     self.rightREncoder = Encoder()
     self.pose = Pose()
     self.lastTime = 0
Пример #10
0
def run_ensemble(train_df):
    encoder = None
    encoder = Encoder(train_df)
    encoder.transform(train_df)
    estimators = []
    scores = []
    labels = []
    nums = list(range(1, 5, 1)) + list(range(5, 60, 5)) + list(
        range(60, 100, 10)) + list(range(100, 500, 50))
    for n in nums:
        lr = modelDict["GBM"](n_estimators=n)
        n_train_df = pd.get_dummies(train_df)
        train_score, val_score = lr.train(n_train_df)
        scores += [train_score, val_score]
        estimators += [n, n]
        labels += ['train', 'val']
    return scores, labels, estimators
Пример #11
0
def run_tests():
    e = Encoder()
    d = Decoder(coding_polynomial=e.coding_polynomial,
                k=e.k,
                t=e.r,
                gf_index=e.gf.index)

    message = "zaqwsxcderfvbgtyhnmjuik,ol.p;/zaqwsxedcrfvtgbyhnujmzaqwsxcderf"
    codeword = e.encode(message)
    decoded_message = d.decode(codeword, 'basic')

    for i in range(2, 28):
        codeword.elements[i] = codeword.elements[i].multiplicative_inversion()
    print('27 errors occurred...')
    print(codeword)
    decoded_message = d.decode(codeword, 'basic')
    print('Decoded message: ' + decoded_message[:len(message)])
Пример #12
0
def train_model(dpath, ppath, epoch, version):
    if dpath.endswith(".csv"):
        d = pd.read_csv(dpath)
    else:
        raise ValueError("data format is not supported")

    pipe = joblib.load(ppath)
    encoder = Encoder(pipe)
    x = encoder.encode(d.iloc[:, 1:-1])

    m = create_model(
        [
            x.shape[1],
        ]
    )
    m.fit(x, d.iloc[:, -1], batch_size=1000, epochs=epoch)
    m.save(f"model/{version}")
Пример #13
0
def test_integration(model_service, xy):
    p = Path(curdir / "saved_model")
    assert "http://127.0.0.1:8501" == model_service

    fname = curdir / ".tmp.joblib"
    train_sk_pipe(fname, xy[0])
    assert os.path.exists(fname)
    pipe = load(fname)

    encoder = Encoder(pipe)
    matrixs = encoder.encode(xy[0][:100]).tolist()

    res = requests.post(
        model_service + "/v1/models/tp_pred:predict",
        data=json.dumps({"instances": matrixs}),
    )

    assert len(res.json()["predictions"]) == 100
Пример #14
0
def run_example_program():
    e = Encoder()
    d = Decoder(coding_polynomial=e.coding_polynomial,
                k=e.k,
                t=e.r,
                gf_index=e.gf.index)

    message = "zaqwsxcderfvbgtyhnmjuik,ol.p;/zaqwsxedcrf"
    print('Message: ' + message)
    codeword = e.encode(message)
    print('Codeword: ' + str(codeword))
    decoded_message = d.decode(codeword, 'basic')
    print('Decoded message: ' + decoded_message[:len(message)])

    for i in range(1, 28):
        codeword.elements[i] = codeword.elements[i].multiplicative_inversion()
    print('27 errors occurred...')
    print(codeword)
    decoded_message = d.decode(codeword, 'basic')
    print('Decoded message: ' + decoded_message[:len(message)])
Пример #15
0
def test_encoder_fix_errors(ii, k, test_type, message):
    e = Encoder()
    d = Decoder(coding_polynomial=e.coding_polynomial, k=e.k, t=e.r, gf_index=e.gf.index)
    encoded_message = e.encode(message)

    if test_type == 'multiple':
        random_indexes = random.sample(range(0, len(encoded_message)), k)
    else:
        random_start = random.randint(0, len(encoded_message)-k-1)
        random_indexes = [i for i in range(random_start, random_start + k)]
    # print("{}): {}".format(k, random_indexes))
    for i in random_indexes:
        encoded_message.elements[i] = encoded_message.elements[i].multiplicative_inversion()
    try:
        start = time.time()
        decoded_message = d.decode(encoded_message, 'basic')
        stop = time.time()
        passed.write("{}, {}, {}, {}, {}\n".format(k, test_type, message, random_indexes, stop-start))
    except CannotDetectErrorException as c:
        failed.write("{}, {}, {}, {}\n".format(k, test_type, message, random_indexes))
        assert False
    assert message in decoded_message
Пример #16
0
    def test_minimal(self):
        num_words, num_tags, num_chars = 10, 10, 100
        encoder = Encoder(num_words, num_tags, num_chars=num_chars)

        assert encoder.num_tags == num_tags
        assert isinstance(encoder.word_embedding, nn.Embedding)
Пример #17
0
def encoder():
    e = Encoder()
    return e
Пример #18
0
def main(args):
    # load dictionary and generate char_list, sos_id, eos_id
    char_list, sos_id, eos_id = process_dict(args.dict)
    vocab_size = len(char_list)
    tr_dataset = AudioDataset('train', args.batch_size)
    cv_dataset = AudioDataset('dev', args.batch_size)

    tr_loader = AudioDataLoader(tr_dataset,
                                batch_size=1,
                                num_workers=args.num_workers,
                                shuffle=args.shuffle,
                                feature_dim=args.feature_dim,
                                char_list=char_list,
                                path_list=tr_dataset.path_lst,
                                label_list=tr_dataset.han_lst,
                                LFR_m=args.LFR_m,
                                LFR_n=args.LFR_n)
    cv_loader = AudioDataLoader(cv_dataset,
                                batch_size=1,
                                num_workers=args.num_workers,
                                feature_dim=args.feature_dim,
                                char_list=char_list,
                                path_list=cv_dataset.path_lst,
                                label_list=cv_dataset.han_lst,
                                LFR_m=args.LFR_m,
                                LFR_n=args.LFR_n)

    data = {'tr_loader': tr_loader, 'cv_loader': cv_loader}

    encoder = Encoder(args.d_input * args.LFR_m,
                      args.d_low_dim,
                      args.n_layers_enc,
                      args.n_head,
                      args.d_k,
                      args.d_v,
                      args.d_model,
                      args.d_inner,
                      dropout=args.dropout,
                      pe_maxlen=args.pe_maxlen)
    decoder = Decoder(
        sos_id,
        eos_id,
        vocab_size,
        args.d_word_vec,
        args.n_layers_dec,
        args.n_head,
        args.d_k,
        args.d_v,
        args.d_model,
        args.d_inner,
        dropout=args.dropout,
        tgt_emb_prj_weight_sharing=args.tgt_emb_prj_weight_sharing,
        pe_maxlen=args.pe_maxlen)
    model = Transformer(encoder, decoder)
    print(model)
    model.cuda()
    # optimizer
    optimizier = TransformerOptimizer(
        torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09),
        args.init_lr, args.d_model, args.warmup_steps)

    # solver
    solver = Solver(data, model, optimizier, args)
    solver.train()
Пример #19
0
 def __init__(self, hidden_encoder_size, z_dim, hidden_decoder_size, output_size, rnn_type, device):
     super(GrammarVAE, self).__init__()
     self.encoder = Encoder(hidden_encoder_size, z_dim)
     self.decoder = Decoder(z_dim, hidden_decoder_size, output_size, device, rnn_type)
     self.device = device
Пример #20
0
from termcolor import colored
from src.encoder import Encoder
from src.universal_function import universalFunction


def present():
    print(colored('For following instructions:', 'green'))
    print(open('instructions.txt', "r").read())
    print()
    print(colored('We have this LL(1) grammar:', 'green'))
    print(open('grammar.txt', "r").read())
    print()
    print(colored('Parse table of this grammar is:', 'green'))
    print(open('parse-table.txt', "r").read())


if __name__ == "__main__":
    present()
    # filePath = 'data/in/book-example.txt'
    filePath = 'data/in/test.txt'
    fileString = open(filePath, "r").read()
    print(colored('input file:', 'green'))
    encoder = Encoder(fileString)
    instructions = encoder.encodeLines()
    programCode = encoder.calcuateProgramCode()
    inputValues = encoder.getInputVaules()
    input_for_universal_program = inputValues + [programCode]
    universalFunction(input_for_universal_program, instructions)
Пример #21
0
    :return:
    """
    parser = ArgumentParser()
    parser.add_argument("path", help="path to .csv data file")
    return parser.parse_args()

if __name__ == '__main__':
    args = args()
    DATA_PATH = args.path

    seed = 42
    np.random.seed(seed)

    harvester = DataHarvester(DATA_PATH)
    harvester.read_file()
    harvester.cut_lines()

    encoder = Encoder(harvester.read_data)
    encoder.encode_data()
    encoder.encode_label()

    X = encoder.encoded
    Y = encoder.encoded_label

    model_builder = ModelBuilder(encoder.num_of_label_classes, encoder.num_of_data_classes)
    estimator = KerasClassifier(build_fn=model_builder, epochs=20, batch_size=5, verbose=5)
    kfold = KFold(n_splits=30, shuffle=True, random_state=seed)

    results = cross_val_score(estimator, X, Y, cv=kfold)
    print("Baseline: %.2f%% (%.2f%%)" % (results.mean() * 100, results.std() * 100))
Пример #22
0
word_map['<unk>'] = len(word_map) + 1
word_map['<start>'] = len(word_map) + 1
word_map['<end>'] = len(word_map) + 1
word_map['<pad>'] = 0


checkpoint = '../input/image-copy-2/checkpoint_copy.pt'

decoder = DecoderWithAttention(embed_dim=emb_dim,
                                   decoder_dim=decoder_dim,
                                   vocab_size=len(word_map),
                                   dropout=dropout)
decoder_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, decoder.parameters()),
                                     lr=decoder_lr)

encoder = Encoder()


# Move to GPU, if available
decoder = decoder.to(device)
encoder = encoder.to(device)

decoder.eval()
encoder.eval()
from scipy.misc import imread, imresize


if checkpoint is not None:
  checkpoint = torch.load(checkpoint)
  decoder.load_state_dict(checkpoint['decoder_state_dict'])
  decoder_optimizer.load_state_dict(checkpoint['decoder_optimizer_dict'])