def make_model(src_vocab, tgt_vocab, N=6, d_model=512, d_ff=2048, h=8, dropout=0.1): "Helper: Construct a model from hyperparameters." c = copy.deepcopy attn = MultiHeadedAttention(h, d_model) ff = PositionwiseFeedForward(d_model, d_ff, dropout) position = PositionalEncoding(d_model, dropout) model = EncoderDecoder( Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N), Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N), nn.Sequential(Embeddings(d_model, src_vocab), c(position)), nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)), Generator(d_model, tgt_vocab)) # This was important from their code. # Initialize parameters with Glorot / fan_avg. for p in model.parameters(): if p.dim() > 1: nn.init.xavier_uniform(p) return model
def load_model_from_package(cls, package): encoder = Encoder(package['d_input'], package['n_layers_enc'], package['n_head'], package['d_k'], package['d_v'], package['d_model'], package['d_inner'], dropout=package['dropout'], pe_maxlen=package['pe_maxlen']) decoder = Decoder( package['sos_id'], package['eos_id'], package['vocab_size'], package['d_word_vec'], package['n_layers_dec'], package['n_head'], package['d_k'], package['d_v'], package['d_model'], package['d_inner'], dropout=package['dropout'], tgt_emb_prj_weight_sharing=package['tgt_emb_prj_weight_sharing'], pe_maxlen=package['pe_maxlen'], ) model = cls(encoder, decoder) model.load_state_dict(package['state_dict']) LFR_m, LFR_n = package['LFR_m'], package['LFR_n'] return model, LFR_m, LFR_n
def main(): # Load a dictionary of Michael's quotes to their season and episode print("Attempting to load quotes from file") quotes = load_quotes() if quotes is None: print("Scraping the web for new quotes") quotes = scrape() print("Creating sentence encoder") encoder = Encoder() print("Attempting to load quote embeddings from file") quote_embeddings = load_quote_embeddings() if quote_embeddings is None: print("Generating new quote embeddings") quote_embeddings = generate_quote_embeddings(encoder, quotes) print("Saving new quote embeddings to {0}".format(embeddings_file)) save_pickle(quote_embeddings, embeddings_file) print("Creating predictor") predictor = Predictor(encoder, quote_embeddings) while True: input_sentence = query_input() prediction = predictor.predict_output(input_sentence) output_quote = prediction[0] output_season = prediction[1]['season'] output_episode = prediction[1]['episode'] print("Michael says: \"{0}\" in season {1}, episode {2}".format( output_quote, output_season, output_episode))
def second_pass(self, file_lines): memory_address = self.MEM_START_ADDR for line in file_lines: parser = Parser(instruction=line) encoder = Encoder(instruction_type=parser.instruction_type) if parser.instruction_type == InstructionType.c_instruction: hack_line = encoder.encode(dest=parser.dest, comp=parser.comp, jump=parser.jump) elif parser.instruction_type == InstructionType.a_instruction: try: integer_address = int(parser.address) except ValueError: if self.symbol_table.get(parser.address) is None: self.symbol_table[parser.address] = memory_address memory_address += 1 integer_address = self.symbol_table.get(parser.address) hack_line = encoder.encode(address=integer_address) else: continue self.hack_file.write(hack_line + '\r\n')
def _build(self, Log, Scorer): if Log == None: Log = Logger() if Scorer == None: Scorer = Score() self.Log = Log self.Scorer = Scorer self.Encoder = Encoder() self.GPT = GPT2LanguageModel(model_name=self.model)
def run(train_df, test_df): encoder = None encoder = Encoder(train_df) lr = modelDict["GBM"](need_scale=False) encoder.transform(train_df) n_train_df = pd.get_dummies(train_df) lr.train(n_train_df) encoder.transform(test_df) n_test_df = pd.get_dummies(test_df) y = lr.test(n_test_df) save(test_df, y, encoder)
def _build(self, mod, Log): ''' Builds application using variables provided by user ''' if Log == None: Log = Log() if (self._seed < 1): random.seed(time.time()) self._seed = random.random() self.Log = Log self.Scorer = Score(mod, self.Log) self.Encoder = Encoder(seed=self._seed, probability=self._probability) self.GPT = GPT2LanguageModel(model_name=self.model)
def __init__(self, params, embedding_matrix): super(RVAE_dilated, self).__init__() self.params = params self.word_embeddings = nn.Embedding(params.word_vocab_size, params.word_embed_size) self.word_embeddings.weight = Parameter(t.from_numpy(embedding_matrix).float(), requires_grad=False) self.encoder = Encoder(self.params) self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) self.decoder = Decoder(self.params)
def __init__(self): self.leftFEncoder = Encoder() self.leftMEncoder = Encoder() self.leftREncoder = Encoder() self.rightFEncoder = Encoder() self.rightMEncoder = Encoder() self.rightREncoder = Encoder() self.pose = Pose() self.lastTime = 0
def run_ensemble(train_df): encoder = None encoder = Encoder(train_df) encoder.transform(train_df) estimators = [] scores = [] labels = [] nums = list(range(1, 5, 1)) + list(range(5, 60, 5)) + list( range(60, 100, 10)) + list(range(100, 500, 50)) for n in nums: lr = modelDict["GBM"](n_estimators=n) n_train_df = pd.get_dummies(train_df) train_score, val_score = lr.train(n_train_df) scores += [train_score, val_score] estimators += [n, n] labels += ['train', 'val'] return scores, labels, estimators
def run_tests(): e = Encoder() d = Decoder(coding_polynomial=e.coding_polynomial, k=e.k, t=e.r, gf_index=e.gf.index) message = "zaqwsxcderfvbgtyhnmjuik,ol.p;/zaqwsxedcrfvtgbyhnujmzaqwsxcderf" codeword = e.encode(message) decoded_message = d.decode(codeword, 'basic') for i in range(2, 28): codeword.elements[i] = codeword.elements[i].multiplicative_inversion() print('27 errors occurred...') print(codeword) decoded_message = d.decode(codeword, 'basic') print('Decoded message: ' + decoded_message[:len(message)])
def train_model(dpath, ppath, epoch, version): if dpath.endswith(".csv"): d = pd.read_csv(dpath) else: raise ValueError("data format is not supported") pipe = joblib.load(ppath) encoder = Encoder(pipe) x = encoder.encode(d.iloc[:, 1:-1]) m = create_model( [ x.shape[1], ] ) m.fit(x, d.iloc[:, -1], batch_size=1000, epochs=epoch) m.save(f"model/{version}")
def test_integration(model_service, xy): p = Path(curdir / "saved_model") assert "http://127.0.0.1:8501" == model_service fname = curdir / ".tmp.joblib" train_sk_pipe(fname, xy[0]) assert os.path.exists(fname) pipe = load(fname) encoder = Encoder(pipe) matrixs = encoder.encode(xy[0][:100]).tolist() res = requests.post( model_service + "/v1/models/tp_pred:predict", data=json.dumps({"instances": matrixs}), ) assert len(res.json()["predictions"]) == 100
def run_example_program(): e = Encoder() d = Decoder(coding_polynomial=e.coding_polynomial, k=e.k, t=e.r, gf_index=e.gf.index) message = "zaqwsxcderfvbgtyhnmjuik,ol.p;/zaqwsxedcrf" print('Message: ' + message) codeword = e.encode(message) print('Codeword: ' + str(codeword)) decoded_message = d.decode(codeword, 'basic') print('Decoded message: ' + decoded_message[:len(message)]) for i in range(1, 28): codeword.elements[i] = codeword.elements[i].multiplicative_inversion() print('27 errors occurred...') print(codeword) decoded_message = d.decode(codeword, 'basic') print('Decoded message: ' + decoded_message[:len(message)])
def test_encoder_fix_errors(ii, k, test_type, message): e = Encoder() d = Decoder(coding_polynomial=e.coding_polynomial, k=e.k, t=e.r, gf_index=e.gf.index) encoded_message = e.encode(message) if test_type == 'multiple': random_indexes = random.sample(range(0, len(encoded_message)), k) else: random_start = random.randint(0, len(encoded_message)-k-1) random_indexes = [i for i in range(random_start, random_start + k)] # print("{}): {}".format(k, random_indexes)) for i in random_indexes: encoded_message.elements[i] = encoded_message.elements[i].multiplicative_inversion() try: start = time.time() decoded_message = d.decode(encoded_message, 'basic') stop = time.time() passed.write("{}, {}, {}, {}, {}\n".format(k, test_type, message, random_indexes, stop-start)) except CannotDetectErrorException as c: failed.write("{}, {}, {}, {}\n".format(k, test_type, message, random_indexes)) assert False assert message in decoded_message
def test_minimal(self): num_words, num_tags, num_chars = 10, 10, 100 encoder = Encoder(num_words, num_tags, num_chars=num_chars) assert encoder.num_tags == num_tags assert isinstance(encoder.word_embedding, nn.Embedding)
def encoder(): e = Encoder() return e
def main(args): # load dictionary and generate char_list, sos_id, eos_id char_list, sos_id, eos_id = process_dict(args.dict) vocab_size = len(char_list) tr_dataset = AudioDataset('train', args.batch_size) cv_dataset = AudioDataset('dev', args.batch_size) tr_loader = AudioDataLoader(tr_dataset, batch_size=1, num_workers=args.num_workers, shuffle=args.shuffle, feature_dim=args.feature_dim, char_list=char_list, path_list=tr_dataset.path_lst, label_list=tr_dataset.han_lst, LFR_m=args.LFR_m, LFR_n=args.LFR_n) cv_loader = AudioDataLoader(cv_dataset, batch_size=1, num_workers=args.num_workers, feature_dim=args.feature_dim, char_list=char_list, path_list=cv_dataset.path_lst, label_list=cv_dataset.han_lst, LFR_m=args.LFR_m, LFR_n=args.LFR_n) data = {'tr_loader': tr_loader, 'cv_loader': cv_loader} encoder = Encoder(args.d_input * args.LFR_m, args.d_low_dim, args.n_layers_enc, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, pe_maxlen=args.pe_maxlen) decoder = Decoder( sos_id, eos_id, vocab_size, args.d_word_vec, args.n_layers_dec, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, tgt_emb_prj_weight_sharing=args.tgt_emb_prj_weight_sharing, pe_maxlen=args.pe_maxlen) model = Transformer(encoder, decoder) print(model) model.cuda() # optimizer optimizier = TransformerOptimizer( torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09), args.init_lr, args.d_model, args.warmup_steps) # solver solver = Solver(data, model, optimizier, args) solver.train()
def __init__(self, hidden_encoder_size, z_dim, hidden_decoder_size, output_size, rnn_type, device): super(GrammarVAE, self).__init__() self.encoder = Encoder(hidden_encoder_size, z_dim) self.decoder = Decoder(z_dim, hidden_decoder_size, output_size, device, rnn_type) self.device = device
from termcolor import colored from src.encoder import Encoder from src.universal_function import universalFunction def present(): print(colored('For following instructions:', 'green')) print(open('instructions.txt', "r").read()) print() print(colored('We have this LL(1) grammar:', 'green')) print(open('grammar.txt', "r").read()) print() print(colored('Parse table of this grammar is:', 'green')) print(open('parse-table.txt', "r").read()) if __name__ == "__main__": present() # filePath = 'data/in/book-example.txt' filePath = 'data/in/test.txt' fileString = open(filePath, "r").read() print(colored('input file:', 'green')) encoder = Encoder(fileString) instructions = encoder.encodeLines() programCode = encoder.calcuateProgramCode() inputValues = encoder.getInputVaules() input_for_universal_program = inputValues + [programCode] universalFunction(input_for_universal_program, instructions)
:return: """ parser = ArgumentParser() parser.add_argument("path", help="path to .csv data file") return parser.parse_args() if __name__ == '__main__': args = args() DATA_PATH = args.path seed = 42 np.random.seed(seed) harvester = DataHarvester(DATA_PATH) harvester.read_file() harvester.cut_lines() encoder = Encoder(harvester.read_data) encoder.encode_data() encoder.encode_label() X = encoder.encoded Y = encoder.encoded_label model_builder = ModelBuilder(encoder.num_of_label_classes, encoder.num_of_data_classes) estimator = KerasClassifier(build_fn=model_builder, epochs=20, batch_size=5, verbose=5) kfold = KFold(n_splits=30, shuffle=True, random_state=seed) results = cross_val_score(estimator, X, Y, cv=kfold) print("Baseline: %.2f%% (%.2f%%)" % (results.mean() * 100, results.std() * 100))
word_map['<unk>'] = len(word_map) + 1 word_map['<start>'] = len(word_map) + 1 word_map['<end>'] = len(word_map) + 1 word_map['<pad>'] = 0 checkpoint = '../input/image-copy-2/checkpoint_copy.pt' decoder = DecoderWithAttention(embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout) decoder_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) encoder = Encoder() # Move to GPU, if available decoder = decoder.to(device) encoder = encoder.to(device) decoder.eval() encoder.eval() from scipy.misc import imread, imresize if checkpoint is not None: checkpoint = torch.load(checkpoint) decoder.load_state_dict(checkpoint['decoder_state_dict']) decoder_optimizer.load_state_dict(checkpoint['decoder_optimizer_dict'])