def __init__(self, *args, **kw): # ensure the parent"s __init__ is called super(Window, self).__init__(*args, **kw) self.conf = Configs() self.api = YandexAPI() self.builders = builders.Builders() self.panel = main_panel.MainPanel(self) self.login = login.Login(self) self.SetTitle(self.conf.get_attr("APP_TITLE")) self.main_pnl = self.panel.make_main_panel() self.input = wx.TextCtrl() self.gauge = self.panel.playback_slider self.playlists_list = None self.account_menu = None self.playlist_selection = None self.playlists = None self.player = Player(parent=self.panel, slider=self.gauge) self.Bind(events.FIRST_TRACK_APPEAR, self.on_first_track) self.Bind(events.PLAYLIST_READY, self.on_playlist_ready) self.gauge.Bind(wx.EVT_SLIDER, self.player.on_seek) self.make_menu() self.Center() self.Show() if not self.api.is_logged_in() and wx.FindWindowByName( "login_popup") is None: self.login.create_login_popup()
def init_config(self): # self.config = Configs(model='default', dataset='default', epochs=10, batch_size=128) # self.config = Configs(model='resnet', dataset='cifar10', epochs=10, batch_size=128) # self.config = Configs(model='resnet152', dataset='mnist', epochs=10, batch_size=128) self.config = Configs(model='default', dataset='stock', epochs=10000, batch_size=128)
def __init__(self): self.conf = Configs() if 'RESOURCEPATH' in os.environ: self.asset_dir = '{}/assets'.format(os.environ['RESOURCEPATH']) else: self.dir_name = os.path.dirname( os.path.dirname(os.path.abspath(__file__))) self.asset_dir = os.path.join(self.dir_name, 'assets') pass
def __init__(self, parent): self.builders = Builders() self.conf = Configs() self.parent = parent self.api = YandexAPI() self.validation_error = wx.StaticText() self.sizer = wx.BoxSizer() self.dialog = wx.Dialog() self.main_pnl = self.parent.panel pass
def __init__(self): self.conf = Configs() self.client = self.login() self.win = None self.list_type = None self.playlists_list = None self.updating_thread = None if 'RESOURCEPATH' in os.environ: self.cache = '{}/cache'.format(os.environ['RESOURCEPATH']) else: self.dirName = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) self.cache = os.path.join(self.dirName, 'cache') pass
class Builders(object): def __init__(self): self.conf = Configs() if 'RESOURCEPATH' in os.environ: self.asset_dir = '{}/assets'.format(os.environ['RESOURCEPATH']) else: self.dir_name = os.path.dirname( os.path.dirname(os.path.abspath(__file__))) self.asset_dir = os.path.join(self.dir_name, 'assets') pass def button_builder(self, parent, label, name): button = wx.Button(parent, label=label, name=name) button.SetForegroundColour(self.conf.get_attr("TEXT_COLOR")) button.SetBackgroundColour(self.conf.get_attr("BACKGROUND_COLOR")) return button def input_builder(self, parent, name, size=(250, 20), value=""): input_field = wx.TextCtrl(parent, size=size, name=name, value=value) input_field.SetBackgroundColour(self.conf.get_attr("BACKGROUND_COLOR")) input_field.SetForegroundColour(self.conf.get_attr("TEXT_COLOR")) return input_field def static_text_builder(self, parent, label): text = wx.StaticText(parent, label=label) text.SetForegroundColour(self.conf.get_attr("TEXT_COLOR")) return text def build_bitmap_button(self, btn_dict): handler = btn_dict['handler'] name = btn_dict['name'] parent = btn_dict['parent'] size_h = btn_dict['size_h'] size_w = btn_dict['size_w'] img = wx.Image(os.path.join(self.asset_dir, btn_dict['bitmap']), wx.BITMAP_TYPE_PNG) img = img.Scale(size_w, size_h, wx.IMAGE_QUALITY_HIGH) img = wx.Bitmap(img) btn = buttons.GenBitmapButton(parent=parent, bitmap=img, name=name) btn.Bind(wx.EVT_BUTTON, handler) return btn def build_playlist_cover(self, playlist_dict): cover_name = playlist_dict['cover'] cover_parent = playlist_dict['parent'] cover = Animation('{}/{}.gif'.format(self.asset_dir, cover_name)) cover_ctrl = AnimationCtrl(cover_parent, -1, cover, name=cover_name) return cover_ctrl
def __init__(self, parent): self.conf = Configs() self.parent = parent self.api = YandexAPI() self.builders = Builders() if 'RESOURCEPATH' in os.environ: self.asset_dir = '{}/assets'.format(os.environ['RESOURCEPATH']) else: self.dir_name = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) self.asset_dir = os.path.join(self.dir_name, 'assets') pass self.main_pnl = wx.Panel(parent) self.song_band = None self.song_name = None self.playback_slider = None self.play_pause_btn = None self.next_track = None self.prev_track = None
def main(configs: Configs = None, data_loader: DataLoader = None): """ main function for data processor from raw files SAP to tables in database to be consumed by forecast model usage example: $ python spike-challenge/src/make_dataset.py """ if configs is None: configs = Configs('default_config.yaml') if data_loader is None: data_loader = DataLoader() data_loader.load_data()
class MainPanel(object): def __init__(self, parent): self.conf = Configs() self.parent = parent self.api = YandexAPI() self.builders = Builders() if 'RESOURCEPATH' in os.environ: self.asset_dir = '{}/assets'.format(os.environ['RESOURCEPATH']) else: self.dir_name = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) self.asset_dir = os.path.join(self.dir_name, 'assets') pass self.main_pnl = wx.Panel(parent) self.song_band = None self.song_name = None self.playback_slider = None self.play_pause_btn = None self.next_track = None self.prev_track = None def toggle_gauge_slider(self, slider_type='slider'): if slider_type == 'gauge': control = wx.Gauge( self.main_pnl, range=20, size=(self.parent.GetSize()[0], 5), style=wx.GA_HORIZONTAL | wx.GA_SMOOTH ).Pulse() else: control = wx.Slider( self.main_pnl, size=(self.parent.GetSize()[0], 5), minValue=0, maxValue=20 ) control.Disable() return control def make_main_panel(self): self.main_pnl.SetBackgroundColour(self.conf.get_attr("BACKGROUND_COLOR")) self.playback_slider = self.toggle_gauge_slider() main_sizer = wx.BoxSizer(wx.VERTICAL) toolbar = self.build_audio_bar() main_sizer.Add( self.playback_slider, 0, wx.ALL | wx.EXPAND, 0 ) main_sizer.AddStretchSpacer(1) main_sizer.Add( self.playlist_list(), 0 ) main_sizer.AddStretchSpacer(1) main_sizer.Add( toolbar, 0 ) self.main_pnl.SetSizer(main_sizer) return self.main_pnl def build_audio_bar(self): """ Builds the audio bar controls """ audio_bar_sizer = wx.BoxSizer(wx.HORIZONTAL) self.prev_track = self.builders.build_bitmap_button({ 'bitmap': 'player_prev.png', 'handler': self.parent.on_prev, 'name': 'prev', 'parent': self.main_pnl, 'size_h': 30, 'size_w': 30 }) self.song_band = self.builders.static_text_builder(parent=self.main_pnl, label="") self.song_name = self.builders.static_text_builder(parent=self.main_pnl, label="") song_sizer = wx.BoxSizer(wx.VERTICAL) song_sizer.Add( self.song_band, 0 ) song_sizer.Add( self.song_name, 0 ) audio_bar_sizer.Add( self.prev_track, 0 ) img = wx.Image(os.path.join(self.asset_dir, "player_play.png"), wx.BITMAP_TYPE_ANY) img = img.Scale(30, 30, wx.IMAGE_QUALITY_HIGH) img = wx.Bitmap(img) self.play_pause_btn = buttons.GenBitmapToggleButton(self.main_pnl, bitmap=img, name="play") self.play_pause_btn.Enable(False) img = wx.Image(os.path.join(self.asset_dir, "player_pause.png"), wx.BITMAP_TYPE_ANY) img = img.Scale(30, 30, wx.IMAGE_QUALITY_HIGH) img = wx.Bitmap(img) self.play_pause_btn.SetBitmapSelected(img) self.play_pause_btn.SetInitialSize() self.play_pause_btn.Bind(wx.EVT_BUTTON, self.parent.on_play) audio_bar_sizer.Add( self.play_pause_btn, 0 ) self.next_track = self.builders.build_bitmap_button({ 'bitmap': 'player_next.png', 'handler': self.parent.on_next, 'name': 'next', 'parent': self.main_pnl, 'size_h': 30, 'size_w': 30 }) audio_bar_sizer.Add( self.next_track, 0 ) audio_bar_sizer.AddSpacer(5) audio_bar_sizer.Add( song_sizer, 0 ) self.next_track.Disable() self.prev_track.Disable() return audio_bar_sizer def enable_play_button(self): self.play_pause_btn.Enable(True) def playlist_list(self): playlists_sizer = wx.BoxSizer(wx.HORIZONTAL) playlists_sizer.AddSpacer(5) playlists = [ { "cover": "playlistOfTheDay", "parent": self.main_pnl }, { "cover": "neverHeard", "parent": self.main_pnl }, { "cover": "missedLikes", "parent": self.main_pnl }, { "cover": "recentTracks", "parent": self.main_pnl } ] for playlist in playlists: item = self.builders.build_playlist_cover(playlist) playlists_sizer.Add( item, 0 ) playlists_sizer.AddSpacer(5) item.Bind(wx.EVT_ENTER_WINDOW, self.on_hover) item.Bind(wx.EVT_LEAVE_WINDOW, self.on_unhover) item.Bind(wx.EVT_LEFT_DOWN, self.on_click) return playlists_sizer def on_click(self, event): playlist_type = event.EventObject.GetName() self.api.preparation(playlist_type, self.parent) def on_unhover(self, event): event.EventObject.Stop() def on_hover(self, event): event.EventObject.Play()
def main(args): # process config c = Configs(args.config) ROOT = os.environ['TENSOROFLOW'] model_path = '%s/examples/model/basic_nmt/model' % ROOT PAD = c.const['PAD'] EOS = c.const['EOS'] train_step = c.option['train_step'] max_time = c.option['max_time'] batch_size = c.option['batch_size'] vocabulary_size = c.option['vocabulary_size'] input_embedding_size = c.option['embedding_size'] hidden_units = c.option['hidden_units'] source_train_data_path = c.data['source_train_data'] target_train_data_path = c.data['target_train_data'] source_valid_data_path = c.data['source_valid_data'] target_valid_data_path = c.data['target_valid_data'] source_test_data_path = c.data['source_test_data'] target_test_data_path = c.data['target_test_data'] # read data source_dictionary, source_reverse_dictionary = build_dictionary( read_words(source_train_data_path), vocabulary_size) source_train_datas = [ sentence_to_onehot(lines, source_dictionary) for lines in read_data(source_train_data_path) ] target_dictionary, target_reverse_dictionary = build_dictionary( read_words(target_train_data_path), vocabulary_size) target_train_datas = [ sentence_to_onehot(lines, target_dictionary) for lines in read_data(target_train_data_path) ] source_valid_datas = [ sentence_to_onehot(lines, source_dictionary) for lines in read_data(source_valid_data_path) ] target_valid_datas = [ sentence_to_onehot(lines, target_dictionary) for lines in read_data(target_valid_data_path) ] source_test_datas = [ sentence_to_onehot(lines, source_dictionary) for lines in read_data(source_test_data_path) ] target_test_datas = [ sentence_to_onehot(lines, target_dictionary) for lines in read_data(target_test_data_path) ] # placeholder encoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='encoder_inputs') decoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_inputs') decoder_labels = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_labels') # embed embeddings = tf.Variable(tf.random_uniform( [vocabulary_size, input_embedding_size], -1.0, 1.0), dtype=tf.float32, name='embeddings') encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, encoder_inputs) decoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, decoder_inputs) # encoder encoder_units = hidden_units encoder_cell = tf.contrib.rnn.LSTMCell(encoder_units) _, encoder_final_state = tf.nn.dynamic_rnn(encoder_cell, encoder_inputs_embedded, dtype=tf.float32, time_major=True) # decoder decoder_units = encoder_units decoder_cell = tf.contrib.rnn.LSTMCell(decoder_units) decoder_output, decoder_final_state = tf.nn.dynamic_rnn( decoder_cell, decoder_inputs_embedded, initial_state=encoder_final_state, scope="plain_decoder", dtype=tf.float32, time_major=True) decoder_logits = tf.contrib.layers.linear(decoder_output, vocabulary_size) decoder_prediction = tf.argmax( decoder_logits, 2) # max_time: axis=0, batch: axis=1, vocab: axis=2 # optimizer stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits( labels=tf.one_hot(decoder_labels, depth=vocabulary_size, dtype=tf.float32), logits=decoder_logits, ) loss = tf.reduce_mean(stepwise_cross_entropy) train_op = tf.train.AdamOptimizer().minimize(loss) saver = tf.train.Saver() batch_idx = {'train': 0, 'valid': 0, 'test': 0} with tf.Session() as sess: if args.mode == 'train': # train loss_freq = train_step // 100 loss_log = [] loss_suffix = '' es = EarlyStopper(max_size=5, edge_threshold=0.1) m = Monitor(train_step) sess.run(tf.global_variables_initializer()) for i in range(train_step): m.monitor(i, loss_suffix) source_train_batch, _ = batchnize(source_train_datas, batch_size, batch_idx['train']) target_train_batch, batch_idx['train'] = batchnize( target_train_datas, batch_size, batch_idx['train']) batch_data = seq2seq(source_train_batch, target_train_batch, max_time, vocabulary_size) feed_dict = { encoder_inputs: batch_data['encoder_inputs'], decoder_inputs: batch_data['decoder_inputs'], decoder_labels: batch_data['decoder_labels'] } sess.run(fetches=[train_op, loss], feed_dict=feed_dict) if i % loss_freq == 0: source_valid_batch, _ = batchnize(source_valid_datas, batch_size, batch_idx['valid']) target_valid_batch, batch_idx['valid'] = batchnize( target_valid_datas, batch_size, batch_idx['valid']) batch_data = seq2seq(source_valid_batch, target_valid_batch, max_time, vocabulary_size) feed_dict = { encoder_inputs: batch_data['encoder_inputs'], decoder_inputs: batch_data['decoder_inputs'], decoder_labels: batch_data['decoder_labels'] } loss_val = sess.run(fetches=loss, feed_dict=feed_dict) loss_log.append(loss_val) loss_suffix = 'loss: %f' % loss_val es_status = es(loss_val) if i > train_step // 2 and es_status: print('early stopping at step: %d' % i) break saver.save(sess, model_path) print('save at %s' % model_path) plt.plot(np.arange(len(loss_log)) * loss_freq, loss_log) plt.savefig('%s_loss.png' % model_path) elif args.mode == 'eval': saver.restore(sess, model_path) print('load from %s' % model_path) else: raise # evaluate loss_val = [] input_vectors = None predict_vectors = None for i in range(len(source_test_datas) // batch_size + 1): source_test_batch, _ = batchnize(source_test_datas, batch_size, batch_idx['test']) target_test_batch, batch_idx['test'] = batchnize( target_test_datas, batch_size, batch_idx['test']) batch_data = seq2seq(source_test_batch, target_test_batch, max_time, vocabulary_size) feed_dict = { encoder_inputs: batch_data['encoder_inputs'], decoder_inputs: batch_data['decoder_inputs'], decoder_labels: batch_data['decoder_labels'] } pred = sess.run(fetches=decoder_prediction, feed_dict=feed_dict) if predict_vectors is None: predict_vectors = pred.T else: predict_vectors = np.vstack((predict_vectors, pred.T)) input_ = batch_data['encoder_inputs'] if input_vectors is None: input_vectors = input_.T else: input_vectors = np.vstack((input_vectors, input_.T)) loss_val.append(sess.run(fetches=loss, feed_dict=feed_dict)) input_sentences = '' predict_sentences = '' for i, (input_vector, predict_vector) in enumerate( zip(input_vectors[:len(source_test_datas)], predict_vectors[:len(target_test_datas)])): input_sentences += ' '.join([ source_reverse_dictionary[vector] for vector in input_vector if not vector == PAD ]) predict_sentences += ' '.join([ target_reverse_dictionary[vector] for vector in predict_vector if not vector == PAD ]) if i < len(source_test_datas) - 1: input_sentences += '\n' predict_sentences += '\n' evaluate_input_path = '%s.evaluate_input' % model_path evaluate_predict_path = '%s.evaluate_predict' % model_path with open(evaluate_input_path, 'w') as f1, \ open(evaluate_predict_path, 'w') as f2: f1.write(input_sentences) f2.write(predict_sentences) print('input sequences at {}'.format(evaluate_input_path)) print('predict sequences at {}'.format(evaluate_predict_path)) print('mean of loss: %f' % np.mean(loss_val)) print('finish.')
def main(): parser = argparse.ArgumentParser(description="Shared Model") parser.add_argument('--test_prompt_id', type=int, default=1, help='prompt id of test essay set') parser.add_argument('--seed', type=int, default=12, help='set random seed') args = parser.parse_args() test_prompt_id = args.test_prompt_id seed = args.seed np.random.seed(seed) tf.random.set_seed(seed) random.seed(seed) os.environ['PYTHONHASHSEED'] = str(seed) print("Test prompt id is {} of type {}".format(test_prompt_id, type(test_prompt_id))) print("Seed: {}".format(seed)) configs = Configs() data_path = configs.DATA_PATH train_path = data_path + '/train.tsv' dev_path = data_path + '/dev.tsv' pretrained_embedding = configs.PRETRAINED_EMBEDDING embedding_path = configs.EMBEDDING_PATH embedding_dim = configs.EMBEDDING_DIM vocab_size = configs.VOCAB_SIZE epochs = configs.EPOCHS batch_size = configs.BATCH_SIZE read_configs = { 'train_path': train_path, 'dev_path': dev_path, 'vocab_size': vocab_size } word_vocab = read_word_vocab(read_configs) print('vocab complete') train_data_src, train_data_tgt, dev_data_src, dev_data_tgt = \ read_essays_words(read_configs, word_vocab, test_prompt_id) if pretrained_embedding: embedd_dict, embedd_dim, _ = load_word_embedding_dict(embedding_path) embedd_matrix = build_embedd_table(word_vocab, embedd_dict, embedd_dim, caseless=True) embed_table = [embedd_matrix] else: embed_table = None max_sentlen = max(train_data_src['max_sentlen'], train_data_tgt['max_sentlen'], dev_data_src['max_sentlen'], dev_data_tgt['max_sentlen']) max_sentnum = max(train_data_src['max_sentnum'], train_data_tgt['max_sentnum'], dev_data_src['max_sentnum'], dev_data_tgt['max_sentnum']) print('max sent length: {}'.format(max_sentlen)) print('max sent num: {}'.format(max_sentnum)) train_data_src['y_scaled'] = get_scaled_down_scores(train_data_src['data_y'], train_data_src['prompt_ids']) train_data_tgt['y_scaled'] = get_scaled_down_scores(train_data_tgt['data_y'], train_data_tgt['prompt_ids']) dev_data_src['y_scaled'] = get_scaled_down_scores(dev_data_src['data_y'], dev_data_src['prompt_ids']) dev_data_tgt['y_scaled'] = get_scaled_down_scores(dev_data_tgt['data_y'], dev_data_tgt['prompt_ids']) X_train_src = pad_hierarchical_text_sequences(train_data_src['words'], max_sentnum, max_sentlen) X_train_tgt = pad_hierarchical_text_sequences(train_data_tgt['words'], max_sentnum, max_sentlen) X_dev_src = pad_hierarchical_text_sequences(dev_data_src['words'], max_sentnum, max_sentlen) X_dev_tgt = pad_hierarchical_text_sequences(dev_data_tgt['words'], max_sentnum, max_sentlen) X_train_src = X_train_src.reshape((X_train_src.shape[0], X_train_src.shape[1] * X_train_src.shape[2])) X_train_tgt = X_train_tgt.reshape((X_train_tgt.shape[0], X_train_tgt.shape[1] * X_train_tgt.shape[2])) X_dev_src = X_dev_src.reshape((X_dev_src.shape[0], X_dev_src.shape[1] * X_dev_src.shape[2])) X_dev_tgt = X_dev_tgt.reshape((X_dev_tgt.shape[0], X_dev_tgt.shape[1] * X_dev_tgt.shape[2])) Y_train_src = np.array(train_data_src['y_scaled']) Y_train_tgt = np.array(train_data_tgt['y_scaled']) Y_dev_src = np.array(dev_data_src['y_scaled']) Y_dev_tgt = np.array(dev_data_tgt['y_scaled']) train_src_batches = batch_generator( [X_train_src, Y_train_src], batch_size) train_tgt_batches = batch_generator( [X_train_tgt, Y_train_tgt], batch_size) disc_loss_fn = tf.keras.losses.SparseCategoricalCrossentropy() score_loss_fn = tf.keras.losses.MeanSquaredError() optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3) shared_model = SharedModelV2(len(word_vocab), max_sentnum, max_sentlen, embedding_dim, embed_table) steps = (X_train_src.shape[0] // batch_size) * epochs evaluator = SharedModelEvaluatorV2(test_prompt_id, X_dev_src, X_train_tgt, X_dev_tgt, dev_data_src['prompt_ids'], train_data_tgt['prompt_ids'], dev_data_tgt['prompt_ids'], Y_dev_src, Y_train_tgt, Y_dev_tgt) evaluator.evaluate(shared_model, 0, print_info=True) for step in range(steps): src_label = tf.zeros((batch_size, 1)) tgt_label = tf.ones((batch_size, 1)) X_train_src_batch, Y_train_src_batch = next(train_src_batches) X_train_tgt_batch, Y_train_tgt_batch = next(train_tgt_batches) X_both = tf.concat([X_train_src_batch, X_train_tgt_batch], axis=0) label_both = tf.concat([src_label, tgt_label], axis=0) fe_loss, score_loss, disc_loss = full_train_step(X_train_src_batch, Y_train_src_batch, X_both, label_both, shared_model, score_loss_fn, disc_loss_fn, optimizer) current_step = step + 1 if current_step % (steps//epochs) == 0: print( "fe loss (for one batch) at step %d: %.4f" % (current_step, float(fe_loss)) ) print( "score loss (for one batch) at step %d: %.4f" % (current_step, float(score_loss)) ) print( "disc loss (for one batch) at step %d: %.4f" % (current_step, float(disc_loss)) ) print('steps', steps) print('step', current_step) print('epochs', epochs) print('batch_size', batch_size) print('Evaluating epoch', current_step/(steps//epochs)) if step == 0: evaluator.evaluate(shared_model, 0) else: evaluator.evaluate(shared_model, current_step/(steps//epochs)) evaluator.print_final_info()
class Window(wx.Frame): def __init__(self, *args, **kw): # ensure the parent"s __init__ is called super(Window, self).__init__(*args, **kw) self.conf = Configs() self.api = YandexAPI() self.builders = builders.Builders() self.panel = main_panel.MainPanel(self) self.login = login.Login(self) self.SetTitle(self.conf.get_attr("APP_TITLE")) self.main_pnl = self.panel.make_main_panel() self.input = wx.TextCtrl() self.gauge = self.panel.playback_slider self.playlists_list = None self.account_menu = None self.playlist_selection = None self.playlists = None self.player = Player(parent=self.panel, slider=self.gauge) self.Bind(events.FIRST_TRACK_APPEAR, self.on_first_track) self.Bind(events.PLAYLIST_READY, self.on_playlist_ready) self.gauge.Bind(wx.EVT_SLIDER, self.player.on_seek) self.make_menu() self.Center() self.Show() if not self.api.is_logged_in() and wx.FindWindowByName( "login_popup") is None: self.login.create_login_popup() def make_menu(self): self.account_menu = wx.Menu() if self.api.is_logged_in(): logout = self.account_menu.Append(1, "&Logout\tCtrl-L", "Logout from account") self.Bind(wx.EVT_MENU, self.login.on_logout_menu, logout) player_menu = wx.Menu() self.playlists_list = wx.Menu(wx.ID_ANY) self.playlist_selection = player_menu.Append(wx.ID_ANY, "Playlists", self.playlists_list) if self.api.is_logged_in(): self.playlists = self.api.get_play_lists_list() for playlist in self.playlists: self.Bind( wx.EVT_MENU, self.on_list_select, self.playlists_list.Append(wx.ID_ANY, playlist['name'])) pass else: self.playlist_selection.Enable(False) pass help_menu = wx.Menu() about_item = help_menu.Append(wx.ID_ABOUT) menu_bar = wx.MenuBar() menu_bar.Append(self.account_menu, "Account") menu_bar.Append(player_menu, "Player") menu_bar.Append(help_menu, "Help") self.SetMenuBar(menu_bar) self.Bind(wx.EVT_MENU, self.on_about, about_item) def on_first_track(self, event): playlist_type = event.playlist_type self.gauge.Enable() self.panel.enable_play_button() self.player.load_playlist(playlist_type) pass def on_playlist_ready(self, event): playlist_name = event.playlist_name playlist_type = event.playlist_type self.player.load_playlist(playlist_type) notify(playlist_name, "Playlist is ready", "") pass def on_prev(self, event): self.player.on_prev(event) pass def on_next(self, event): self.player.on_next(event) pass def on_play(self, event): if not event.GetIsDown(): self.on_pause(event) return self.player.on_play(event) pass def on_pause(self, event): self.player.on_pause(event) pass def on_list_select(self, event): playlist_label = event.GetEventObject().GetLabelText(event.GetId()) playlist_type = None for playlist in self.playlists: if playlist['name'] == playlist_label: playlist_type = playlist['type'] self.api.preparation(playlist_type, self) def on_exit(self, event): self.Close() def on_about(self, event): wx.MessageBox("This is a wxPython Hello World sample", "About Hello World 2", wx.OK | wx.ICON_ASTERISK)
import os from configs.consts import ProjectPath from configs.configs import Configs __all__ = ['ProjectPath', 'cfg'] cfg = Configs() def _mkdir_dirs(*dir_paths): for dir_path in dir_paths: if not os.path.exists(dir_path): os.makedirs(dir_path) _mkdir_dirs( ProjectPath.LOGS_DIR.value, ProjectPath.CKPTS_DIR.value, ProjectPath.VOC_CKPTS_DIR.value, ProjectPath.CONSOLE_LOGS_DIR.value, ProjectPath.TB_LOGS_DIR.value, )
def main(args): # process config c = Configs(args.config) ROOT = os.environ['TENSOROFLOW'] output = c.option.get('output', 'examples/model/buf') model_directory = '%s/%s' % (ROOT, output) model_path = '%s/model' % model_directory dictionary_path = { 'source': '%s/source_dictionary.pickle' % model_directory, 'source_reverse': '%s/source_reverse_dictionary.pickle' % model_directory, 'target': '%s/target_dictionary.pickle' % model_directory, 'target_reverse': '%s/target_reverse_dictionary.pickle' % model_directory } PAD = c.const['PAD'] BOS = c.const['BOS'] EOS = c.const['EOS'] train_step = c.option['train_step'] max_time = c.option['max_time'] batch_size = c.option['batch_size'] vocabulary_size = c.option['vocabulary_size'] input_embedding_size = c.option['embedding_size'] hidden_units = c.option['hidden_units'] layers = c.option['layers'] source_train_data_path = c.data['source_train_data'] target_train_data_path = c.data['target_train_data'] source_valid_data_path = c.data['source_valid_data'] target_valid_data_path = c.data['target_valid_data'] source_test_data_path = c.data['source_test_data'] target_test_data_path = c.data['target_test_data'] # initialize output directory if pathlib.Path(model_directory).exists(): print('Warning: model %s is exists.') print('Old model will be overwritten.') while True: print('Do you wanna continue? [yes|no]') command = input('> ') if command == 'yes': shutil.rmtree(model_directory) break elif command == 'no': sys.exit() else: print('You can only input "yes" or "no".') print('Make new model: %s' % model_directory) pathlib.Path(model_directory).mkdir() # read data if args.mode == 'train': source_dictionary, source_reverse_dictionary = build_dictionary( read_words(source_train_data_path), vocabulary_size) source_train_datas = [ sentence_to_onehot(lines, source_dictionary) for lines in read_data(source_train_data_path) ] target_dictionary, target_reverse_dictionary = build_dictionary( read_words(target_train_data_path), vocabulary_size) target_train_datas = [ sentence_to_onehot(lines, target_dictionary) for lines in read_data(target_train_data_path) ] source_valid_datas = [ sentence_to_onehot(lines, source_dictionary) for lines in read_data(source_valid_data_path) ] target_valid_datas = [ sentence_to_onehot(lines, target_dictionary) for lines in read_data(target_valid_data_path) ] if args.debug: source_train_datas = source_train_datas[:1000] target_train_datas = source_train_datas[:1000] else: with open(dictionary_path['source'], 'rb') as f1, \ open(dictionary_path['source_reverse'], 'rb') as f2, \ open(dictionary_path['target'], 'rb') as f3, \ open(dictionary_path['target_reverse'], 'rb') as f4: source_dictionary = pickle.load(f1) source_reverse_dictionary = pickle.load(f2) target_dictionary = pickle.load(f3) target_reverse_dictionary = pickle.load(f4) source_test_datas = [ sentence_to_onehot(lines, source_dictionary) for lines in read_data(source_test_data_path) ] target_test_datas = [ sentence_to_onehot(lines, target_dictionary) for lines in read_data(target_test_data_path) ] # placeholder encoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='encoder_inputs') decoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_inputs') decoder_labels = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_labels') # embed embeddings = tf.Variable(tf.random_uniform( [vocabulary_size, input_embedding_size], -1.0, 1.0), dtype=tf.float32, name='embeddings') encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, encoder_inputs) decoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, decoder_inputs) # encoder with bidirection encoder_units = hidden_units encoder_layers_fw = [ tf.contrib.rnn.LSTMCell(size) for size in [encoder_units] * layers ] encoder_cell_fw = tf.contrib.rnn.MultiRNNCell(encoder_layers_fw) encoder_layers_bw = [ tf.contrib.rnn.LSTMCell(size) for size in [encoder_units] * layers ] encoder_cell_bw = tf.contrib.rnn.MultiRNNCell(encoder_layers_bw) (encoder_output_fw, encoder_output_bw), encoder_state = tf.nn.bidirectional_dynamic_rnn( encoder_cell_fw, encoder_cell_bw, encoder_inputs_embedded, dtype=tf.float32, time_major=True) encoder_outputs = tf.concat((encoder_output_fw, encoder_output_bw), 2) encoder_state = tuple( tf.contrib.rnn.LSTMStateTuple( tf.concat((encoder_state[0][layer].c, encoder_state[1][layer].c), 1), tf.concat((encoder_state[0][layer].h, encoder_state[1][layer].h), 1)) for layer in range(layers)) # decoder with attention decoder_units = encoder_units * 2 attention_units = decoder_units decoder_layers = [ tf.contrib.rnn.LSTMCell(size) for size in [decoder_units] * layers ] cell = tf.contrib.rnn.MultiRNNCell(decoder_layers) sequence_length = tf.cast([max_time] * batch_size, dtype=tf.int32) beam_width = 1 tiled_encoder_outputs = tf.contrib.seq2seq.tile_batch( encoder_outputs, multiplier=beam_width) tiled_encoder_final_state = tf.contrib.seq2seq.tile_batch( encoder_state, multiplier=beam_width) tiled_sequence_length = tf.contrib.seq2seq.tile_batch( sequence_length, multiplier=beam_width) attention_mechanism = tf.contrib.seq2seq.LuongAttention( num_units=attention_units, memory=tiled_encoder_outputs, memory_sequence_length=tiled_sequence_length) attention_cell = tf.contrib.seq2seq.AttentionWrapper( cell, attention_mechanism, attention_layer_size=256) decoder_initial_state = attention_cell.zero_state(dtype=tf.float32, batch_size=batch_size * beam_width) decoder_initial_state = decoder_initial_state.clone( cell_state=tiled_encoder_final_state) if args.mode == 'train': helper = tf.contrib.seq2seq.TrainingHelper( inputs=decoder_inputs_embedded, sequence_length=tf.cast([max_time] * batch_size, dtype=tf.int32), time_major=True) elif args.mode == 'eval': """ helper = tf.contrib.seq2seq.TrainingHelper( inputs=decoder_inputs_embedded, sequence_length=tf.cast([max_time] * batch_size, dtype=tf.int32), time_major=True) """ helper = tf.contrib.seq2seq.GreedyEmbeddingHelper( embedding=embeddings, start_tokens=tf.tile([BOS], [batch_size]), end_token=EOS) decoder = tf.contrib.seq2seq.BasicDecoder( cell=attention_cell, helper=helper, initial_state=decoder_initial_state) decoder_outputs = tf.contrib.seq2seq.dynamic_decode( decoder=decoder, output_time_major=True, impute_finished=False, maximum_iterations=max_time) decoder_logits = tf.contrib.layers.linear(decoder_outputs[0][0], vocabulary_size) decoder_prediction = tf.argmax( decoder_logits, 2) # max_time: axis=0, batch: axis=1, vocab: axis=2 # optimizer stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits( labels=tf.one_hot(decoder_labels, depth=vocabulary_size, dtype=tf.float32), logits=decoder_logits, ) loss = tf.reduce_mean(stepwise_cross_entropy) regularizer = 0.0 * tf.nn.l2_loss(decoder_outputs[0][0]) train_op = tf.train.AdamOptimizer().minimize(loss + regularizer) saver = tf.train.Saver() minibatch_idx = {'train': 0, 'valid': 0, 'test': 0} with tf.Session() as sess: if args.mode == 'train': # train global_max_step = train_step * ( len(source_train_datas) // batch_size + 1) loss_freq = global_max_step // 100 if global_max_step > 100 else 1 loss_log = [] batch_loss_log = [] loss_suffix = '' es = EarlyStopper(max_size=5, edge_threshold=0.1) m = Monitor(global_max_step) log = Logger('%s/log' % model_directory) sess.run(tf.global_variables_initializer()) global_step = 0 stop_flag = False for batch in range(train_step): if stop_flag: break current_batch_loss_log = [] while True: # minibatch process m.monitor(global_step, loss_suffix) source_train_batch, _ = batchnize(source_train_datas, batch_size, minibatch_idx['train']) target_train_batch, minibatch_idx['train'] = batchnize( target_train_datas, batch_size, minibatch_idx['train']) batch_data = seq2seq(source_train_batch, target_train_batch, max_time, vocabulary_size, reverse=True) feed_dict = { encoder_inputs: batch_data['encoder_inputs'], decoder_inputs: batch_data['decoder_inputs'], decoder_labels: batch_data['decoder_labels'] } sess.run(fetches=[train_op, loss], feed_dict=feed_dict) if global_step % loss_freq == 0: source_valid_batch, _ = batchnize( source_valid_datas, batch_size, minibatch_idx['valid']) target_valid_batch, minibatch_idx['valid'] = batchnize( target_valid_datas, batch_size, minibatch_idx['valid']) batch_data = seq2seq(source_valid_batch, target_valid_batch, max_time, vocabulary_size, reverse=True) feed_dict = { encoder_inputs: batch_data['encoder_inputs'], decoder_inputs: batch_data['decoder_inputs'], decoder_labels: batch_data['decoder_labels'] } loss_val = sess.run(fetches=loss, feed_dict=feed_dict) loss_log.append(loss_val) current_batch_loss_log.append(loss_val) loss_suffix = 'loss: %f' % loss_val global_step += 1 if minibatch_idx['train'] == 0: batch_loss = np.mean(current_batch_loss_log) batch_loss_log.append(batch_loss) loss_msg = 'Batch: {}/{}, batch loss: {}'.format( batch + 1, train_step, batch_loss) print(loss_msg) log(loss_msg) es_status = es(batch_loss) if batch > train_step // 2 and es_status: print('early stopping at step: %d' % global_step) stop_flag = True break # save tf.graph and variables saver.save(sess, model_path) print('save at %s' % model_path) # save plot of loss plt.plot(np.arange(len(loss_log)) * loss_freq, loss_log) plt.savefig('%s_global_loss.png' % model_path) plt.figure() plt.plot(np.arange(len(batch_loss_log)), batch_loss_log) plt.savefig('%s_batch_loss.png' % model_path) # save dictionary with open(dictionary_path['source'], 'wb') as f1, \ open(dictionary_path['source_reverse'], 'wb') as f2, \ open(dictionary_path['target'], 'wb') as f3, \ open(dictionary_path['target_reverse'], 'wb') as f4: pickle.dump(source_dictionary, f1) pickle.dump(source_reverse_dictionary, f2) pickle.dump(target_dictionary, f3) pickle.dump(target_reverse_dictionary, f4) elif args.mode == 'eval': saver.restore(sess, model_path) print('load from %s' % model_path) else: raise # args.mode should be train or eval # evaluate loss_val = [] input_vectors = None predict_vectors = None for i in range(len(source_test_datas) // batch_size + 1): source_test_batch, _ = batchnize(source_test_datas, batch_size, minibatch_idx['test']) target_test_batch, minibatch_idx['test'] = batchnize( target_test_datas, batch_size, minibatch_idx['test']) batch_data = seq2seq(source_test_batch, target_test_batch, max_time, vocabulary_size, reverse=True) feed_dict = { encoder_inputs: batch_data['encoder_inputs'], decoder_inputs: batch_data['decoder_inputs'], decoder_labels: batch_data['decoder_labels'] } pred = sess.run(fetches=decoder_prediction, feed_dict=feed_dict) if predict_vectors is None: predict_vectors = pred.T else: predict_vectors = np.vstack((predict_vectors, pred.T)) input_ = batch_data['encoder_inputs'] if input_vectors is None: input_vectors = input_.T else: input_vectors = np.vstack((input_vectors, input_.T)) loss_val.append(sess.run(fetches=loss, feed_dict=feed_dict)) input_sentences = '' predict_sentences = '' ignore_token = EOS for i, (input_vector, predict_vector) in enumerate( zip(input_vectors[:len(source_test_datas)], predict_vectors[:len(target_test_datas)])): input_sentences += ' '.join([ source_reverse_dictionary[vector] for vector in input_vector if not vector == ignore_token ]) predict_sentences += ' '.join([ target_reverse_dictionary[vector] for vector in predict_vector if not vector == ignore_token ]) if i < len(source_test_datas) - 1: input_sentences += '\n' predict_sentences += '\n' evaluate_input_path = '%s.evaluate_input' % model_path evaluate_predict_path = '%s.evaluate_predict' % model_path with open(evaluate_input_path, 'w') as f1, \ open(evaluate_predict_path, 'w') as f2: f1.write(input_sentences) f2.write(predict_sentences) print('input sequences at {}'.format(evaluate_input_path)) print('predict sequences at {}'.format(evaluate_predict_path)) print('mean of loss: %f' % np.mean(loss_val)) print('finish.')
def main(): parser = argparse.ArgumentParser(description="PAES_attributes model") parser.add_argument('--test_prompt_id', type=int, default=1, help='prompt id of test essay set') parser.add_argument('--seed', type=int, default=12, help='set random seed') parser.add_argument('--attribute_name', type=str, help='name of the attribute to be trained on') args = parser.parse_args() test_prompt_id = args.test_prompt_id attribute_name = args.attribute_name seed = args.seed np.random.seed(seed) tf.random.set_seed(seed) random.seed(seed) os.environ['PYTHONHASHSEED'] = str(seed) print("Test prompt id is {} of type {}".format(test_prompt_id, type(test_prompt_id))) print("Attribute: {}".format(attribute_name)) print("Seed: {}".format(seed)) configs = Configs() data_path = configs.DATA_PATH train_path = data_path + str(test_prompt_id) + '/train.pk' dev_path = data_path + str(test_prompt_id) + '/dev.pk' test_path = data_path + str(test_prompt_id) + '/test.pk' features_path = configs.FEATURES_PATH readability_path = configs.READABILITY_PATH vocab_size = configs.VOCAB_SIZE epochs = configs.EPOCHS batch_size = configs.BATCH_SIZE read_configs = { 'train_path': train_path, 'dev_path': dev_path, 'test_path': test_path, 'features_path': features_path, 'readability_path': readability_path, 'vocab_size': vocab_size } pos_vocab = read_pos_vocab(read_configs) train_data, dev_data, test_data = read_essays_single_score( read_configs, pos_vocab, attribute_name) max_sentlen = max(train_data['max_sentlen'], dev_data['max_sentlen'], test_data['max_sentlen']) max_sentnum = max(train_data['max_sentnum'], dev_data['max_sentnum'], test_data['max_sentnum']) print('max sent length: {}'.format(max_sentlen)) print('max sent num: {}'.format(max_sentnum)) train_data['y_scaled'] = get_single_scaled_down_score( train_data['data_y'], train_data['prompt_ids'], attribute_name) dev_data['y_scaled'] = get_single_scaled_down_score( dev_data['data_y'], dev_data['prompt_ids'], attribute_name) test_data['y_scaled'] = get_single_scaled_down_score( test_data['data_y'], test_data['prompt_ids'], attribute_name) X_train_pos = pad_hierarchical_text_sequences(train_data['pos_x'], max_sentnum, max_sentlen) X_dev_pos = pad_hierarchical_text_sequences(dev_data['pos_x'], max_sentnum, max_sentlen) X_test_pos = pad_hierarchical_text_sequences(test_data['pos_x'], max_sentnum, max_sentlen) X_train_pos = X_train_pos.reshape( (X_train_pos.shape[0], X_train_pos.shape[1] * X_train_pos.shape[2])) X_dev_pos = X_dev_pos.reshape( (X_dev_pos.shape[0], X_dev_pos.shape[1] * X_dev_pos.shape[2])) X_test_pos = X_test_pos.reshape( (X_test_pos.shape[0], X_test_pos.shape[1] * X_test_pos.shape[2])) X_train_linguistic_features = np.array(train_data['features_x']) X_dev_linguistic_features = np.array(dev_data['features_x']) X_test_linguistic_features = np.array(test_data['features_x']) X_train_readability = np.array(train_data['readability_x']) X_dev_readability = np.array(dev_data['readability_x']) X_test_readability = np.array(test_data['readability_x']) Y_train = np.array(train_data['y_scaled']) Y_dev = np.array(dev_data['y_scaled']) Y_test = np.array(test_data['y_scaled']) print('================================') print('X_train_pos: ', X_train_pos.shape) print('X_train_readability: ', X_train_readability.shape) print('X_train_ling: ', X_train_linguistic_features.shape) print('Y_train: ', Y_train.shape) print('================================') print('X_dev_pos: ', X_dev_pos.shape) print('X_dev_readability: ', X_dev_readability.shape) print('X_dev_ling: ', X_dev_linguistic_features.shape) print('Y_dev: ', Y_dev.shape) print('================================') print('X_test_pos: ', X_test_pos.shape) print('X_test_readability: ', X_test_readability.shape) print('X_test_ling: ', X_test_linguistic_features.shape) print('Y_test: ', Y_test.shape) print('================================') model = build_PAES(len(pos_vocab), max_sentnum, max_sentlen, X_train_readability.shape[1], X_train_linguistic_features.shape[1], configs) dev_features_list = [ X_dev_pos, X_dev_linguistic_features, X_dev_readability ] test_features_list = [ X_test_pos, X_test_linguistic_features, X_test_readability ] evaluator = Evaluator(test_prompt_id, dev_data['prompt_ids'], test_data['prompt_ids'], dev_features_list, test_features_list, Y_dev, Y_test, attribute_name) evaluator.evaluate(model, -1, print_info=True) for ii in range(epochs): print('Epoch %s/%s' % (str(ii + 1), epochs)) start_time = time.time() model.fit( [X_train_pos, X_train_linguistic_features, X_train_readability], Y_train, batch_size=batch_size, epochs=1, verbose=0, shuffle=True) tt_time = time.time() - start_time print("Training one epoch in %.3f s" % tt_time) evaluator.evaluate(model, ii + 1) evaluator.print_final_info()
def main(args): tf.reset_default_graph() # process config c = Configs(args.config) ROOT = os.environ['TENSOROFLOW'] model_path = '%s/examples/model/multi_layer_seq2seq/model' % ROOT PAD = c.const['PAD'] EOS = c.const['EOS'] train_step = c.option['train_step'] max_time = c.option['max_time'] batch_size = c.option['batch_size'] vocabulary_size = c.option['vocabulary_size'] input_embedding_size = c.option['embedding_size'] hidden_units = c.option['hidden_units'] layers = c.option['layers'] datas = [] # placeholder encoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='encoder_inputs') decoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_inputs') decoder_labels = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_labels') # embed embeddings = tf.Variable(tf.random_uniform( [vocabulary_size, input_embedding_size], -1.0, 1.0), dtype=tf.float32, name='embeddings') encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, encoder_inputs) decoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, decoder_inputs) # encoder encoder_units = hidden_units encoder_layers = [ tf.contrib.rnn.LSTMCell(size) for size in [encoder_units] * layers ] encoder_cell = tf.contrib.rnn.MultiRNNCell(encoder_layers) encoder_output, encoder_final_state = tf.nn.dynamic_rnn( encoder_cell, encoder_inputs_embedded, dtype=tf.float32, time_major=True) del encoder_output # decoder decoder_units = encoder_units decoder_layers = [ tf.contrib.rnn.LSTMCell(size) for size in [decoder_units] * layers ] decoder_cell = tf.contrib.rnn.MultiRNNCell(decoder_layers) decoder_output, decoder_final_state = tf.nn.dynamic_rnn( decoder_cell, decoder_inputs_embedded, initial_state=encoder_final_state, scope="plain_decoder", dtype=tf.float32, time_major=True) decoder_logits = tf.contrib.layers.linear(decoder_output, vocabulary_size) decoder_prediction = tf.argmax( decoder_logits, 2) # max_time: axis=0, batch: axis=1, vocab: axis=2 # optimizer stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits( labels=tf.one_hot(decoder_labels, depth=vocabulary_size, dtype=tf.float32), logits=decoder_logits, ) loss = tf.reduce_mean(stepwise_cross_entropy) train_op = tf.train.AdamOptimizer().minimize(loss) saver = tf.train.Saver() with tf.Session() as sess: if args.mode == 'train': # train loss_freq = train_step // 100 loss_log = [] loss_suffix = '' es = EarlyStopper(max_size=5, edge_threshold=0.1) m = Monitor(train_step) sess.run(tf.global_variables_initializer()) for i in range(train_step): m.monitor(i, loss_suffix) batch_data = through(datas, max_time, batch_size, vocabulary_size) feed_dict = { encoder_inputs: batch_data['encoder_inputs'], decoder_inputs: batch_data['decoder_inputs'], decoder_labels: batch_data['decoder_labels'] } sess.run(fetches=[train_op, loss], feed_dict=feed_dict) if i % loss_freq == 0: batch_data = through(datas, max_time, batch_size, vocabulary_size) feed_dict = { encoder_inputs: batch_data['encoder_inputs'], decoder_inputs: batch_data['decoder_inputs'], decoder_labels: batch_data['decoder_labels'] } loss_val = sess.run(fetches=loss, feed_dict=feed_dict) loss_log.append(loss_val) loss_suffix = 'loss: %f' % loss_val es_status = es(loss_val) if i > train_step // 2 and es_status: print('early stopping at step: %d' % i) break saver.save(sess, model_path) print('save at %s' % model_path) plt.plot(np.arange(len(loss_log)) * loss_freq, loss_log) plt.savefig('%s_loss.png' % model_path) elif args.mode == 'eval': saver.restore(sess, model_path) print('load from %s' % model_path) else: raise # evaluate batch_data = through(datas, max_time, batch_size, vocabulary_size) feed_dict = { encoder_inputs: batch_data['encoder_inputs'], decoder_inputs: batch_data['decoder_inputs'], decoder_labels: batch_data['decoder_labels'] } pred = sess.run(fetches=decoder_prediction, feed_dict=feed_dict) input_ = batch_data['encoder_inputs'] loss_val = sess.run(fetches=loss, feed_dict=feed_dict) print('input sequences...\n{}'.format(input_)) print('predict sequences...\n{}'.format(pred)) print('loss: %f' % loss_val) print('finish.')
class Login(object): def __init__(self, parent): self.builders = Builders() self.conf = Configs() self.parent = parent self.api = YandexAPI() self.validation_error = wx.StaticText() self.sizer = wx.BoxSizer() self.dialog = wx.Dialog() self.main_pnl = self.parent.panel pass def create_login_popup(self): self.dialog = dialog = wx.Dialog(self.parent, wx.ID_ANY, "", style=wx.DEFAULT_DIALOG_STYLE | wx.RESIZE_BORDER, name="login_popup") self.sizer = sizer = wx.BoxSizer(wx.VERTICAL) dialog.BackgroundColour = self.conf.get_attr("BACKGROUND_COLOR") text = self.builders.static_text_builder(dialog, label="Please login") font = text.GetFont() font.PointSize += 10 font = font.Bold() text.SetFont(font) login_label = self.builders.static_text_builder(dialog, label="Login:"******"login_input") password_label = self.builders.static_text_builder(dialog, label="Password:"******"password_input") login_button = self.builders.button_builder(dialog, "Login", "login_button") login_button.Bind(wx.EVT_BUTTON, self.on_login) self.validation_error = self.builders.static_text_builder(dialog, label="") self.validation_error.SetForegroundColour(wx.RED) sizer.Add(text, 0, wx.ALIGN_CENTER_HORIZONTAL | wx.LEFT | wx.RIGHT | wx.TOP, 20) sizer.Add(login_label, 0, wx.ALIGN_CENTER_HORIZONTAL | wx.LEFT | wx.RIGHT | wx.TOP, 5) sizer.Add(login_input, 0, wx.ALIGN_CENTER_HORIZONTAL | wx.LEFT | wx.RIGHT | wx.TOP, 10) sizer.Add(password_label, 0, wx.ALIGN_CENTER_HORIZONTAL | wx.LEFT | wx.RIGHT | wx.TOP, 5) sizer.Add(password_input, 0, wx.ALIGN_CENTER_HORIZONTAL | wx.LEFT | wx.RIGHT | wx.TOP, 10) sizer.Add(login_button, 0, wx.ALIGN_CENTER_HORIZONTAL | wx.LEFT | wx.RIGHT | wx.TOP, 10) self.sizer.Add(self.validation_error, 1, wx.ALIGN_CENTER_HORIZONTAL | wx.TOP, 5) dialog.SetSizer(sizer) dialog.Bind(wx.EVT_CLOSE, self.on_popup_close) dialog.Center() dialog.Show() def on_popup_close(self, event): if self.api.is_logged_in() is True: event.Skip(True) else: self.parent.on_exit(event) def on_login(self, event): login_button = self.parent.FindWindowByName("login_button") login_button.Disable() login = self.parent.FindWindowByName("login_input").GetValue() password = self.parent.FindWindowByName("password_input").GetValue() try: self.api.login(login=login, password=password) popup = self.parent.FindWindowByName("login_popup") popup.Destroy() notify(subtitle="Hello " + self.api.get_display_name()) self.parent.playlist_selection.Enable(True) self.parent.make_menu() except BadRequest as e: self.validation_error.SetLabel(str(e)) size = self.dialog.GetSize() self.dialog.SetInitialSize() self.dialog.SetSize(size) login_button.Enable() event.Skip() def on_logout_menu(self, event): self.api.logout() self.create_login_popup() self.parent.make_menu()
class XHNF(object): def __init__(self): self.config = None self.data = None self.model = None self.start_date = '20140101' self.end_date = '20160501' def init_config(self): # self.config = Configs(model='default', dataset='default', epochs=10, batch_size=128) # self.config = Configs(model='resnet', dataset='cifar10', epochs=10, batch_size=128) # self.config = Configs(model='resnet152', dataset='mnist', epochs=10, batch_size=128) self.config = Configs(model='default', dataset='stock', epochs=10000, batch_size=128) def init_data(self): self.data = get_data(self.config.dataset, self.start_date, self.end_date) def init_predict_data(self): now = datetime.datetime.now() end_date = datetime.datetime.strftime(now, "%Y%m%d") start = now + datetime.timedelta(days=-7) start_date = datetime.datetime.strftime(start, "%Y%m%d") self.data = get_predict_data(self.config.dataset, start_date, end_date) def init_test_data(self): now = datetime.datetime.now() end_date = datetime.datetime.strftime(now, "%Y%m%d") start = now + datetime.timedelta(days=-10) start_date = datetime.datetime.strftime(start, "%Y%m%d") self.data = get_test_data(self.config.dataset, start_date, end_date) def init_model(self): self.model = get_model(self.config.model, self.data.input_shape, self.data.nb_classes) self.model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy']) def init(self): self.init_config() self.init_data() self.init_model() def init_predict(self): self.init_config() self.init_predict_data() self.init_model() def init_test(self): self.init_config() self.init_test_data() self.init_model() def getModelFileName(self): records_dir = 'records' \ + os.path.sep + self.config.get_model() \ + os.path.sep + self.config.get_dataset() weight_name = "Weight_C" + str(self.data.nb_classes) weight_name = weight_name + "_I" + str( self.data.input_shape[0]) + "_" + str( self.data.input_shape[1]) + "_" + str(self.data.input_shape[2]) weight_name = weight_name + ".h5" file_name = records_dir \ + os.path.sep + weight_name #print("input_shape",str(self.data.input_shape), "nb_classes", str(self.data.nb_classes)) if os.path.exists(records_dir) == False: os.makedirs(records_dir) return file_name def train_network(self): filepath = self.getModelFileName() checkpoint = ModelCheckpoint(filepath, monitor='val_loss' \ , save_weights_only=True,verbose=1,save_best_only=True, period=1) earlyStopping = EarlyStopping(monitor='val_loss', patience=6, verbose=0, mode='auto') if os.path.exists(filepath): self.model.load_weights(filepath) print("checkpoint_loaded") print('start training, start:', self.start_date, '; end:', self.end_date) if self.data.x_train.shape[0] <= 0: print("train shape:", self.data.x_train.shape) print("do not read any train date, so just next loop.") return False self.model.fit(self.data.x_train, self.data.y_train, batch_size=self.config.batch_size, epochs=self.config.epochs, verbose=1, validation_data=(self.data.x_test, self.data.y_test), callbacks=[checkpoint, earlyStopping]) score = self.model.evaluate(self.data.x_test, self.data.y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) return True def train(self): print('do train.') train_status = True for i in range(1000): print('train run', i) if self.data == None: end = datetime.datetime.strptime(self.end_date, '%Y%m%d') now = datetime.datetime.now() if end < now: end = end + datetime.timedelta(days=20) else: print('all done, last train date ', self.end_date) break end_date_str = datetime.datetime.strftime(end, "%Y%m%d") if train_status == True: self.start_date = self.end_date self.end_date = end_date_str self.data = get_data(self.config.dataset, self.start_date, self.end_date) # del self.model # self.init_model() train_status = self.train_network() del self.data self.data = None def save_predict(self, y): now = datetime.datetime.now() now_str = datetime.datetime.strftime(now, "%Y%m%d") records_dir = 'money' \ + os.path.sep + now_str if os.path.exists(records_dir) == False: os.makedirs(records_dir) file_name = records_dir + os.path.sep + now_str + "_predict.csv" np.savetxt(file_name, y, fmt='%s', delimiter=',', header='date,code,predict,real') def display_predict(self, need_save=False): y_predict = self.model.predict(self.data.x_train) if self.data.nb_classes == 2: y_res = y_predict[:, 1] - y_predict[:, 0] y_res = y_res.reshape(y_res.shape[0], 1) yt = np.delete(self.data.y_train, (0), axis=1) y = np.concatenate((self.data.y_test, y_res), axis=1) y = np.concatenate((y, yt), axis=1) elif self.data.nb_classes == 4: y_res = y_predict[:, 0] - y_predict[:, 1] - y_predict[:, 2] - y_predict[:, 3] y_res = y_res.reshape(y_res.shape[0], 1) yt = np.delete(self.data.y_train, (1, 2, 3), axis=1) y = np.concatenate((self.data.y_test, y_res), axis=1) y = np.concatenate((y, yt), axis=1) else: y = np.concatenate((self.data.y_test, y_predict), axis=1) y = np.concatenate((y, self.data.y_train), axis=1) print(y) if need_save == True: self.save_predict(y) def test(self): print('do test.') filepath = self.getModelFileName() if os.path.exists(filepath): self.model.load_weights(filepath) print("checkpoint_loaded") if self.data.x_train.shape[0] <= 0: print("train shape:", self.data.x_train.shape) print("do not read any test date, so just exit.") return self.display_predict() score = self.model.evaluate(self.data.x_train, self.data.y_train, verbose=1) print('Test loss:', score[0]) print('Test accuracy:', score[1]) def predict(self): print('do predict.') filepath = self.getModelFileName() if os.path.exists(filepath): self.model.load_weights(filepath) print("checkpoint_loaded") if self.data.x_train.shape[0] <= 0: print("train shape:", self.data.x_train.shape) print("do not read any predict date, so just exit.") return self.display_predict(True) def do_train(self): self.init() self.train() def do_test(self): self.init_test() self.test() def do_predict(self): self.init_predict() self.predict()
""" if configs is None: configs = Configs('default_config.yaml') if data_loader is None: data_loader = DataLoader() data_loader.load_data() # parser = Parser( # data=data_loader # ) if __name__ == '__main__': START = time.time() # find .env automatically by walking up directories until it's found, then # load up the .env entries as environment variables load_dotenv(find_dotenv()) configs = None if len(sys.argv) > 1: configs = Configs(sys.argv[1]) main(configs) END = time.time() Logger.info('Script completed in', '%i seconds' % int(END - START), __file__)
def main(args): # process config c = Configs(args.config) ROOT = os.environ['TENSOROFLOW'] model_directory = '%s/examples/model/multi_layer_nmt' % ROOT model_path = '%s/model' % model_directory dictionary_path = { 'source': '%s/source_dictionary.pickle' % model_directory, 'source_reverse': '%s/source_reverse_dictionary.pickle' % model_directory, 'target': '%s/target_dictionary.pickle' % model_directory, 'target_reverse': '%s/target_reverse_dictionary.pickle' % model_directory } PAD = c.const['PAD'] EOS = c.const['EOS'] train_step = c.option['train_step'] max_time = c.option['max_time'] batch_size = c.option['batch_size'] vocabulary_size = c.option['vocabulary_size'] input_embedding_size = c.option['embedding_size'] hidden_units = c.option['hidden_units'] layers = c.option['layers'] source_train_data_path = c.data['source_train_data'] target_train_data_path = c.data['target_train_data'] source_valid_data_path = c.data['source_valid_data'] target_valid_data_path = c.data['target_valid_data'] source_test_data_path = c.data['source_test_data'] target_test_data_path = c.data['target_test_data'] # read data if args.mode == 'train': source_dictionary, source_reverse_dictionary = build_dictionary( read_words(source_train_data_path), vocabulary_size) source_train_datas = [ sentence_to_onehot(lines, source_dictionary) for lines in read_data(source_train_data_path) ] target_dictionary, target_reverse_dictionary = build_dictionary( read_words(target_train_data_path), vocabulary_size) target_train_datas = [ sentence_to_onehot(lines, target_dictionary) for lines in read_data(target_train_data_path) ] source_valid_datas = [ sentence_to_onehot(lines, source_dictionary) for lines in read_data(source_valid_data_path) ] target_valid_datas = [ sentence_to_onehot(lines, target_dictionary) for lines in read_data(target_valid_data_path) ] if args.debug: source_train_datas = source_train_datas[:1000] target_train_datas = source_train_datas[:1000] else: with open(dictionary_path['source'], 'rb') as f1, \ open(dictionary_path['source_reverse'], 'rb') as f2, \ open(dictionary_path['target'], 'rb') as f3, \ open(dictionary_path['target_reverse'], 'rb') as f4: source_dictionary = pickle.load(f1) source_reverse_dictionary = pickle.load(f2) target_dictionary = pickle.load(f3) target_reverse_dictionary = pickle.load(f4) source_test_datas = [ sentence_to_onehot(lines, source_dictionary) for lines in read_data(source_test_data_path) ] target_test_datas = [ sentence_to_onehot(lines, target_dictionary) for lines in read_data(target_test_data_path) ] # placeholder encoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='encoder_inputs') decoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_inputs') decoder_labels = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_labels') # embed embeddings = tf.Variable(tf.random_uniform( [vocabulary_size, input_embedding_size], -1.0, 1.0), dtype=tf.float32, name='embeddings') encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, encoder_inputs) decoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, decoder_inputs) # encoder encoder_units = hidden_units encoder_layers = [ tf.contrib.rnn.LSTMCell(size) for size in [encoder_units] * layers ] encoder_cell = tf.contrib.rnn.MultiRNNCell(encoder_layers) encoder_output, encoder_final_state = tf.nn.dynamic_rnn( encoder_cell, encoder_inputs_embedded, dtype=tf.float32, time_major=True) del encoder_output # decoder decoder_units = encoder_units decoder_layers = [ tf.contrib.rnn.LSTMCell(size) for size in [decoder_units] * layers ] decoder_cell = tf.contrib.rnn.MultiRNNCell(decoder_layers) decoder_output, decoder_final_state = tf.nn.dynamic_rnn( decoder_cell, decoder_inputs_embedded, initial_state=encoder_final_state, scope="plain_decoder", dtype=tf.float32, time_major=True) decoder_logits = tf.contrib.layers.linear(decoder_output, vocabulary_size) decoder_prediction = tf.argmax( decoder_logits, 2) # max_time: axis=0, batch: axis=1, vocab: axis=2 # optimizer stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits( labels=tf.one_hot(decoder_labels, depth=vocabulary_size, dtype=tf.float32), logits=decoder_logits, ) loss = tf.reduce_mean(stepwise_cross_entropy) train_op = tf.train.AdamOptimizer().minimize(loss) saver = tf.train.Saver() minibatch_idx = {'train': 0, 'valid': 0, 'test': 0} with tf.Session() as sess: if args.mode == 'train': # train global_max_step = train_step * ( len(source_train_datas) // batch_size + 1) loss_freq = global_max_step // 100 if global_max_step > 100 else 1 loss_log = [] batch_loss_log = [] loss_suffix = '' es = EarlyStopper(max_size=5, edge_threshold=0.1) m = Monitor(global_max_step) sess.run(tf.global_variables_initializer()) global_step = 0 stop_flag = False for batch in range(train_step): if stop_flag: break current_batch_loss_log = [] while True: # minibatch process m.monitor(global_step, loss_suffix) source_train_batch, _ = batchnize(source_train_datas, batch_size, minibatch_idx['train']) target_train_batch, minibatch_idx['train'] = batchnize( target_train_datas, batch_size, minibatch_idx['train']) batch_data = seq2seq(source_train_batch, target_train_batch, max_time, vocabulary_size) feed_dict = { encoder_inputs: batch_data['encoder_inputs'], decoder_inputs: batch_data['decoder_inputs'], decoder_labels: batch_data['decoder_labels'] } sess.run(fetches=[train_op, loss], feed_dict=feed_dict) if global_step % loss_freq == 0: source_valid_batch, _ = batchnize( source_valid_datas, batch_size, minibatch_idx['valid']) target_valid_batch, minibatch_idx['valid'] = batchnize( target_valid_datas, batch_size, minibatch_idx['valid']) batch_data = seq2seq(source_valid_batch, target_valid_batch, max_time, vocabulary_size) feed_dict = { encoder_inputs: batch_data['encoder_inputs'], decoder_inputs: batch_data['decoder_inputs'], decoder_labels: batch_data['decoder_labels'] } loss_val = sess.run(fetches=loss, feed_dict=feed_dict) loss_log.append(loss_val) current_batch_loss_log.append(loss_val) loss_suffix = 'loss: %f' % loss_val es_status = es(loss_val) if batch > train_step // 2 and es_status: print('early stopping at step: %d' % global_step) stop_flag = True break global_step += 1 if minibatch_idx['train'] == 0: batch_loss = np.mean(current_batch_loss_log) batch_loss_log.append(batch_loss) print('Batch: {}/{}, batch loss: {}'.format( batch + 1, train_step, batch_loss)) break # save tf.graph and variables saver.save(sess, model_path) print('save at %s' % model_path) # save plot of loss plt.plot(np.arange(len(loss_log)) * loss_freq, loss_log) plt.savefig('%s_global_loss.png' % model_path) plt.figure() plt.plot(np.arange(len(batch_loss_log)), batch_loss_log) plt.savefig('%s_batch_loss.png' % model_path) # save dictionary with open(dictionary_path['source'], 'wb') as f1, \ open(dictionary_path['source_reverse'], 'wb') as f2, \ open(dictionary_path['target'], 'wb') as f3, \ open(dictionary_path['target_reverse'], 'wb') as f4: pickle.dump(source_dictionary, f1) pickle.dump(source_reverse_dictionary, f2) pickle.dump(target_dictionary, f3) pickle.dump(target_reverse_dictionary, f4) elif args.mode == 'eval': saver.restore(sess, model_path) print('load from %s' % model_path) else: raise # args.mode should be train or eval # evaluate loss_val = [] input_vectors = None predict_vectors = None for i in range(len(source_test_datas) // batch_size + 1): source_test_batch, _ = batchnize(source_test_datas, batch_size, minibatch_idx['test']) target_test_batch, minibatch_idx['test'] = batchnize( target_test_datas, batch_size, minibatch_idx['test']) batch_data = seq2seq(source_test_batch, target_test_batch, max_time, vocabulary_size) feed_dict = { encoder_inputs: batch_data['encoder_inputs'], decoder_inputs: batch_data['decoder_inputs'], decoder_labels: batch_data['decoder_labels'] } pred = sess.run(fetches=decoder_prediction, feed_dict=feed_dict) if predict_vectors is None: predict_vectors = pred.T else: predict_vectors = np.vstack((predict_vectors, pred.T)) input_ = batch_data['encoder_inputs'] if input_vectors is None: input_vectors = input_.T else: input_vectors = np.vstack((input_vectors, input_.T)) loss_val.append(sess.run(fetches=loss, feed_dict=feed_dict)) input_sentences = '' predict_sentences = '' for i, (input_vector, predict_vector) in enumerate( zip(input_vectors[:len(source_test_datas)], predict_vectors[:len(target_test_datas)])): input_sentences += ' '.join([ source_reverse_dictionary[vector] for vector in input_vector if not vector == PAD ]) predict_sentences += ' '.join([ target_reverse_dictionary[vector] for vector in predict_vector if not vector == PAD ]) if i < len(source_test_datas) - 1: input_sentences += '\n' predict_sentences += '\n' evaluate_input_path = '%s.evaluate_input' % model_path evaluate_predict_path = '%s.evaluate_predict' % model_path with open(evaluate_input_path, 'w') as f1, \ open(evaluate_predict_path, 'w') as f2: f1.write(input_sentences) f2.write(predict_sentences) print('input sequences at {}'.format(evaluate_input_path)) print('predict sequences at {}'.format(evaluate_predict_path)) print('mean of loss: %f' % np.mean(loss_val)) print('finish.')
class YandexAPI(object): def __init__(self): self.conf = Configs() self.client = self.login() self.win = None self.list_type = None self.playlists_list = None self.updating_thread = None if 'RESOURCEPATH' in os.environ: self.cache = '{}/cache'.format(os.environ['RESOURCEPATH']) else: self.dirName = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) self.cache = os.path.join(self.dirName, 'cache') pass def __new__(cls, *args, **kw): if not hasattr(cls, '_instance'): orig = super(YandexAPI, cls) cls._instance = orig.__new__(cls) return cls._instance def login(self, login=None, password=None): if self.conf.get_attr("token") is not False: client = Client().from_token(self.conf.get_attr("token")) elif login is not None and password is not None: client = Client().from_credentials(login, password) token = client.token self.conf.set_attr("token", token) else: client = Client() self.client = client return client def is_logged_in(self): if self.client.account.display_name is None: return False else: return True def logout(self): self.conf.remove_attr("token") self.client = Client() pass def get_display_name(self): return str(self.login().account.display_name) def get_play_lists_list(self): entities = self.client.landing(blocks="personalplaylists").blocks[0].entities lists = [] for playlist in entities: lists.append({ "name": playlist.data.data.title, "type": playlist.data.type }) self.playlists_list = lists return lists def preparation(self, list_type, win): self.updating_thread = threading.Thread(target=self.update) self.list_type = list_type self.win = win index = { "date": date.today().__str__(), "last_track_num": 1, "tracks": [] } if not os.path.exists('{}/{}/'.format(self.cache, list_type)): os.mkdir('cache/{}'.format(list_type)) if not os.path.exists('{}/{}/index.json'.format(self.cache, list_type)): with open('{}/{}/index.json'.format(self.cache, list_type), 'w+') as file: json.dump(index, file, indent=4) self.updating_thread.start() else: if self.is_need_update(): with open('{}/{}/index.json'.format(self.cache, list_type), 'w+') as file: json.dump(index, file, indent=4) self.updating_thread.start() else: wx.PostEvent(self.win, events.FirstTrackAppear(playlist_type=list_type)) playlist_title = "" for playlist in self.playlists_list: if playlist['type'] == list_type: playlist_title = playlist['name'] wx.PostEvent(self.win, events.PlaylistReady(playlist_name=playlist_title, playlist_type=list_type)) return True def is_need_update(self): list_type = self.list_type with open('{}/{}/index.json'.format(self.cache, list_type), 'r') as file: index_date = datetime.strptime(json.load(file)['date'], '%Y-%m-%d').date() if index_date == date.today(): return False else: return True def update(self): print("Starting update") list_type = self.list_type blocks = self.client.landing(blocks="personalplaylists").blocks[0].entities playlist = "" print("processing blocks") for block in blocks: if block.data.type == list_type: playlist = block.data.data tracks = self.client.users_playlists(playlist.kind, playlist.owner.uid)[0].tracks index_file = json.load(open('{}/{}/index.json'.format(self.cache, list_type), 'r')) index = 1 print("processing tracks") for track in tracks: if index == 2: wx.PostEvent(self.win, events.FirstTrackAppear(playlist_name=playlist.title, playlist_type=list_type)) full_track_info = track.track index_file['tracks'].append({ "id": full_track_info.id, "title": full_track_info.title, "artist": full_track_info.artists[0]['name'], "duration": full_track_info.duration_ms, "num": index }) with open('{}/{}/index.json'.format(self.cache, list_type), 'w+') as file: json.dump(index_file, file) track.track.download_cover('{}/{}/{}.png'.format(self.cache, list_type, index)) track.track.download('{}/{}/{}.mp3'.format(self.cache, list_type, index), codec="mp3", bitrate_in_kbps=320) if index == 3: break index = index + 1 print("finishing updating") wx.PostEvent(self.win, events.PlaylistReady(playlist_name=playlist.title, playlist_type=list_type)) return True
import collections import os from typing import List, Sequence, TypeVar import numpy as np from configs.configs import Configs ROOT = os.environ['TENSOROFLOW'] config_file = '%s/configs/const.ini' % ROOT print('data.py: config file is %s' % config_file) c = Configs(config_file) PAD = c.const['PAD'] EOS = c.const['EOS'] BOS = c.const['BOS'] UNK = c.const['UNK'] END_TOKEN = c.const['END_TOKEN'] A = TypeVar('A') def read_words(input_file: str) -> List[str]: words = [] with open(input_file) as f: words += f.read().split() return words def read_data(input_file: str) -> List[List[str]]: with open(input_file) as f: lines = f.readlines() return lines
def main(): parser = argparse.ArgumentParser(description="AES_aug model") parser.add_argument('--test_prompt_id', type=int, default=1, help='prompt id of test essay set') parser.add_argument('--seed', type=int, default=12, help='set random seed') args = parser.parse_args() test_prompt_id = args.test_prompt_id seed = args.seed np.random.seed(seed) tf.random.set_seed(seed) random.seed(seed) os.environ['PYTHONHASHSEED'] = str(seed) print("Test prompt id is {} of type {}".format(test_prompt_id, type(test_prompt_id))) print("Seed: {}".format(seed)) configs = Configs() data_path = configs.DATA_PATH train_path = data_path + str(test_prompt_id) + '/train.pk' dev_path = data_path + str(test_prompt_id) + '/dev.pk' test_path = data_path + str(test_prompt_id) + '/test.pk' features_path = configs.FEATURES_PATH readability_path = configs.READABILITY_PATH vocab_size = configs.VOCAB_SIZE epochs = configs.EPOCHS batch_size = configs.BATCH_SIZE pretrained_embedding = configs.PRETRAINED_EMBEDDING embedding_path = configs.EMBEDDING_PATH read_configs = { 'train_path': train_path, 'dev_path': dev_path, 'test_path': test_path, 'features_path': features_path, 'readability_path': readability_path, 'vocab_size': vocab_size } word_vocab = read_word_vocab(read_configs) train_data, dev_data, test_data = read_essays_words_flat( read_configs, word_vocab) if pretrained_embedding: embedd_dict, embedd_dim, _ = load_word_embedding_dict(embedding_path) embedd_matrix = build_embedd_table(word_vocab, embedd_dict, embedd_dim, caseless=True) embed_table = [embedd_matrix] else: embed_table = None max_essay_len = max(train_data['max_essay_len'], dev_data['max_essay_len'], test_data['max_essay_len']) print('max essay length: {}'.format(max_essay_len)) train_data['y_scaled'] = get_scaled_down_scores(train_data['data_y'], train_data['prompt_ids']) dev_data['y_scaled'] = get_scaled_down_scores(dev_data['data_y'], dev_data['prompt_ids']) test_data['y_scaled'] = get_scaled_down_scores(test_data['data_y'], test_data['prompt_ids']) X_train = pad_flat_text_sequences(train_data['words'], max_essay_len) X_dev = pad_flat_text_sequences(dev_data['words'], max_essay_len) X_test = pad_flat_text_sequences(test_data['words'], max_essay_len) X_train_linguistic_features = np.array(train_data['features_x']) X_dev_linguistic_features = np.array(dev_data['features_x']) X_test_linguistic_features = np.array(test_data['features_x']) X_train_readability = np.array(train_data['readability_x']) X_dev_readability = np.array(dev_data['readability_x']) X_test_readability = np.array(test_data['readability_x']) Y_train = np.array(train_data['y_scaled']) Y_dev = np.array(dev_data['y_scaled']) Y_test = np.array(test_data['y_scaled']) print('================================') print('X_train: ', X_train.shape) print('X_train_readability: ', X_train_readability.shape) print('X_train_ling: ', X_train_linguistic_features.shape) print('Y_train: ', Y_train.shape) print('================================') print('X_dev: ', X_dev.shape) print('X_dev_readability: ', X_dev_readability.shape) print('X_dev_ling: ', X_dev_linguistic_features.shape) print('Y_dev: ', Y_dev.shape) print('================================') print('X_test: ', X_test.shape) print('X_test_readability: ', X_test_readability.shape) print('X_test_ling: ', X_test_linguistic_features.shape) print('Y_test: ', Y_test.shape) print('================================') model = build_AES_aug_multitask(len(word_vocab), max_essay_len, configs, embed_table, Y_train.shape[1]) dev_features_list = [X_dev] test_features_list = [X_test] evaluator = AllAttEvaluator(test_prompt_id, dev_data['prompt_ids'], test_data['prompt_ids'], dev_features_list, test_features_list, Y_dev, Y_test) evaluator.evaluate(model, -1, print_info=True) for ii in range(epochs): print('Epoch %s/%s' % (str(ii + 1), epochs)) start_time = time.time() model.fit(X_train, Y_train, batch_size=batch_size, epochs=1, verbose=0, shuffle=True) tt_time = time.time() - start_time print("Training one epoch in %.3f s" % tt_time) evaluator.evaluate(model, ii + 1) evaluator.print_final_info()