def compare(): """Compare two sentences separated by a semi-colon""" # Load the data frame train, dev, test = loader.getData() with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True, train_dir=TRAIN_DIR) model.batch_size = 64 # We decode one sentence at a time. # Load vocabularies. en_vocab = get_english_vocab(DATA_DIR, VOCAB_SIZE) results = [] for i, row in train.iterrows(): try: context1 = get_context(sess, model, en_vocab, row["sentence1"])[0] context2 = get_context(sess, model, en_vocab, row["sentence2"])[0] except TypeError: print "Error on line %i" % i continue cosine_distance = cosine(context1, context2) euclid_distance = np.linalg.norm(context1 - context2) prediction = euclid_distance < 10 correctness = prediction == row["label"] results.append(correctness) print "%i, %i, %.3f" % (row["label"], prediction, euclid_distance) # Print the accuracy so far if i % 10 == 0: print "Correctness:", np.mean(results) results = np.array(results) print np.mean(results)
def __str__(self): if len(self.simgr.active) > 0: if self.state is None: return get_context(self.proj, self.simgr.active[0]) return get_context(self.proj, self.inspect_simgr.active[0]) else: return "No active states."
def help(): """Devolve a pagina de Ajuda""" window_id = str(get_window_id()) set_base_context(window_id) ctx_dict = get_context(window_id) ctx_dict['window_id'] = window_id ctx_dict['name'] = 'help' ctx_dict['title'] = 'Ajuda' code = """ <textarea rows="30" class="small-12 large-12 columns"> """ code += """ Ajuda Por Implementar... """ code += """ </textarea> """ ctx_dict['form'] = code set_context(window_id, ctx_dict) return ctx_dict
def load_and_evaluate(args): """Load the pretrained model and run evaluate.""" context = utils.get_context(args) embedding, model_idx_to_token = get_model(args) idx_to_token_set = evaluation.get_tokens_in_evaluation_datasets(args) idx_to_token_set.update(model_idx_to_token) idx_to_token = list(idx_to_token_set) # Compute their word vectors token_embedding = embedding.to_token_embedding(idx_to_token, ctx=context[0]) os.makedirs(args.logdir, exist_ok=True) results = evaluation.evaluate_similarity(args, token_embedding, context[0], logfile=os.path.join( args.logdir, 'similarity.tsv')) results += evaluation.evaluate_analogy(args, token_embedding, context[0], logfile=os.path.join( args.logdir, 'analogy.tsv'))
def get(self): context = utils.get_context(self.request) insert_details(context) # current_session = auth.get_auth().get_user_by_session() new_user_object = self.auth.store.user_model.get_by_auth_token( current_session['user_id'], current_session['token'])[0] username = new_user_object.auth_ids[0] old_user_object = models.Account.all().filter( 'username ='******'_entity'] if 'user' in self.request.GET: new_user_object = self.auth.store.user_model.get_by_auth_id( self.request.GET['user']) old_user_object = models.Account.all().filter( 'username ='******'user']).fetch(1)[0].__dict__['_entity'] username = new_user_object.auth_ids[0] context['current_session'] = current_session context['new_user_object'] = new_user_object context['user'] = username context['old_user_object'] = old_user_object path = os.path.join(os.path.dirname(__file__), 'templates/auth_test.html') self.response.out.write(template.render(path, context))
def get(self): context = utils.get_context(self.auth) resumes = models.Resume.all().fetch(10000) context['resumes'] = resumes path = os.path.join(os.path.dirname(__file__), 'templates/view_resumes.html') self.response.out.write(template.render(path, context))
def get(self): context = utils.get_context(self.auth) upload_url = blobstore.create_upload_url('/upload') upload_url = upload_url.replace('http://localhost:8080', self.request.get('host')) context['upload_url'] = upload_url path = os.path.join(os.path.dirname(__file__), 'templates/apply.html') self.response.out.write(template.render(path, context))
def evaluate(args, model, vocab, global_step, eval_analogy=False): """Evaluation helper""" if 'eval_tokens' not in globals(): global eval_tokens eval_tokens_set = evaluation.get_tokens_in_evaluation_datasets(args) if not args.no_eval_analogy: eval_tokens_set.update(vocab.idx_to_token) # GloVe does not support computing vectors for OOV words eval_tokens_set = filter(lambda t: t in vocab, eval_tokens_set) eval_tokens = list(eval_tokens_set) # Compute their word vectors context = get_context(args) mx.nd.waitall() token_embedding = nlp.embedding.TokenEmbedding(unknown_token=None, allow_extend=True) token_embedding[eval_tokens] = model[eval_tokens] results = evaluation.evaluate_similarity( args, token_embedding, context[0], logfile=os.path.join( args.logdir, 'similarity.tsv'), global_step=global_step) if eval_analogy: assert not args.no_eval_analogy results += evaluation.evaluate_analogy( args, token_embedding, context[0], logfile=os.path.join( args.logdir, 'analogy.tsv')) return results
def about(): """Devolve a pagina about""" window_id = str(get_window_id()) set_base_context(window_id) ctx_dict = get_context(window_id) ctx_dict['window_id'] = window_id ctx_dict['name'] = 'about' ctx_dict['title'] = 'Sobre' code = """ <div class="small-12 large-12 columns"> <textarea rows="30" readonly> """ code += """ Sobre o ERP+ Versão 1.0 de 2015 O ERP + é uma plataforma de Gestão sobre a qual qualquer pessoa pode desenvolver objectos que suportem o seu negócio ou actividade. Bom trabalho Contactos: Dario Costa +238 983 04 90 """ code += """ </textarea> </div> """ ctx_dict['form'] = code set_context(window_id, ctx_dict) return ctx_dict
def evaluate(args, embedding, vocab, global_step, eval_analogy=False): """Evaluation helper""" if 'eval_tokens' not in globals(): global eval_tokens eval_tokens_set = evaluation.get_tokens_in_evaluation_datasets(args) if not args.no_eval_analogy: eval_tokens_set.update(vocab.idx_to_token) eval_tokens = list(eval_tokens_set) os.makedirs(args.logdir, exist_ok=True) # Compute their word vectors context = get_context(args) idx_to_token = eval_tokens mx.nd.waitall() token_embedding = embedding.to_token_embedding(idx_to_token, ctx=context[0]) results = evaluation.evaluate_similarity(args, token_embedding, context[0], logfile=os.path.join( args.logdir, 'similarity.tsv'), global_step=global_step) if eval_analogy: assert not args.no_eval_analogy results += evaluation.evaluate_analogy(args, token_embedding, context[0], logfile=os.path.join( args.logdir, 'analogy.tsv')) return results
def get(self): context = utils.get_context(self.request) insert_details(context) resumes = models.Resume.all().fetch(10000) context['resumes'] = resumes path = os.path.join(os.path.dirname(__file__), 'templates/view_resumes.html') self.response.out.write(template.render(path, context))
def get(self): #mike_exists = models.Account.all().filter('username ='******'mike') #if not mike_exists.count(): # account = models.Account(username="******", password="******", given_name="Michael", is_admin=True, is_employee=True, ssn='999999999') # account.save() context = utils.get_context(self.auth) path = os.path.join(os.path.dirname(__file__), 'templates/home.html') self.response.out.write(template.render(path, context))
def get(self): context = utils.get_context(self.auth) if context['is_employee']: employee = models.Account.all().filter('username ='******'username'])[0] context['employee'] = employee path = os.path.join(os.path.dirname(__file__), 'templates/profile.html') self.response.out.write(template.render(path, context))
def get(self): context = utils.get_context(self.request) insert_details(context) upload_url = blobstore.create_upload_url('/upload') upload_url = upload_url.replace('http://localhost:8080', self.request.get('host')) context['upload_url'] = upload_url path = os.path.join(os.path.dirname(__file__), 'templates/apply.html') self.response.out.write(template.render(path, context))
def Lesk_algorithm(word, sentence_tokens): synset = wn.synsets(word) best_sense = synset[0] max_olp = 0 sentence_context = utils.get_context( sentence_tokens) # estrae contesto della frase for sense in synset: sense_examples = utils.get_examples( sense ) # prende esempio e glossa se non c'è, prende solo la glossa (sarà una frase che determinerà il contesto per quel particolare senso) sense_context = utils.get_context( word_tokenize(sense_examples) ) # estrae il contesto dall'esempio e la glossa per quel particolare senso olp = max_overlap(sentence_context, sense_context) if max_olp < olp: max_olp = olp best_sense = sense return best_sense
def get(self): context = utils.get_context() user = users.get_current_user() path = os.path.join(os.path.dirname(__file__), 'templates/welcome.html') if user: calendars = models.Calendar.query(models.Calendar.owner == user.user_id()) context['records'] = ndb.get_multi(calendars.fetch(keys_only=True)) path = os.path.join(os.path.dirname(__file__), 'templates/main.html') self.response.out.write(template.render(path, context))
def test_get_context(): inputs = [ "context", ] def mock_input(s): return inputs.pop(0) utils.input = mock_input a = utils.get_context() if not a == 'context': raise AssertionError()
def get(self): context = utils.get_context(self.request) insert_details(context) if context['is_employee']: employee = models.Account.all().filter('username ='******'username'])[0] context['employee'] = employee path = os.path.join(os.path.dirname(__file__), 'templates/profile.html') self.response.out.write(template.render(path, context))
def get(self): context = utils.get_context(self.auth) if context['is_admin']: employee_query = models.Account.all().filter('is_employee =', True) employees = employee_query.fetch(1000) context['employees'] = employees path = os.path.join(os.path.dirname(__file__), 'templates/directory.html') self.response.out.write(template.render(path, context)) else: path = os.path.join(os.path.dirname(__file__), 'templates/error_no_permission.html') self.response.out.write(template.render(path, context))
def get(self): context = utils.get_context(self.auth) if context['is_admin']: customer_query = models.Account.all().filter('is_customer =', True) customers = customer_query.fetch(1000) context['customers'] = customers path = os.path.join(os.path.dirname(__file__), 'templates/customers.html') self.response.out.write(template.render(path, context)) else: path = os.path.join(os.path.dirname(__file__), 'templates/error_no_permission.html') self.response.out.write(template.render(path, context))
def comparison_task(sess, model=None): """Compare the encoder state for two different English sentences Cosine similarity is used as the distance metric """ sentences = [ "There was a man with a red hat", "There was a man with a blue hat", "A man with a blue hat was there", "The sky is blue and the grass is green", "Why are you asking about the history of my life" ] # Load the vocab en_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.from" % FLAGS.from_vocab_size) en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path) # Create model and load parameters. if model == None: model = create_model(sess, True) # Persist the original batch size original_batch_size = model.batch_size model.batch_size = 1 # We decode one sentence at a time. # Get all of the context vectors context_vectors = [] for sentence in sentences: context_vector = get_context(sess, model, en_vocab, sentence) context_vectors.append(context_vector) # Calculate the similarity matrix similarity = np.zeros((len(sentences), len(sentences))) for i in range(len(sentences)): for j in range(len(sentences)): similarity[i, j] = cosine_similarity(context_vectors[i], context_vectors[j]) # Dislay the output print(80 * "=") print("For the following sentences:\n") for i, sentence in enumerate(sentences): print(i, sentence) print("The similarity matrix is:\n") print(similarity, "\n") print(80 * "=") # Reset the model model.batch_size = original_batch_size
def get(self): #mike_exists = models.Account.all().filter('username ='******'mike') #if not mike_exists.count(): # account = models.Account(username="******", password="******", given_name="Michael", is_admin=True, is_employee=True, ssn='999999999') # account.save() context = utils.get_context(self.request) insert_details(context) #if 'user' in self.request.GET: # new_user_object = self.auth.store.user_model.get_by_auth_id(self.request.GET['user']) # old_user_object = models.Account.all().filter('username ='******'user']).fetch(1)[0].__dict__['_entity'] # username = new_user_object.auth_ids[0] path = os.path.join(os.path.dirname(__file__), 'templates/home.html') self.response.out.write(template.render(path, context))
def get(self): context = utils.get_context(self.request) insert_details(context) if context['is_admin']: customer_query = models.Account.all().filter('is_customer =', True) customers = customer_query.fetch(1000) context['customers'] = customers path = os.path.join(os.path.dirname(__file__), 'templates/customers.html') self.response.out.write(template.render(path, context)) else: path = os.path.join(os.path.dirname(__file__), 'templates/error_no_permission.html') self.response.out.write(template.render(path, context))
def main(): """Funçao index""" print('Init do main_route') window_id = str(get_window_id()) print(window_id) set_base_context(window_id) print('oi') ctx_dict = get_context(window_id) print(ctx_dict) ctx_dict['window_id'] = window_id ctx_dict['name'] = 'index' ctx_dict['title'] = 'ERP +' ctx_dict['form'] = '' print(ctx_dict) set_context(window_id, ctx_dict) return ctx_dict
def get(self): context = utils.get_context(self.request) insert_details(context) if context['is_admin']: employee_query = models.Account.all().filter( 'is_employee =', True) #.filter('username !=', 'mike') employees = employee_query.fetch(1000) context['employees'] = employees path = os.path.join(os.path.dirname(__file__), 'templates/directory.html') self.response.out.write(template.render(path, context)) else: path = os.path.join(os.path.dirname(__file__), 'templates/error_no_permission.html') self.response.out.write(template.render(path, context))
def login_submit(): """Valida o Login""" print('Im on login submit') window_id = request.forms.get('window_id') #este código elimina os dicionarios json que vão sendo criados para guardara informação contextual now = time.time() path = '/var/www/tmp/' for f in os.listdir(path): if os.stat(os.path.join(path,f)).st_mtime < now - 86400: os.remove(os.path.join(path, f)) import base64 from users import Users user = request.forms.get('login') password = request.forms.get('password') print('before db request') db_user = Users(where="login = '******'".format(user=user)).get() autenticated = False print('1', user, db_user) if db_user: db_user = db_user[0] if base64.decodestring(db_user['password'].encode('utf-8')).decode('utf-8')[6:] == password: print('o utilizador {user} autenticou-se com sucesso!'.format(user=db_user['nome'])) request.session['user'] = db_user['id'] request.session['user_name'] = db_user['nome'] request.session.save() autenticated = True print('2') if not autenticated: return HTTPResponse(status=500, output='Autenticação Inválida!!!') else: #print('estou autenticado') if window_id: #print('tenho window_id') ctx_dict = get_context(window_id) if 'redirect_url' in ctx_dict: #print('tenho redirect'+str(ctx_dict['redirect_url'])) return ctx_dict['redirect_url'] else: return '/' else: #print('nao tenho window_id') return '/' print('end')
def parse_folder(path, parsers): context = get_context(path) parser = None for p in parsers: album_string = path.split(os.path.sep)[-1] if p.match_store(album_string, source='album'): parser = p break if parser == None: # Eventually we'll deal with errors here, # allow the user to enter a store manually, etc print("Panic! No parser found") else: album_info = {} album_info['artist'] = parser.get_field(path, 'artist', 'album') album_info['album_title'] = parser.get_field(path, 'album_title', 'album') album_info['label'] = parser.get_field(path, 'label', 'album') return path, parser, context, album_info
def licence(): """Devolve a pagina da Licença""" window_id = str(get_window_id()) set_base_context(window_id) ctx_dict = get_context(window_id) ctx_dict['window_id'] = window_id ctx_dict['name'] = 'licence' ctx_dict['title'] = 'Licença' licence_file = open('/var/www/core/help/licence.txt', 'r', encoding='utf8') code = """ <textarea rows="30" class="small-12 large-12 columns"> """ code += licence_file.read() code += """ </textarea> """ ctx_dict['form'] = code set_context(window_id, ctx_dict) return ctx_dict
def secure(*args, **kargs): #print ('inicio do secure do verify_form_rights') from utils import get_context, set_context import objs window_id = kargs.get('window_id') #print (window_id) ctx_dict = get_context(window_id) #print ('ctx_dict no verify_form_rights', ctx_dict) model_name = ctx_dict.get('model_name') model = eval("""objs.{model_name}()""".format(model_name=model_name)) result = verify_rights(model=model, action=target.__name__) #print ('2') if result == True: return target(*args, **kargs) elif isinstance(result, list): ctx_dict['rights'] = result set_context(window_id, ctx_dict) #print ('fim do secure de form, vou carregar o objecto') return target(*args, **kargs) else: return result
def get(self): context = utils.get_context(self.request) insert_details(context) # current_session = auth.get_auth().get_user_by_session() new_user_object = self.auth.store.user_model.get_by_auth_token(current_session['user_id'], current_session['token'])[0] username = new_user_object.auth_ids[0] old_user_object = models.Account.all().filter('username ='******'_entity'] if 'user' in self.request.GET: new_user_object = self.auth.store.user_model.get_by_auth_id(self.request.GET['user']) old_user_object = models.Account.all().filter('username ='******'user']).fetch(1)[0].__dict__['_entity'] username = new_user_object.auth_ids[0] context['current_session'] = current_session context['new_user_object'] = new_user_object context['user'] = username context['old_user_object'] = old_user_object path = os.path.join(os.path.dirname(__file__), 'templates/auth_test.html') self.response.out.write(template.render(path, context))
def update(): """Devolve a pagina de Actualizaçao da Implementacao Local""" window_id = str(get_window_id()) set_base_context(window_id) ctx_dict = get_context(window_id) ctx_dict['window_id'] = window_id ctx_dict['name'] = 'update' ctx_dict['title'] = 'Actualização' code = """ <textarea rows="30" class="small-12 large-12 columns"> """ code += """ Actualização Por Implementar... """ code += """ </textarea> """ ctx_dict['form'] = code set_context(window_id, ctx_dict) return ctx_dict
# files = os.listdir(path) num_img = len(files) maxconf = np.empty([num_img-1]) conftmp = [] for frame in range(1, num_img): sigma = sigma * scale window = hamming_window * np.exp(-0.5 / (sigma * sigma) * dist) window = hamming_window / window.sum() # load image img = cv2.imread(path + '\\' + files[frame-1]) if img.shape[2] > 1: im = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) sz = [size_x, size_y] contextprior = get_context(im, pos, sz, window) # if frame > 1: # calculate response of the confidence map at all locations confmap = np.fft.ifft2(Hstcf * np.fft.fft2(contextprior)) confmap = np.real(confmap) #target location is at the maximum response [row, col] = np.unravel_index(np.argmax(confmap), confmap.shape) row = row + 1 col = col + 1 pos = [pos[0]-size_x/2+row, pos[1]-size_y/2+col] sz = [size_x, size_y] print('frame:', frame) # print('pos:', pos) # print('row:', row) # print('col:', col)
def train(args): """Training helper.""" if not args.model.lower() in ['cbow', 'skipgram']: logging.error('Unsupported model %s.', args.model) sys.exit(1) if args.data.lower() == 'toy': data = mx.gluon.data.SimpleDataset(nlp.data.Text8(segment='train')[:2]) data, vocab, idx_to_counts = preprocess_dataset( data, max_vocab_size=args.max_vocab_size) elif args.data.lower() == 'text8': data = nlp.data.Text8(segment='train') data, vocab, idx_to_counts = preprocess_dataset( data, max_vocab_size=args.max_vocab_size) elif args.data.lower() == 'fil9': data = nlp.data.Fil9(max_sentence_length=10000) data, vocab, idx_to_counts = preprocess_dataset( data, max_vocab_size=args.max_vocab_size) elif args.data.lower() == 'wiki': data, vocab, idx_to_counts = wiki(args.wiki_root, args.wiki_date, args.wiki_language, args.max_vocab_size) if args.ngram_buckets > 0: data, batchify_fn, subword_function = transform_data_fasttext( data, vocab, idx_to_counts, cbow=args.model.lower() == 'cbow', ngram_buckets=args.ngram_buckets, ngrams=args.ngrams, batch_size=args.batch_size, window_size=args.window, frequent_token_subsampling=args.frequent_token_subsampling) else: subword_function = None data, batchify_fn = transform_data_word2vec( data, vocab, idx_to_counts, cbow=args.model.lower() == 'cbow', batch_size=args.batch_size, window_size=args.window, frequent_token_subsampling=args.frequent_token_subsampling) num_tokens = float(sum(idx_to_counts)) model = CBOW if args.model.lower() == 'cbow' else SG embedding = model(token_to_idx=vocab.token_to_idx, output_dim=args.emsize, batch_size=args.batch_size, num_negatives=args.negative, negatives_weights=mx.nd.array(idx_to_counts), subword_function=subword_function) context = get_context(args) embedding.initialize(ctx=context) if not args.no_hybridize: embedding.hybridize(static_alloc=True, static_shape=True) optimizer_kwargs = dict(learning_rate=args.lr) try: trainer = mx.gluon.Trainer(embedding.collect_params(), args.optimizer, optimizer_kwargs) except ValueError as e: if args.optimizer == 'groupadagrad': logging.warning('MXNet <= v1.3 does not contain ' 'GroupAdaGrad support. Falling back to AdaGrad') trainer = mx.gluon.Trainer(embedding.collect_params(), 'adagrad', optimizer_kwargs) else: raise e try: if args.no_prefetch_batch: data = data.transform(batchify_fn) else: from executors import LazyThreadPoolExecutor num_cpu = len(os.sched_getaffinity(0)) ex = LazyThreadPoolExecutor(num_cpu) except (ImportError, SyntaxError, AttributeError): # Py2 - no async prefetching is supported logging.warning( 'Asynchronous batch prefetching is not supported on Python 2. ' 'Consider upgrading to Python 3 for improved performance.') data = data.transform(batchify_fn) num_update = 0 prefetched_iters = [] for _ in range(min(args.num_prefetch_epoch, args.epochs)): prefetched_iters.append(iter(data)) for epoch in range(args.epochs): if epoch + len(prefetched_iters) < args.epochs: prefetched_iters.append(iter(data)) data_iter = prefetched_iters.pop(0) try: batches = ex.map(batchify_fn, data_iter) except NameError: # Py 2 or batch prefetching disabled batches = data_iter # Logging variables log_wc = 0 log_start_time = time.time() log_avg_loss = 0 for i, batch in enumerate(batches): ctx = context[i % len(context)] batch = [array.as_in_context(ctx) for array in batch] with mx.autograd.record(): loss = embedding(*batch) loss.backward() num_update += loss.shape[0] if len(context) == 1 or (i + 1) % len(context) == 0: trainer.step(batch_size=1) # Logging log_wc += loss.shape[0] log_avg_loss += loss.mean().as_in_context(context[0]) if (i + 1) % args.log_interval == 0: # Forces waiting for computation by computing loss value log_avg_loss = log_avg_loss.asscalar() / args.log_interval wps = log_wc / (time.time() - log_start_time) # Due to subsampling, the overall number of batches is an upper # bound num_batches = num_tokens // args.batch_size if args.model.lower() == 'skipgram': num_batches = (num_tokens * args.window * 2) // args.batch_size else: num_batches = num_tokens // args.batch_size logging.info('[Epoch {} Batch {}/{}] loss={:.4f}, ' 'throughput={:.2f}K wps, wc={:.2f}K'.format( epoch, i + 1, num_batches, log_avg_loss, wps / 1000, log_wc / 1000)) log_start_time = time.time() log_avg_loss = 0 log_wc = 0 if args.eval_interval and (i + 1) % args.eval_interval == 0: with print_time('mx.nd.waitall()'): mx.nd.waitall() with print_time('evaluate'): evaluate(args, embedding, vocab, num_update) # Evaluate with print_time('mx.nd.waitall()'): mx.nd.waitall() with print_time('evaluate'): evaluate(args, embedding, vocab, num_update, eval_analogy=not args.no_eval_analogy) # Save params with print_time('save parameters'): embedding.save_parameters(os.path.join(args.logdir, 'embedding.params'))
plt.ion() from utils import generate_data, get_context # DEBUGGING from theano import ProfileMode # mode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker()) # mode = theano.compile.DebugMode(check_py_code=False, require_matching_strides=False) mode = None # generate data print ">> Generating dataset..." data = generate_data(1000) # np.random.randint(2, size=(10000, n_visible)) data_context = get_context(data) data_train = data[:-1000, :] data_eval = data[-1000:, :] data_context_train = data_context[:-1000, :] data_context_eval = data_context[-1000:, :] n_visible = data.shape[1] n_context = data_context.shape[1] n_hidden = 100 print ">> Constructing RBM..." rbm = rbms.BinaryBinaryCRBM(n_visible, n_hidden, n_context) initial_vmap = { rbm.v: T.matrix('v'), rbm.x: T.matrix('x') }
def webhook(): data = request.get_json() log(data) messaging_text = None audio_link = None audio_resp = None if data['object'] == 'page': for entry in data['entry']: for messaging_event in entry['messaging']: # ID's sender_id = messaging_event['sender']['id'] recipient_id = messaging_event['recipient']['id'] if messaging_event.get('message'): print(messaging_event['message']) if 'text' in messaging_event['message']: messaging_text = messaging_event['message']['text'] else: # print("No message") messaging_text = None if 'attachments' in messaging_event['message']: # print('attachments') # print(messaging_event['message']['attachments'][0]['type']) if messaging_event['message']['attachments'][0][ 'type'] == "audio": audio_link = messaging_event['message'][ 'attachments'][0]['payload']['url'] r = requests.get(audio_link, allow_redirects=True) filepath = os.path.join(sys.path[0], 'audio.mp4') open(filepath, 'wb').write(r.content) export_filepath = os.path.join( sys.path[0], 'audio.wav') # print(filepath) # print(export_filepath) try: # track = AudioSegment.from_file("filepath") # track.export(os.path.join(sys.path[0],'audio.wav')) if os.path.exists(export_filepath): os.remove(export_filepath) subprocess.call([ 'ffmpeg', '-i', filepath, export_filepath ]) # print("successfully Converted!") except Exception as e: print(e) # print(filepath) # urllib.request.urlretrieve(audio_link,'.\audio.mp4') # print(audio_link) # with open(audio_link,'rb') as f: # print('inside audio file') audio_resp = ut.get_audio_response(export_filepath) # print(audio_resp) # print(audio_link) else: audio_link = None #ECHO # print(messaging_text) # response = messaging_text if messaging_text is not None or audio_resp is not None: greetings = ut.is_greetings(messaging_text, audio_resp) # print(greetings) if greetings: response = ut.handle_greetings() print(response) bot.send_text_message(sender_id, response) else: emot, emotconf = ut.get_emotion( messaging_text, audio_resp) sent, sentconf = ut.get_sentiment( messaging_text, audio_resp) cont, contconf = ut.get_context( messaging_text, audio_resp) # print(emot,sent,cont) util_resp = ut.handle_response( emot, sent, emotconf, sentconf) action_resp = ut.generate_action( util_resp, cont, contconf) act_list = action_resp share_var = False partner_var = False if 'share' in act_list: share_var = True if 'partner' in act_list: partner_var = True elements = ac.get_element(action_resp) resp_emot = ac.get_emotion_response(util_resp) default_text = "These are my suggestions" response = resp_emot + "\n" + default_text bot.send_text_message(sender_id, response) if elements: bot.send_generic_message(sender_id, elements) if share_var: # print('here') bot.send_text_message( sender_id, 'You can also share your achievements on your wall!' ) if partner_var: # print('here') bot.send_text_message( sender_id, 'You can also call your partner') # bot.send_text_message(sender_id,response) return "ok", 200
def get(self): context = utils.get_context(self.request) insert_details(context) path = os.path.join(os.path.dirname(__file__), 'templates/thanks.html') self.response.out.write(template.render(path, context))
def enforce_max_size(token_embedding, size): if size and len(token_embedding.idx_to_token) > size: token_embedding._idx_to_token = token_embedding._idx_to_token[:size] token_embedding._idx_to_vec = token_embedding._idx_to_vec[:size] token_embedding._token_to_idx = { token: idx for idx, token in enumerate(token_embedding._idx_to_token) } if __name__ == '__main__': logging.basicConfig() logging.getLogger().setLevel(logging.INFO) args_ = get_args() ctx = utils.get_context(args_)[0] if not os.path.isdir(args_.logdir): os.makedirs(args_.logdir) # Load pre-trained embeddings if not args_.embedding_path: if args_.embedding_name.lower() == 'fasttext': token_embedding_ = nlp.embedding.create( args_.embedding_name, source=args_.embedding_source, load_ngrams=args_.fasttext_load_ngrams, allow_extend=True, unknown_autoextend=True) else: token_embedding_ = nlp.embedding.create( args_.embedding_name, source=args_.embedding_source)
def train(args): """Training helper.""" if args.ngram_buckets: # Fasttext model coded_dataset, negatives_sampler, vocab, subword_function, \ idx_to_subwordidxs = get_train_data(args) embedding = nlp.model.train.FasttextEmbeddingModel( token_to_idx=vocab.token_to_idx, subword_function=subword_function, embedding_size=args.emsize, weight_initializer=mx.init.Uniform(scale=1 / args.emsize), sparse_grad=not args.no_sparse_grad, ) else: coded_dataset, negatives_sampler, vocab = get_train_data(args) embedding = nlp.model.train.SimpleEmbeddingModel( token_to_idx=vocab.token_to_idx, embedding_size=args.emsize, weight_initializer=mx.init.Uniform(scale=1 / args.emsize), sparse_grad=not args.no_sparse_grad, ) embedding_out = nlp.model.train.SimpleEmbeddingModel( token_to_idx=vocab.token_to_idx, embedding_size=args.emsize, weight_initializer=mx.init.Zero(), sparse_grad=not args.no_sparse_grad, ) loss_function = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss() context = get_context(args) embedding.initialize(ctx=context) embedding_out.initialize(ctx=context) if not args.no_hybridize: embedding.hybridize(static_alloc=not args.no_static_alloc) embedding_out.hybridize(static_alloc=not args.no_static_alloc) optimizer_kwargs = dict(learning_rate=args.lr) params = list(embedding.embedding.collect_params().values()) + \ list(embedding_out.collect_params().values()) trainer = mx.gluon.Trainer(params, args.optimizer, optimizer_kwargs) if args.ngram_buckets: # Fasttext model optimizer_subwords_kwargs = dict(learning_rate=args.lr_subwords) params_subwords = list( embedding.subword_embedding.collect_params().values()) trainer_subwords = mx.gluon.Trainer(params_subwords, args.optimizer_subwords, optimizer_subwords_kwargs) num_update = 0 for epoch in range(args.epochs): random.shuffle(coded_dataset) context_sampler = nlp.data.ContextSampler(coded=coded_dataset, batch_size=args.batch_size, window=args.window) num_batches = len(context_sampler) # Logging variables log_wc = 0 log_start_time = time.time() log_avg_loss = 0 for i, batch in enumerate(context_sampler): progress = (epoch * num_batches + i) / (args.epochs * num_batches) (center, word_context, word_context_mask) = batch negatives_shape = (word_context.shape[0], word_context.shape[1] * args.negative) negatives, negatives_mask = remove_accidental_hits( negatives_sampler(negatives_shape), word_context, word_context_mask) if args.ngram_buckets: # Fasttext model if args.model.lower() == 'skipgram': unique, inverse_unique_indices = np.unique( center.asnumpy(), return_inverse=True) unique = mx.nd.array(unique) inverse_unique_indices = mx.nd.array( inverse_unique_indices, ctx=context[0]) subwords, subwords_mask = \ indices_to_subwordindices_mask(unique, idx_to_subwordidxs) elif args.model.lower() == 'cbow': unique, inverse_unique_indices = np.unique( word_context.asnumpy(), return_inverse=True) unique = mx.nd.array(unique) inverse_unique_indices = mx.nd.array( inverse_unique_indices, ctx=context[0]) subwords, subwords_mask = \ indices_to_subwordindices_mask(unique, idx_to_subwordidxs) else: logging.error('Unsupported model %s.', args.model) sys.exit(1) num_update += len(center) # To GPU center = center.as_in_context(context[0]) if args.ngram_buckets: # Fasttext model subwords = subwords.as_in_context(context[0]) subwords_mask = subwords_mask.astype(np.float32).as_in_context( context[0]) word_context = word_context.as_in_context(context[0]) word_context_mask = word_context_mask.as_in_context(context[0]) negatives = negatives.as_in_context(context[0]) negatives_mask = negatives_mask.as_in_context(context[0]) with mx.autograd.record(): # Combine subword level embeddings with word embeddings if args.model.lower() == 'skipgram': if args.ngram_buckets: emb_in = embedding(center, subwords, subwordsmask=subwords_mask, words_to_unique_subwords_indices= inverse_unique_indices) else: emb_in = embedding(center) with mx.autograd.pause(): word_context_negatives = mx.nd.concat( word_context, negatives, dim=1) word_context_negatives_mask = mx.nd.concat( word_context_mask, negatives_mask, dim=1) emb_out = embedding_out(word_context_negatives, word_context_negatives_mask) # Compute loss pred = mx.nd.batch_dot(emb_in, emb_out.swapaxes(1, 2)) pred = pred.squeeze() * word_context_negatives_mask label = mx.nd.concat(word_context_mask, mx.nd.zeros_like(negatives), dim=1) elif args.model.lower() == 'cbow': word_context = word_context.reshape((-3, 1)) word_context_mask = word_context_mask.reshape((-3, 1)) if args.ngram_buckets: emb_in = embedding(word_context, subwords, word_context_mask, subwords_mask, inverse_unique_indices) else: emb_in = embedding(word_context, word_context_mask) with mx.autograd.pause(): center = center.tile(args.window * 2).reshape((-1, 1)) negatives = negatives.reshape((-1, args.negative)) center_negatives = mx.nd.concat( center, negatives, dim=1) center_negatives_mask = mx.nd.concat( mx.nd.ones_like(center), negatives_mask, dim=1) emb_out = embedding_out(center_negatives, center_negatives_mask) # Compute loss pred = mx.nd.batch_dot(emb_in, emb_out.swapaxes(1, 2)) pred = pred.squeeze() * word_context_mask label = mx.nd.concat( mx.nd.ones_like(word_context), mx.nd.zeros_like(negatives), dim=1) loss = loss_function(pred, label) loss.backward() if args.optimizer.lower() != 'adagrad': trainer.set_learning_rate( max(0.0001, args.lr * (1 - progress))) if (args.optimizer_subwords.lower() != 'adagrad' and args.ngram_buckets): trainer_subwords.set_learning_rate( max(0.0001, args.lr_subwords * (1 - progress))) trainer.step(batch_size=1) if args.ngram_buckets: trainer_subwords.step(batch_size=1) # Logging log_wc += loss.shape[0] log_avg_loss += loss.mean() if (i + 1) % args.log_interval == 0: wps = log_wc / (time.time() - log_start_time) # Forces waiting for computation by computing loss value log_avg_loss = log_avg_loss.asscalar() / args.log_interval logging.info('[Epoch {} Batch {}/{}] loss={:.4f}, ' 'throughput={:.2f}K wps, wc={:.2f}K'.format( epoch, i + 1, num_batches, log_avg_loss, wps / 1000, log_wc / 1000)) log_start_time = time.time() log_avg_loss = 0 log_wc = 0 if args.eval_interval and (i + 1) % args.eval_interval == 0: with print_time('mx.nd.waitall()'): mx.nd.waitall() with print_time('evaluate'): evaluate(args, embedding, vocab, num_update) # Evaluate with print_time('mx.nd.waitall()'): mx.nd.waitall() with print_time('evaluate'): evaluate(args, embedding, vocab, num_update, eval_analogy=not args.no_eval_analogy) # Save params with print_time('save parameters'): save_params(args, embedding, embedding_out)
def main(debug_mode=False): file_path = Path('commiter.yml') debug('file_path', file_path, debug_mode) if file_path.is_file(): with open(str(file_path), 'r') as stream: try: config = safe_load(stream) debug('convention from file', config['convention'], debug_mode) if config['convention'] is not None: convention = str(config['convention']).lower() else: convention = 'none' if convention == 'none': print('You are not using a convention') commit_message = just_message() else: print('You are using the %s convention' % convention) tag, msg = get_text() if convention == 'angular' or convention == 'karma': context = get_context() commit_message = angular_convention(tag, msg, context) elif convention == 'changelog': commit_message = changelog_convention(tag, msg) elif convention == 'symphony': commit_message = symphony_convention(tag, msg) commit_message += gen_co_author(args.co_author) debug('commit message', commit_message, debug_mode) system('git commit -m "%s"' % commit_message) except YAMLError as exc: print(exc) elif args.convention is not '': convention = str(args.convention) debug('convention flag', convention, debug_mode) if convention == 'message': commit_message = just_message() create_file('none', args.no_file) else: tag, msg = get_text() if convention == 'angular' or convention == 'karma': context = get_context() commit_message = angular_convention(tag, msg, context) create_file(convention, args.no_file) elif convention == 'changelog': commit_message = changelog_convention(tag, msg) create_file(convention, args.no_file) elif convention == 'symphony': commit_message = symphony_convention(tag, msg) create_file(convention, args.no_file) commit_message += gen_co_author(args.co_author) debug('commit message', commit_message, debug_mode) system('git commit -m "%s"' % commit_message) else: debug('parser full return', parser.parse_args(), debug_mode) parser.print_help()
import matplotlib.pyplot as plt plt.ion() from utils import generate_data, get_context # DEBUGGING from theano import ProfileMode # mode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker()) # mode = theano.compile.DebugMode(check_py_code=False, require_matching_strides=False) mode = None # generate data print ">> Generating dataset..." data = generate_data(1000) # np.random.randint(2, size=(10000, n_visible)) data_context = get_context(data, N=1) # keep the number of dimensions low data_train = data[:-1000, :] data_eval = data[-1000:, :] data_context_train = data_context[:-1000, :] data_context_eval = data_context[-1000:, :] n_visible = data.shape[1] n_context = data_context.shape[1] n_hidden = 20 print ">> Constructing RBM..." numpy_rng = np.random.RandomState(123) initial_W = np.asarray(np.random.uniform( low=-4 * np.sqrt(6. / (n_hidden + n_visible + n_context)), high=4 * np.sqrt(6. / (n_hidden + n_visible + n_context)),
import matplotlib.pyplot as plt plt.ion() from utils import generate_data, get_context # DEBUGGING from theano import ProfileMode # mode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker()) # mode = theano.compile.DebugMode(check_py_code=False, require_matching_strides=False) mode = None # generate data print ">> Generating dataset..." data = generate_data(1000) # np.random.randint(2, size=(10000, n_visible)) data_context = get_context(data) data_train = data[:-1000, :] data_eval = data[-1000:, :] data_context_train = data_context[:-1000, :] data_context_eval = data_context[-1000:, :] n_visible = data.shape[1] n_context = data_context.shape[1] n_hidden = 100 print ">> Constructing RBM..." rbm = rbms.BinaryBinaryCRBM(n_visible, n_hidden, n_context) initial_vmap = {rbm.v: T.matrix('v'), rbm.x: T.matrix('x')} # try to calculate weight updates using CD-1 stats
def get(self): context = utils.get_context(self.auth) path = os.path.join(os.path.dirname(__file__), 'templates/thanks.html') self.response.out.write(template.render(path, context))
plt.ion() from utils import generate_data, get_context # DEBUGGING from theano import ProfileMode # mode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker()) # mode = theano.compile.DebugMode(check_py_code=False, require_matching_strides=False) mode = None # generate data print ">> Generating dataset..." data = generate_data(1000) # np.random.randint(2, size=(10000, n_visible)) data_context = get_context(data, N=1) # keep the number of dimensions low data_train = data[:-1000, :] data_eval = data[-1000:, :] data_context_train = data_context[:-1000, :] data_context_eval = data_context[-1000:, :] n_visible = data.shape[1] n_context = data_context.shape[1] n_hidden = 20 n_factors = 50 print ">> Constructing RBM..." numpy_rng = np.random.RandomState(123) def initial_W(n, f):
def train(args): """Training helper.""" vocab, row, col, counts = get_train_data(args) model = GloVe(token_to_idx=vocab.token_to_idx, output_dim=args.emsize, dropout=args.dropout, x_max=args.x_max, alpha=args.alpha, weight_initializer=mx.init.Uniform(scale=1 / args.emsize)) context = get_context(args) model.initialize(ctx=context) if not args.no_hybridize: model.hybridize(static_alloc=not args.no_static_alloc) optimizer_kwargs = dict(learning_rate=args.lr, eps=args.adagrad_eps) params = list(model.collect_params().values()) try: trainer = mx.gluon.Trainer(params, 'groupadagrad', optimizer_kwargs) except ValueError: logging.warning('MXNet <= v1.3 does not contain ' 'GroupAdaGrad support. Falling back to AdaGrad') trainer = mx.gluon.Trainer(params, 'adagrad', optimizer_kwargs) index_dtype = 'int32' if counts.shape[0] >= np.iinfo(np.int32).max: index_dtype = 'int64' logging.info('Co-occurrence matrix is large. ' 'Using int64 to represent sample indices.') indices = mx.nd.arange(counts.shape[0], dtype=index_dtype) for epoch in range(args.epochs): # Logging variables log_wc = 0 log_start_time = time.time() log_avg_loss = 0 mx.nd.shuffle(indices, indices) # inplace shuffle bs = args.batch_size num_batches = indices.shape[0] // bs for i in range(num_batches): batch_indices = indices[bs * i:bs * (i + 1)] ctx = context[i % len(context)] batch_row = row[batch_indices].as_in_context(ctx) batch_col = col[batch_indices].as_in_context(ctx) batch_counts = counts[batch_indices].as_in_context(ctx) with mx.autograd.record(): loss = model(batch_row, batch_col, batch_counts) loss.backward() if len(context) == 1 or (i + 1) % len(context) == 0: trainer.step(batch_size=1) # Logging log_wc += loss.shape[0] log_avg_loss += loss.mean().as_in_context(context[0]) if (i + 1) % args.log_interval == 0: # Forces waiting for computation by computing loss value log_avg_loss = log_avg_loss.asscalar() / args.log_interval wps = log_wc / (time.time() - log_start_time) logging.info('[Epoch {} Batch {}/{}] loss={:.4f}, ' 'throughput={:.2f}K wps, wc={:.2f}K'.format( epoch, i + 1, num_batches, log_avg_loss, wps / 1000, log_wc / 1000)) log_dict = dict( global_step=epoch * len(indices) + i * args.batch_size, epoch=epoch, batch=i + 1, loss=log_avg_loss, wps=wps / 1000) log(args, log_dict) log_start_time = time.time() log_avg_loss = 0 log_wc = 0 if args.eval_interval and (i + 1) % args.eval_interval == 0: with print_time('mx.nd.waitall()'): mx.nd.waitall() with print_time('evaluate'): evaluate(args, model, vocab, i + num_batches * epoch) # Evaluate with print_time('mx.nd.waitall()'): mx.nd.waitall() with print_time('evaluate'): evaluate(args, model, vocab, num_batches * args.epochs, eval_analogy=not args.no_eval_analogy) # Save params with print_time('save parameters'): model.save_parameters(os.path.join(args.logdir, 'glove.params'))
def get_model(args): """Load the pretrained model.""" context = utils.get_context(args) assert '.bin' in args.path # Assume binary fasttext format gensim_fasttext = gensim.models.FastText() gensim_fasttext.file_name = args.path with open(args.path, 'rb') as f: gensim_fasttext._load_model_params(f) gensim_fasttext._load_dict(f) if gensim_fasttext.new_format: # quant input gensim_fasttext.struct_unpack(f, '@?') num_vectors, dim = gensim_fasttext.struct_unpack(f, '@2q') assert gensim_fasttext.wv.vector_size == dim dtype = np.float32 if struct.calcsize('@f') == 4 else np.float64 matrix = np.fromfile(f, dtype=dtype, count=num_vectors * dim) matrix = matrix.reshape((-1, dim)) num_words = len(gensim_fasttext.wv.vocab) num_subwords = gensim_fasttext.bucket assert num_words + num_subwords == num_vectors if args.max_vocab_size: idx_to_token = list( gensim_fasttext.wv.vocab.keys())[:args.max_vocab_size] idx_to_vec = mx.nd.array(matrix[:len(idx_to_token)]) token_to_idx = {(token, idx) for idx, token in enumerate(idx_to_token)} else: idx_to_token = list(gensim_fasttext.wv.vocab.keys()) idx_to_vec = mx.nd.array(matrix[:num_words]) token_to_idx = {(token, idx) for idx, token in enumerate(idx_to_token)} if num_subwords: subword_function = nlp.vocab.create_subword_function( 'NGramHashes', num_subwords=num_subwords) embedding = nlp.model.train.FasttextEmbeddingModel( token_to_idx=token_to_idx, subword_function=subword_function, embedding_size=dim, ) embedding.initialize(ctx=context[0]) embedding.embedding.weight.set_data(idx_to_vec) embedding.subword_embedding.embedding.weight.set_data( mx.nd.array(matrix[num_words:])) else: print('Loaded model does not contain subwords.') embedding = nlp.model.train.SimpleEmbeddingModel( token_to_idx=token_to_idx, embedding_size=dim, ) embedding.initialize(ctx=context[0]) embedding.embedding.weight.set_data(idx_to_vec) return embedding, idx_to_token
def main(): doc = DocxTemplate(f'templates/{settings["template_filename"]}.docx') context = get_context(settings, current_period_data) doc.render(context) doc.save(f'output/{context["output_doc_name"]}.docx') print_output_information(context)
def get(self): context = utils.get_context() path = 'welcome.html' self.render_template(path, context)
def train(args): """Training helper.""" if args.ngram_buckets: data, negatives_sampler, vocab, subword_function, \ subword_lookup, num_tokens, idx_to_subwordidxs = get_train_data(args) embedding = nlp.model.train.FasttextEmbeddingModel( token_to_idx=vocab.token_to_idx, subword_function=subword_function, embedding_size=args.emsize, weight_initializer=mx.init.Uniform(scale=1 / args.emsize), sparse_grad=not args.no_sparse_grad, ) else: data, negatives_sampler, vocab, num_tokens = get_train_data(args) embedding = nlp.model.train.SimpleEmbeddingModel( token_to_idx=vocab.token_to_idx, embedding_size=args.emsize, weight_initializer=mx.init.Uniform(scale=1 / args.emsize), sparse_grad=not args.no_sparse_grad, ) embedding_out = nlp.model.train.SimpleEmbeddingModel( token_to_idx=vocab.token_to_idx, embedding_size=args.emsize, weight_initializer=mx.init.Zero(), sparse_grad=not args.no_sparse_grad, ) loss_function = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss() context = get_context(args) embedding.initialize(ctx=context) embedding_out.initialize(ctx=context) if not args.no_hybridize: embedding.hybridize(static_alloc=not args.no_static_alloc) embedding_out.hybridize(static_alloc=not args.no_static_alloc) optimizer_kwargs = dict(learning_rate=args.lr) params = list(embedding.collect_params().values()) + \ list(embedding_out.collect_params().values()) trainer = mx.gluon.Trainer(params, args.optimizer, optimizer_kwargs) def skipgram_batch(data): """Create a batch for Skipgram training objective.""" centers, word_context, word_context_mask = data assert len(centers.shape) == 2 negatives_shape = (len(word_context), 2 * args.window * args.negative) negatives, negatives_mask = remove_accidental_hits( negatives_sampler(negatives_shape), word_context) context_negatives = mx.nd.concat(word_context, negatives, dim=1) masks = mx.nd.concat(word_context_mask, negatives_mask, dim=1) labels = mx.nd.concat(word_context_mask, mx.nd.zeros_like(negatives), dim=1) if not args.ngram_buckets: return (centers.as_in_context(context[0]), context_negatives.as_in_context(context[0]), masks.as_in_context(context[0]), labels.as_in_context(context[0])) else: unique, inverse_unique_indices = np.unique(centers.asnumpy(), return_inverse=True) inverse_unique_indices = mx.nd.array(inverse_unique_indices, ctx=context[0]) subwords, subwords_mask = subword_lookup.get(unique.astype(int)) return (centers.as_in_context(context[0]), context_negatives.as_in_context(context[0]), masks.as_in_context(context[0]), labels.as_in_context(context[0]), mx.nd.array(subwords, ctx=context[0]), mx.nd.array(subwords_mask, ctx=context[0]), inverse_unique_indices) def cbow_batch(data): """Create a batch for CBOW training objective.""" centers, word_context, word_context_mask = data assert len(centers.shape) == 2 negatives_shape = (len(centers), args.negative) negatives, negatives_mask = remove_accidental_hits( negatives_sampler(negatives_shape), centers) center_negatives = mx.nd.concat(centers, negatives, dim=1) center_negatives_mask = mx.nd.concat(mx.nd.ones_like(centers), negatives_mask, dim=1) labels = mx.nd.concat(mx.nd.ones_like(centers), mx.nd.zeros_like(negatives), dim=1) if not args.ngram_buckets: return (word_context.as_in_context(context[0]), word_context_mask.as_in_context(context[0]), center_negatives.as_in_context(context[0]), center_negatives_mask.as_in_context(context[0]), labels.as_in_context(context[0])) else: unique, inverse_unique_indices = np.unique(word_context.asnumpy(), return_inverse=True) inverse_unique_indices = mx.nd.array(inverse_unique_indices, ctx=context[0]) subwords, subwords_mask = subword_lookup.get(unique.astype(int)) return (word_context.as_in_context(context[0]), word_context_mask.as_in_context(context[0]), center_negatives.as_in_context(context[0]), center_negatives_mask.as_in_context(context[0]), labels.as_in_context(context[0]), mx.nd.array(subwords, ctx=context[0]), mx.nd.array(subwords_mask, ctx=context[0]), inverse_unique_indices) # Helpers for bucketing def skipgram_length_fn(data): """Return lengths for bucketing.""" centers, _, _ = data lengths = [ len(idx_to_subwordidxs[i]) for i in centers.asnumpy().astype(int).flat ] return lengths def cbow_length_fn(data): """Return lengths for bucketing.""" _, word_context, _ = data word_context_np = word_context.asnumpy().astype(int) lengths = [ max(len(idx_to_subwordidxs[i]) for i in one_context) for one_context in word_context_np ] return lengths def bucketing_batchify_fn(indices, data): """Select elements from data batch based on bucket indices.""" centers, word_context, word_context_mask = data return (centers[indices], word_context[indices], word_context_mask[indices]) length_fn = skipgram_length_fn if args.model.lower() == 'skipgram' \ else cbow_length_fn bucketing_split = 16 batchify = nlp.data.batchify.EmbeddingCenterContextBatchify( batch_size=args.batch_size * bucketing_split if args.ngram_buckets else args.batch_size, window_size=args.window) batches = data.transform(batchify) num_update = 0 for epoch in range(args.epochs): # Logging variables log_wc = 0 log_start_time = time.time() log_avg_loss = 0 batches = itertools.chain.from_iterable(batches) if args.ngram_buckets: # For fastText training, create batches such that subwords used in # that batch are of similar length batches = BucketingStream(batches, bucketing_split, length_fn, bucketing_batchify_fn) for i, batch in enumerate(batches): progress = (epoch * num_tokens + i * args.batch_size) / \ (args.epochs * num_tokens) if args.model.lower() == 'skipgram': if args.ngram_buckets: (center, context_negatives, mask, label, subwords, subwords_mask, inverse_unique_indices) = skipgram_batch(batch) with mx.autograd.record(): emb_in = embedding(center, subwords, subwordsmask=subwords_mask, words_to_unique_subwords_indices= inverse_unique_indices) emb_out = embedding_out(context_negatives, mask) pred = mx.nd.batch_dot(emb_in, emb_out.swapaxes(1, 2)) loss = (loss_function(pred, label, mask) * mask.shape[1] / mask.sum(axis=1)) else: (center, context_negatives, mask, label) = skipgram_batch(batch) with mx.autograd.record(): emb_in = embedding(center) emb_out = embedding_out(context_negatives, mask) pred = mx.nd.batch_dot(emb_in, emb_out.swapaxes(1, 2)) loss = (loss_function(pred, label, mask) * mask.shape[1] / mask.sum(axis=1)) elif args.model.lower() == 'cbow': if args.ngram_buckets: (word_context, word_context_mask, center_negatives, center_negatives_mask, label, subwords, subwords_mask, inverse_unique_indices) = cbow_batch(batch) with mx.autograd.record(): emb_in = embedding(word_context, subwords, wordsmask=word_context_mask, subwordsmask=subwords_mask, words_to_unique_subwords_indices= inverse_unique_indices) emb_in = emb_in.mean(axis=1, keepdims=True) emb_out = embedding_out( center_negatives, wordsmask=center_negatives_mask) pred = mx.nd.batch_dot(emb_in, emb_out.swapaxes(1, 2)) loss = (loss_function(pred.squeeze(), label, center_negatives_mask) * center_negatives_mask.shape[1] / center_negatives_mask.sum(axis=1)) else: (word_context, word_context_mask, center_negatives, center_negatives_mask, label) = cbow_batch(batch) with mx.autograd.record(): emb_in = embedding(word_context, wordsmask=word_context_mask) emb_in = emb_in.mean(axis=1, keepdims=True) emb_out = embedding_out( center_negatives, wordsmask=center_negatives_mask) pred = mx.nd.batch_dot(emb_in, emb_out.swapaxes(1, 2)) loss = (loss_function(pred.squeeze(), label, center_negatives_mask) * center_negatives_mask.shape[1] / center_negatives_mask.sum(axis=1)) else: logging.error('Unsupported model %s.', args.model) sys.exit(1) loss.backward() num_update += len(label) if args.optimizer.lower() != 'adagrad': trainer.set_learning_rate(max(0.0001, args.lr * (1 - progress))) trainer.step(batch_size=1) # Logging log_wc += loss.shape[0] log_avg_loss += loss.mean() if (i + 1) % args.log_interval == 0: # Forces waiting for computation by computing loss value log_avg_loss = log_avg_loss.asscalar() / args.log_interval wps = log_wc / (time.time() - log_start_time) # Due to subsampling, the overall number of batches is an upper bound logging.info('[Epoch {} Batch {}/{}] loss={:.4f}, ' 'throughput={:.2f}K wps, wc={:.2f}K'.format( epoch, i + 1, num_tokens // args.batch_size, log_avg_loss, wps / 1000, log_wc / 1000)) log_start_time = time.time() log_avg_loss = 0 log_wc = 0 if args.eval_interval and (i + 1) % args.eval_interval == 0: with print_time('mx.nd.waitall()'): mx.nd.waitall() with print_time('evaluate'): evaluate(args, embedding, vocab, num_update) # Evaluate with print_time('mx.nd.waitall()'): mx.nd.waitall() with print_time('evaluate'): evaluate(args, embedding, vocab, num_update, eval_analogy=not args.no_eval_analogy) # Save params with print_time('save parameters'): save_parameters(args, embedding, embedding_out)
def api_method (self, context=None, *args, **kwargs): if requires_context or context is not None: context = get_context(context or kwargs, self) return method(self, context, *args, **kwargs) return method(self, *args, **kwargs)
def train(cfg): date_today = date.today().strftime("%b-%d-%Y") summary_writer = SummaryWriter(cfg.log_dir, flush_secs=5, filename_suffix=date_today) train_data = mx.gluon.data.vision.MNIST( train=True).transform_first(data_xform) train_loader = mx.gluon.data.DataLoader(train_data, shuffle=True, batch_size=cfg.batch_size) image_shape = train_data[0][0].shape # No initialization. Custom blocks encapsulate initialization and setting of data. net = Glow(image_shape, cfg.K, cfg.L, cfg.affine, cfg.filter_size, cfg.temp, cfg.n_bits) ctx = get_context(cfg.use_gpu) net = set_context(net, ctx) trainer = mx.gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': cfg.lr}) n_samples = len(train_loader) update_interval = n_samples // 2 # store the loss with summary writer twice loss_buffer = LossBuffer() global_step = 1 for epoch in range(1, cfg.n_epochs + 1): for idx, (batch, label) in enumerate(train_loader): print(f'Epoch {epoch} - Batch {idx}/{n_samples}', end='\r') data = mx.gluon.utils.split_and_load(batch, ctx) with mx.autograd.record(): for X in data: z_list, nll, bpd = net(X) prev_loss = loss_buffer.new_loss(bpd.mean()) loss_buffer.loss.backward() trainer.step(1) if prev_loss is not None and global_step % update_interval == 0: loss = prev_loss.asscalar() summary_writer.add_scalar(tag='bpd', value=loss, global_step=global_step) global_step += 1 # Sample from latent space to generate random digit and reverse from latent if (epoch % cfg.plot_interval) == 0: x_generate = net.reverse()[0] x_generate = x_generate.reshape(1, *x_generate.shape) x_recon = net.reverse(z_list[-1])[0] x_recon = x_recon.reshape(1, *x_recon.shape) x_real = data[0][0].reshape(1, *data[0][0].shape) minim = -0.5 maxim = 0.5 x_generate = x_generate.clip(minim, maxim) x_generate += -minim x_recon = x_recon.clip(minim, maxim) x_recon += -minim x_real += -minim img = mx.nd.concatenate([x_real, x_generate, x_recon], axis=0).asnumpy() summary_writer.add_image(tag='generations', image=img, global_step=global_step) summary_writer.close()