def aggr_and_output_all_word_zeta(model_dir, param_fpathin_index2vocab, param_fpathout_aggrd_all_wordrep, mtype='TopicalWordEmbedding'): ''' The dataset take the trainset path ==================== params: ---------- model_dir: saved model dir param_fpathin_index2vocab: vocabfile param_fpathout_aggrd_all_wordrep: the outputed all wordrep mtype: model name return: ---------- None ''' # ----------load the index2vocabulary fpointerInIndex2Vocabulary = open(param_fpathin_index2vocab, 'rt', encoding='utf8') dictIndex2Vocab = \ json.load(fpointerInIndex2Vocabulary) fpointerInIndex2Vocabulary.close() config = DefaultConfig() batch_size = config.batch_size # ----------Compute the wordrep dictIndex2Wordvec = dict() for i in range(VOCABULARY_SIZE): dictIndex2Wordvec[i] = numpy.zeros(shape=TOPIC_COUNT, dtype=numpy.float32) # determine whether to run on cuda if config.on_cuda: config.on_cuda = torch.cuda.is_available() if not config.on_cuda: logger.info('Cuda is unavailable,\ Although wants to run on cuda,\ Model still run on CPU') model_path = '%s/model' % model_dir if config.model == 'TopicalWordEmbedding': model = topicalWordEmbedding.TopicalWordEmbedding( param_on_cuda=config.on_cuda, param_half_window_size=HALF_WINDOW_SIZE, param_vocabulary_size=VOCABULARY_SIZE, param_hidden_layer_size=HIDDEN_LAYER_SIZE, param_encoder_pi_size=DIM_ENCODER, param_topic_count=TOPIC_COUNT) print('Loading trained model') if config.on_cuda: model.load(model_path) model = model.cuda() else: model.load_cpu_from_gputrained(model_path) model = model.cpu() train_data_manager = DataManager(batch_size, TRAINING_INSTANCES) train_data_manager.load_dataframe_from_file(TRAIN_SET_PATH) n_batch = train_data_manager.n_batches() batch_index = 0 for batch_index in range(0, n_batch - 1): # this operation is time consuming xn, wc = train_data_manager.next_batch() idx = numpy.argmax(xn, axis=1) if config.on_cuda: var_xn = Variable(torch.from_numpy(xn).float()).cuda() # print( x.size() ) var_wc = Variable(torch.from_numpy(wc).float(), requires_grad=False).cuda() else: var_xn = Variable(torch.from_numpy(xn).float()).cpu() # print( x.size() ) var_wc = Variable(torch.from_numpy(wc).float(), requires_grad=False).cpu() var_zeta = model.forward_obtain_xn_zeta(var_xn, var_wc) # var_zeta_softmaxd = softmax(var_zeta, dim=1) arr_zeta = var_zeta.data.cpu().numpy() for row_idx, pivot_idx in enumerate(idx): pivot_zeta = arr_zeta[row_idx] dictIndex2Wordvec[pivot_idx] += pivot_zeta # += pivot_rep for pivot_idx in idx: dictIndex2Wordvec[pivot_idx] = softmax_np( dictIndex2Wordvec[pivot_idx]) # y = y - 1 # print(y.size()) if TRAINING_INSTANCES % batch_size == 0: train_data_manager.set_current_cursor_in_dataframe_zero() else: train_data_manager.set_current_cursor_in_dataframe_zero() # ----------Output the dict fpointerOutWordRep = open(param_fpathout_aggrd_all_wordrep, 'wt', encoding='utf8') for an_word_idx in dictIndex2Wordvec: arr_word_rep = dictIndex2Wordvec[an_word_idx] arr_word_rep = arr_word_rep.astype(dtype=str) str_word_rep = ' '.join(arr_word_rep) str_vocab = dictIndex2Vocab[str(an_word_idx)] str4output = str_vocab + ' ' + str_word_rep + '\n' fpointerOutWordRep.write(str4output) fpointerOutWordRep.close()
def subst_compute_originalpivot_rep( model_dir, fpathIn_instances, fpathIn_labels, fpathOut_topcandidate, param_fpathin_voca2index, param_fpathin_subst_voca2index, mtype='TopicalWordEmbedding'): ''' for each line in the fpathIn_instances, iterate the dict and construct a list, find the closest rep for the original ==================== params: ---------- model_dir: saved model dir fpathIn_instances: input filepath, parsed fpathOut_pivot_rep: output pivot_rep, it's pi param_fpathin_subst_voca2index: the dictionary for candidate mtype: model name return: ---------- (pivot word list, rep list, topic rep list) ''' # ----------load the subst_voca2index fpointerInSubstVocabulary2Index = open( param_fpathin_subst_voca2index, 'rt', encoding='utf8') dictSubstVocabulary2Index = \ json.load(fpointerInSubstVocabulary2Index) fpointerInSubstVocabulary2Index.close() # ---------- load the instance list fpointerIn_instances = open(fpathIn_instances, 'rt', encoding='utf8') list_instances = list(map(str.strip, fpointerIn_instances.readlines())) fpointerIn_instances.close() # ---------- load the pivot list fpointerIn_labels = open(fpathIn_labels, 'rt', encoding='utf8') list_pivots = list(map(str.strip, fpointerIn_labels.readlines())) for idx, aline_in_pivot in enumerate(list_pivots): list_pivots[idx] = aline_in_pivot.split(' ')[0] fpointerIn_labels.close() # ----------load the trained model config = DefaultConfig() # config.set_attrs({'batch_size': len(list_pivot)}) model_path = '%s/model' % model_dir model = topicalWordEmbedding.TopicalWordEmbedding( param_on_cuda=config.on_cuda, param_half_window_size=HALF_WINDOW_SIZE, param_vocabulary_size=VOCABULARY_SIZE, param_hidden_layer_size=HIDDEN_LAYER_SIZE, param_encoder_pi_size=DIM_ENCODER, param_topic_count=TOPIC_COUNT) print('Loading trained model') if config.on_cuda: model.load(model_path) model = model.cuda() else: model.load_cpu_from_gputrained(model_path) model = model.cpu() # ----------iterate over each instance to find the best, and output fpointerOut_topcandidate = open( fpathOut_topcandidate, 'wt', encoding='utf8') for idx_instance, a_candidate_instance in enumerate(list_instances): a_candidate_pivot = list_pivots[idx_instance] list_possible_instances = [a_candidate_instance] list_possible_pivots = [a_candidate_pivot] head_start_index = a_candidate_instance.find('<head>') + 6 head_end_index = a_candidate_instance.find('</head>') for a_possible_pivot in dictSubstVocabulary2Index: if a_possible_pivot == a_candidate_pivot: continue a_possible_instance = a_candidate_instance[:head_start_index]\ + a_possible_pivot + a_candidate_instance[head_end_index:] list_possible_instances.append(a_possible_instance) list_possible_pivots.append(a_possible_pivot) # ----------get a list of (pivot word, xn, wc) parsed_list = yelpDoclist2Parsedlist_noTokenize( paramDocList=list_possible_instances, paramPivotList=list_possible_pivots, paramFpathInVocabulary2Index=param_fpathin_voca2index) # print(len(parsed_list)) (list_pivot, list_xn, list_wc) = zip(*parsed_list) # print(list_pivot) # ----------compute the representation list arr_xn = numpy.zeros((len(list_xn), VOCABULARY_SIZE), dtype=numpy.int32) for list_xn_linenum, list_xn_vocabindex in enumerate(list_xn): arr_xn[list_xn_linenum, list_xn_vocabindex] += 1 arr_xn = arr_xn.astype(numpy.float32) arr_wc = numpy.array(list_wc).astype(numpy.float32) if config.on_cuda: var_xn = Variable(torch.from_numpy(arr_xn)).cuda() var_wc = Variable(torch.from_numpy(arr_wc)).cuda() else: var_xn = Variable(torch.from_numpy(arr_xn)).cpu() var_wc = Variable(torch.from_numpy(arr_wc)).cpu() var_rep = model.forward_obtain_xn_rep(var_xn, var_wc) # var_zeta = model.forward_obtain_xn_zeta(var_xn, var_wc) arr_rep = var_rep.data.cpu().numpy() # arr_zeta = var_zeta.data.cpu().numpy() best_pivots, best_reps = find_the_best_possible_pivot( list_pivot, arr_rep) # print(list_possible_instances) # break # print(list_pivot[0], best_pivots) fpointerOut_topcandidate.write(' '.join(best_pivots) + '\n') fpointerOut_topcandidate.close()
def display_sorted_topic_matrix(model_dir, param_fpathin_index2vocab, mtype='TopicalWordEmbedding'): ''' get the topic matrix, for each topic, concatenate, sort and map the top 10 words ==================== params: ---------- model_dir: saved model dir param_fpathin_voca2index: input dict dir mtype: model name return: ---------- None, output to the console ''' # ----------load the voca2index fpointerInIndex2Vocabulary = open(param_fpathin_index2vocab, 'rt', encoding='utf8') dictIndex2Vocabulary = \ json.load(fpointerInIndex2Vocabulary) fpointerInIndex2Vocabulary.close() # ----------load the trained model config = DefaultConfig() # config.set_attrs({'batch_size': len(list_pivot)}) model_path = '%s/model' % model_dir model = topicalWordEmbedding.TopicalWordEmbedding( param_on_cuda=config.on_cuda, param_half_window_size=HALF_WINDOW_SIZE, param_vocabulary_size=VOCABULARY_SIZE, param_hidden_layer_size=HIDDEN_LAYER_SIZE, param_encoder_pi_size=DIM_ENCODER, param_topic_count=TOPIC_COUNT) print('Loading trained model') if config.on_cuda: model.load(model_path) model = model.cuda() else: model.load_cpu_from_gputrained(model_path) model = model.cpu() # ----------get the topic matrix var_topic_matrix = model.vae_decoder.MATRIX_decoder_beta arr_topic_matrix = var_topic_matrix.data.cpu().numpy() itemgetter_1 = operator.itemgetter(1) for topic_index in range(TOPIC_COUNT): list_voca = list(range(VOCABULARY_SIZE)) list_topicvoca = arr_topic_matrix[topic_index, :].tolist() # concatenate list_voca_topicvoca = list(zip(list_voca, list_topicvoca)) list_voca_topicvoca.sort(key=itemgetter_1, reverse=True) (list_voca, list_topicvoca) = zip(*list_voca_topicvoca) top_list_voca = list_voca[0:50] top_list_voca_mapped = [ dictIndex2Vocabulary[str(i)] for i in top_list_voca ] print(top_list_voca_mapped) list_voca = None list_topicvoca = None list_voca_topicvoca = None return None
def compute_pivot_rep(model_dir, input_doc_list, param_fpathin_voca2index, mtype='TopicalWordEmbedding'): ''' given a list of documents, transfer the documents into instances, enumerate the instances and compute the pivot representations. ==================== params: ---------- model_dir: saved model dir input_list: input documents, unparsed mtype: model name return: ---------- (pivot word list, rep list, topic rep list) ''' # ----------load the voca2index # fpointerInVocabulary2Index = open( # param_fpathin_voca2index, # 'rt', # encoding='utf8') # dictVocabulary2Index = \ # json.load(fpointerInVocabulary2Index) # fpointerInVocabulary2Index.close() # ----------get a list of (pivot word, xn, wc) oYelpPreprocessor = YelpPreprocessor() parsed_list = oYelpPreprocessor.yelpDoclist2Parsedlist( paramDocList=input_doc_list, paramFpathInVocabulary2Index=param_fpathin_voca2index) (list_pivot, list_xn, list_wc) = zip(*parsed_list) # ----------load the trained model config = DefaultConfig() config.set_attrs({'batch_size': len(list_pivot)}) model_path = '%s/model' % model_dir model = topicalWordEmbedding.TopicalWordEmbedding( param_on_cuda=config.on_cuda, param_half_window_size=HALF_WINDOW_SIZE, param_vocabulary_size=VOCABULARY_SIZE, param_hidden_layer_size=HIDDEN_LAYER_SIZE, param_encoder_pi_size=DIM_ENCODER, param_topic_count=TOPIC_COUNT) print('Loading trained model') if config.on_cuda: model.load(model_path) model = model.cuda() else: model.load_cpu_from_gputrained(model_path) model = model.cpu() # ----------compute the representation list arr_xn = numpy.zeros((len(list_xn), VOCABULARY_SIZE), dtype=numpy.int32) for list_xn_linenum, list_xn_vocabindex in enumerate(list_xn): arr_xn[list_xn_linenum, list_xn_vocabindex] += 1 arr_xn = arr_xn.astype(numpy.float32) arr_wc = numpy.array(list_wc).astype(numpy.float32) if config.on_cuda: var_xn = Variable(torch.from_numpy(arr_xn)).cuda() var_wc = Variable(torch.from_numpy(arr_wc)).cuda() else: var_xn = Variable(torch.from_numpy(arr_xn)).cpu() var_wc = Variable(torch.from_numpy(arr_wc)).cpu() var_rep = model.forward_obtain_xn_rep(var_xn, var_wc) var_zeta = model.forward_obtain_xn_zeta(var_xn, var_wc) arr_rep = var_rep.data.cpu().numpy() arr_zeta = var_zeta.data.cpu().numpy() return list_pivot, arr_rep, arr_zeta
def output_sorted_topic_matrix(model_dir, param_fpathin_index2vocab, param_fpathout_topic_matrix, mtype='TopicalWordEmbedding'): ''' get the topic matrix, for each topic, concatenate, sort and map the top 10 words ==================== params: ---------- model_dir: saved model dir param_fpathin_voca2index: input dict dir mtype: model name param_fpathout_topic_matrix: output_topic_matrix_path return: ---------- None, output to the console ''' # ----------load the voca2index fpointerInIndex2Vocabulary = open(param_fpathin_index2vocab, 'rt', encoding='utf8') dictIndex2Vocabulary = \ json.load(fpointerInIndex2Vocabulary) fpointerInIndex2Vocabulary.close() # ----------load the trained model config = DefaultConfig() # config.set_attrs({'batch_size': len(list_pivot)}) model_path = '%s/model' % model_dir model = topicalWordEmbedding.TopicalWordEmbedding( param_on_cuda=config.on_cuda, param_half_window_size=HALF_WINDOW_SIZE, param_vocabulary_size=VOCABULARY_SIZE, param_hidden_layer_size=HIDDEN_LAYER_SIZE, param_encoder_pi_size=DIM_ENCODER, param_topic_count=TOPIC_COUNT) print('Loading trained model') if config.on_cuda: model.load(model_path) model = model.cuda() else: model.load_cpu_from_gputrained(model_path) model = model.cpu() # ----------get and output the topic matrix fpointerOutTopicMatrix = open(param_fpathout_topic_matrix, 'wt', encoding='utf8') var_topic_matrix = model.vae_decoder.MATRIX_decoder_beta arr_topic_matrix = var_topic_matrix.data.cpu().numpy() itemgetter_1 = operator.itemgetter(1) for topic_index in range(TOPIC_COUNT): list_voca = list(range(VOCABULARY_SIZE)) list_topicvoca = arr_topic_matrix[topic_index, :].tolist() # concatenate list_voca_topicvoca = list(zip(list_voca, list_topicvoca)) list_voca_topicvoca.sort(key=itemgetter_1, reverse=True) (list_voca, list_topicvoca) = zip(*list_voca_topicvoca) top_list_voca = list_voca[0:50] top_list_voca_mapped = [ dictIndex2Vocabulary[str(i)] for i in top_list_voca ] top_list_voca_cleaned = [ vocastr for vocastr in top_list_voca_mapped if (vocastr.find('~') == -1 and vocastr.find('!') == -1 and vocastr.find('@') == -1 and vocastr.find('#') == -1 and vocastr.find('$') == -1 and vocastr.find('%') == -1 and vocastr.find('^') == -1 and vocastr.find('&') == -1 and vocastr.find('*') == -1 and vocastr.find('(') == -1 and vocastr.find(')') == -1 and vocastr.find('0') == -1 and vocastr.find('1') == -1 and vocastr.find('2') == -1 and vocastr.find('3') == -1 and vocastr.find('4') == -1 and vocastr.find('5') == -1 and vocastr.find('6') == -1 and vocastr.find('7') == -1 and vocastr.find('8') == -1 and vocastr.find('9') == -1 and vocastr.find('-') == -1 and vocastr.find('+') == -1 and vocastr.find('_') == -1 and vocastr.find('=') == -1 and vocastr.find('.') == -1 and vocastr.find(',') == -1 and vocastr.find('/') == -1 and vocastr.find('?') == -1 and vocastr.find('\\') == -1 and vocastr.find('"') == -1 and vocastr.find(':') == -1 and vocastr.find('\'') == -1 and vocastr.find(';') == -1 and vocastr.find('|') == -1 and vocastr.find('<') == -1 and vocastr.find('>') == -1 and vocastr.find('[') == -1 and vocastr.find(']') == -1) ] top_list_voca_top10 = top_list_voca_cleaned[:10] fpointerOutTopicMatrix.write('topic %03d ' % topic_index + ' '.join(top_list_voca_top10) + '\n') list_voca = None list_topicvoca = None list_voca_topicvoca = None fpointerOutTopicMatrix.close() return None
def train(**kwargs): ''' begin training the model *kwargs: train(1, 2, 3, 4, 5) => kwargs[0] = 1 kwargs[1] = 2 ..., kwargs is principally a tuple **kwargs: train(a=1, b=2, c=3, d=4) CustomPreProcessor => kwargs[a] = 1, kwargs[b] = 2, kwargs[c] = 3, kwargs[d] = 4, kwargs is principally a dict function containing kwargs *kwargs **kwargs must be written as: def train(args,*args,**args) ''' saveid = latest_save_num() + 1 # the save_path is save_path = '%s/%d' % (SAVE_DIR, saveid) print("logger save path: %s" % (save_path)) if not os.path.exists(save_path): os.makedirs(save_path) log_path_each_save = '%s/log.txt' % save_path model_path_each_save = '%s/model' % save_path logger = get_logger(log_path_each_save) config = DefaultConfig() # settings here, avalid_data_utillso about whether on cuda config.set_attrs(kwargs) # print(config.get_attrs()) epochs = config.epochs batch_size = config.batch_size # determine whether to run on cuda if config.on_cuda: config.on_cuda = torch.cuda.is_available() if not config.on_cuda: logger.info('Cuda is unavailable,\ Although wants to run on cuda,\ Model still run on CPU') # 300 in our model # 1024 is the Elmo size, # the concatenated hidden size is supposed to Elmo size, # however, any size is OK # it depends on the setting # attention size should be a smoothed representation of character-emb if config.model == 'TopicalWordEmbedding': model = topicalWordEmbedding.TopicalWordEmbedding( param_on_cuda=config.on_cuda, param_half_window_size=HALF_WINDOW_SIZE, param_vocabulary_size=VOCABULARY_SIZE, param_hidden_layer_size=HIDDEN_LAYER_SIZE, param_encoder_pi_size=DIM_ENCODER, param_topic_count=TOPIC_COUNT) if config.on_cuda: logger.info('Model run on GPU') model = model.cuda() logger.info('Model initialized on GPU') else: logger.info('Model run on CPU') model = model.cpu() logger.info('Model initialized on CPU') # print('logger-setted',file=sys.stderr) # output the string informetion to the log logger.info(model.modelname) # output the string information to the log logger.info(str(config.get_attrs())) # read in the trainset and the trial set # Train Set train_data_manager = DataManager(batch_size, TRAINING_INSTANCES) train_data_manager.load_dataframe_from_file(TRAIN_SET_PATH) # set the optimizer parameter, # such as learning rate and weight_decay, # function Adam, a method for Stochastic Optizimism # load the learning rate in config, that is settings.py lr = config.learning_rate # params_iterator_requires_grad can only be iterated once params_iterator_requires_grad = filter( lambda trainingParams: trainingParams.requires_grad, model.parameters()) # print(len(list(params_iterator_requires_grad))) # 25 parameters # weight decay that is L2 penalty that is L2 regularization, # usually added after a cost function(loss function), # for example C=C_0+penalty, QuanZhongShuaiJian, # to avoid overfitting optimizer = torch.optim.Adam( params_iterator_requires_grad, lr=lr, weight_decay=config.weight_decay) # By default, the losses are averaged over observations # for each minibatch. # However, if the field size_average is set to False, # the losses are instead # summed for each minibatch # The CrossEntropyLoss, # My selector in my notebook = loss + selecting strategy # (often is selecting the least loss) # criterion = torch.nn.CrossEntropyLoss(size_average=False) # once you have the loss function, you also have # to train the parameters in g(x), # which will be used for prediction # the loss calculated after the smooth method, # that is L2 penalty mentioned in torch.optim.Adam loss_meter = meter.AverageValueMeter() # get confusionMatrix, the confusion matrix is the one show as follows: # confusion_matrix = meter.ConfusionMeter( # CLASS_COUNT) ''' class1 predicted class2 predicted class3 predicted class1 ground truth [[4, 1, 1] class2 ground truth [2, 3, 1] class2 ground truth [1, 2, 9]] ''' model.train() # pre_loss = 1e100 # best_acc = 0 smallest_loss = 0x7fffffffffffffffffffffffffffffff for epoch in range(epochs): ''' an epoch, that is, train data of all barches(all the data) for one time ''' loss_meter.reset() # confusion_matrix.reset() train_data_manager.reshuffle_dataframe() # it was ceiled, so it is "instances/batch_size + 1" n_batch = train_data_manager.n_batches() batch_index = 0 for batch_index in range(0, n_batch - 1): # this operation is time consuming xn, wc = train_data_manager.next_batch() # long seems to trigger cuda error, # it cannot handle long # variable by defalut requires_grad = False # t = torch.Tensor(1) # t.to(torch.float32) <=> t.float() # t.to(torch.int64) <=> t.long() var_xn = Variable(torch.from_numpy(xn).float()) # print( x.size() ) var_wc = Variable(torch.from_numpy(wc).float(), requires_grad=False) # y = y - 1 # print(y.size()) # #########################logger.info('Begin fetching a batch') loss = eval_batch(model, var_xn, var_wc, config.on_cuda) # #########################logger.info( # 'End fetching a batch, begin optimizer') optimizer.zero_grad() loss.backward() optimizer.step() # #########################logger.info('End optimizer') # data is the tensor, # [0] is a Python number, # if a 0-dim tensor, then .item will get the python number, # if 1-dim then .items will get list loss_meter.add(loss.data.item()) # confusion_matrix.add(scores.data, y.data) # if batch_index == 10 then display the accuracy of the batch if (batch_index + 1) % 200 == 0: # for 2 LongTensors, 17 / 18 = 0 # accuracy = corrects.float() / config.batch_size # .value()[0] is the loss value logger.info('TRAIN\tepoch: %d/%d\tbatch: %d/%d\tloss: %f' % ( epoch, epochs, batch_index, n_batch, loss_meter.value()[0])) # abandon the tail batch, because it will trigger duplicate # context window thus causing loss == nan if TRAINING_INSTANCES % batch_size == 0: train_data_manager.set_current_cursor_in_dataframe_zero() else: # train_data_manager.set_current_cursor_in_dataframe_zero() print('!!!!!!!!!!!Enter tail batch') # the value can be inherented batch_index += 1 (xn, wc) = train_data_manager.tail_batch_nobatchpadding() # long seems to trigger # t = torch.Tensor(1) # t.to(torch.float32) <=> t.float() # t.to(torch.int64) <=> t.long() var_xn = Variable(torch.from_numpy(xn).float()) var_wc = Variable(torch.from_numpy(wc).float(), requires_grad=False) # y = y - 1 loss = eval_batch(model, var_xn, var_wc, config.on_cuda) optimizer.zero_grad() loss.backward() optimizer.step() loss_meter.add(loss.data.item()) # confusion_matrix.add( scores.data , y.data ) # if batch_index == 10 then display the accuracy of the batch if (batch_index + 1) % 200 == 0: # for 2 LongTensors, 17 / 18 = 0 # accuracy = corrects.float() / config.batch_size # print("accuracy = %f, corrects = %d"%(accuracy, corrects)) # .value()[0] is the loss value y = Variable( # torch.LongTensor(y), requires_grad = False) logger.info('TRAIN\tepoch: %d/%d\tbatch: %d/%d\tloss: %f' % ( epoch, epochs, batch_index, n_batch, loss_meter.value()[0])) # print('!!!!!!!!!!!Exit tail batch') # after an epoch it should be evaluated # switch to evaluate model model.eval() # if (batch_epochsindex + 1) % 25 == 0: # every 50 batches peek its accuracy and get the best accuracy # confusion_matrix_value=confusion_matrix.value() # acc = 0 # for i in range(CLASS_COUNT): # correct prediction count # acc += confusion_matrix_value[i][i] # the accuracy, overall accuracy in an epoch # acc = acc / confusion_matrix_value.sum() # a 1-dim tensor with lenth 1, # so you have to access the element by [0] # loss_meter.value() = (mean, var), mean is the average among batches the_overall_averaged_loss_in_epoch = loss_meter.value()[0] logger.info('epoch: %d/%d\taverage_loss: %f' % ( epoch, epochs, the_overall_averaged_loss_in_epoch)) # switch to train model model.train() # if accuracy increased, then save the model and # change the learning rate if loss_meter.value()[0] < smallest_loss: # save the model model.save(model_path_each_save) logger.info('model saved to %s' % model_path_each_save) # change the learning rate if epoch < 4: lr = lr * config.lr_decay else: if epoch < 8: lr = lr * 0.97 else: lr = lr * 0.99 logger.info('learning_rate changed to %f' % lr) for param_group in optimizer.param_groups: param_group['lr'] = lr smallest_loss = loss_meter.value()[0] else: print('the loss_meter = ', loss_meter.value()[0])