def get_all_train_image(train_dir=train_dir): train_image_ids = os.listdir(train_dir) logger.info('read all train images, total: %d', len(train_image_ids)) train_image_pathes = [ train_dir + '\\' + image_id + '\\images\\' + image_id + '.png' for image_id in train_image_ids ] return array_union(np.array(imread_collection(train_image_pathes)))
def Net(): logger.info('net init...') model = Sequential() model.add( LSTM(units=25, input_shape=(None, 25), dropout=0.1, return_sequences=True)) model.add(LSTM(units=10, dropout=0.1)) model.add(Dense(6)) model.compile(optimizer='adam', loss='mean_squared_error') return model
def valid(): # =====parameters =========== col, tokenizer = db_init() use_loaded_model = True batch_size = 64 batch_idx = 0 test_batch_idx = 0 # ==========currnt========== epoch = 14 # =======net =============== net = Net() if use_loaded_model: try: net = load_model(model_dir + model_name + '_' + str(epoch) + '_' + model_ext) logger.info('init net from local, current epoch is %d', epoch) except Exception: pass logger.info('validation start') valid_data, valid_labels = get_valid_batch_data_labels( csv_path=train_data_path, col=col, tokenizer=tokenizer, use_batch=False) if len(valid_data) > 1: logger.info('valid data length is %d', len(valid_data)) bi = lambda x: 1 if x > 0.5 else 0 bi_list = lambda label: [bi(item) for item in label] predict_labels = net.predict(valid_data, batch_size=150) predict_labels = np.array([bi_list(item) for item in predict_labels]) logger.info("pridict labels shape: %d,%d", predict_labels.shape[0], predict_labels.shape[1]) accu = total_accuracy(valid_labels, predict_labels) logger.info("accuracy: %f", accu) else: logger.debug('error!,data in batch_index %d len(data) <= 1', batch_idx)
def train(): # =====parameters =========== col, tokenizer= db_init() use_loaded_model =True batch_size = 64 batch_idx = 0 n_epoch = 15 # ==========currnt========== epoch = 1 # =======net =============== net = Net() if use_loaded_model: try: net = load_model(model_dir+model_name+'_'+ str(epoch)+'_'+model_ext) logger.info('init net from local, current epoch is %d',epoch) except Exception: pass # =========train =============== while epoch < n_epoch: logger.info('%s %d / %d epoch start',now_str(),epoch,n_epoch) while True: if batch_idx % 100 == 0: logger.info('%s %d epoch %d batch start',now_str(),epoch,batch_idx) data,labels = get_train_batch_data_labels( csv_path=train_data_path,col=col,tokenizer=tokenizer, batch_index=batch_idx,batch_size=batch_size) if len(data) > 1: sample_weight = calc_sample_weight(labels) net.train_on_batch(data,labels,sample_weight=sample_weight) else: logger.debug('error!,data in batch_index %d len(data) <= 1',batch_idx) break batch_idx +=1 # ===save===== logger.info('save model...') batch_idx = 0 net.save(model_dir+model_name+'_'+ str(epoch)+'_'+model_ext) epoch+=1
def predict(): logger.info('start predict') model_path = 'model\\' + 'model_' + str(epoch) + '_' + '.h5' logger.info('local net path is %s', model_path) net = load_model(model_path) assert net is not None logger.info('net loaded') df = test_df() data = df.values col, tokenizer = db_init() lines = [Line(item[0], item[1], col, tokenizer) for item in data] for line in lines: line.predict(net) for line in lines: line.to_labels() new_data = [line.to_data_line() for line in lines] new_df = pd.DataFrame(data=new_data, columns=[ "id", "comment_text", "toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate" ]) return new_df
def train(): # ======= paramter ============== use_local_net = False epoch = 20 num_epoches = 40 batch_size = 32 # =======net ================ model_path = 'model_without_sigmoid\\' + \ 'model' + '_' + str(epoch - 1) + model_ext if use_local_net: model = load_model(model_path) logger.info('use local model from %d epoch', epoch) else: model = net() logger.info('model inited') test_cells, test_masks = get_test_data() # ===========train ============= while epoch < num_epoches: logger.info('%s epoch %d started', now_str(), epoch) generator = batch_data_generator(batch_size=batch_size, use_shuffle=True) batch_idx = 0 for cell_mask in generator: cell_list = np.array([item[0] for item in cell_mask]) mask_list = np.array([item[1] for item in cell_mask]) model.train_on_batch(cell_list, mask_list) batch_idx += 1 if batch_idx % 20: logger.info('%s epoch %d batch_index %d', now_str(), epoch, batch_idx) batch_idx = 0 # ===========test================== predict_masks = model.predict(test_cells, batch_size=64) error = test_error(test_masks, predict_masks) logger.info('predict error %f in epoch %d', error, epoch) model.save('model_without_sigmoid\\' + 'model' + '_' + str(epoch) + model_ext) logger.info('%s epoch %d model saved', now_str(), epoch) epoch += 1
def save_csv(df, path='./predict.csv'): df.to_csv(path, encoding='utf-8') logger.info('predict result saved to %s', path)