def home(): if request.method == 'GET': return render_template('home.html') if request.method == 'POST': image_file = check_image_file(request) if image_file: try: filename = save_image(image_file) passed = True except Exception: passed = False if passed: img_url = url_for('images', filename=filename) args.img_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) result = evaluate(model, args) _format = request.args.get('format') if _format == 'json': return jsonify(str(result)) else: return render_template('predict.html', result=result, img_url=img_url) else: return redirect(url_for('error'))
def plot_confusion_matrix(): # Keep track of correct guesses in a confusion matrix confusion = torch.zeros(n_categories, n_categories) n_confusion = 10000 # go through a bunch of examples and record which are correctly guessed for i in range(n_confusion): category, line, category_tensor, line_tensor = randomTrainingPair() output = evaluate(line_tensor) guess, guess_i = categoryFromOutput(output) category_i = all_categories.index(category) confusion[category_i][guess_i] += 1 # normalize by dividing every row by its sum for i in range(n_categories): confusion[i] = confusion[i] / confusion[i].sum() # setup plot fig = plt.figure() ax = fig.add_subplot(111) cax = ax.matshow(confusion.numpy()) fig.colorbar(cax) # set up axes # Set up axes ax.set_xticklabels([''] + all_categories, rotation=90) ax.set_yticklabels([''] + all_categories) # Force label at every tick ax.xaxis.set_major_locator(ticker.MultipleLocator(1)) ax.yaxis.set_major_locator(ticker.MultipleLocator(1)) # sphinx_gallery_thumbnail_number = 2 plt.show()
def predict(): filename = request.form.get('fId') df = pd.read_csv(os.path.join(app.config['UPLOAD_FOLDER'], filename)) res = evaluate(df) return render_template( 'table.html', tables=[ df.to_html(classes="table table-striped table-bordered", max_rows=30, index=False, header=True, border=False) ], prediction='Sato prediction: ' + str(res), showButton=False)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-f', '--format', type=str, choices=('json', 'csv'), default='json') args = parser.parse_args() if args.format == 'json': df = load_from_json() elif args.format == 'csv': df = pd.read_csv(sys.stdin, nrows=100) sample_columns = sample(df, 6, 5) #sample_columns = sliding_window(df.columns.tolist(), 6) sys.stderr.write('Loading model...\n') import predict sys.stderr.write(f'Extracting features from {len(df.columns)}..\n') feature_dict, sherlock_features = predict.extract(df) sys.stderr.write(f'Running {len(sample_columns)} predictions...\n') predictions = [] for cols in tqdm.tqdm(sample_columns): predictions.append( predict.evaluate(df, list(cols), feature_dict, sherlock_features)) col_predictions = collections.defaultdict(lambda: []) for i, cols in enumerate(sample_columns): for j, col in enumerate(cols): col_predictions[col].append(predictions[i][j]) for c, p in col_predictions.items(): print(c, collections.Counter(p).most_common(1)[0][0])
def train_without_classifier(): batch_per_epoch = train_loader.batch_num print_every = 3 save_every = 3 patience = 10 early_stopping = None best_f1_batch = 0 best_f1 = 0 start_time = time.time() for epoch in range(1, args.epochs + 1): # if early_stopping: # logging.info("-----------------------------") # logging.info("early stopping at epoch %d" % epoch) # logging.info("best epoch %d" % best_f1_batch) # break pr_train_loss = 0 for batch_idx, batch in enumerate(train_loader.next_batch()): # seq: (batch_size, seq_len) # seq_len: (batch,) # sub: (batch,) # rel: (batch, rel_len) # rel_len: (batch,) # crel: (batch, crel_len) # crel_label: (batch, crel_len) # crel_len: (batch,) seq, seq_len, sub, rel, rel_len, crel, crel_label, crel_len = batch encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() loss = train_epoch(seq, seq_len, sub, rel, rel_len, crel, crel_label, crel_len, encoder, decoder) loss.backward() pr_train_loss += loss.data.item() encoder_optimizer.step() decoder_optimizer.step() if epoch % print_every == 0: BiLSTM = True if args.atten_mode == "BiLSTM" else False acc, recall, precision, f1 = evaluate( valid_loader, encoder, decoder, device, start=rel_vocab.lookup("<_start>"), biLSTM=BiLSTM) logging.info("-----------------------------") logging.info("epoch %d" % epoch) logging.info("valid accuracy : %f" % acc) logging.info("valid recall : %f" % recall) logging.info("valid precision : %f" % precision) logging.info("valid f1 : %f" % f1) logging.info("train loss : %f" % pr_train_loss) if f1 > best_f1: best_f1 = f1 best_f1_batch = epoch torch.save(encoder, args.save_path + "/best_encoder.%s.pth" % name) torch.save(decoder, args.save_path + "/best_decoder.%s.pth" % name) elif f1 < best_f1 and epoch - best_f1_batch >= patience: early_stopping = True if epoch % save_every == 0: torch.save(encoder, args.save_path + "/encoder.%s.pth" % name) torch.save(decoder, args.save_path + "/decoder.%s.pth" % name) return best_f1
from config import config import tensorflow as tf import predict from PIL import Image from tkinter import * from tkinter import filedialog root = Tk() root.filename = filedialog.askopenfilename(initialdir="E:/Images", title="이미지 파일을 선택하세요!") print(root.filename) root.withdraw() image_path = root.filename result, attention_plot = predict.evaluate(image_path) print('Prediction Caption:', ' '.join(result)) predict.plot_attention(image_path, result, attention_plot)
def take(): print("Take starting") category, dataset = c.department_class() dataset = preprocess.data_clean(dataset) dfwater, dfpwd, dfksrtc, dfkseb, dfenv = dataframes.dataframing(dataset) water_lemm, pwd_lemm, ksrtc_lemm, kseb_lemm, env_lemm = tokenise.tokenisation( dfwater, dfpwd, dfksrtc, dfkseb, dfenv) water_freq, pwd_freq, ksrtc_freq, kseb_freq, env_freq = frequency.word_frequency( water_lemm, pwd_lemm, ksrtc_lemm, kseb_lemm, env_lemm) water_lis, pwd_lis, ksrtc_lis, kseb_lis, env_lis = topwords.most_repeated_keywords( dfwater, dfpwd, dfksrtc, dfkseb, dfenv, water_freq, pwd_freq, ksrtc_freq, kseb_freq, env_freq, "manual") #subject = request.form['subject'] subject = request.args.get('subject') mess = request.args.get('message') #message = request.form['message'] message = subject + " " + mess keywords, item = testdata.test(message) water_flag, pwd_flag, kseb_flag, ksrtc_flag, env_flag, water_dept, pwd_dept, kseb_dept, ksrtc_dept, env_dept, flag_env, flag_kseb, flag_ksrtc, flag_pwd, flag_water = predict.evaluate( keywords, item, water_lis, env_lis, pwd_lis, ksrtc_lis, kseb_lis, category, nlp) name = water_dept + pwd_dept + kseb_dept + ksrtc_dept + env_dept name = [ 'Water Authority', 'PWD', 'KSEB', 'KSRTC', 'Environment and climate change' ] flags = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] flags[0] = water_flag flags[1] = pwd_flag flags[2] = kseb_flag flags[3] = ksrtc_flag flags[4] = env_flag flags[5] = flag_env flags[6] = flag_kseb flags[7] = flag_ksrtc flags[8] = flag_pwd flags[9] = flag_water #Prediction list predicted_class = [] if flags[0] == 1: predicted_class.append(name[0]) if flags[1] == 1: predicted_class.append(name[1]) if flags[2] == 1: predicted_class.append(name[2]) if flags[3] == 1: predicted_class.append(name[3]) if flags[4] == 1: predicted_class.append(name[4]) if flags[5] == 1: predicted_class.append(name[4]) if flags[6] == 1: predicted_class.append(name[2]) if flags[7] == 1: predicted_class.append(name[3]) if flags[8] == 1: predicted_class.append(name[1]) if flags[9] == 1: predicted_class.append(name[0]) print("Predicted class") print(predicted_class) for i in predicted_class: flash(i) print("Working >>>") #adding into database #taking aadhaar from database flag = 0 predicted_class_length = len(predicted_class) print("Predicted class length", predicted_class_length) for i in range(0, predicted_class_length): id = session['id'] new_complaint = Complaints(subject=subject, content=mess, department=predicted_class[i], status="Submitted", user_id=id) db.session.add(new_complaint) flag = flag + 1 db.session.commit() print('New Complaint submitted ') #all_data = Complaints.query.all() print("Flag Complaints length", flag) obj = db.session.query(Complaints).order_by( Complaints.comp_id.desc()).first() last_subject = obj.subject last_complaint = obj.content last_id = obj.comp_id notification = Notifications(comp_id=last_id, subject=last_subject, complaint=last_complaint) db.session.add(notification) db.session.commit() print("new notification added") #return render_template('Success.html',name =name,flags =flags) if subject and message: return render_template( 'Success.html', name=name, flags=flags, ) else: return redirect(url_for('log'))
def train(args): if args.max_seq_length <= 0: args.max_seq_length = np.inf # load training data training_set = GroundedScanDataset( args.data_path, args.data_directory + args.split + '/', split="train", target_vocabulary_file=args.target_vocabulary_file, k=args.k, max_seq_length=args.max_seq_length) training_set.read_dataset( max_examples=None, # use all dataset simple_situation_representation=args.simple_situation_representation) training_set.shuffle_data() # load validation data validation_set = GroundedScanDataset( args.data_path, args.data_directory + args.split + '/', split="dev", target_vocabulary_file=args.target_vocabulary_file, k=args.k, max_seq_length=args.max_seq_length) validation_set.read_dataset( max_examples=None, # use all dataset simple_situation_representation=args.simple_situation_representation) validation_set.shuffle_data() parser = None if args.parse_type == 'default': grammar = Grammar() word2narg = WORD2NARG else: if args.parse_type == 'constituency': parser = ConstituencyParser() elif args.parse_type == 'dependency': parser = StanfordDependencyParser() word2narg = parser.word2narg if args.compare_attention: compare_list = COMPARE_LIST else: compare_list = None data_iter = training_set.get_data_iterator( batch_size=args.training_batch_size) input_text_batch, _, situation_batch, situation_representation_batch, \ target_batch, target_lengths, agent_positions, target_positions = next(data_iter) example_feature = situation_batch[0][0] # first seq, first observation model = SentenceNetwork(words=word2narg, cnn_kernel_size=args.cnn_kernel_size, n_channels=args.cnn_num_channels, example_feature=example_feature, rnn_dim=args.rnn_dim, rnn_depth=args.rnn_depth, attention_dim=args.att_dim, output_dim=args.output_dim, device=args.device, compare_list=compare_list, compare_weight=args.compare_weight, normalize_size=args.normalize_size, no_attention=args.no_attention, parse_type=args.parse_type, pass_state=args.pass_state) n_update = 0 n_validate = 0 n_checkpoint = 0 best_match = 0 if args.resume_from_file != '': resume_file = args.model_prefix + args.resume_from_file assert os.path.isfile(resume_file), "No checkpoint found at {}".format( resume_file) args.logger.info( "Loading checkpoint from file at '{}'".format(resume_file)) model.load_state_dict(torch.load(resume_file)[0]) n_checkpoint = args.resume_n_update n_update = args.checkpoint_range * n_checkpoint n_validate = n_update / args.validate_every else: torch.save([model.state_dict()], args.model_prefix + '/model_0.pkl') model.to(args.device) optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(args.adam_beta_1, args.adam_beta_2)) # training training_set.shuffle_data() for i in range(args.n_epochs): for j, data in enumerate( training_set.get_data_iterator( batch_size=args.training_batch_size)): model.train() input_text_batch, _, situation_batch, situation_representation_batch, \ target_batch, target_lengths, agent_positions, target_positions = data if args.parse_type == 'default': arg_tree = grammar.arg_tree(split_str(input_text_batch[0])) else: arg_tree = parser.parse(input_text_batch[0]) args.logger.info('train {}, arg tree: {}'.format( input_text_batch[0], arg_tree)) model.update_words(arg_tree) target_lengths = torch.tensor(target_lengths, dtype=torch.long, device=args.device) success, total_loss, word_losses = model.loss( situation_batch, target_batch, target_lengths, optimizer) if not success: continue args.logger.info('epoch {}, iter {}, train loss: {}'.format( i, j, float(total_loss))) # save checkpoints if n_update % args.checkpoint_range == 0: log_model_params(model, args.writer, 'comp_gscan', n_update) # log numbers, TODO: log loss per word args.writer.add_scalar('loss/train_total', float(total_loss), n_checkpoint) model_path = args.model_prefix + '/model_' + str( n_checkpoint) + '.pkl' torch.save([model.state_dict()], model_path) n_checkpoint += 1 # validation if n_update % args.validate_every == 0: validation_set.shuffle_data() model.eval() # compute loss loss = 0 n_batch = 0 for k, data in enumerate( validation_set.get_data_iterator( batch_size=args.training_batch_size)): input_text_batch, _, situation_batch, situation_representation_batch, \ target_batch, target_lengths, agent_positions, target_positions = data if args.parse_type == 'default': arg_tree = grammar.arg_tree( split_str(input_text_batch[0])) else: arg_tree = parser.parse(input_text_batch[0]) model.update_words(arg_tree) with torch.no_grad(): target_lengths = torch.tensor(target_lengths, dtype=torch.long, device=args.device) success, total_loss, word_losses = model.loss( situation_batch, target_batch, target_lengths) loss += float(total_loss) n_batch += 1 loss = loss / n_batch args.logger.info('epoch {}, iter {}, val loss: {}'.format( i, j, float(loss))) args.writer.add_scalar('loss/val_total', float(loss), n_validate) # run evaluation accuracy, exact_match = evaluate( training_set, validation_set.get_data_iterator(batch_size=1), model=model, world=validation_set.dataset._world, max_steps=args.max_steps, vocab=validation_set.target_vocabulary, max_examples_to_evaluate=args.max_testing_examples, device=args.device, parser=parser) args.logger.info( " Evaluation Accuracy: %5.2f Exact Match: %5.2f" % (accuracy, exact_match)) args.writer.add_scalar('accuracy/val_total', accuracy, n_validate) args.writer.add_scalar('exact_match/val_total', exact_match, n_validate) # save the best model if exact_match > best_match: model_path = args.model_prefix + '/model_best.pkl' torch.save([model.state_dict(), n_update, exact_match], model_path) best_match = exact_match args.logger.info( 'save best model at n_update {}'.format(n_update)) n_validate += 1 n_update += 1
nargs=1, action="store", default=0, dest="limit", help= "Limit genes in all datasets, it speeds up on data pre-processing development." ) parser.add_argument("-t", "--train", action="store_true", dest="train", help="When the flag is activated, it performs training.") parser.add_argument( "-e", "--evaluate", action="store_true", dest="evaluate", help="When the flag is activated, it predicts test classes.") args = parser.parse_args() if args.data_proc: dp = DataPreprocess("data", [2, 4, 6, 8, 10, 12, 15, 20, 25, 30], int(args.limit[0])) if args.train: tr = Training([2, 4, 6, 8, 10, 12, 15, 20, 25, 30]) if args.evaluate: evaluate()
'causal': args.causal, 'mask_nonlinear': args.mask_nonlinear } train_args = { 'lr': args.lr, 'batch_size': args.batch_size, 'epochs': args.epochs } model = ConvTasNet(**model_args) if args.evaluate == 0 and args.separate == 0: dataset = AudioDataset(args.data_dir, sr=args.sr, mode='train', seq_len=args.seq_len, verbose=0, voice_only=args.voice_only) print('DataLoading Done') train(model, dataset, **train_args) elif args.evaluate == 1: model.load_state_dict(torch.load(args.model, map_location='cpu')) dataset = AudioDataset(args.data_dir, sr=args.sr, mode='test', seq_len=args.seq_len, verbose=0, voice_only=args.voice_only) evaluate(model, dataset, args.batch_size, 0, args.cal_sdr) else: model.load_state_dict(torch.load(args.model, map_location='cpu')) dataset = AudioDataset(args.data_dir, sr=args.sr, mode='test', seq_len=args.seq_len, verbose=0, voice_only=args.voice_only) separate(model, dataset, args.output_dir, sr=8000)
ckpt_manager.save() print('Epoch {} Loss {:.6f}'.format(epoch + 1, total_loss / num_steps)) print('Time taken for 1 epoch {} sec\n'.format(time.time() - start)) return loss_plot # checkout start_epoch, ckpt_manager = checkout() # training loss_plot = training(start_epoch, dataset, num_steps) # loss plot show preprocess.pltshow(loss_plot) # captions on the validation set # 랜덤한 이미지 하나 선택 rid = np.random.randint(0, len(img_name_val)) image = img_name_val[rid] real_caption = ' '.join( [tokenizer.index_word[i] for i in cap_val[rid] if i not in [0]]) result, attention_plot = predict.evaluate(image, tokenizer, max_length, attention_features_shape, image_features_extract_model, encoder, decoder) print('Real Caption:', real_caption) print('Prediction Caption:', ' '.join(result)) predict.plot_attention(image, result, attention_plot, real_caption, ' '.join(result))