def load_static(args): device, n_gpu = setup_device() set_seed_everywhere(args.seed, n_gpu) schemas_raw, schemas_dict = spider_utils.load_schema(args.data_dir) grammar = semQL.Grammar() model = IRNet(args, device, grammar) model.to(device) # load the pre-trained parameters model.load_state_dict( torch.load(args.model_to_load, map_location=torch.device('cpu'))) model.eval() print("Load pre-trained model from '{}'".format(args.model_to_load)) nlp = English() tokenizer = nlp.Defaults.create_tokenizer(nlp) with open(os.path.join(args.conceptNet, 'english_RelatedTo.pkl'), 'rb') as f: related_to_concept = pickle.load(f) with open(os.path.join(args.conceptNet, 'english_IsA.pkl'), 'rb') as f: is_a_concept = pickle.load(f) return args, grammar, model, nlp, tokenizer, related_to_concept, is_a_concept, schemas_raw, schemas_dict
s = " ".join(s) return s def _find_nums(question): nums = re.findall('\d*\.?\d+', question) return nums if __name__ == '__main__': args = read_arguments_manual_inference() device, n_gpu = setup_device() set_seed_everywhere(args.seed, n_gpu) schemas_raw, schemas_dict = spider_utils.load_schema(args.data_dir) grammar = semQL.Grammar() model = IRNet(args, device, grammar) model.to(device) # load the pre-trained parameters model.load_state_dict(torch.load(args.model_to_load)) # to use cpu instead of gpu , uncomment this code # model.load_state_dict(torch.load(args.model_to_load,map_location=torch.device('cpu'))) model.eval() print("Load pre-trained model from '{}'".format(args.model_to_load)) nlp = English() tokenizer = nlp.Defaults.create_tokenizer(nlp)
def handle_request0(request): debug = 'debug' in request.form base = "" try: csv_key = 'csv' if csv_key not in request.files: csv_key = 'csv[]' print(request.files) if csv_key not in request.files and not 'sqlite' in request.files: raise Exception('please include a csv file or sqlite file') if not 'q' in request.form: raise Exception( 'please include a q parameter with a question in it') csvs = request.files.getlist(csv_key) sqlite_file = request.files.get('sqlite') q = request.form['q'] # brute force removal of any old requests if not TRIAL_RUN: subprocess.run(["bash", "-c", "rm -rf /cache/case_*"]) key = "case_" + str(uuid.uuid4()) data_dir = os.path.join('/cache', key) os.makedirs(os.path.join(data_dir, 'data'), exist_ok=True) os.makedirs(os.path.join(data_dir, 'original', 'database', 'data'), exist_ok=True) print("Key", key) for csv in csvs: print("Working on", csv) table_id = os.path.splitext(csv.filename)[0] table_id = re.sub(r'\W+', '_', table_id) stream = io.StringIO(csv.stream.read().decode("UTF8"), newline=None) add_csv.csv_stream_to_sqlite( table_id, stream, os.path.join(data_dir, 'data', 'data.sqlite')) stream.seek(0) if sqlite_file: print("Working on", sqlite_file) sqlite_file.save(os.path.join(data_dir, 'data', 'data.sqlite')) question_file = os.path.join(data_dir, 'question.json') tables_file = os.path.join(data_dir, 'tables.json') dummy_file = os.path.join(data_dir, 'dummy.json') add_question.question_to_json('data', q, question_file) row = { 'question': q, 'query': 'DUMMY', 'db_id': args.database, 'question_toks': _tokenize_question(tokenizer, q) } print( colored( f"question has been tokenized to : { row['question_toks'] }", 'cyan', attrs=['bold'])) with open(dummy_file, 'w') as fout: fout.write('[]\n') subprocess.run([ "python", "/spider/preprocess/get_tables.py", data_dir, tables_file, dummy_file ]) # valuenet expects different setup to irnet shutil.copyfile(tables_file, os.path.join(data_dir, 'original', 'tables.json')) database_path = os.path.join(data_dir, 'original', 'database', 'data', 'data.sqlite') shutil.copyfile(os.path.join(data_dir, 'data', 'data.sqlite'), database_path) schemas_raw, schemas_dict = spider_utils.load_schema(data_dir) data, table = merge_data_with_schema(schemas_raw, [row]) pre_processed_data = process_datas(data, related_to_concept, is_a_concept) pre_processed_with_values = _pre_process_values(pre_processed_data[0]) print( f"we found the following potential values in the question: {row['values']}" ) prediction, example = _inference_semql(pre_processed_with_values, schemas_dict, model) print( f"Results from schema linking (question token types): {example.src_sent}" ) print( f"Results from schema linking (column types): {example.col_hot_type}" ) print( colored(f"Predicted SemQL-Tree: {prediction['model_result']}", 'magenta', attrs=['bold'])) print() sql = _semql_to_sql(prediction, schemas_dict) print(colored(f"Transformed to SQL: {sql}", 'cyan', attrs=['bold'])) print() result = _execute_query(sql, database_path) print(f"Executed on the database '{args.database}'. Results: ") for row in result: print(colored(row, 'green')) message = { "split": key, "result": { "sql": sql.strip(), "answer": result } } code = 200 except Exception as e: message = {"error": str(e)} code = 500 if debug: message['base'] = base return jsonify(message), code