Python load_schema示例，spider.spider_utils.load_schema Python示例

示例#1

0

显示文件

文件： updated_manual_inference.py 项目： neerajbhat98/ValueNet

def load_static(args):
    device, n_gpu = setup_device()
    set_seed_everywhere(args.seed, n_gpu)

    schemas_raw, schemas_dict = spider_utils.load_schema(args.data_dir)

    grammar = semQL.Grammar()
    model = IRNet(args, device, grammar)
    model.to(device)
    # load the pre-trained parameters
    model.load_state_dict(
        torch.load(args.model_to_load, map_location=torch.device('cpu')))
    model.eval()
    print("Load pre-trained model from '{}'".format(args.model_to_load))

    nlp = English()
    tokenizer = nlp.Defaults.create_tokenizer(nlp)

    with open(os.path.join(args.conceptNet, 'english_RelatedTo.pkl'),
              'rb') as f:
        related_to_concept = pickle.load(f)

    with open(os.path.join(args.conceptNet, 'english_IsA.pkl'), 'rb') as f:
        is_a_concept = pickle.load(f)

    return args, grammar, model, nlp, tokenizer, related_to_concept, is_a_concept, schemas_raw, schemas_dict

示例#2

0

显示文件

    s = " ".join(s)
    return s


def _find_nums(question):
    nums = re.findall('\d*\.?\d+', question)
    return nums


if __name__ == '__main__':
    args = read_arguments_manual_inference()

    device, n_gpu = setup_device()
    set_seed_everywhere(args.seed, n_gpu)

    schemas_raw, schemas_dict = spider_utils.load_schema(args.data_dir)

    grammar = semQL.Grammar()
    model = IRNet(args, device, grammar)
    model.to(device)

    # load the pre-trained parameters
    model.load_state_dict(torch.load(args.model_to_load))
    # to use cpu instead of gpu , uncomment this code
    # model.load_state_dict(torch.load(args.model_to_load,map_location=torch.device('cpu')))

    model.eval()
    print("Load pre-trained model from '{}'".format(args.model_to_load))

    nlp = English()
    tokenizer = nlp.Defaults.create_tokenizer(nlp)

示例#3

0

显示文件

def handle_request0(request):
    debug = 'debug' in request.form
    base = ""
    try:
        csv_key = 'csv'
        if csv_key not in request.files:
            csv_key = 'csv[]'
        print(request.files)
        if csv_key not in request.files and not 'sqlite' in request.files:
            raise Exception('please include a csv file or sqlite file')
        if not 'q' in request.form:
            raise Exception(
                'please include a q parameter with a question in it')
        csvs = request.files.getlist(csv_key)
        sqlite_file = request.files.get('sqlite')
        q = request.form['q']

        # brute force removal of any old requests
        if not TRIAL_RUN:
            subprocess.run(["bash", "-c", "rm -rf /cache/case_*"])
        key = "case_" + str(uuid.uuid4())
        data_dir = os.path.join('/cache', key)
        os.makedirs(os.path.join(data_dir, 'data'), exist_ok=True)
        os.makedirs(os.path.join(data_dir, 'original', 'database', 'data'),
                    exist_ok=True)
        print("Key", key)
        for csv in csvs:
            print("Working on", csv)
            table_id = os.path.splitext(csv.filename)[0]
            table_id = re.sub(r'\W+', '_', table_id)
            stream = io.StringIO(csv.stream.read().decode("UTF8"),
                                 newline=None)
            add_csv.csv_stream_to_sqlite(
                table_id, stream, os.path.join(data_dir, 'data',
                                               'data.sqlite'))
            stream.seek(0)
        if sqlite_file:
            print("Working on", sqlite_file)
            sqlite_file.save(os.path.join(data_dir, 'data', 'data.sqlite'))
        question_file = os.path.join(data_dir, 'question.json')
        tables_file = os.path.join(data_dir, 'tables.json')
        dummy_file = os.path.join(data_dir, 'dummy.json')
        add_question.question_to_json('data', q, question_file)

        row = {
            'question': q,
            'query': 'DUMMY',
            'db_id': args.database,
            'question_toks': _tokenize_question(tokenizer, q)
        }

        print(
            colored(
                f"question has been tokenized to : { row['question_toks'] }",
                'cyan',
                attrs=['bold']))

        with open(dummy_file, 'w') as fout:
            fout.write('[]\n')

        subprocess.run([
            "python", "/spider/preprocess/get_tables.py", data_dir,
            tables_file, dummy_file
        ])

        # valuenet expects different setup to irnet
        shutil.copyfile(tables_file,
                        os.path.join(data_dir, 'original', 'tables.json'))
        database_path = os.path.join(data_dir, 'original', 'database', 'data',
                                     'data.sqlite')
        shutil.copyfile(os.path.join(data_dir, 'data', 'data.sqlite'),
                        database_path)

        schemas_raw, schemas_dict = spider_utils.load_schema(data_dir)

        data, table = merge_data_with_schema(schemas_raw, [row])

        pre_processed_data = process_datas(data, related_to_concept,
                                           is_a_concept)

        pre_processed_with_values = _pre_process_values(pre_processed_data[0])

        print(
            f"we found the following potential values in the question: {row['values']}"
        )

        prediction, example = _inference_semql(pre_processed_with_values,
                                               schemas_dict, model)

        print(
            f"Results from schema linking (question token types): {example.src_sent}"
        )
        print(
            f"Results from schema linking (column types): {example.col_hot_type}"
        )

        print(
            colored(f"Predicted SemQL-Tree: {prediction['model_result']}",
                    'magenta',
                    attrs=['bold']))
        print()
        sql = _semql_to_sql(prediction, schemas_dict)

        print(colored(f"Transformed to SQL: {sql}", 'cyan', attrs=['bold']))
        print()
        result = _execute_query(sql, database_path)

        print(f"Executed on the database '{args.database}'. Results: ")
        for row in result:
            print(colored(row, 'green'))

        message = {
            "split": key,
            "result": {
                "sql": sql.strip(),
                "answer": result
            }
        }
        code = 200
    except Exception as e:
        message = {"error": str(e)}
        code = 500
    if debug:
        message['base'] = base
    return jsonify(message), code