Пример #1
0
def Main():
    eval_dir = ''   # TODO
    config_dict = load_configs(eval_dir + '/param_config')
    # TODO: Define parameters

    np_list = load_q_schema_data(data_size, max_len, n_emb, PN) # TODO: parameters
    Tvt_split_list = [0, 3022, 3778, 5810]  # TODO: Webq
    for idx, Tvt in enumerate(['T', 'v', 't']):
        indices = range(Tvt_split_list[idx], Tvt_split_list[idx+1])
Пример #2
0
        data['entity'][idx * PN * rows * cols: (idx + real_batch_size) * PN * rows * cols]
    batch_data['coherence'] = \
        data['coherence'][idx * PN: (idx + real_batch_size) * PN]
    batch_data['context'] = \
        data['context'][idx * PN * rows * cols: (idx + real_batch_size) * PN * rows * cols]
    batch_data['corrupt'] = \
        data['corrupt'][idx: idx + real_batch_size]
    return batch_data


if __name__ == '__main__':
    setting_dir = sys.argv[1]
    try_dir = sys.argv[2]
    root_path = 'runnings/tabel/e2e/%s/%s' % (setting_dir, try_dir)
    config_fp = '%s/param_config' % root_path
    config_dict = load_configs(config_fp)

    # data path
    joint_data_fp = config_dict['joint_data_fp']
    # model selection
    model_num = int(config_dict['model'])
    trans = config_dict['translation_setting']
    nonj = config_dict['non_joint_setting']
    coherence_norm = 0
    if 'coherence_norm' in config_dict:
        coherence_norm = int(config_dict['coherence_norm'])
    # table settings
    candidate_num = 50
    if 'candidate_num' in config_dict:
        candidate_num = int(config_dict['candidate_num'])
    epochs = int(config_dict['epochs'])
Пример #3
0
def main():
    LogInfo.begin_track('[cand_gen] starts ... ')

    import sys
    from kangqi.util.config import load_configs

    root_path = '/home/kangqi/workspace/PythonProject'
    try_dir = sys.argv[1]

    config_fp = '%s/runnings/candgen/%s/param_config' % (root_path, try_dir)
    config_dict = load_configs(config_fp)

    parser_ip = config_dict['parser_ip']
    parser_port = int(config_dict['parser_port'])

    use_sparql_cache = True if config_dict[
        'use_sparql_cache'] == 'True' else False
    cache_dir = config_dict['cache_dir']
    check_only = True if config_dict['check_only'] == 'True' else False

    use_ext_sk = True if config_dict['use_ext_sk'] == 'True' else False
    s_mart = True if config_dict['S-MART'] == 'True' else False
    min_ratio = float(config_dict['min_ratio'])
    min_surface_score = float(config_dict['min_surface_score'])
    min_pop = int(config_dict['min_pop'])

    # from ..data_prepare.u import load_complex_questions
    # train_qa_list, test_qa_list = load_complex_questions()
    # compq_list = train_qa_list + test_qa_list
    # LogInfo.logs('%d ComplexQuestions loaded.', len(compq_list))
    # surface_list = [qa.q for qa in compq_list]

    import json
    webq_fp = '/home/kangqi/Webquestions/Json/webquestions.examples.json'
    with open(webq_fp, 'r') as br:
        webq_data = json.load(br)
        surface_list = [webq['utterance'] for webq in webq_data]
    LogInfo.logs('%d WebQuesetions loaded.', len(webq_data))

    #    cand_gen = CandidateGenerator(use_sparql_cache=False, check_only=True)
    #    q_list = [qa.q for qa in train_qa_list]
    #    check_el_quality(cand_gen, q_list)

    cand_gen = CandidateGenerator(use_sparql_cache=use_sparql_cache,
                                  cache_dir=cache_dir,
                                  check_only=check_only,
                                  parser_ip=parser_ip,
                                  parser_port=parser_port,
                                  k_hop=1,
                                  max_hops=2)
    for q_idx, q in enumerate(surface_list):
        LogInfo.begin_track('Checking Q %d / %d: ', q_idx + 1,
                            len(surface_list))
        LogInfo.logs('Surface: %s', q)
        schema_list = cand_gen.run_candgen(q_idx=q_idx,
                                           q=q,
                                           min_surface_score=min_surface_score,
                                           min_pop=min_pop,
                                           use_ext_sk=use_ext_sk,
                                           min_ratio=min_ratio,
                                           s_mart=s_mart,
                                           vb=1)
        LogInfo.logs('Finally: generated %d schemas.', len(schema_list))

        sc_sz = len(schema_list)
        for idx in range(sc_sz):
            LogInfo.begin_track('Showing schema %d / %d: ', idx + 1, sc_sz)
            sc = schema_list[idx]
            sc.display()
            #            sc.display_sparql()
            LogInfo.end_track()

        LogInfo.end_track()


#    cand_gen = CandidateGenerator(use_sparql=False)
#    q = 'what language do most people speak in afghanistan?'
#    cand_gen.linking(q, vb=0)

    LogInfo.end_track()