Пример #1
0
def merge_subsentences_initializer(to_write_file, to_write_storage,
                                   to_read_file, to_read_storage,
                                   sentidx2offset_file):
    """merge_subsentences initializer for multiprocessing.

    Args:
        to_write_file: file to write
        to_write_storage: mmap storage type
        to_read_file: file to read
        to_read_storage: mmap storage type
        sentidx2offset_file: sentence index to offset in mmap data

    Returns:
    """
    global filt_emb_data_global
    filt_emb_data_global = np.memmap(to_write_file,
                                     dtype=to_write_storage,
                                     mode="r+")
    global full_pred_data_global
    full_pred_data_global = np.memmap(to_read_file,
                                      dtype=to_read_storage,
                                      mode="r+")
    global sentidx2offset_marisa_global
    sentidx2offset_marisa_global = utils.load_single_item_trie(
        sentidx2offset_file)
Пример #2
0
def write_data_labels_initializer(
    merged_entity_emb_file,
    merged_storage_type,
    sental2embid_file,
    result_alias_offset,
    train_in_candidates,
    max_cands,
    dump_embs,
    trie_candidate_map_folder,
    trie_qid2eid_file,
):
    """
    write_data_labels multiprocessing initializer
    Args:
        merged_entity_emb_file: flattened embedding input file
        merged_storage_type: mmap storage type
        sental2embid_file: sentence, alias -> embedding id mapping
        result_alias_offset: alias offset of this batch of results
        train_in_candidates: train in candidates flag
        max_cands: max candidates
        dump_embs: dump embedding flag
        trie_candidate_map_folder: alias trie folder
        trie_qid2eid_file: qid to eid trie file

    Returns:

    """
    global filt_emb_data_global
    filt_emb_data_global = np.memmap(merged_entity_emb_file,
                                     dtype=merged_storage_type,
                                     mode="r+")
    global sental2embid_global
    sental2embid_global = utils.load_single_item_trie(sental2embid_file)
    global alias_cand_trie_global
    alias_cand_trie_global = AliasCandRecordTrie(
        load_dir=trie_candidate_map_folder)
    global qid2eid_global
    qid2eid_global = utils.load_single_item_trie(trie_qid2eid_file)
    global result_alias_offset_global
    result_alias_offset_global = result_alias_offset
    global train_in_candidates_global
    train_in_candidates_global = train_in_candidates
    global max_cands_global
    max_cands_global = max_cands
    global dump_embs_global
    dump_embs_global = dump_embs
Пример #3
0
def merge_subsentences_initializer(to_write_file, to_write_storage,
                                   to_read_file, to_read_storage,
                                   sent_start_map_file):
    global filt_emb_data_global
    filt_emb_data_global = np.memmap(to_write_file,
                                     dtype=to_write_storage,
                                     mode='r+')
    global full_pred_data_global
    full_pred_data_global = np.memmap(to_read_file,
                                      dtype=to_read_storage,
                                      mode='r+')
    global sent_start_map_marisa_global
    sent_start_map_marisa_global = utils.load_single_item_trie(
        sent_start_map_file)
Пример #4
0
def write_data_labels_initializer(merged_entity_emb_file, merged_storage_type,
                                  sent_idx_map_file, train_in_candidates,
                                  dump_embs, data_config):
    global filt_emb_data_global
    filt_emb_data_global = np.memmap(merged_entity_emb_file,
                                     dtype=merged_storage_type,
                                     mode="r+")
    global sent_idx_map_global
    sent_idx_map_global = utils.load_single_item_trie(sent_idx_map_file)
    global train_in_candidates_global
    train_in_candidates_global = train_in_candidates
    global dump_embs_global
    dump_embs_global = dump_embs
    global entity_dump_global
    entity_dump_global = EntitySymbols(
        load_dir=os.path.join(data_config.entity_dir,
                              data_config.entity_map_dir),
        alias_cand_map_file=data_config.alias_cand_map)