Python create_mapping 예제들, utils.create_mapping Python 예제들

예제 #1

0

파일 보기

파일: loader.py 프로젝트: JudeLee19/tagger

def augment_with_pretrained(dictionary, ext_emb_path, words):
    """
    Augment the dictionary with words that have a pretrained embedding.
    If `words` is None, we add every word that has a pretrained embedding
    to the dictionary, otherwise, we only add the words that are given by
    `words` (typically the words in the development and test sets.)
    """
    print 'Loading pretrained embeddings from %s...' % ext_emb_path
    assert os.path.isfile(ext_emb_path)

    # Load pretrained embeddings from file
    pretrained = set([
        line.rstrip().split()[0].strip()
        for line in codecs.open(ext_emb_path, 'r', 'utf-8')
        if len(ext_emb_path) > 0
    ])

    # We either add every word in the pretrained file,
    # or only words given in the `words` list to which
    # we can assign a pretrained embedding
    if words is None:
        for word in pretrained:
            if word not in dictionary:
                dictionary[word] = 0
    else:
        for word in words:
            if any(x in pretrained for x in [
                word,
                word.lower(),
                re.sub('\d', '0', word.lower())
            ]) and word not in dictionary:
                dictionary[word] = 0

    word_to_id, id_to_word = create_mapping(dictionary)
    return dictionary, word_to_id, id_to_word

예제 #2

0

파일 보기

def char_mapping(sentences):
    """
    Create a dictionary and mapping of characters, sorted by frequency.
    """
    chars = ["".join([w[0] for w in s]) for s in sentences]
    dico = create_dico(chars)
    char_to_id, id_to_char = create_mapping(dico)
    print "Found %i unique characters" % len(dico)
    return dico, char_to_id, id_to_char

예제 #3

0

파일 보기

파일: loader.py 프로젝트: zhutixiaojie0120/healthNER

def tag_mapping(sentences):
    """
    Create a dictionary and a mapping of tags, sorted by frequency.
    """
    tags = [[word[-1] for word in s] for s in sentences]
    dico = create_dico(tags)
    tag_to_id, id_to_tag = create_mapping(dico)
    print "Found %i unique named entity tags" % len(dico)
    return dico, tag_to_id, id_to_tag

예제 #4

0

파일 보기

파일: loader.py 프로젝트: zhutixiaojie0120/healthNER

def cluster_mapping(sentences):
    """
    Create a dictionary and mapping of clusters, sorted by frequency.
    """
    clusters = [[word[4] for word in s] for s in sentences]
    dico = create_dico(clusters)
    cluster_to_id, id_to_cluster = create_mapping(dico)
    print "Found %i clusters" % len(dico)
    return dico, cluster_to_id, id_to_cluster

예제 #5

0

파일 보기

파일: loader.py 프로젝트: zhutixiaojie0120/healthNER

def POStag_mapping(sentences):
    """
    Create a dictionary and mapping of POS tags, sorted by frequency.
    """
    POStags = [[word[1] for word in s] for s in sentences]
    dico = create_dico(POStags)
    POStag_to_id, id_to_POStag = create_mapping(dico)
    print "Found %i POS tags" % len(dico)
    return dico, POStag_to_id, id_to_POStag

예제 #6

0

파일 보기

def parse_sentence(sentence, tokenizer, encoder, use_cuda=True):
    '''Implement the match part of MAMA
    '''
    tokenizer_name = str(tokenizer.__str__)

    inputs, tokenid2word_mapping, token2id, noun_chunks = create_mapping(
        sentence, return_pt=True, tokenizer=tokenizer)

    with torch.no_grad():
        if use_cuda:
            for key in inputs.keys():
                inputs[key] = inputs[key].cuda()
        outputs = encoder(**inputs, output_attentions=True)
    trim = True
    if 'GPT2' in tokenizer_name:
        trim = False
    '''
    Use average of last layer attention : page 6, section 3.1.2
    '''
    attention = process_matrix(outputs[2],
                               avg_head=True,
                               trim=trim,
                               use_cuda=use_cuda)

    merged_attention = compress_attention(attention, tokenid2word_mapping)
    attn_graph = build_graph(merged_attention)

    tail_head_pairs = []
    for head in noun_chunks:
        for tail in noun_chunks:
            if head != tail:
                tail_head_pairs.append((token2id[head], token2id[tail]))

    black_list_relation = set([token2id[n] for n in noun_chunks])

    all_relation_pairs = []
    id2token = {value: key for key, value in token2id.items()}

    with Pool(10) as pool:
        params = [(
            pair[0],
            pair[1],
            attn_graph,
            max(tokenid2word_mapping),
            black_list_relation,
        ) for pair in tail_head_pairs]
        for output in pool.imap_unordered(bfs, params):
            if len(output):
                all_relation_pairs += [(o, id2token) for o in output]

    triplet_text = []
    with Pool(10) as pool:
        for triplet in pool.imap_unordered(filter_relation_sets,
                                           all_relation_pairs):
            if len(triplet) > 0:
                triplet_text.append(triplet)
    return triplet_text

예제 #7

0

파일 보기

파일: loader.py 프로젝트: ntson2002/re-tagging

def chunk_mapping(sentences, col=2):
    """
    Create a dictionary and a mapping of chunk tags, sorted by frequency.
    """
    tags = [[word[col] for word in s] for s in sentences]
    dico = create_dico(tags)
    chunk_to_id, id_to_chunk = create_mapping(dico)
    print "Found %i unique Chunk tags" % len(dico)
    return dico, chunk_to_id, id_to_chunk

예제 #8

0

파일 보기

파일: loader.py 프로젝트: zwt0204/Medical-named-entity-recognition-for-ccks2017

def tag_mapping(sentences):
    """
    Create a dictionary and a mapping of tags, sorted by frequency.
    """
    tags = [[word[-1] for word in s] for s in sentences]
    dico = create_dico(tags)
    tag_to_id, id_to_tag = create_mapping(dico)
    print("Found %i unique named entity tags" % (len(dico)))
    return dico, tag_to_id, id_to_tag

예제 #9

0

파일 보기

파일: loader.py 프로젝트: natemccoy/tagger

def char_mapping(sentences):
    """
    Create a dictionary and mapping of characters, sorted by frequency.
    """
    chars = ["".join([w[0] for w in s]) for s in sentences]
    dico = create_dico(chars)
    char_to_id, id_to_char = create_mapping(dico)
    print("Found %i unique characters" % len(dico))
    return dico, char_to_id, id_to_char

예제 #10

0

파일 보기

def dep_mapping(sentences):
    """
    Create a dictionary and a mapping of dep tags, sorted by frequency.
    """
    tags = [[word[4] for word in s] for s in sentences]
    dico = create_dico(tags)
    print dico
    tag_to_id, id_to_tag = create_mapping(dico)
    print "Found %i unique Dependency Role tags" % len(dico)
    return dico, tag_to_id, id_to_tag

예제 #11

0

파일 보기

파일: loader.py 프로젝트: sogang-nlp-mrc/nlp_labeling

def mor_mapping(sentences):
    """
    Create a dictionary and a mapping of pos, sorted by frequency.
    """
    tags = [[word[2] for word in s] for s in sentences]
    dico = create_dico(tags)
    dico['<UNK>'] = 10000000
    mor_to_id, id_to_mor = create_mapping(dico)
    print("Found %i unique causality pos" % len(dico))
    return dico, mor_to_id, id_to_mor

예제 #12

0

파일 보기

파일: loader.py 프로젝트: weili-baidu/JER-Tree_LSTM

def dep_verb_mapping(sentences):
    """
    Create a dictionary and a mapping of tags, sorted by frequency.
    """
    tags = [[word[4].split('|')[0] for word in s] for s in sentences]
    dico = create_dico(tags)
    dico['<UNK>'] = 10000000
    pos_to_id, id_to_pos = create_mapping(dico)
    print "Found %i unique verb dep words" % len(dico)
    return dico, pos_to_id, id_to_pos

예제 #13

0

파일 보기

파일: loader.py 프로젝트: sjtuliuyuan/SequenceTagging

def word_mapping(sentences, lower, vocabulary_size, pre_train=None):
    """
    Create a dictionary and a mapping of words, sorted by frequency.
    """
    words = [[x[0].lower() if lower else x[0] for x in s] for s in sentences]
    dico = create_dico(words)
    word_to_id, id_to_word = create_mapping(dico, vocabulary_size)
    print("Found %i unique words (%i in total)" %
          (len(dico), sum(len(x) for x in words)))

    if pre_train:
        emb_dictionary = read_pre_training(pre_train)
        for word in dico.iterkeys():
            if word not in emb_dictionary:
                dico[word] = 0

    dico['<UNK>'] = 10000000
    word_to_id, id_to_word = create_mapping(dico, vocabulary_size)
    return dico, word_to_id, id_to_word

예제 #14

0

파일 보기

def pos_mapping(sentences, position=1):
    """
    Create a dictionary and a mapping of poss, sorted by frequency.
    """
    # tags = [[word[position][0] for word in s] for s in sentences]
    tags = [[word[position] for word in s] for s in sentences]
    dico = create_dico(tags)
    tag_to_id, id_to_tag = create_mapping(dico)
    print "Found %i unique named entity tags" % len(dico)
    return dico, tag_to_id, id_to_tag

예제 #15

0

파일 보기

def loc_mapping(sentences):
    """
    Create a dictionary and a mapping of location labels, sorted by frequency.
    """
    loc = [[x[6] for x in s] for s in sentences]
    dico = create_dico(loc)
    dico['<UNK>'] = 10000000
    loc_to_id, id_to_loc = create_mapping(dico)
    print("Found %i unique location (%i in total)" %
          (len(dico), sum(len(x) for x in loc)))
    return dico, loc_to_id, id_to_loc

예제 #16

0

파일 보기

def char_mapping(sentences):
    """
    Create a dictionary and a mapping of chars, sorted by frequency.
    """
    chars = [[x[0] for x in s] for s in sentences]
    dico = create_dico(chars)
    dico['<UNK>'] = 10000000
    char_to_id, id_to_char = create_mapping(dico)
    print "Found %i unique chars (%i in total)" % (len(dico),
                                                   sum(len(x) for x in chars))
    return dico, char_to_id, id_to_char

예제 #17

0

파일 보기

파일: loader.py 프로젝트: zwqll/entity

def pos_mapping(sentences):
    """
    Create a dictionary and a mapping of pos tags, sorted by frequency.
    """
    pos_tags = [[x[1] for x in s] for s in sentences]
    dico = create_dico(pos_tags)
    dico['<UNKPOS>'] = sys.maxint
    pos_tag_to_id, id_to_pos_tag = create_mapping(dico)
    print "Found %i unique pos tags (%i in total)" % (
        len(dico), sum(len(x) for x in pos_tags))
    return dico, pos_tag_to_id, id_to_pos_tag

예제 #18

0

파일 보기

파일: loader_pos.py 프로젝트: firojalam/tagger

def pos_mapping(sentences):
    """
    Create a dictionary and a mapping of words, sorted by frequency.
    """
    pos = [[x[1] for x in s] for s in sentences]
    dico = create_dico(pos)
    dico['<UNK>'] = 10000000
    pos_to_id, id_to_pos = create_mapping(dico)
    print "Found %i unique words (%i in total)" % (len(dico),
                                                   sum(len(x) for x in pos))
    return dico, pos_to_id, id_to_pos

예제 #19

0

파일 보기

파일: loader.py 프로젝트: mayhewsw/tagger

def char_mapping(sentences):
    """
    Create a dictionary and mapping of characters, sorted by frequency.
    """
    chars = ["".join([w[0] for w in s]) for s in sentences]
    dico = create_dico(chars)
    char_to_id, id_to_char = create_mapping(dico)
    # SWM: replace unseen characters with special symbol (hopefully seen in training)
    char_to_id = defaultdict(lambda: 0, char_to_id)
    print "Found %i unique characters" % len(dico)
    return dico, char_to_id, id_to_char

예제 #20

0

파일 보기

def semroles_mapping(sentences):
    """
    Create a dictionary and a mapping of semantic roles labels, sorted by frequency.
    """
    semroles = [[x[4] for x in s] for s in sentences]
    dico = create_dico(semroles)
    dico['<UNK>'] = 10000000
    semroles_to_id, id_to_semroles = create_mapping(dico)
    print("Found %i unique semroles (%i in total)" %
          (len(dico), sum(len(x) for x in semroles)))
    return dico, semroles_to_id, id_to_semroles

예제 #21

0

파일 보기

def depNode_mapping(sentences):
    """
    Create a dictionary and a mapping of dependency node labels, sorted by frequency.
    """
    depNode = [[x[3] for x in s] for s in sentences]
    dico = create_dico(depNode)
    dico['<UNK>'] = 10000000
    depNode_to_id, id_to_depNode = create_mapping(dico)
    print("Found %i unique depNode (%i in total)" %
          (len(dico), sum(len(x) for x in depNode)))
    return dico, depNode_to_id, id_to_depNode

예제 #22

0

파일 보기

def conNode_mapping(sentences):
    """
    Create a dictionary and a mapping of chunk labels, sorted by frequency.
    """
    conNode = [[x[2] for x in s] for s in sentences]
    dico = create_dico(conNode)
    dico['<UNK>'] = 10000000
    conNode_to_id, id_to_conNode = create_mapping(dico)
    print("Found %i unique conNode (%i in total)" %
          (len(dico), sum(len(x) for x in conNode)))
    return dico, conNode_to_id, id_to_conNode

예제 #23

0

파일 보기

파일: loader.py 프로젝트: ManHieu/NER_pytoch

def char_mapping(sentences):
    """
    Create a dictionary and mapping of characters, sorted by frequency.
    """
    chars = [''.join([w[0] for w in sentence]) for sentence in sentences]
    
    dico = create_dico(chars)
    dico['<PAD>'] = 1000000

    id_to_char, char_to_id = create_mapping(dico)
    return dico, char_to_id, id_to_char

예제 #24

0

파일 보기

파일: import_xml.py 프로젝트: davedekoning/bokeh_test

 def _get_loc_var(self):
     self.loc_var_map = {}
     for series in self.series_list:
         header = series.find(self.default_ns + "header")
         param = header.findtext(self.default_ns + "parameterId")
         loc_id = header.findtext(self.default_ns + "locationId")
         # if loc_id not in self.loc_var_map.keys():
         #     self.loc_var_map[loc_id] = []
         # self.loc_var_map[loc_id].append(param)
         self.loc_var_map[loc_id] = create_mapping(loc_id, param,
                                                   self.loc_var_map)

예제 #25

0

파일 보기

파일: import_xml.py 프로젝트: davedekoning/bokeh_test

 def _get_var_loc(self):
     self.var_loc_map = {}
     for series in self.series_list:
         header = series.find(self.default_ns + "header")
         param = header.findtext(self.default_ns + "parameterId")
         loc_id = header.findtext(self.default_ns + "locationId")
         # if param not in self.var_loc_map.keys():
         #     self.var_loc_map[param] = []
         # self.var_loc_map[param].append(loc_id)
         self.var_loc_map[param] = create_mapping(param, loc_id,
                                                  self.var_loc_map)

예제 #26

0

파일 보기

파일: loader.py 프로젝트: runiner/tagger

def word_mapping(sentences, lower):
    """
    Create a dictionary and a mapping of words, sorted by frequency.
    """
    words = [[x[0].lower() if lower else x[0] for x in s] for s in sentences]
    dico = create_dico(words)
    dico['<UNK>'] = 10000000
    word_to_id, id_to_word = create_mapping(dico)
    print("Found %i unique words (%i in total)" %
          (len(dico), sum(len(x) for x in words)))
    return dico, word_to_id, id_to_word

예제 #27

0

파일 보기

파일: loader.py 프로젝트: sc-lj/NER-BiLSTM-CRF-PyTorch

def tag_mapping(sentences):
    """
    Create a dictionary and a mapping of tags, sorted by frequency.
    """
    tags = [word[-1] for s in sentences for word in s]
    dico = dict(Counter(tags))
    dico[model.START_TAG] = -1
    dico[model.STOP_TAG] = -2
    tag_to_id, id_to_tag = create_mapping(dico)
    print("Found %i unique named entity tags" % len(dico))
    return dico, tag_to_id, id_to_tag

예제 #28

0

파일 보기

파일: loader.py 프로젝트: ntson2002/re-tagging

def pos_mapping(sentences, col=1):
    """
    Create a dictionary and a mapping of pos tags, sorted by frequency.
    """
    tags = [[word[col] for word in s] for s in sentences]
    dico = create_dico(tags)
    pos_to_id, id_to_pos = create_mapping(dico)

    print "Found %i unique POS tags" % len(dico)

    return dico, pos_to_id, id_to_pos

예제 #29

0

파일 보기

파일: loader.py 프로젝트: zwt0204/Medical-named-entity-recognition-for-ccks2017

def word_mapping(sentences,vocabulary_size, pre_train = None):
    """
    Create a dictionary and a mapping of words, sorted by frequency.
    """
    words = [[x[0] for x in s] for s in sentences]
    dico = create_dico(words)
    word_to_id, id_to_word = create_mapping(dico, vocabulary_size)
    print ("Found %i unique words (%i in total)" %
        (len(dico), sum(len(x) for x in words))
    )

    if pre_train:
        emb_dictionary = read_pre_training(pre_train)
        for word in dico.iterkeys():
        	  if word not in emb_dictionary:
        	  	  dico[word]=0

    dico['<UNK>'] = 10000000
    word_to_id, id_to_word = create_mapping(dico, vocabulary_size)
    return dico, word_to_id, id_to_word

예제 #30

0

파일 보기

def head_mapping(sentences):
    """
    Create a dictionary and a mapping of head tags, sorted by frequency.
    """
    tags = [[word[3] for word in s] for s in sentences]
    dico = create_dico(tags)
    dico['MAX'] = 10000000
    print dico
    tag_to_id, id_to_tag = create_mapping(dico)
    print "Found %i unique Head index tags" % len(dico)
    return dico, tag_to_id, id_to_tag

예제 #31

0

파일 보기

파일: loader.py 프로젝트: natemccoy/tagger

def word_mapping(sentences, lower):
    """
    Create a dictionary and a mapping of words, sorted by frequency.
    """
    words = [[x[0].lower() if lower else x[0] for x in s] for s in sentences]
    dico = create_dico(words)
    dico['<UNK>'] = 10000000
    word_to_id, id_to_word = create_mapping(dico)
    print("Found %i unique words (%i in total)" % (
        len(dico), sum(len(x) for x in words)
    ))
    return dico, word_to_id, id_to_word

예제 #32

0

파일 보기

파일: loader.py 프로젝트: sc-lj/NER-BiLSTM-CRF-PyTorch

def char_mapping(sentences):
    """
    Create a dictionary and mapping of characters, sorted by frequency.
    """
    chars = ''.join([w[0] for s in sentences for w in s])
    dico = dict(Counter(chars))
    dico['<PAD>'] = 10000001
    dico['<UNK>'] = 10000000

    char_to_id, id_to_char = create_mapping(dico)
    print("Found %i unique characters" % len(dico))
    return dico, char_to_id, id_to_char

예제 #33

0

파일 보기

파일: preprocessing.py 프로젝트: seemapatel151997/Disfluency_Detection

def tag_mapping(data_path, data_type):
    """
    Create a dictionary and a mapping of tags, sorted by frequency.
    """
    with open(data_path+data_type+"_labels.txt", "r") as file1:
        tags = [line.split(" ")[:-1] for line in file1.readlines()]
    dico = create_dico(tags)
    dico[model.START_TAG] = -1
    dico[model.STOP_TAG] = -2
    tag_to_id, id_to_tag = create_mapping(dico)
    print("Found %i unique named entity tags" % len(dico))
    return dico, tag_to_id, id_to_tag

예제 #34

0

파일 보기

파일: loader.py 프로젝트: ManHieu/NER_pytoch

def tag_mapping(sentences):
    """
    Create a dictionary and a mapping of tags, sorted by frequency.
    """
    tags = [[word[-1] for word in s] for s in sentences]
    
    dico = create_dico(tags)
    dico[model.START_TAG] = -1
    dico[model.STOP_TAG] = -2

    id_to_tag, tag_to_id = create_mapping(dico)
    return dico, tag_to_id, id_to_tag

예제 #35

0

파일 보기

파일: data_helper.py 프로젝트: xkyoung/NER-BERT-BiLSTM-CRF-

def tag_mapping(sentences):
    """
    Create a dictionary and a mapping of tags, sorted by frequency.
    """
    tags = [[char[-1] for char in s] for s in sentences]

    dico = create_dico(tags)
    dico['[SEP]'] = len(dico) + 1
    dico['[CLS]'] = len(dico) + 2

    tag_to_id, id_to_tag = create_mapping(dico)
    print("Found %i unique named entity tags" % len(dico))
    return dico, tag_to_id, id_to_tag