Python PositiveExplicit примеры использования

Язык программирования: Python

Пространство имен/Пакет: representations.explicit

Класс/Тип: PositiveExplicit

Примеров на hotexamples.com: 8

Python PositiveExplicit - 8 примеров найдено. Это лучшие примеры Python кода для representations.explicit.PositiveExplicit, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

PositiveExplicit(6)

load(2)

Основные методы

PositiveExplicit (6)

load (2)

Пример #1

Показать файл

def main():
    args = docopt("""
    Usage:
        pmi2svd.py [options] <pmi_path> <output_path>
    
    Options:
        --dim NUM    Dimensionality of eigenvectors [default: 500]
        --neg NUM    Number of negative samples; subtracts its log from PMI [default: 1]
    """)
    
    pmi_path = args['<pmi_path>']
    output_path = args['<output_path>']
    dim = int(args['--dim'])
    neg = int(args['--neg'])
    
    explicit = PositiveExplicit(pmi_path, normalize=False, neg=neg)

    start = time.time()
    ut, s, vt = sparsesvd(explicit.m.tocsc(), dim)
    print("Time elapsed for SVD: %f" % (time.time() - start))

    np.save(output_path + '.ut.npy', ut)
    np.save(output_path + '.s.npy', s)
    np.save(output_path + '.vt.npy', vt)
    save_vocabulary(output_path + '.words.vocab', explicit.iw)
    save_vocabulary(output_path + '.contexts.vocab', explicit.ic)

Пример #2

Показать файл

Файл: sim_pair.py Проект: cltl/meaning_space

def get_sim_pair_ppmi(corpus, target_word1, target_word2, year, results_dir):

    results_pair = target_word1 + '-' + target_word2 + '-cosines.tsv'

    embedd = PositiveExplicit.load(corpus + "/" + str(year))

    cos = embedd.similarity(target_word1, target_word2)

    if os.path.isfile(results_dir + results_pair):
        print('file exists')
        with open(results_dir + results_pair) as infile:
            existing_results = infile.read().split('\n')

    else:
        existing_results = []

    with open(results_dir + results_pair, 'a') as outfile:
        result = target_word1 + '-' + target_word2 + '\t' + str(
            year) + '\t' + str(cos) + '\n'
        if result.strip() in existing_results:
            print('result already there')
        else:
            outfile.write(result)

    print(cos)

Пример #3

Показать файл

Файл: PPMI.py Проект: grv1207/Exploring-Diachronic-Changes-Medical-Knowledge

    def _counts2PMI(self):

        words = list(self.words.keys())
        contexts = list(self.contexts.keys())
        iw = sorted(words)
        ic = sorted(contexts)
        wi = dict([(w, i) for i, w in enumerate(iw)])
        ci = dict([(c, i) for i, c in enumerate(ic)])

        counts = csr_matrix((len(wi), len(ci)), dtype=np.float32)
        tmp_counts = dok_matrix((len(wi), len(ci)), dtype=np.float32)
        update_threshold = 100000
        i = 0
        with open(self.count_pair_file) as f:
            for line in f:
                count, word, context = line.strip().split()
                if word in wi and context in ci:
                    tmp_counts[wi[word], ci[context]] = int(count)
                i += 1
                if i == update_threshold:
                    counts = counts + tmp_counts.tocsr()
                    tmp_counts = dok_matrix((len(wi), len(ci)), dtype=np.float32)
                    i = 0
        counts = counts + tmp_counts.tocsr()
        pmi = self.calc_pmi(counts, self.cds)

        save_matrix(self.pmi_file, pmi)
        save_vocabulary(self.pmi_file + '.words.vocab', iw)
        save_vocabulary(self.pmi_file + '.contexts.vocab', ic)
        self.explicit = PositiveExplicit(self.pmi_file, normalize=False, neg=self.neg)
        cf.saveDictionary(self.explicit,self.dict_name.split('/')[0]+'/'+self.dict_name.split('/')[1]+'_explicit_ppmi.bin')

Пример #4

Показать файл

def create_representation(args):
    rep_type = args['<representation>']
    path = args['<representation_path>']
    neg = int(args['--neg'])
    w_c = args['--w+c']
    eig = float(args['--eig'])
    
    if rep_type == 'PPMI':
        if w_c:
            raise Exception('w+c is not implemented for PPMI.')
        else:
            return PositiveExplicit(path, True, neg)
        
    elif rep_type == 'SVD':
        if w_c:
            return EnsembleEmbedding(SVDEmbedding(path, False, eig, False), SVDEmbedding(path, False, eig, True), True)
        else:
            return SVDEmbedding(path, True, eig)
    elif rep_type == 'GLOVE':
        return GLOVEEmbedding(path, True)        
    else:
        if w_c:
            return EnsembleEmbedding(Embedding(path + '.words', False), Embedding(path + '.contexts', False), True)
        else:
            return Embedding(path + '.words', True)

Пример #5

Показать файл

Файл: pmi2svd.py Проект: cruigo93/hyperwords

def main():
    args = docopt("""
    Usage:
        pmi2svd.py [options] <repres> <pmi_path> <output_path>
    
    Options:
        --dim NUM    Dimensionality of eigenvectors [default: 500]
        --neg NUM    Number of negative samples; subtracts its log from PMI [default: 1]
        --k NUM [default: 1]
    """)

    repres = args['<repres>']
    pmi_path = args['<pmi_path>']
    output_path = args['<output_path>']
    dim = int(args['--dim'])
    neg = int(args['--neg'])
    k = int(args['--k'])

    if (repres == "BPMI"):
        explicit = BinExplicit(pmi_path, normalize=False)
    elif (repres == "PMI"):
        explicit = NoExplicit(pmi_path, normalize=False, k=k)
    elif (repres == "NPMI"):
        explicit = NegExplicit(pmi_path, normalize=False)
    else:
        explicit = PositiveExplicit(pmi_path, normalize=False, neg=neg)

    ut, s, vt = sparsesvd(explicit.m.tocsc(), dim)

    np.save(output_path + '.ut.npy', ut)
    np.save(output_path + '.s.npy', s)
    np.save(output_path + '.vt.npy', vt)
    save_vocabulary(output_path + '.words.vocab', explicit.iw)
    save_vocabulary(output_path + '.contexts.vocab', explicit.ic)

Пример #6

Показать файл

def main():
    args = docopt("""
    Usage:
        ppmi2svd.py [options] <ppmi> <output>
    
    Options:
        --dim NUM    Dimensionality of eigenvectors [default: 300]
        --neg NUM    Number of negative samples; subtracts its log from PMI [default: 1]
    """)
    
    ppmi_path = args['<ppmi>']
    output_path = args['<output>']
    dim = int(args['--dim'])
    neg = int(args['--neg'])
    
    explicit = PositiveExplicit(ppmi_path, normalize=False, neg=neg)

    ut, s, vt = sparsesvd(explicit.m.tocsc(), dim)

    np.save(output_path + '.ut.npy', ut)
    np.save(output_path + '.s.npy', s)
    np.save(output_path + '.vt.npy', vt)

Пример #7

Показать файл

def folder2chi(folder):
    return PositiveExplicit(join(folder, "chi")).similarity_first_order

Пример #8

Показать файл

def get_sim_neighbors_ppmi(corpus, target_word1, target_word2, year1, year2, n, results_dir):


    """Two options: either 2 differnt years and 1 target word
    or the same year and 2 target words"""

    if not os.path.isdir(results_dir+'neighbors'):

        os.mkdir(results_dir+'neighbors')

    results_words = 'neighbors/'+target_word1+'-'+target_word2+'-'+str(year1)+'-'+str(year2)+'.tsv'


    if (year1 != year2) and (target_word1 == target_word2):
        results_cosine = 'cosines-'+target_word1+'-n-'+str(n)+'.tsv'

        embedd_year1 = PositiveExplicit.load(corpus+ "/" + str(year1))
        embedd_year2 = PositiveExplicit.load(corpus+ "/" + str(year2))

        with open(corpus+'/'+str(year1)+'-index.pkl', 'rb') as infile:
            year1_vocab = pickle.load(infile, encoding = 'utf-8')
        with open(corpus+'/'+str(year2)+'-index.pkl', 'rb') as infile:
            year2_vocab = pickle.load(infile, encoding = 'utf-8')

        #year1_vocab = pickle.load(open(corpus+'/'+str(year1)+'-index.pkl'))
        #year2_vocab = pickle.load(open(corpus+'/'+str(year2)+'-index.pkl'))

        if (embedd_year1.represent(target_word1).nnz != 0) and (embedd_year2.represent(target_word1).nnz != 0):

            neighbors_year1 = get_nearest_neighbors(embedd_year1, target_word1, n)
            neighbors_year2 = get_nearest_neighbors(embedd_year2, target_word1, n)


            union = get_union(neighbors_year1, neighbors_year2)

            filtered_union = filter_union(union, embedd_year1, embedd_year2, target_word1)

            #clean_union = []

            #for word in union:
            #    if (word in year1_vocab) and (word in year2_vocab):
            #        clean_union.append(word)

            vec1 = get_second_order_vector(embedd_year1, filtered_union, target_word1)
            vec2 = get_second_order_vector(embedd_year2, filtered_union, target_word1)
            #vec1, vec2 = filter_so_vector_for_nans(embedd_year1, embedd_year2, union, target_word1)

            neighbor_words1 = get_nearest_neighbor_words(neighbors_year1)
            neighbor_words2 = get_nearest_neighbor_words(neighbors_year2)

            cos = get_cosine(vec1, vec2)
        else:
            print('word out of vocab')
            cos = 'OOV'
            neighbor_words1 = ['OOV']
            neighbor_words2 = ['OOV']



    elif (year1 == year2) and (target_word1 != target_word2):
        results_cosine = 'cosines-'+target_word1+'-'+target_word2+'-n-'+str(n)+'.tsv'


        embedd_year = PositiveExplicit.load(corpus+ "/" + str(year1))

        if (embedd_year.represent(target_word1).nnz) != 0 and (embedd_year.represent(target_word2).nnz != 0):

            neighbors_word1 = get_nearest_neighbors(embedd_year, target_word1, n)
            neighbors_word2 = get_nearest_neighbors(embedd_year, target_word2, n)

            union = get_union(neighbors_word1, neighbors_word2)

            vec1 = get_second_order_vector(embedd_year, union, target_word1)
            vec2 = get_second_order_vector(embedd_year, union, target_word2)

            neighbor_words1 = get_nearest_neighbor_words(neighbors_word1)
            neighbor_words2 = get_nearest_neighbor_words(neighbors_word2)

            cos = get_cosine(vec1, vec2)
        else:
            print('word out of vocab')
            cos = 'OOV'
            neighbor_words1 = ['OOV']
            neighbor_words2 = ['OOV']

    if os.path.isfile(results_dir+results_cosine):
        print('file exists')
        with open(results_dir+results_cosine) as infile:
            existing_results = infile.read().split('\n')

    else:
        existing_results = []

    with open(results_dir+results_words, 'w') as outfile1:
        for word1, word2 in zip(neighbor_words1, neighbor_words2):
            #outfile1.write(word1.encode('utf-8')+'\t'+word2.encode('utf-8')+'\n')
            outfile1.write(word1+'\t'+word2+'\n')

    with open(results_dir+'/'+results_cosine, 'a') as outfile2:
        result = target_word1+'-'+target_word2+'\t'+str(year1)+'-'+str(year2)+'\t'+str(cos)+'\n'
        if result.strip() in existing_results:
            print('result already there')
        else:
            outfile2.write(result)
    print(cos)