Exemplos de JsonParser.get_texts em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: GoogleNews

Classe / Tipo: JsonParser

Método / Função: get_texts

Exemplos em hotexamples.com: 3

JsonParser.get_texts em Python - 3 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de GoogleNews.JsonParser.get_texts em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

get_texts(3)

get_target_labels(2)

get_docs_labels(1)

get_only_labels(1)

Métodos Frequentes

get_texts (3)

get_target_labels (2)

get_docs_labels (1)

get_only_labels (1)

Exemplo n.º 1

0

Exibir arquivo

def run_hicocluster_create_matrix(): # Number of docs: 1950 # Number of items: 21826 texts = JsonParser.get_texts(os.getcwd() + "\\clusters") newTexts = [] for text in texts: newTexts.append(text.split()) # print newTexts[0] dictionary = corpora.Dictionary(newTexts) dictionary.save(os.getcwd() + "\\dictionary.dict") corpus = [dictionary.doc2bow(text) for text in newTexts] corpora.MmCorpus.serialize(os.getcwd() + "\\corpus.mm", corpus) print "length of docs: " + str(dictionary.num_docs) print "length of items: " + str(len(dictionary.token2id.items())) features = len(dictionary.token2id.items()) row = 1 set_doc_terms = [] for doc in corpus: doc_terms = [0] * features if len(doc) > 0: row += 1 for term in doc: doc_terms[term[0]] = term[1] set_doc_terms.append(doc_terms) matrix = open(os.getcwd() + "\\matrix.txt", "w") for line in set_doc_terms: for i in range(len(line)): matrix.write(str(line[i]) + " ") matrix.write("\n") matrix.close()

Exemplo n.º 2

0

Exibir arquivo

def get_combination(): print "run_combination" # Google data # parser = GoogleNewsParser.NewsParsers() # parser.parse_data_from_tok() # Json Google tfidf = ExTFIDF.TfIdf() # tfidf.fit_data(parser.get_texts()) tfidf.fit_data(JsonParser.get_texts(os.getcwd() + "\\" + "clusters")) tf_vectors = tfidf.get_data_as_vector() print "Length of tfidf feature: " + str(len(tf_vectors[0])) # print tf_vectors[0] pairs = load_d2v() single = pairs[1] print "Length of doc2vec feature: " + str(len(single[0])) # print single[0] final = numpy.hstack((tf_vectors, single)) print "Length of final features: " + str(len(final[0])) # final = [] # for i in range(length): # temp = tf_vectors[i] + single[i] # final.append(temp) return final

Exemplo n.º 3

0

Exibir arquivo

def algorithm_tfidf(): print "Running TFIDF" # Google data # parser = GoogleNewsParser.NewsParsers() # parser.parse_data_from_tok() # Json Google tfidf = ExTFIDF.TfIdf() # tfidf.fit_data(parser.get_texts()) tfidf.fit_data(JsonParser.get_texts(os.getcwd() + "\\" + "clusters")) print "lennth of tfidf : " + str(len(tfidf.get_data_as_vector())) print "Running algorithm with TFIDF" Algorithm.algorithm_Kmean(tfidf.get_data_as_vector())