Exemplos de get_stopwords em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: utils.helpers

Método / Função: get_stopwords

Exemplos em hotexamples.com: 2

get_stopwords em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de utils.helpers.get_stopwords em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Exemplo n.º 1

0

Exibir arquivo

Arquivo: index.py Projeto: ThanhTiem/vector-space-retrieval-model

print('starting..') resources_path = os.path.join(os.getcwd(), 'resources') data_path = os.path.join(os.getcwd(), 'data') if not os.path.isdir(resources_path): print('ERROR: {} not esxits'.format(resources_path)) sys.exit(1) if not os.path.exists(data_path): os.mkdir(data_path) dataset_path = os.path.join(resources_path, 'dataset') stopwords_file = os.path.join(resources_path, 'stopwords_en.txt') stopwords = helpers.get_stopwords(stopwords_file) docs = helpers.get_docs(dataset_path) corpus = [] for doc in docs: with open(doc, mode='r') as f: text = f.read() words = textprocessing.preprocess_text(text, stopwords) bag_of_words = Counter(words) corpus.append(bag_of_words) idf = helpers.compute_idf(corpus) for doc in corpus: helpers.compute_weights(idf, doc) helpers.normalize(doc)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: server.py Projeto: ChaithaliBT/sis

def index(): if request.method == 'POST': file = request.files['query_data'] filename = secure_filename(file.filename) f = filename.split('.') if f[1] == 'jpg': img = Image.open(file.stream) # PIL image uploaded_img_path = "static/uploaded/" + datetime.now().isoformat() + "_" + file.filename img.save(uploaded_img_path) query = fe.extract(img) dists = np.linalg.norm(features - query, axis=1) # Do search ids = np.argsort(dists)[:30] # Top 30 results scores = [(dists[id], img_paths[id]) for id in ids] return render_template('index.html', query_path=uploaded_img_path, scores=scores) else: query = file.read().decode("utf-8") docs_file = os.path.join(os.getcwd(), 'data', 'docs.pickle') inverted_index_file = os.path.join( os.getcwd(), 'data', 'inverted_index.pickle') stopwords_file = os.path.join(os.getcwd(), 'resources', 'stopwords_en.txt') # Deserialize data with open(docs_file, 'rb') as f: docs = pickle.load(f) with open(inverted_index_file, 'rb') as f: inverted_index = pickle.load(f) stopwords = helpers.get_stopwords(stopwords_file) dictionary = set(inverted_index.keys()) # Get query from command line # Preprocess query query = textprocessing.preprocess_text(query, stopwords) query = [word for word in query if word in dictionary] query = Counter(query) # Compute weights for words in query for word, value in query.items(): query[word] = inverted_index[word]['idf'] * (1 + math.log(value)) helpers.normalize(query) scores = [[i, 0] for i in range(len(docs))] for word, value in query.items(): for doc in inverted_index[word]['postings_list']: index, weight = doc scores[index][1] += value * weight scores.sort(key=lambda doc: doc[1], reverse=True) all_docs = [] all_scores = [] for index, score in enumerate(scores): if score[1] == 0: break all_docs.append(docs[score[0]]) all_scores.append(score[1]) return render_template('docindex.html', query_path=secure_filename(file.filename), docs=zip(all_docs,all_scores)) else: return render_template('index.html')