Python extractStopTerms 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: utils

메소드/함수: extractStopTerms

hotexamples.com에서의 예제들: 2

Python extractStopTerms - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 utils.extractStopTerms에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: buildIndices.py 프로젝트: kmeurer/Information-Retrieval

import tabulate as tab
import numpy as np
import settings as ENV

sys.path.insert(0, 'src')
import utils as util
from indexing import docProcessor as dp
from indexing import indexing as idx
from indexing import tripleBuilder as tb
from object_definitions import document as d

termList = []
dfList = []
tripleList = []
documentList = []
stopTerms = util.extractStopTerms()
if ENV.BUILD_ALL_INDEXES == True:
    indexTypes = ["INVERTED", "POSITIONAL", "STEM", "PHRASE"]
else:
    indexTypes = [ENV.INDEX_TYPE]

runStats = {}
timeStats = {}


# Empty our index folder
indexFiles = os.listdir(ENV.INDEX_LOCATION)
for f in indexFiles:
    os.remove(ENV.INDEX_LOCATION + f)

# For every index type we wish to create...

예제 #2

파일 보기

파일: processQueries.py 프로젝트: kmeurer/Information-Retrieval

import utils as util
from query import queryProcessor as qp
from query import index as i
from query import vectorSpace as vsm
from query import bm25
from query import languageModel as lang
from query import queryReducer as q_red
from query import queryExpander as q_exp
from indexing import indexing as idx
from object_definitions import document as d
from object_definitions.query import Query


start_time = datetime.datetime.now()

ENV.STOP_TERMS = util.extractStopTerms()

# we always want to extract phrases for the lexicon
ENV.EXTRACT_PHRASES = True

''' LOAD NECESSARY INDEXES '''
if ENV.QUERY_PROCESSING_METHOD == "STANDARD":
    lexicon_path = ENV.INDEX_LOCATION + ENV.QUERY_PROCESSING_INDEX.lower() + "Lexicon.txt"
    doc_list_path = ENV.INDEX_LOCATION + ENV.QUERY_PROCESSING_INDEX.lower() + ENV.DOC_FILE_NAME + ".txt"
    posting_list_path = ENV.INDEX_LOCATION + ENV.QUERY_PROCESSING_INDEX.lower() + ENV.POSTING_LIST_NAME + ".txt"
    ENV.primary_index = i.Index(lexicon_path, posting_list_path, doc_list_path)
# if the index we use is query dependent
elif ENV.QUERY_PROCESSING_METHOD == "CONDITIONAL":
    ENV.QUERY_PROCESSING_INDEX = "PHRASE"
    lexicon_path = ENV.INDEX_LOCATION + ENV.QUERY_PROCESSING_INDEX.lower() + "Lexicon.txt"
    doc_list_path = ENV.INDEX_LOCATION + ENV.QUERY_PROCESSING_INDEX.lower() + ENV.DOC_FILE_NAME + ".txt"