Python ProcDoc.readRELdict примеры использования

Язык программирования: Python

Класс/Тип: ProcDoc

Метод/Функция: readRELdict

Примеров на hotexamples.com: 3

Python ProcDoc.readRELdict - 3 примера найдено. Это лучшие примеры Python кода для ProcDoc.readRELdict, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

doc_preprocess(7)

docPreproc(7)

readFile(6)

qryPreproc(6)

read_background_dict(5)

query_preprocess(4)

dict2np(3)

readRELdict(3)

dict2npSparse(2)

docFreq(2)

dict2npDense(2)

modeling(2)

compute_average_doc_length(1)

inverted_word_doc(1)

readBGnp(1)

confPreproc(1)

compute_TFIDF(1)

Пример #1

Показать файл

    def __init__(self,
                 num_of_homo_feats=10,
                 max_qry_length=1794,
                 max_doc_length=2907,
                 query_path=None,
                 document_path=None,
                 corpus="TDT2"):
        res_pos = True
        self.num_vocab = 51253
        self.max_qry_length = max_qry_length
        self.max_doc_length = max_doc_length
        self.num_of_homo_feats = num_of_homo_feats
        if query_path == None:
            query_path = "../Corpus/" + corpus + "/Train/XinTrainQryTDT2/QUERY_WDID_NEW"
        if document_path == None:
            document_path = "../Corpus/" + corpus + "/SPLIT_DOC_WDID_NEW"

        # relevance set
        self.hmm_training_set = ProcDoc.readRELdict()

        # read document, reserve position
        doc = ProcDoc.readFile(document_path)
        self.doc = ProcDoc.docPreproc(doc, res_pos, 200)

        # read query, reserve position
        qry = ProcDoc.readFile(query_path)
        self.qry = ProcDoc.qryPreproc(qry, self.hmm_training_set, res_pos, 200)

        # generate h featrues
        self.homo_feats = self.__genFeature(num_of_homo_feats)

Пример #2

Показать файл

Файл: VSM.py Проект: zhouyonglong/Information-retrieval

    def __init__(self,
                 qry_path=None,
                 rel_path=None,
                 isTraining=True,
                 doc_path=None):
        # default training step
        if qry_path == None:
            qry_path = "../Corpus/TDT2/Train/XinTrainQryTDT2/QUERY_WDID_NEW"
        if doc_path == None:
            doc_path = "../Corpus/TDT2/SPLIT_DOC_WDID_NEW"
        if rel_path == None:
            rel_path = "../Corpus/TDT2/Train/QDRelevanceTDT2_forHMMOutSideTrain"
        self.vocab_size = 51253
        # relevance set
        self.rel_set = ProcDoc.readRELdict(rel_path, isTraining)
        self.evaluate_model = EvaluateModel(rel_path, isTraining)

        # read documents
        doc = ProcDoc.readFile(doc_path)
        self.doc = ProcDoc.docPreproc(doc)
        self.doc_len = Statistical.compLenAcc(self.doc)

        # read queries
        qry = ProcDoc.readFile(qry_path)
        self.qry_tf = ProcDoc.qryPreproc(qry, self.rel_set)
        self.qry_len = Statistical.compLenAcc(self.qry_tf)
        [self.qry, self.doc] = Statistical.TFIDF(self.qry_tf, self.doc,
                                                 self.qry_len, self.doc_len)

        # dict to numpy
        self.qry_tf, self.qry_tf_IDs = self.__dict2np(self.qry_tf)
        self.qry, self.qry_IDs = self.__dict2np(self.qry, self.qry_tf_IDs)
        self.doc, self.doc_IDs = self.__dict2np(self.doc)

        # precompute len(document)
        self.doc = Statistical.l2Normalize(self.doc)

Пример #3

Показать файл

Файл: Training.py Проект: 40347015S/Information-retrieval

type_feat = "sparse"  # or embeddings
query_path = None
document_path = None
QDrel_file_path = None

corpus = "TDT2"

# qry and doc
if query_path == None:
    query_path = "../Corpus/" + corpus + "/Train/XinTrainQryTDT2/QUERY_WDID_NEW"
if document_path == None:
    document_path = "../Corpus/" + corpus + "/SPLIT_DOC_WDID_NEW"
if QDrel_file_path == None:
    QDrel_file_path = "../Significant-Words-Language-Models/train-qry-results-0.675969697596.txt"
# relevancy set
hmm_training_set = ProcDoc.readRELdict()

# read document, reserve position
doc = ProcDoc.readFile(document_path)
doc = ProcDoc.docPreproc(doc, RES_POS)

# read query, reserve position
qry = ProcDoc.readFile(query_path)
qry = ProcDoc.qryPreproc(qry, hmm_training_set, RES_POS)
QDrel = RelPrep.readQDRel(QDrel_file_path)

print len(qry), len(doc)
print len(QDrel)
NRMprep.getTrainAndValidation(qry, doc, QDrel, NUM_VOCAB, type_rank, type_feat)
# (pointwise or pairwise) and (sparse or embeddings)
# prepare data and label