Python create_index示例

编程语言: Python

命名空间/包名称: indexing

方法/功能: create_index

hotexamples.com的示例: 4

Python create_index - 已找到4个示例。这些是从开源项目中提取的最受好评的indexing.create_index现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

from indexing import create_index

create_index()

示例#2

显示文件

文件： mySRCengine.py 项目： kr-sh/SRCSystem

                sys.exit(2)
        query = raw_input("Please enter the search string: ")             ## Expect exactly 2 arguments: the search query and number of clusters
        k = raw_input("Please enter the number of clusters: ")
        k=int(k)
##        query = "jaguar"
        start = time.clock()
##        k=7
        interim_path = extract_results.get_search_results(query)        ## extract search results
               
        parsing_search_results.parse_file(interim_path)                 ## parse the serach results
        title_dict = pickle.load(open("title_dict","rb"))               ## unpickle the document dictionaries
        desc_dict = pickle.load(open("desc_dict","rb"))
        url_dict = pickle.load(open("url_dict","rb"))

        
        indexing.create_index(title_dict,desc_dict,url_dict)            ## create an index
        index = pickle.load(open("index","rb"))                         ## unpickle the index dictionaries
        np_ind = pickle.load(open("np_ind","rb"))
        stem_dict = pickle.load(open("stem_dict","rb"))
        
        indexing.calc_tf_idf(title_dict,index,np_ind)                   ## calculate the tf-idf values for the document vector
        doc_word_dict = pickle.load(open("doc_word_dict","rb"))         ## unpickle the tf-idf dictionaries representing document vectors
        doc_np_dict = pickle.load(open("doc_np_dict","rb"))

        norm_doc_word_dict = clustering.normalize_doc_dict(doc_word_dict)               ## normalize the document vector
        dij = clustering.calc_eucl_dist(norm_doc_word_dict)                             ## calculate the eucledian distance
        clustering.get_mediods(k,dij)                                                   ## use k-mediods to get the clusters
        cluster = pickle.load(open("cluster","rb"))                                     ## unpickle the clusters

        
        label_dict = labeling.label(cluster,doc_np_dict, stem_dict,np_ind,query)                        ## label the clusters

示例#3

显示文件

文件： parse_index.py 项目： kr-sh/basic-search-engine

import math
import random
import re
from xml.dom.minidom import parse, parseString
import xml.dom.minidom as minidom
from collections import OrderedDict,defaultdict
import pickle
from sets import Set
from operator import itemgetter
import os
import nltk
from nltk.tokenize.regexp import RegexpTokenizer
import time
import Queue

import parsing
import indexing



if __name__ == "__main__":
        if len(sys.argv)!=2:                # Expect exactly 1 argument
                sys.exit(2)

        path1 = sys.argv[1]
        
        parsing.parse_file(path1)

        indexing.create_index()

示例#4

显示文件

文件： myClassifier.py 项目： kr-sh/MyClassifier

###! /usr/bin/python

import sys
import math
import random
import re
from collections import OrderedDict
import os
import pickle
import time
import nltk
from nltk.stem.porter import PorterStemmer
import itertools
import indexing
import train


if __name__ == "__main__":
    if len(sys.argv) != 2:  # Expect exactly 1 argument: the training data file
        sys.exit(2)
    input1 = file(sys.argv[1], "r")

    indexing.create_index(input1)  ## this function will create dp the basic pre-processing and create the index
    train.train_classifier()  ## this function will calculate the model parameters for the classifier