def main(category_id):
    reload(sys)
    sys.setdefaultencoding('utf-8')

    file_utils.createDirs(['internal'])
    category_set = common.getCandidateCategory(category_id)
    inclusionRelation(category_id, category_set)
def main(category_id):
	reload(sys)
	sys.setdefaultencoding('utf-8')

	jieba.load_userdict("../../../data/jieba_userdict.txt")

	file_utils.createDirs(['wiki_search'])
	category_set = common.getCandidateCategory(category_id)
	category_crawl_dict =readJosn(category_id)
	clean(category_id,category_crawl_dict,category_set)
Exemplo n.º 3
0
def main(category_id):
    reload(sys)
    sys.setdefaultencoding('utf-8')

    jieba.load_userdict("../../../data/jieba_userdict.txt")

    file_utils.createDirs(['wiki_search'])
    category_set = common.getCandidateCategory(category_id)
    category_crawl_dict = readJosn(category_id)
    clean(category_id, category_crawl_dict, category_set)
Exemplo n.º 4
0
def main(category_path):
    reload(sys)
    sys.setdefaultencoding('utf-8')

    category_path_list = category_path.split('_')
    category_id = int(category_path_list[0])
    query_category = ""
    if len(category_path_list) >= 2:
        query_category = category_path_list[-1].decode('utf-8')
    main_category_list = [query_category]

    file_utils.createDirs(['combine_feature'])
    category_set = common.getCandidateCategory(category_id)
    combineFeature(category_id, category_path, category_set)
def main(category_path):
	reload(sys)
	sys.setdefaultencoding('utf-8')

	jieba.load_userdict(data_path+"jieba_userdict.txt")
	file_utils.createDirs(['wikipedia'])

	category_path_list = category_path.split('_')
	category_id = int(category_path_list[0])
	query_category = ""
	if len(category_path_list) >= 2:
		query_category = category_path_list[-1].decode('utf-8')
	main_category_list = [query_category]

	category_set = common.getCandidateCategory(category_id)
	extractFeatureFromWikiCategory(category_id,category_path,main_category_list,category_set)
Exemplo n.º 6
0
def main(category_path):
    reload(sys)
    sys.setdefaultencoding('utf-8')

    jieba.load_userdict(data_path + "jieba_userdict.txt")
    file_utils.createDirs(['wikipedia'])

    category_path_list = category_path.split('_')
    category_id = int(category_path_list[0])
    query_category = ""
    if len(category_path_list) >= 2:
        query_category = category_path_list[-1].decode('utf-8')
    main_category_list = [query_category]

    category_set = common.getCandidateCategory(category_id)
    extractFeatureFromWikiCategory(category_id, category_path,
                                   main_category_list, category_set)
def main(category_path):
	reload(sys)
	sys.setdefaultencoding('utf-8')

	print '-loading preparation file'
	jieba.load_userdict(data_path+"jieba_userdict.txt")
	app_tag_dict = pickle.load(open(data_path+'app_tag.dict','rb'))
	app_category_dict = pickle.load(open(data_path+'app_category.dict','rb'))

	file_utils.createDirs(['tag_tf','title_tf'])
	category_path_list = category_path.split('_')
	category_id = int(category_path_list[0])
	query_category = ""
	if len(category_path_list) >= 2:
		query_category = category_path_list[-1].decode('utf-8')
	category_set = common.getCandidateCategory(category_id)
	tf(category_id,category_path,query_category,category_set,app_category_dict,app_tag_dict)
Exemplo n.º 8
0
def main(category_path):
    reload(sys)
    sys.setdefaultencoding('utf-8')

    print '-loading preparation file'
    jieba.load_userdict(data_path + "jieba_userdict.txt")
    app_tag_dict = pickle.load(open(data_path + 'app_tag.dict', 'rb'))
    app_category_dict = pickle.load(open(data_path + 'app_category.dict',
                                         'rb'))

    file_utils.createDirs(['tag_tf', 'title_tf'])
    category_path_list = category_path.split('_')
    category_id = int(category_path_list[0])
    query_category = ""
    if len(category_path_list) >= 2:
        query_category = category_path_list[-1].decode('utf-8')
    category_set = common.getCandidateCategory(category_id)
    tf(category_id, category_path, query_category, category_set,
       app_category_dict, app_tag_dict)