Пример #1
0
def add_info(files_path, core, core_http):
    core.delete(os.path.abspath(files_path))
    id_List.Delete_list_path(files_path)
    with open(files_path, 'rb') as f:
        dicts = []
        url = 'http://localhost:8983/solr/info/schema'
        text = f.readline()
        words = text.strip().split()
        # print words
        for word in words:
            add_field(url, word)
        fields = [n.decode('utf-8') for n in text.strip().split()]
        # print fields
        fields.extend([u'path', u'content', u'id'])
        # print fields
        for line in f.readlines():
            words = [s.decode('utf-8') for s in line.strip().split()]
            id_num = id_List.Create_id()
            words.extend([
                os.path.abspath(files_path).decode('utf-8'),
                line.decode('utf-8'), id_num
            ])
            dicts.append(dict(zip(fields, words)))
            id_List.Create_list(files_path, id_num, core_http)
    core.add(dicts)
Пример #2
0
def add_desc(files_path, core, core_http):
	id_List.Delete_list_path(files_path)
	with open(files_path, 'rb') as f:
		text = f.read().decode('utf-8')
		id_num = id_List.Create_desc_id(files_path)
		core.add([{'id': id_num,
                           'path': os.path.abspath(files_path),
		           'context': text}])
		id_List.Create_list(files_path, id_num, core_http)
Пример #3
0
def add_txt(files_path, core, core_http):
	core.delete(os.path.abspath(files_path))
	id_List.Delete_list_path(files_path)
	dicts = []
	with open(files_path, 'rb') as f:
		for i, line in enumerate(f):
			words = [s.decode('utf-8') for s in line.strip().split()]
			if i == 0:
				if len(words) == 4:
					fields = ('wav', 'start', 'end', 'text', 'path', 'id')
				elif len(words) == 2:
					fields = ('wav', 'text', 'path', 'id')
				else:
					print 'ERROR! ' + files_path + ' is not the correct format!'
					return

			id_num = id_List.Create_id()
			words.extend([os.path.abspath(files_path), id_num])
			dicts.append(dict(zip(fields, words)))
			id_List.Create_list(files_path, id_num, core_http)

        core.add(dicts)