def delete_documents(self, doc_set, paths): """Delete documents from the index.""" index = open_dir(self.index_path) writer = AsyncWriter(index) query = And([ Term('set', doc_set), Or([Term('path', path) for path in paths]) ]) writer.delete_by_query(query) writer.commit()
def delete_documents(self, doc_set, paths): """Delete documents from the index.""" index = open_dir(self.index_path) writer = AsyncWriter(index) query = And( [Term('set', doc_set), Or([Term('path', path) for path in paths])]) writer.delete_by_query(query) writer.commit()
def delPage(self, item): index = item.childCount() while index > 0: index = index - 1 self.dirname = item.child(index).text(0) self.delPage(item.child(index)) # remove attachment folder attDir = self.itemToAttachmentDir(item) for info in QDir(attDir).entryInfoList(): QDir().remove(info.absoluteFilePath()) QDir().rmdir(attDir) pagePath = self.itemToPage(item) self.ix = open_dir(self.settings.indexdir) query = QueryParser('path', self.ix.schema).parse(pagePath) #writer = self.ix.writer() writer = AsyncWriter(self.ix) n = writer.delete_by_query(query) # n = writer.delete_by_term('path', pagePath) writer.commit() #self.ix.close() b = QDir(self.notePath).remove(self.pageToFile(pagePath)) parent = item.parent() parentPage = self.itemToPage(parent) if parent is not None: index = parent.indexOfChild(item) parent.takeChild(index) if parent.childCount() == 0: # if no child, dir not needed QDir(self.notePath).rmdir(parentPage) else: index = self.indexOfTopLevelItem(item) self.takeTopLevelItem(index) QDir(self.notePath).rmdir(pagePath)
def delPage(self, item): index = item.childCount() while index > 0: index = index - 1 self.dirname = item.child(index).text(0) self.delPage(item.child(index)) # remove attachment folder attDir = self.itemToAttachmentDir(item) for info in QtCore.QDir(attDir).entryInfoList(): QtCore.QDir().remove(info.absoluteFilePath()) QtCore.QDir().rmdir(attDir) pagePath = self.itemToPage(item) self.ix = open_dir(self.settings.indexdir) query = QueryParser("path", self.ix.schema).parse(pagePath) # writer = self.ix.writer() writer = AsyncWriter(self.ix) n = writer.delete_by_query(query) # n = writer.delete_by_term('path', pagePath) writer.commit() # self.ix.close() b = QtCore.QDir(self.notePath).remove(self.pageToFile(pagePath)) parent = item.parent() parentPage = self.itemToPage(parent) if parent is not None: index = parent.indexOfChild(item) parent.takeChild(index) if parent.childCount() == 0: # if no child, dir not needed QtCore.QDir(self.notePath).rmdir(parentPage) else: index = self.indexOfTopLevelItem(item) self.takeTopLevelItem(index) QtCore.QDir(self.notePath).rmdir(pagePath)
def load_all_dset_metadata(self, dsetname, create_index=False): """ Loads into memory the metadata of a dataset. The metadata is read from a CSV file, which should have at least two columns: - filename: Paths to the images in the dataset, relative to the image data folder. For backward compatibility '#filename' is also accepted - file_attributes: JSON string containing information about the file. The most important file attributes are 'caption' and 'keywords'. The 'caption' field should be a short string which will be used as the caption of the image in result lists. The 'keywords' field must contain a comma-separated list of keywords. Each keyword can be used as the source for a search. If create_index is True, it builds a search index with the 'keywords' in the file_attributes. Arguments: dsetname: String corresponding to the dataset within the list of supported datasets. create_index: Boolean indicating whether or not to build a search index with the metadata """ metaindex = None t = time.time() try: for afile in os.listdir(os.path.join(self.metadata_dir, dsetname)): if afile.endswith(".csv"): metadata_file = os.path.join(self.metadata_dir, dsetname, afile) print('Found metadata file at', metadata_file) if create_index: metaindex = open_dir(self.index_dir) with open(metadata_file, 'r') as fin: reader = csv.DictReader(fin) for row in reader: id_field = None if 'filename' in row.keys(): id_field = 'filename' elif '#filename' in row.keys(): id_field = '#filename' if id_field and 'file_attributes' in row.keys(): filename = row[id_field] try: self.fname2meta[dsetname][ filename] = json.loads( row['file_attributes']) except: self.fname2meta[dsetname][filename] = None metadata = self.fname2meta[dsetname][filename] keyword_list = None if metadata and 'keywords' in metadata.keys(): keyword_list = metadata['keywords'] if keyword_list and create_index: keyword_list_splitted = keyword_list.split( ',') writer = AsyncWriter(metaindex) for key in keyword_list_splitted: key = key.strip() # delete previous entry if found query = QueryParser( 'key', metaindex.schema).parse(key) writer.delete_by_query( query, metaindex.searcher()) # add document writer.add_document( key=str(key), dataset=str(dsetname)) writer.commit() if keyword_list: # we would like to do this, even if the index is not created # register link keyword-file keyword_list_splitted = keyword_list.split( ',') for key in keyword_list_splitted: key = key.strip() if key in self.keyword2fname[ dsetname].keys(): self.keyword2fname[dsetname][ key].append(filename) else: self.keyword2fname[dsetname][ key] = [filename] else: raise Exception( '"filename" and/or "file_attributes" columns not found in ' + afile + ' (are you missing the column names?). Metadata will not be available!.' ) print('Finished loading metadata for %s in %s' % (dsetname, str(time.time() - t))) self.is_all_metadata_loaded = True break except Exception as e: print("load_all_dset_metadata Exception:" + str(e) + '\n')