def setupExperimentDir(self): """ Ensures dir exists, etc. """ self.setupExperimentDir() self.exp["exp_dir"]=os.path.normpath(os.path.join(cp.Corpus.paths.experiments,self.exp["name"])) + os.sep ensureDirExists(self.exp["exp_dir"])
def initializeIndexer(self): """ Initializes the Java VM, creates directories if needed """ print "Initializing VM..." lucene.initVM(maxheap="768m") baseFullIndexDir=cp.Corpus.paths.fileLuceneIndex+os.sep ensureDirExists(baseFullIndexDir)
def __init__(self, result_storer, cache_dir, res_ids=None, max_results=sys.maxint): """ Creates cache directory if it doesn't exist """ super(self.__class__, self).__init__(result_storer, res_ids=res_ids, max_results=max_results) self.cache_dir=cache_dir self.own_dir=os.path.join(cache_dir, self.result_storer.table_name) ensureDirExists(cache_dir) ensureDirExists(self.own_dir)
def saveCachedJson(self, path, data): """ Save anything as JSON """ ensureDirExists(os.path.dirname(path)) lines=json.dumps(bow,indent=3) try: f=codecs.open(path, "w","utf-8") f.write(lines) f.close() except: print("Error saving JSON", path, "Exception in saveCachedJson():",sys.exc_info()[:2])
def loadListOrListAllFiles(self, inputdir, file_mask): """ Either loads the existing file list or lists the contents of the input directory. """ all_input_files_fn=os.path.join(cp.Corpus.paths.fileDB,"all_input_files.txt") ALL_INPUT_FILES=loadFileList(all_input_files_fn) if not ALL_INPUT_FILES: print("Listing all files...") ALL_INPUT_FILES=self.listAllFiles(inputdir,file_mask) ensureDirExists(cp.Corpus.paths.fileDB) saveFileList(ALL_INPUT_FILES,all_input_files_fn) return ALL_INPUT_FILES
def createIndexWriter(self, actual_dir, max_field_length=20000000): """ Returns an IndexWriter object created for the actual_dir specified """ ensureDirExists(actual_dir) index = SimpleFSDirectory(File(actual_dir)) analyzer = StandardAnalyzer(LuceneVersion.LUCENE_CURRENT) writerConfig=IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, analyzer) similarity=FieldAgnosticSimilarity() writerConfig.setSimilarity(similarity) writerConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE) ## res= IndexWriter(index, analyzer, True, IndexWriter.MaxFieldLength(max_field_length)) res= IndexWriter(index, writerConfig) res.deleteAll() return res
def createDefaultDirs(self): """ Creates all necessary dirs """ for path in self.paths: ensureDirExists(self.paths[path])