def handleOnePass(indexpath, workdir, length): print(indexpath, workdir, length) filename = config.getNgramFileName(length) filepath = workdir + os.sep + filename conn = sqlite3.connect(filepath) cur = conn.cursor() #begin processing indexfile = open(indexpath, 'r') for oneline in indexfile.readlines(): oneline = oneline.rstrip(os.linesep) (title, textpath) = oneline.split('#') infile = config.getTextDir() + textpath infilesize = utils.get_file_length(infile + config.getSegmentPostfix()) if infilesize < config.getMinimumFileSize(): print("Skipping " + title + '#' + textpath) continue #process one document handleOneDocument(infile, cur, length) indexfile.close() conn.commit() if conn: conn.close()
def add_variation(self, f, im, name, ops): """Add a variation to the asset""" from models.accounts import Account # Make sure we have access to the associated account frame if not isinstance(self.account, Account): self.account = Account.one(Q._id == self.account) # Transform the original image to generate the variation vim = None if im.format.lower() == 'gif' and im.is_animated: # By-pass transforms for animated gifs fmt = {'ext': 'gif', 'fmt': 'gif'} else: # Transform the image based on the variation vim = im.copy() vim, fmt = Variation.transform_image(vim, ops) # Prepare the variation file for storage f = io.BytesIO() vim.save(f, **fmt) f.seek(0) # Add the variation to the asset variation = Variation(name=name, ext=fmt['ext'], meta={ 'length': get_file_length(f), 'image': { 'mode': (vim or im).mode, 'size': (vim or im).size } }) # Set a version variation.version = generate_uid(3) while self.get_variation(name, variation.version): variation.version = generate_uid(3) # Store the variation variation.store_key = Variation.get_store_key(self, variation) backend = self.account.get_backend_instance() backend.store(f, variation.store_key) # We use the $push operator to store the variation to prevent race # conditions if multiple processes attempt to update the assets # variations at the same time. self.get_collection().update( {'_id': self._id}, {'$push': { 'variations': variation._document }}) return variation
def handleOneIndex(indexpath, subdir, indexname, fast): inMemoryFile = "model.db" modeldir = os.path.join(config.getModelDir(), subdir, indexname) os.path.exists(modeldir) or os.makedirs(modeldir) def cleanupInMemoryFile(): modelfile = os.path.join(config.getInMemoryFileSystem(), inMemoryFile) reportfile = modelfile + config.getReportPostfix() if os.access(modelfile, os.F_OK): os.unlink(modelfile) if os.access(reportfile, os.F_OK): os.unlink(reportfile) def copyoutInMemoryFile(modelfile): inmemoryfile = os.path.join\ (config.getInMemoryFileSystem(), inMemoryFile) inmemoryreportfile = inmemoryfile + config.getReportPostfix() reportfile = modelfile + config.getReportPostfix() if os.access(inmemoryfile, os.F_OK): utils.copyfile(inmemoryfile, modelfile) if os.access(inmemoryreportfile, os.F_OK): utils.copyfile(inmemoryreportfile, reportfile) def cleanupFiles(modelnum): modeldir = os.path.join(config.getModelDir(), subdir, indexname) modelfile = os.path.join( \ modeldir, config.getCandidateModelName(modelnum)) reportfile = modelfile + config.getReportPostfix() if os.access(modelfile, os.F_OK): os.unlink(modelfile) if os.access(reportfile, os.F_OK): os.unlink(reportfile) def storeModelStatus(modelfile, textnum, nexttextnum): #store model info in status file modelstatuspath = modelfile + config.getStatusPostfix() #create None status modelstatus = {} modelstatus['GenerateStart'] = textnum modelstatus['GenerateEnd'] = nexttextnum utils.sign_epoch(modelstatus, 'Generate') utils.store_status(modelstatuspath, modelstatus) print(indexpath, subdir, indexname) indexstatuspath = indexpath + config.getStatusPostfix() indexstatus = utils.load_status(indexstatuspath) if not utils.check_epoch(indexstatus, 'MergeSequence'): raise utils.EpochError('Please mergeseq first.\n') if utils.check_epoch(indexstatus, 'Generate'): return #continue generating textnum, modelnum, aggmodelsize = 0, 0, 0 if 'GenerateTextEnd' in indexstatus: textnum = indexstatus['GenerateTextEnd'] if 'GenerateModelEnd' in indexstatus: modelnum = indexstatus['GenerateModelEnd'] #clean up previous file if fast: cleanupInMemoryFile() cleanupFiles(modelnum) #begin processing indexfile = open(indexpath, 'r') for i, oneline in enumerate(indexfile.readlines()): #continue last generating if i < textnum: continue #remove trailing '\n' oneline = oneline.rstrip(os.linesep) (title, textpath) = oneline.split('#') infile = config.getTextDir() + textpath infilesize = utils.get_file_length(infile + config.getMergedPostfix()) if infilesize < config.getMinimumFileSize(): print("Skipping " + title + '#' + textpath) continue if fast: modelfile = os.path.join(config.getInMemoryFileSystem(), \ inMemoryFile) else: modelfile = os.path.join(modeldir, \ config.getCandidateModelName(modelnum)) reportfile = modelfile + config.getReportPostfix() print("Proccessing " + title + '#' + textpath) if generateOneText(infile, modelfile, reportfile): aggmodelsize += infilesize print("Processed " + title + '#' + textpath) if aggmodelsize > config.getCandidateModelSize(): #copy out in memory file if fast: modelfile = os.path.join\ (modeldir, config.getCandidateModelName(modelnum)) copyoutInMemoryFile(modelfile) cleanupInMemoryFile() #the model file is in disk now nexttextnum = i + 1 storeModelStatus(modelfile, textnum, nexttextnum) #new model candidate aggmodelsize = 0 textnum = nexttextnum modelnum += 1 #clean up next file cleanupFiles(modelnum) #save current progress in status file indexstatus['GenerateTextEnd'] = nexttextnum indexstatus['GenerateModelEnd'] = modelnum utils.store_status(indexstatuspath, indexstatus) #copy out in memory file if fast: modelfile = os.path.join\ (modeldir, config.getCandidateModelName(modelnum)) copyoutInMemoryFile(modelfile) cleanupInMemoryFile() #the model file is in disk now nexttextnum = i + 1 storeModelStatus(modelfile, textnum, nexttextnum) indexfile.close() #end processing #save current progress in status file modelnum += 1 indexstatus['GenerateTextEnd'] = nexttextnum indexstatus['GenerateModelEnd'] = modelnum utils.sign_epoch(indexstatus, 'Generate') utils.store_status(indexstatuspath, indexstatus)
def upload(): """Upload an asset""" # Check a file has been provided fs = request.files.get('asset') if not fs: return fail('No `asset` sent.') # Validate the parameters form = UploadForm(request.values) if not form.validate(): return fail('Invalid request', issues=form.errors) # Prep the asset name for form_data = form.data # Name name = form_data['name'] if not name: name = os.path.splitext(fs.filename)[0] name = slugify_name(name) # Extension ext = os.path.splitext(fs.filename)[1].lower()[1:] # If there's no extension associated with then see if we can guess it using # the imghdr module if not ext: fs.stream.seek(0) ext = imghdr.what(fs.filename, fs.stream.read()) or '' # If the file is a recognized image format then attempt to read it as an # image otherwise leave it as a file. asset_file = fs.stream asset_meta = {} asset_type = Asset.get_type(ext) if asset_type is 'image': try: asset_file, asset_meta = prep_image(asset_file) except IOError as e: return fail('File appears to be an image but it cannot be read.') # Add basic file information to the asset meta asset_meta.update({ 'filename': fs.filename, 'length': get_file_length(asset_file) }) # Create the asset asset = Asset(account=g.account._id, name=name, ext=ext, meta=asset_meta, type=asset_type, variations=[]) if form_data['expires']: asset.expires = form_data['expires'] # Generate a unique Id for the asset asset.uid = generate_uid(6) while Asset.count(And(Q.account == g.account, Q.uid == asset.uid)) > 0: asset.uid = generate_uid(6) # Store the original file asset.store_key = Asset.get_store_key(asset) backend = g.account.get_backend_instance() backend.store(asset_file, asset.store_key) # Save the asset asset.insert() return success(asset.to_json_type())
else: #backup merged model utils.copyfile(mergedmodel, prunedmodel) pruneModel(prunedmodel, args.k, args.CDF) #validate pruned model print('validating') validateModel(prunedmodel) #export textual format print('exporting') exportfile = os.path.join(trydir, 'kmm_pruned.text') exportModel(prunedmodel, exportfile) #convert to interpolation print('converting') kmm_model = exportfile inter_model = os.path.join(trydir, config.getFinalModelFileName()) convertModel(kmm_model, inter_model) modelsize = utils.get_file_length(inter_model) cwdstatus['PruneModelSize'] = modelsize utils.store_status(cwdstatuspath, cwdstatus) print('final model size:', modelsize) #sign status epoch utils.sign_epoch(cwdstatus, 'Prune') utils.store_status(cwdstatuspath, cwdstatus) print('done')