def report(db, policy, path): if os.path.exists(path): shutil.rmtree(path) mkdirp(path) with open(os.path.join(path, 'totals.txt'), 'w') as f: print >> f, 'Worked: {}'.format(timedelta_to_str(db.worked())) print >> f, 'Balance: {}'.format(timedelta_to_str(policy.timesheet_balance(db))) if db.adjustments: print >> f, 'Adjustments:' for a in db.adjustments: print >> f, ' ' + a.identifier() for year_record in db.records: year_path = os.path.join(path, str(year_record.year)) mkdirp(year_path) with open(os.path.join(year_path, 'totals.txt'), 'w') as f: print >> f, 'Worked: {}'.format(timedelta_to_str(year_record.worked())) print >> f, 'Balance: {}'.format(timedelta_to_str(policy.year_balance(year_record))) for month_record in year_record.records: month_path = os.path.join(year_path, '{:02d}.txt'.format(month_record.month)) with open(month_path, 'w') as f: print >> f, DAY_HEADER_TEMPLATE for day_record in month_record.records: print >> f, day_record_to_report_line(day_record, policy) print >> f, 'Worked: {}'.format(timedelta_to_str(month_record.worked())) month_balance = timedelta_to_str(policy.month_balance(month_record)) print >> f, 'Balance: {}'.format(month_balance) print 'Balance: {}'.format(timedelta_to_str(policy.timesheet_balance(db)))
def write_day_record(db_path, day_record): month_path = os.path.join( db_path, str(day_record.day.year), '{:02d}.csv'.format(day_record.day.month) ) mkdirp(os.path.dirname(month_path)) if not os.path.exists(month_path): with open(month_path, 'w') as f: f.write('day,day_type,checkin,checkout\n') month_record = parse_month(month_path, year=day_record.day.year) if month_record.get_day_record(day_record.day.day): return False with open(month_path, 'a') as f: f.write( ','.join([ str(day_record.day.day), day_record.day_type, day_record.checkin.strftime('%H:%M'), day_record.checkout.strftime('%H:%M'), ]) + '\n' ) return True
def main(args): # Determine the proper domain and instance filenames if args.domain is None: args.domain = pddl_file.extract_domain_name(args.instance) domain_name, instance_name = extract_names(args.domain, args.instance) # Determine the appropriate output directory for the problem solver, and create it, if necessary translation_dir = args.output if not translation_dir: components = [args.output_base, args.tag, domain_name, instance_name] translation_dir = os.path.abspath(os.path.join(*components)) util.mkdirp(translation_dir) print("{0:<30}{1}".format("Problem domain:", domain_name)) print("{0:<30}{1}".format("Problem instance:", instance_name)) print("{0:<30}{1}".format("Chosen Planner:", args.planner)) print("{0:<30}{1}".format("Translation directory:", translation_dir)) # Parse the task with FD's parser and transform it to our format fd_task = parse_pddl_task(args.domain, args.instance) fs_task = create_fs_task(fd_task, domain_name, instance_name) # Generate the appropriate problem representation from our task, store it, and (if necessary) compile # the C++ generated code to obtain a binary tailored to the particular instance representation = ProblemRepresentation(fs_task, translation_dir, args.edebug or args.debug) representation.generate() use_vanilla = not representation.requires_compilation() move_files(args.instance_dir, args.instance, args.domain, translation_dir, use_vanilla) compile_translation(translation_dir, use_vanilla, args) run_solver(translation_dir, args)
def move_files(base_dir, instance, domain, target_dir, use_vanilla): """ Moves the domain and instance description files plus additional data files to the translation directory """ definition_dir = target_dir + '/definition' data_dir = target_dir + '/data' # Copy the domain and instance file to the subfolder "definition" on the destination dir util.mkdirp(definition_dir) shutil.copy(instance, definition_dir) shutil.copy(domain, definition_dir) is_external_defined = os.path.isfile(base_dir + '/external.hxx') if is_external_defined and use_vanilla: raise RuntimeError("An external definitions file was found at '{}', but the runner script determined" "that no external files were needed. Something is wrong.") if not use_vanilla: # The ad-hoc external definitions file - if it does not exist, we use the default. if is_external_defined: shutil.copy(base_dir + '/external.hxx', target_dir) if os.path.isfile(base_dir + '/external.cxx'): # We also copy a possible cxx implementation file shutil.copy(base_dir + '/external.cxx', target_dir) else: default = tplManager.get('external_default.hxx').substitute() # No substitutions for the default template util.save_file(target_dir + '/external.hxx', default) # Copy, if they exist, all data files origin_data_dir = base_dir + '/data' if os.path.isdir(origin_data_dir): for filename in glob.glob(os.path.join(origin_data_dir, '*')): if os.path.isfile(filename): shutil.copy(filename, data_dir)
def update_sources(self, source_dir, projects, revision, svn='svn'): self.report_build_step('update-sources') self.halt_on_failure() # TODO: This needs to be updated to use the monorepo. # Where to check the project out relative to an LLVM checkout. checkout_locations = { 'llvm': '', 'clang': 'tools/clang', 'lld': 'tools/lld', 'compiler-rt': 'projects/compiler-rt', } # If the project is named differently in svn, put it here. svn_locations = {'clang': 'cfe'} svn_uri_pattern = 'https://llvm.org/svn/llvm-project/%s/trunk' for project in projects: # TODO: Fail the build and report an error if we don't know the # checkout location. path = checkout_locations[project] if not path: path = source_dir elif not os.path.isabs(path): path = pjoin(source_dir, path) uri = svn_uri_pattern % (svn_locations.get(project, project), ) util.report("Updating %s to %s at %s from %s" % (project, revision, util.shquote(path), uri)) if os.path.exists(pjoin(path, '.svn')): cmd = [svn, 'up', '-r', revision] else: util.mkdirp(path) cmd = [svn, 'co', '-r', revision, uri, '.'] util.report_run_cmd(cmd, cwd=path)
def update_sources(self, source_dir, projects, revision, svn='svn'): self.report_build_step('update-sources') self.halt_on_failure() # TODO: This needs to be updated to use the monorepo. # Where to check the project out relative to an LLVM checkout. checkout_locations = { 'llvm': '', 'clang': 'tools/clang', 'lld': 'tools/lld', 'compiler-rt': 'projects/compiler-rt', 'debuginfo-tests': 'projects/debuginfo-tests', } # If the project is named differently in svn, put it here. svn_locations = { 'clang': 'cfe' } svn_uri_pattern = 'https://llvm.org/svn/llvm-project/%s/trunk' for project in projects: # TODO: Fail the build and report an error if we don't know the # checkout location. path = checkout_locations[project] if not path: path = source_dir elif not os.path.isabs(path): path = pjoin(source_dir, path) uri = svn_uri_pattern % (svn_locations.get(project, project),) util.report("Updating %s to %s at %s from %s" % (project, revision, util.shquote(path), uri)) if os.path.exists(pjoin(path, '.svn')): cmd = [svn, 'up', '-r', revision] else: util.mkdirp(path) cmd = [svn, 'co', '-r', revision, uri, '.'] util.report_run_cmd(cmd, cwd=path)
def save_images(autoencoder, args, test_folds): assert (args.threshold != -1) array_files = util.load_array_of_files(args.path, test_folds) for fname in array_files: print('Processing image', fname) img = cv2.imread(fname, cv2.IMREAD_GRAYSCALE) img = np.asarray(img) rows = img.shape[0] cols = img.shape[1] if img.shape[0] < args.window or img.shape[1] < args.window: new_rows = args.window if img.shape[ 0] < args.window else img.shape[0] new_cols = args.window if img.shape[ 1] < args.window else img.shape[1] img = cv2.resize(img, (new_cols, new_rows), interpolation=cv2.INTER_CUBIC) img = np.asarray(img).astype('float32') img = 255. - img finalImg = np.zeros(img.shape, dtype=bool) for (x, y, window) in utilDataGenerator.sliding_window( img, stepSize=args.step, windowSize=(args.window, args.window)): if window.shape[0] != args.window or window.shape[1] != args.window: continue roi = img[y:(y + args.window), x:(x + args.window)].copy() roi = roi.reshape(1, args.window, args.window, 1) roi = roi.astype('float32') #/ 255. prediction = autoencoder.predict(roi) prediction = (prediction > args.threshold) finalImg[y:(y + args.window), x:(x + args.window)] = prediction[0].reshape( args.window, args.window) finalImg = 1 - finalImg finalImg *= 255 finalImg = finalImg.astype('uint8') if finalImg.shape[0] != rows or finalImg.shape[1] != cols: finalImg = cv2.resize(finalImg, (cols, rows), interpolation=cv2.INTER_CUBIC) outFilename = fname.replace('_GR/', '_PR-' + args.modelpath + '/') util.mkdirp(os.path.dirname(outFilename)) cv2.imwrite(outFilename, finalImg)
def __init__( self, argv, i=1 ): argv = util.preprocess_argv(argv, i) version = util.get_version( argv[:-1], i ) self._version = version self._dataset = argv[-1] #dataset self._version_root = util.make_path_in_workdir(self.version) self._dataset_root = os.path.join(self._version_root, self.dataset) # we need to create a Path like that # when extracting a thinned dataset util.mkdirp(self._dataset_root)
def evaluate_all(paths): jiggle_params_dir = os.path.join(paths.runs_dir, 'jiggle_params') util.mkdirp(jiggle_params_dir) print('Jiggling some parameters to check how the LLHs change on THE maximum likelihood tree', end = '') raxmlng_params_sets = [] iqtree_params_sets = [] for blmin in ('1e-1', '1e-2', '1e-3', '1e-4', '1e-5', '1e-6', '1e-7', '1e-8', '1e-9', '1e-10'): raxmlng_params_sets.append((paths.alignment, common.subst_model, blmin, '100', 'nr_safe', '0.1', paths.raxml_best_tree, os.path.join(jiggle_params_dir, 'blmin_%s' % blmin))) iqtree_params_sets.append((paths.alignment, common.subst_model, blmin, '100', True, '0.1', paths.raxml_best_tree, os.path.join(jiggle_params_dir, 'blmin_%s' % blmin))) for blmax in ('1e1', '1e2', '1e3', '1e4', '1e5', '1e6'): raxmlng_params_sets.append((paths.alignment, common.subst_model, '1e-6', blmax, 'nr_safe', '0.1', paths.raxml_best_tree, os.path.join(jiggle_params_dir, 'blmax_%s' % blmax))) iqtree_params_sets.append((paths.alignment, common.subst_model, '1e-6', blmax, True, '0.1', paths.raxml_best_tree, os.path.join(jiggle_params_dir, 'blmax_%s' % blmax))) for blopt in ('nr_safe', 'nr_fast', 'nr_oldfast', 'nr_oldsafe'): raxmlng_params_sets.append((paths.alignment, common.subst_model, '1e-6', '100', blopt, '0.1', paths.raxml_best_tree, os.path.join(jiggle_params_dir, 'blopt_%s' % blopt))) for safe in (True, False): iqtree_params_sets.append((paths.alignment, common.subst_model, '1e-6', '100', safe, '0.1', paths.raxml_best_tree, os.path.join(jiggle_params_dir, 'blopt_%s' % str(blopt)))) for lh_epsilon in ('10', '1', '0.1', '0.01', '0.001', '0.0001'): raxmlng_params_sets.append((paths.alignment, common.subst_model, '1e-6', '100', 'nr_safe', lh_epsilon, paths.raxml_best_tree, os.path.join(jiggle_params_dir, 'lh_epsilon_%s' % lh_epsilon))) if float(lh_epsilon) <= 0.1: # iqtree supports only <= 0.1 iqtree_params_sets.append((paths.alignment, common.subst_model, '1e-6', '100', True, lh_epsilon, paths.raxml_best_tree, os.path.join(jiggle_params_dir, 'lh_epsilon_%s' % lh_epsilon))) pool = mp.Pool(common.available_cores) raxmlng_llhs = pool.starmap(raxmlng_eval, raxmlng_params_sets) iqtree_llhs = pool.starmap(iqtree_eval, iqtree_params_sets) with open(paths.raxmlng_param_jiggle_llhs, "w") as writer: writer.write('blmin,blmax,blopt,lh_epsilon,llh\n') for llh, params in zip(raxmlng_llhs, raxmlng_params_sets): if llh is None: llh = "NA" writer.write('%s,%s,%s,%s,%s\n' % (params[2], params[3], params[4], params[5], llh)) with open(paths.iqtree_param_jiggle_llhs, "w") as writer: writer.write('blmin,blmax,blopt,lh_epsilon,llh\n') for llh, params in zip(iqtree_llhs, iqtree_params_sets): if llh is None: llh = "NA" writer.write('%s,%s,%s,%s,%s\n' % (params[2], params[3], params[4], params[5], llh)) print('done')
def dump_data(self, name, data, ext='data', subdir=None): if not isinstance(data, list): data = [data] basedir = self.translation_dir + '/data' if subdir: basedir += '/' + subdir util.mkdirp(basedir) with open(basedir + '/' + name + '.' + ext, "w") as f: for l in data: f.write(str(l) + '\n')
def cmake(self, stage_name, build_dir, source_dir, cmake='cmake', cmake_args=None): self.report_build_step('%s cmake' % (stage_name, )) self.halt_on_failure() cmd = [cmake] if cmake_args is not None: cmd += cmake_args cmd += [util.cmake_pjoin(source_dir, 'llvm')] util.mkdirp(build_dir) util.report_run_cmd(cmd, cwd=build_dir)
def cmake( self, stage_name, build_dir, source_dir, cmake='cmake', cmake_args=None): self.report_build_step('%s cmake' % (stage_name,)) self.halt_on_failure() cmd = [cmake] if cmake_args is not None: cmd += cmake_args cmd += [source_dir] util.mkdirp(build_dir) util.report_run_cmd(cmd, cwd=build_dir)
def trim_separate_align(input_fasta, mode, runsdir, version_dir): util.expect_file_exists(input_fasta) util.clean_dir(runsdir) util.mkdirp(runsdir) cmd = [] cmd.append(common.preanalysis1) cmd.append(input_fasta) cmd.append(mode) cmd.append(common.scripts_dir) cmd.append(common.mafft) cmd.append(common.outgroup_spec) cmd.append(version_dir) cmd.append(str(common.available_cores)) print(" ".join(cmd)) subprocess.check_call(cmd, cwd=runsdir)
def build_consensus_tree(input_trees, output_tree, run_dir, mode="MR"): util.mkdirp(run_dir) prefix = os.path.join(run_dir, "consensus_" + mode) cmd = [] cmd.append(common.raxml) cmd.append("--consense") cmd.append(mode) cmd.append("--tree") cmd.append(input_trees) cmd.append("--prefix") cmd.append(prefix) cmd.append("--redo") subprocess.check_output(cmd, encoding='utf-8') consensus = prefix + ".raxml.consensusTree" + mode print("Saving consensus tree to " + output_tree) shutil.copy(consensus, output_tree)
def main(counter, duplex, comment, inches, resolution): # read configuration from tevs.cfg and set constants for this run const.debug = False #XXX config.get("tevs.cfg") util.mkdirp(const.root) log = config.logger(util.root("scan.log")) inches_to_mm = 25.4 inc = 1 if duplex: inc = 2 num = next.IncrementingFile(util.root("nexttoscan.txt"), inc) try: scanner = Scanner( duplex, int(inches * inches_to_mm), resolution ) while True: counter = num.value() print "Scanning",counter stamp = datetime.now().isoformat() for i, img in enumerate(scanner.scan(counter)): #get path n = counter + i p = "%03d" % (n/1000,) f = "%06d" % n dir = util.root(const.incoming, p) util.mkdirp(dir) filename = os.path.join(dir, f + ".jpg") img.save(filename) print "Saved",filename log.info("Saved %s at %s\n%s", filename, stamp, comment) num.increment_and_save() except ScanningException: print "Empty feeder?" log.info("Scan aborted due to empty feeder for 20 seconds.") sys.exit(2) except KeyboardInterrupt: log.info("Scan aborted by user") sys.exit(1)
def update_sources(self, source_dir, projects, revision, svn='svn'): self.report_build_step('update-sources') self.halt_on_failure() try: for (project, path, uri) in projects: if path is None: path = source_dir elif not os.path.isabs(path): path = pjoin(source_dir, path) util.report("Updating %s to %s at %s from %s" % (project, revision, util.shquote(path), uri)) if os.path.exists(pjoin(path, '.svn')): cmd = [svn, 'up', '-r', revision] else: util.mkdirp(path) cmd = [svn, 'co', '-r', revision, uri, '.'] util.report_run_cmd(cmd, cwd=path) except Exception as e: self.report_step_exception(e) raise
def prv_export_vc(output_folder, model, X_train, X_test, Y_train, Y_test): print('Exporting vector codes to files...') print('# Output data to folder:', output_folder) print('# model num layers:', len(model.layers)) util.mkdirp(output_folder) n_layer = len(model.layers) - 2 get_layer_output = K.function( [model.layers[0].input, K.learning_phase()], [model.layers[n_layer].output]) X_train_nc = calculate_output_features(get_layer_output, X_train) X_test_nc = calculate_output_features(get_layer_output, X_test) print('NC Train size:', X_train_nc.shape) print('NC Test size:', X_test_nc.shape) util.write_data_to_csv_file(output_folder + '/train.txt', X_train_nc, Y_train) util.write_data_to_csv_file(output_folder + '/test.txt', X_test_nc, Y_test)
def launch_pargenes(alignment, model, output_dir, seed, rand_trees, pars_trees, bs_trees, cores): util.make_path_clean(output_dir) debug = False alignment_dir = os.path.join(output_dir, "alignments") util.mkdirp(alignment_dir) alignment_symlink = os.path.join(alignment_dir, common.pargenes_ali_name) raxml_options_file = os.path.join(output_dir, "raxml_options.txt") with open(raxml_options_file, "w") as writer: writer.write("--model " + model + " ") writer.write("--blmin " + common.raxml_min_bl + " ") writer.write("--precision " + str(common.raxml_precision) + " ") relative_symlink(alignment, alignment_symlink) prefix = os.path.join(output_dir, "pargenes") cmd = [] cmd.append("python") cmd.append(common.pargenes) cmd.append("-a") cmd.append(alignment_dir) cmd.append("-o") cmd.append(os.path.join(output_dir, "pargenes_output")) cmd.append("-r") cmd.append(raxml_options_file) cmd.append("--seed") cmd.append(str(seed)) cmd.append("-s") cmd.append(str(rand_trees)) cmd.append("-p") cmd.append(str(pars_trees)) cmd.append("-b") cmd.append(str(bs_trees)) cmd.append("-c") cmd.append(str(cores)) cmd.append("--core-assignment") cmd.append("low") print(" ".join(cmd)) launcher.submit(prefix, cmd, cores, debug)
def perform_all_tests(paths): iqtree_dir = os.path.join(paths.runs_dir, "iqtree_tests") util.mkdirp(iqtree_dir) iqtree_ll = iqtree_tests(paths.alignment, common.subst_model, paths.raxml_all_ml_trees, paths.raxml_best_tree, iqtree_dir) raxml_ll = float( open(paths.raxml_all_ml_trees_ll).readline().split(" ")[0]) iqtree_tests_output = os.path.join(iqtree_dir, "iqtree_tests.iqtree") accepted_trees = filter_accepted_trees(iqtree_tests_output, paths.raxml_all_ml_trees) print( str(len(accepted_trees)) + " tree passed all IQTree consistency tests") with open(paths.raxml_credible_ml_trees, "w") as writer: for tree in accepted_trees: writer.write(tree) with open(paths.raxml_iqtree_ll, "w") as writer: writer.write( "# this file contains the likelihood of the best tree at the end of the raxml-ng run, and an evaluation of likelihood of the same tree with iqtree (with model optimization)\n" ) writer.write("raxml_ll=" + str(raxml_ll) + "\n") writer.write("iqtree_ll=" + str(iqtree_ll) + "\n")
def main(counter, duplex, comment, inches, resolution): # read configuration from tevs.cfg and set constants for this run const.debug = False #XXX config.get("tevs.cfg") util.mkdirp(const.root) log = config.logger(util.root("scan.log")) inches_to_mm = 25.4 inc = 1 if duplex: inc = 2 num = next.IncrementingFile(util.root("nexttoscan.txt"), inc) try: scanner = Scanner(duplex, int(inches * inches_to_mm), resolution) while True: counter = num.value() print "Scanning", counter stamp = datetime.now().isoformat() for i, img in enumerate(scanner.scan(counter)): #get path n = counter + i p = "%03d" % (n / 1000, ) f = "%06d" % n dir = util.root(const.incoming, p) util.mkdirp(dir) filename = os.path.join(dir, f + ".jpg") img.save(filename) print "Saved", filename log.info("Saved %s at %s\n%s", filename, stamp, comment) num.increment_and_save() except ScanningException: print "Empty feeder?" log.info("Scan aborted due to empty feeder for 20 seconds.") sys.exit(2) except KeyboardInterrupt: log.info("Scan aborted by user") sys.exit(1)
def init_env1(): dir_path = os.path.join(local_path, 'tmp', 'market_env') util.mkdirp(dir_path) df = pd.DataFrame( data={ 'date': [10000 + i for i in range(1, 21)], 'open': [1.0 + 0.1 * i for i in range(1, 21)], 'high': [1.0 + 0.1 * i for i in range(1, 21)], 'low': [1.0 + 0.1 * i for i in range(1, 21)], 'close': [1.0 + 0.1 * i for i in range(1, 21)], 'volume': [1000 + 10 * i for i in range(1, 21)], }) df = df[['date', 'open', 'high', 'low', 'close', 'volume']] df.to_csv(os.path.join(dir_path, 'SYM1.csv'), index=False) env = MarketEnv(dir_path=dir_path, codes=['SYM1'], target_date_start='10015', target_date_end='10020', scope=4) return env
def move_files(base_dir, instance, domain, target_dir, use_vanilla): """ Moves the domain and instance description files plus additional data files to the translation directory """ definition_dir = target_dir + '/definition' data_dir = target_dir + '/data' # Copy the domain and instance file to the subfolder "definition" on the destination dir util.mkdirp(definition_dir) shutil.copy(instance, definition_dir) shutil.copy(domain, definition_dir) is_external_defined = os.path.isfile(base_dir + '/external.hxx') if is_external_defined and use_vanilla: raise RuntimeError( "An external definitions file was found at '{}', but the runner script determined" "that no external files were needed. Something is wrong.") if not use_vanilla: # The ad-hoc external definitions file - if it does not exist, we use the default. if is_external_defined: shutil.copy(base_dir + '/external.hxx', target_dir) if os.path.isfile( base_dir + '/external.cxx' ): # We also copy a possible cxx implementation file shutil.copy(base_dir + '/external.cxx', target_dir) else: default = tplManager.get('external_default.hxx').substitute( ) # No substitutions for the default template util.save_file(target_dir + '/external.hxx', default) # Copy, if they exist, all data files origin_data_dir = base_dir + '/data' if os.path.isdir(origin_data_dir): for filename in glob.glob(os.path.join(origin_data_dir, '*')): if os.path.isfile(filename): shutil.copy(filename, data_dir)
def update_sources(self, source_dir, projects, revision=None, svn='svn'): self.report_build_step('update-sources') self.halt_on_failure() try: for (project, path, uri) in projects: if path is None: path = source_dir elif not os.path.isabs(path): path = pjoin(source_dir, path) util.report( "Updating %s at %s from %s" % (project, util.shquote(path), uri)) if revision is None: revision_args = [] else: revision_args = ['-r', revision] if os.path.exists(pjoin(path, '.svn')): cmd = [svn, 'up'] + revision_args else: util.mkdirp(path) cmd = [svn, 'co'] + revision_args + [uri, '.'] util.report_run_cmd(cmd, cwd=path) except: self.report_step_exception() raise
def __init__(self, location): self.cache = {} self.location = location util.mkdirp(location) self.log = logging.getLogger('') #attempt to prepopulate cache try: for file in os.listdir(location): # Mitch 1/11/2011 really want != .xml if os.path.splitext(file)[1] == ".jpg": continue rfile = os.path.join(location, file) data = util.readfrom(rfile, "<") #default to text that will not parse try: tmpl = BallotTemplate.Template_from_XML(data) except ExpatError: if data != "<": self.log.exception("Could not parse " + file) continue fname = os.path.basename(file) self.cache[fname] = tmpl except OSError: self.log.info("No templates found")
def main(counter, duplex, comment, inches, resolution): # read configuration from tevs.cfg and set constants for this run const.debug = False #XXX config.get("tevs.cfg") util.mkdirp(const.root) log = config.logger(util.root("scan.log")) inches_to_mm = 25.4 print counter inc = 1 if duplex: inc = 2 num = next.Simple(counter, inc) if counter < 0: num = next.File(util.root("nexttoscan.txt"), inc) try: scanner = Scanner(duplex, int(inches * inches_to_mm), resolution) for counter in num: stamp = datetime.now().isoformat() for i, img in enumerate(scanner.scan(counter)): #get path n = counter + i p = "%03d" % (n / 1000, ) f = "%06d" % n dir = util.root(const.incoming, p) util.mkdirp(dir) file = os.path.join(dir, f + ".jpg") img.save(file) log.info("Saving %s at %s\n%s", file, stamp, comment) num.save() except KeyboardInterrupt: log.info("Scan aborted by user") sys.exit(1)
dbname = os.path.basename(os.path.dirname(os.path.normpath( args.path))) # Penultimate folder: datasets/mnist/original --> "mnist" fulldbname = args.path.strip("/").replace("/", "_") sufix_fold = '_cv' + str(args.cv) sufix_noise = '' if args.lnoise > 0 or args.anoise > 0: noise_type = '_label_noise' if args.lnoise > 0 else '_attr_noise' noise_level = args.lnoise if args.lnoise > 0 else args.anoise sufix_noise = noise_type + str(noise_level) output_folder = 'datasets/' + dbname + '/m' + str( args.model) + sufix_fold + sufix_noise weights_filename = 'MODELS/model_' + fulldbname + '_m' + str( args.model) + sufix_fold + sufix_noise + '.h5' util.mkdirp('MODELS') # Load dataset and prepare data print('Loading dataset...') X_train, X_test, Y_train, Y_test = util.load_ABCDE_datasets( args.path, args.cv, args.lnoise, args.anoise) nb_classes = len(np.unique(Y_train)) img_rows = img_cols = int(math.sqrt(X_train.shape[1] / args.channels)) Yc_train = np_utils.to_categorical(Y_train, nb_classes) Yc_test = np_utils.to_categorical(Y_test, nb_classes) if args.pre != None and args.pre != 'None': X_train, scaler = util.preprocess_data(args.pre, X_train)
def rootdigger_dir(self): res = os.path.join(self.results_dir, "rootdigger_rooting") util.mkdirp(res) return res
#!/usr/bin/env python3 import os import sys sys.path.insert(0, 'scripts') import common import support_tree_thinning import clade_compression_thinning import max_entropy_thinning import random_alignment_thinning import util paths = common.Paths(sys.argv) input_tree = paths.raxml_consensus_MRE_tree util.mkdirp(paths.thinning_dir) #print(ss_mre_taxa_number) # support selection method ss_mre_taxa_number = support_tree_thinning.support_selection_tree_thinning( input_tree, paths.ss_mre_thinned_tree) # maximum entroy method max_entropy_thinning.max_entropy_thinning(paths, paths.alignment, ss_mre_taxa_number, paths.me_thinned_alignment) #clade_compression_thinning.clade_compression_thinning(paths, paths.raxml_best_tree, paths.alignment, ss_mre_taxa_number, paths.cc_thinned_alignment) #random_alignment_thinning.thin(paths.alignment, paths.rand_thinned_alignment, ss_mre_taxa_number)
#!/usr/bin/env python import requests import string import os.path import util from atomicfile import AtomicFile util.mkdirp("data/flickr") # URL template for a Flickr image. photo_url = string.Template("https://farm${farmid}.staticflickr.com/${serverid}/${id}_${secret}.jpg") # Page counter. page = 1 while True: # Search for images with the desired tags. r = requests.get('https://api.flickr.com/services/rest/', params={ 'method': 'flickr.photos.search', 'format': 'json', 'nojsoncallback': 1, 'api_key': '0b34944b5b61b43ec4fb3dc6389377a6', 'tags': 'yellowstone,landscape', 'tag_mode': 'all', 'page': page, }) json = r.json() for photo in json['photos']['photo']: file = "data/flickr/" + photo['id'] + '.jpg' url = photo_url.substitute(farmid=photo['farm'], serverid=photo['server'], id=photo['id'], secret=photo['secret'])
def main(): miss_counter = 0 # get command line arguments cfg_file = get_args() # read configuration from tevs.cfg and set constants for this run config.get(cfg_file) util.mkdirp(const.root) log = config.logger(const.logfilename) #create initial top level dirs, if they do not exist for p in ( "%s" % ("templates"), "%s%d" % ("template_images", os.getpid()), "%s%d" % ("composite_images", os.getpid()), "results", "proc", "errors"): util.mkdirp(util.root(p)) next_ballot = next.File(util.root("nexttoprocess.txt"), const.num_pages) try: ballotfrom = Ballot.LoadBallotType(const.layout_brand) except KeyError as e: util.fatal("No such ballot type: " + const.layout_brand + ": check " + cfg_file) # allow all instances to share a common template location, # though need per-pid locs for template_images and composite_images cache = Ballot.TemplateCache(util.root("templates")) extensions = Ballot.Extensions(template_cache=cache) # connect to db and open cursor if const.use_db: try: dbc = db.PostgresDB(const.dbname, const.dbuser) except db.DatabaseError: util.fatal("Could not connect to database") else: dbc = db.NullDB() total_proc, total_unproc = 0, 0 base = os.path.basename # While ballot images exist in the directory specified in tevs.cfg, # create ballot from images, get landmarks, get layout code, get votes. # Write votes to database and results directory. Repeat. #from guppy import hpy;hp=hpy();hp.setref();import gc;gc.disable();gc.collect();hp.setref() try: for n in next_ballot: gc.collect() unprocs = [incomingn(n + m) for m in range(const.num_pages)] if not os.path.exists(unprocs[0]): miss_counter += 1 log.info(base(unprocs[0]) + " does not exist. No more records to process") if miss_counter > 10: break continue #for i, f in enumerate(unprocs[1:]): # if not os.path.exists(f): # log.info(base(f) + " does not exist. Cannot proceed.") # for j in range(i): # log.info(base(unprocs[j]) + " will NOT be processed") # total_unproc += mark_error(None, *unprocs[:i-1]) #Processing log.info("Processing %s:\n %s" % (n, "\n".join("\t%s" % base(u) for u in unprocs)) ) try: ballot = ballotfrom(unprocs, extensions) results = ballot.ProcessPages() except BallotException as e: total_unproc += mark_error(e, *unprocs) log.exception("Could not process ballot") continue csv = Ballot.results_to_CSV(results) #moz = Ballot.results_to_mosaic(results) #Write all data #make dirs: proc1d = dirn("proc", n) resultsd = dirn("results", n) resultsfilename = filen(resultsd, n) for p in (proc1d, resultsd): util.mkdirp(p) try: results_to_vop_files(results,resultsfilename) except Exception as e: print e #write csv and mosaic util.genwriteto(resultsfilename + ".txt", csv) #write to the database try: dbc.insert(ballot) except db.DatabaseError: #dbc does not commit if there is an error, just need to remove #partial files remove_partial(resultsfilename + ".txt") remove_partial(resultsfilename + const.filename_extension) util.fatal("Could not commit vote information to database") #Post-processing # move the images from unproc to proc procs = [filen(proc1d, n + m) + const.filename_extension for m in range(const.num_pages)] for a, b in zip(unprocs, procs): try: os.rename(a, b) except OSError as e: util.fatal("Could not rename %s", a) total_proc += const.num_pages log.info("%d images processed", const.num_pages) #hp.heap().dump('prof.hpy');hp.setref();gc.collect();hp.setref();hp.heap().dump('prof.hpy') finally: cache.save_all() dbc.close() next_ballot.save() log.info("%d images processed", total_proc) if total_unproc > 0: log.warning("%d images NOT processed.", total_unproc)
def export(pargenes_run_dir, paths): print("Pargenes run dir: " + pargenes_run_dir) pargenes_output = os.path.join(pargenes_run_dir, "pargenes_output") ml_run_dir = os.path.join(pargenes_output, "mlsearch_run", "results", "ali_fasta") # export best ml tree (with support values if existing) src = "" if (common.pargenes_bs_trees > 0): src = os.path.join(pargenes_output, "supports_run", "results", "ali_fasta.support.raxml.support") else: src = os.path.join(ml_run_dir, "ali_fasta.raxml.bestTree") shutil.copy(src, paths.raxml_best_tree) # export best tree with duplicates reattached #if ("thinned" not in paths.dataset): reattach_duplicates.reattach_duplicates( src, paths.raxml_best_tree_with_duplicate, paths.duplicates_json) # export best tree with TBE values if (common.pargenes_bs_trees > 0): src = os.path.join(pargenes_output, "supports_run", "results", "ali_fasta.support.tbe.raxml.support") shutil.copy(src, paths.raxml_best_tree_tbe) # export best ml model src = os.path.join(ml_run_dir, "ali_fasta.raxml.bestModel") shutil.copy(src, paths.raxml_best_model) # export all ml trees src = os.path.join(ml_run_dir, "sorted_ml_trees.newick") shutil.copy(src, paths.raxml_all_ml_trees) src = os.path.join(ml_run_dir, "sorted_ml_trees_ll.newick") shutil.copy(src, paths.raxml_all_ml_trees_ll) # export bootstrap trees if (common.pargenes_bs_trees > 0): src = os.path.join(pargenes_output, "concatenated_bootstraps", "ali_fasta.bs") shutil.copy(src, paths.raxml_bootstrap_trees) av_pairwise_rf_distance = rf_distance.get_export_pairwise_rf_distance( paths.raxml_all_ml_trees, paths.raxml_all_ml_trees_rf_distances) toprint0 = "Average pairwise RF distance between all ML trees: " + str( av_pairwise_rf_distance) print(av_pairwise_rf_distance) # compute RF distance between starting and ML trees for the best run print("Computing rf distances between parsimony and ML trees...") rf_dir = os.path.join(paths.runs_dir, "rfdistances") util.clean_dir(rf_dir) util.mkdirp(rf_dir) prefix = os.path.join(rf_dir, "ml") tree1 = os.path.join(ml_run_dir, "ali_fasta.raxml.bestTree") tree2 = os.path.join(ml_run_dir, "ali_fasta.raxml.startTree") rf = rf_distance.get_rf_distance(tree1, tree2, prefix) toprint1 = "RF distance between the best ML tree and its starting tree: " + str( rf) all_runs_dir = os.path.join(ml_run_dir, "multiple_runs") sum_rf = 0.0 deno_rf = 0.0 for run in os.listdir(all_runs_dir): run_dir = os.path.join(all_runs_dir, run) prefix = os.path.join(rf_dir, run) tree1 = os.path.join(run_dir, "ali_fasta.raxml.bestTree") tree2 = os.path.join(run_dir, "ali_fasta.raxml.startTree") rf = rf_distance.get_rf_distance(tree1, tree2, prefix) sum_rf += rf deno_rf += 1.0 av_rf = sum_rf / deno_rf toprint2 = "Average (over all the " + str( deno_rf) + " runs) RF distance between start and ML trees: " + str( av_rf) print(toprint0) print(toprint1) print(toprint2) with open(paths.rf_distance_report, "w") as writer: writer.write(toprint0 + "\n") writer.write(toprint1 + "\n") writer.write(toprint2)
def models_dir(self): res = os.path.join(self.results_dir, "models") util.mkdirp(res) return res
U_L = np.array([]) V_L = np.array([]) for file in files: path = os.path.join(root, file) if not path.endswith(".jpg"): continue print "Training on", path, "..." subsquares, U, V = generateSubsquares(path) X = np.concatenate((X, subsquares), axis=0) U_L = np.concatenate((U_L, U), axis=0) V_L = np.concatenate((V_L, V), axis=0) util.mkdirp("models/") if args.a: # Train all of the data sets model_count = 0 for c in C_LIST: for epsilon in EPSILON_LIST: u_path = "models/u_svr" + str(model_count) + ".model" v_path = "models/v_svr" + str(model_count) + ".model" if not os.path.isfile(u_path) or not os.path.isfile(v_path): print "Fitting the model given by C =", c, ", epsilon =", epsilon u_svr = SVR(C=c, epsilon=epsilon) v_svr = SVR(C=c, epsilon=epsilon) u_svr.fit(X, U_L) v_svr.fit(X, V_L) joblib.dump(u_svr, u_path) joblib.dump(v_svr, v_path)
if total > 0: # 当周边存在格子氧气密度小于1时进行扩散 diffusion_oxygen = last_distribution.iat[i, j] / 2 # 散发当前格子氧气浓度的一半\ total_shared = 0 for dx in range(-1, 2): for dy in range(-1, 2): ti, tj = (i + dx + SIZE) % SIZE, (j + dy + SIZE) % SIZE target_cell = last_distribution.iat[ti, tj] shared_oxygen = diffusion_oxygen * ((1 - min(1, target_cell)) / total) total_shared = total_shared + shared_oxygen current_distribution.iat[ti, tj] = current_distribution.iat[ti, tj] + shared_oxygen # 为避免浮点误差,不应该简单地取当前格子氧气浓度的一半,而是上次的氧气浓度减去当前散发的氧气浓度。 current_distribution.iat[i, j] = \ current_distribution.iat[i, j] + last_distribution.iat[i, j] - total_shared else: current_distribution.iat[i, j] = current_distribution.iat[i, j] + last_distribution.iat[i, j] print("process: " + f"{generation}/{MAX_ITERATION}") if generation % 1 == 0: fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2) show_board(ax1, last_distribution, f"Generation: {generation - 1}") show_board(ax2, current_distribution, f"Generation: {generation}") fig.tight_layout() fig.savefig(OUTPUT_DIR + str(generation)) plt.close(fig) if __name__ == "__main__": mkdirp(OUTPUT_DIR) init() iterate()
def likelihoods_dir(self): res = os.path.join(self.results_dir, "likelihoods") util.mkdirp(res) return res
def thinning_dir(self): res = os.path.join(self.results_dir, "tree_thinning") util.mkdirp(res) return res
def main(): miss_counter = 0 # get command line arguments cfg_file = get_args() # read configuration from tevs.cfg and set constants for this run config.get(cfg_file) util.mkdirp(const.root) log = config.logger(const.logfilename) log.info("Log created.") # create initial toplevel directories if they don't exist for p in ("%s" % ("templates"), "%s" % ("template_images"), "%s" % ("composite_images"), "results", "proc", "errors"): util.mkdirp(util.root(p)) # make sure you have code for ballot type spec'd in config file try: ballotfrom = Ballot.LoadBallotType(const.layout_brand) except KeyError as e: util.fatal("No such ballot type: " + const.layout_brand + ": check " + cfg_file) cache = Ballot.TemplateCache(util.root("templates")) extensions = Ballot.Extensions(template_cache=cache) # connect to db and open cursor if const.use_db: try: dbc = db.PostgresDB(database=const.dbname, user=const.dbuser) except db.DatabaseError: util.fatal("Could not connect to database") else: dbc = db.NullDB() log.info("Database connected.") total_images_processed, total_images_left_unprocessed = 0, 0 base = os.path.basename # Each time given a signal to proceed for count_to_process ballots, # create ballot from images, get landmarks, get layout code, get votes. # Write votes to database and results directory. # for profiling # from guppy import hpy;hp=hpy();hp.setref(); # import gc;gc.disable();gc.collect();hp.setref() count_to_process = 0 while True: next_ballot_number = int(util.readfrom(util.root("nexttoprocess.txt"))) if count_to_process == 0: # wait here until get_count_to_process returns # it will wait on input instruction from stdio processing_command = get_processing_command(next_ballot_number) if processing_command.startswith("+"): next_ballot_number += const.num_pages util.writeto(util.root("nexttoprocess.txt"), next_ballot_number) count_to_process = 1 if processing_command.startswith("="): next_ballot_number = int(processing_command[1:]) util.writeto(util.root("nexttoprocess.txt"), next_ballot_number) count_to_process = 1 if processing_command.startswith("S"): count_to_process = 1 if processing_command.startswith("0"): count_to_process = 0 # we're done when we get instructed to process 0 if count_to_process == 0: break count_to_process -= 1 try: # get number of next image, # clean up, in case... gc.collect() log.debug("Request for %d" % (next_ballot_number, )) unprocs = [ incomingn(next_ballot_number + m) for m in range(const.num_pages) ] log.info(unprocs) # we need all images for sheet to be available to process it for filename in unprocs: if not os.path.exists(filename): errmsg = "File %s not present or available!" % ( base(filename), ) log.info(errmsg) # if a file is not yet available, that's not fatal raise FileNotPresentException(errmsg) #Processing #log.info("Processing %s:\n %s" % # (n, "\n".join("\t%s" % base(u) for u in unprocs)) #) log.debug("Creating ballot.") try: ballot = ballotfrom(unprocs, extensions) log.debug("Created ballot, processing.") results = ballot.ProcessPages() log.debug("Processed.") except BallotException as e: total_images_left_unprocessed += mark_error(e, *unprocs) log.exception("Could not process ballot") continue #Write all data #make dirs: proc1d = dirn("proc", next_ballot_number) resultsd = dirn("results", next_ballot_number) resultsfilename = filen(resultsd, next_ballot_number) for p in (proc1d, resultsd): util.mkdirp(p) #try: # results_to_vop_files(results,resultsfilename) #except Exception as e: # log.info(e) # print e #write csv and mosaic #log.info("local results_to_CSV") #csv = results_to_CSV(results,log) #log.info("Back from results_to_CSV") #util.genwriteto(resultsfilename + ".csv", csv) #write to the database try: log.debug("Inserting to db") dbc.insert(ballot) except db.DatabaseError: #dbc does not commit if there is an error, just need to remove #partial files remove_partial(resultsfilename + ".txt") remove_partial(resultsfilename + const.filename_extension) log.info("Could not commit to db") print "Could not commit to db!" util.fatal("Could not commit vote information to database") #Post-processing # move the images from unproc to proc log.debug("Renaming") procs = [ filen(proc1d, next_ballot_number + m) + const.filename_extension for m in range(const.num_pages) ] for a, b in zip(unprocs, procs): try: os.rename(a, b) except OSError as e: log.info("Could not rename %s" % a) util.fatal("Could not rename %s", a) total_images_processed += const.num_pages # Tell caller you've processed all images of this ballot log.debug("Requesting next") util.writeto(util.root("nexttoprocess.txt"), next_ballot_number + const.num_pages) # update next ballot file with next image number log.debug("Done writing nexttoprocess.txt") #print "%d extracted. " % (next_ballot_number,) log.info("%d images processed", const.num_pages) # for profiling # hp.heap().dump('prof.hpy');hp.setref();gc.collect(); # hp.setref();hp.heap().dump('prof.hpy') except FileNotPresentException, e: print e sys.stdout.flush()
def main(): miss_counter = 0 # get command line arguments cfg_file = get_args() # read configuration from tevs.cfg and set constants for this run config.get(cfg_file) util.mkdirp(const.root) log = config.logger(const.logfilename) #create initial top level dirs, if they do not exist for p in ("%s" % ("templates"), "%s%d" % ("template_images", os.getpid()), "%s%d" % ("composite_images", os.getpid()), "results", "proc", "errors"): util.mkdirp(util.root(p)) next_ballot = next.File(util.root("nexttoprocess.txt"), const.num_pages) try: ballotfrom = Ballot.LoadBallotType(const.layout_brand) except KeyError as e: util.fatal("No such ballot type: " + const.layout_brand + ": check " + cfg_file) # allow all instances to share a common template location, # though need per-pid locs for template_images and composite_images cache = Ballot.TemplateCache(util.root("templates")) extensions = Ballot.Extensions(template_cache=cache) # connect to db and open cursor if const.use_db: try: dbc = db.PostgresDB(const.dbname, const.dbuser) except db.DatabaseError: util.fatal("Could not connect to database") else: dbc = db.NullDB() total_proc, total_unproc = 0, 0 base = os.path.basename # While ballot images exist in the directory specified in tevs.cfg, # create ballot from images, get landmarks, get layout code, get votes. # Write votes to database and results directory. Repeat. #from guppy import hpy;hp=hpy();hp.setref();import gc;gc.disable();gc.collect();hp.setref() try: for n in next_ballot: gc.collect() unprocs = [incomingn(n + m) for m in range(const.num_pages)] if not os.path.exists(unprocs[0]): miss_counter += 1 log.info( base(unprocs[0]) + " does not exist. No more records to process") if miss_counter > 10: break continue #for i, f in enumerate(unprocs[1:]): # if not os.path.exists(f): # log.info(base(f) + " does not exist. Cannot proceed.") # for j in range(i): # log.info(base(unprocs[j]) + " will NOT be processed") # total_unproc += mark_error(None, *unprocs[:i-1]) #Processing log.info("Processing %s:\n %s" % (n, "\n".join("\t%s" % base(u) for u in unprocs))) try: ballot = ballotfrom(unprocs, extensions) results = ballot.ProcessPages() except BallotException as e: total_unproc += mark_error(e, *unprocs) log.exception("Could not process ballot") continue csv = Ballot.results_to_CSV(results) #moz = Ballot.results_to_mosaic(results) #Write all data #make dirs: proc1d = dirn("proc", n) resultsd = dirn("results", n) resultsfilename = filen(resultsd, n) for p in (proc1d, resultsd): util.mkdirp(p) try: results_to_vop_files(results, resultsfilename) except Exception as e: print e #write csv and mosaic util.genwriteto(resultsfilename + ".txt", csv) #write to the database try: dbc.insert(ballot) except db.DatabaseError: #dbc does not commit if there is an error, just need to remove #partial files remove_partial(resultsfilename + ".txt") remove_partial(resultsfilename + const.filename_extension) util.fatal("Could not commit vote information to database") #Post-processing # move the images from unproc to proc procs = [ filen(proc1d, n + m) + const.filename_extension for m in range(const.num_pages) ] for a, b in zip(unprocs, procs): try: os.rename(a, b) except OSError as e: util.fatal("Could not rename %s", a) total_proc += const.num_pages log.info("%d images processed", const.num_pages) #hp.heap().dump('prof.hpy');hp.setref();gc.collect();hp.setref();hp.heap().dump('prof.hpy') finally: cache.save_all() dbc.close() next_ballot.save() log.info("%d images processed", total_proc) if total_unproc > 0: log.warning("%d images NOT processed.", total_unproc)
def main(): NextEqualsPrefix = "Next=" MorePrompt = ":" NextToProcessFile = "" miss_counter = 0 # get command line arguments cfg_file = get_args() # read configuration from tevs.cfg and set constants for this run config.get(cfg_file) util.mkdirp(const.root) log = config.logger(util.root("extraction.log")) # create initial toplevel directories if they don't exist for p in ("%s" % ("templates"), "%s" % ("template_images"), "%s" % ("composite_images"), "results", "proc", "errors"): util.mkdirp(util.root(p)) # make sure you have code for ballot type spec'd in config file try: ballotfrom = Ballot.LoadBallotType(const.layout_brand) except KeyError as e: util.fatal("No such ballot type: %s check %s !", (const.layout_brand, cfg_file)) cache = Ballot.TemplateCache(util.root("templates")) extensions = Ballot.Extensions(template_cache=cache) # connect to db and open cursor if const.use_db: try: dbc = db.PostgresDB(database=const.dbname, user=const.dbuser) except db.DatabaseError: util.fatal("Could not connect to database!") else: dbc = db.NullDB() log.info("Database connected.") total_images_processed, total_images_left_unprocessed = 0, 0 base = os.path.basename # Each time given a signal to proceed for count_to_process ballots, # create ballot from images, get landmarks, get layout code, get votes. # Write votes to database and results directory. # for profiling # from guppy import hpy;hp=hpy();hp.setref(); # import gc;gc.disable();gc.collect();hp.setref() NextToProcessFile = util.root("nexttoprocess.txt") count_to_process = 0 file_problem = False while True: log.debug("Top of loop.") next_ballot_number = int(util.readfrom(NextToProcessFile)) log.debug("Read %d from %s" % (next_ballot_number, NextToProcessFile)) if count_to_process == 0: # send prompt to controlling process, "READY:" or "+ for SKIP:" if file_problem: file_problem = False # do not remove space after %06d print "Next=%06d , + to SKIP:" % (next_ballot_number, ) else: # do not remove space after %06d print "Next=%06d , READY:" % (next_ballot_number, ) sys.stdout.flush() # wait here until get_count_to_process returns # it will wait on input instruction from stdio try: count_to_process = get_count_to_process( next_ballot_number, log) except DoIncrementException, e: log.debug("Do increment exception") util.writeto(NextToProcessFile, next_ballot_number + const.num_pages) log.debug( "Wrote %d to next_ballot_number, count to process is %d" % (next_ballot_number + const.num_pages, count_to_process)) count_to_process = 0 log.debug("Setting count to process to 0.") continue # we're done when we get instructed to process 0 if count_to_process == 0: break count_to_process -= 1 try: # get number of next image, # clean up, in case... gc.collect() log.debug("Request for %d" % (next_ballot_number, )) unprocs = [ incomingn(next_ballot_number + m) for m in range(const.num_pages) ] log.info(unprocs) # we need all images for sheet to be available to process it for filename in unprocs: log.info("Checking for path.") if not os.path.exists(filename): log.info("File not present.") errmsg = "File %s not present or available!!!" % ( base(filename), ) log.info(errmsg.replace("!!!", "")) print errmsg sys.stdout.flush() raise FileNotPresentException(filename) log.info("Path found.") #Processing log.debug("Creating ballot.") try: ballot = ballotfrom(unprocs, extensions) log.debug("Created ballot, processing.") results = ballot.ProcessPages() log.debug("Processed.") except BallotException as e: total_images_left_unprocessed += mark_error(e, *unprocs) log.exception("Could not process ballot") util.writeto(NextToProcessFile, next_ballot_number + const.num_pages) continue #Write all data #make dirs: proc1d = dirn("proc", next_ballot_number) resultsd = dirn("results", next_ballot_number) resultsfilename = filen(resultsd, next_ballot_number) for p in (proc1d, resultsd): util.mkdirp(p) #try: # results_to_vop_files(results,resultsfilename) #except Exception as e: # log.info(e) # print e #write csv and mosaic #log.info("local results_to_CSV") #csv = results_to_CSV(results,log) #log.info("Back from results_to_CSV") #util.genwriteto(resultsfilename + ".csv", csv) #write to the database try: log.debug("Inserting to db") dbc.insert(ballot) except db.DatabaseError: #dbc does not commit if there is an error, just need to remove #partial files remove_partial(resultsfilename + ".txt") remove_partial(resultsfilename + const.filename_extension) log.info("Could not commit to db") print "Could not commit to db!" util.fatal("Could not commit vote information to database") #Post-processing # move the images from unproc to proc log.debug("Renaming") procs = [ filen(proc1d, next_ballot_number + m) + const.filename_extension for m in range(const.num_pages) ] for a, b in zip(unprocs, procs): try: os.rename(a, b) except OSError as e: log.info("Could not rename %s" % a) util.fatal("Could not rename %s", a) total_images_processed += const.num_pages # Tell caller you've processed all images of this ballot log.debug("Requesting next") util.writeto(NextToProcessFile, next_ballot_number + const.num_pages) # update next ballot file with next image number log.debug("Done writing nexttoprocess.txt") #print "%d extracted. " % (next_ballot_number,) log.info("%d images processed", const.num_pages) # for profiling # hp.heap().dump('prof.hpy');hp.setref();gc.collect(); # hp.setref();hp.heap().dump('prof.hpy') except FileNotPresentException, e: file_problem = True print "FileNotPresentException" sys.stdout.flush() log.info("FileNotPresentException occurred") continue
import shutil from config import config STORAGE_DIR = config.get('STORAGE_DIR') or config['SIM_STORAGE_DIR'] # where the actual files are stored. name = sha1("H" + file content), contents = file content DB_DIR = join(STORAGE_DIR, "files_by_sha1", '') # where the snapshots are stored as hardlink trees SNAP_DIR = join(STORAGE_DIR, "sim_snapshots", '') from util import mkdirp, makehardlink mkdirp(DB_DIR) mkdirp(SNAP_DIR) def raise_wat(p): raise Exception('What kind of thing are you?: ' + p) def dedupe_cp(src, dest): """ both src and dest should be absolute paths src must exist dest must not exist """ if os.path.isfile(src):
def species_delimitation_runs_dir(self): res = os.path.join(self.runs_dir, "species_delimitation") util.mkdirp(res) return res