def main(): mkdir(DATADIR) for i in range(N_DATASETS): bias = 10.0 * random() X, y, coef = make_regression(n_samples=900, n_features=20, n_informative=10, bias=bias, noise=2.0, coef=True, random_state=round(random() * 1e6)) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1.0 / 3.0, random_state=42) train = Bunch(X=X_train, y=y_train, true_coef=coef) test = Bunch(X=X_test, y=y_test) train_filename = os.path.join(DATADIR, 'dataset_%i_train.txt' % (i + 1)) test_filename = os.path.join(DATADIR, 'dataset_%i_test.txt' % (i + 1)) with open(train_filename, 'wb') as fid: cPickle.dump(train, fid, 2) with open(test_filename, 'wb') as fid: cPickle.dump(test, fid, 2)
def move_results(task_dict): mkdir(settings.RESULT_DIR) subdirs = os.listdir(settings.STAGE_DIR) for subdir in subdirs: subpath = '%s%s%s' % (settings.STAGE_DIR, os.sep, subdir) files = os.listdir(subpath) for fname in files: fpath = '%s%s%s' % (subpath, os.sep, fname) try: hsh = int(splitext(basename(fpath))[0]) except ValueError: warning("Couldn't obtain hash from file: %s. Skipping." % basename(fpath)) continue if settings.TYPE == 'RAW': dset = 'dataset' method = 'method' else: if settings.TYPE == 'ASSESS': dset = dataset_name(task_dict[hsh]['dataset']) elif settings.TYPE == 'CV_TT': dset = dataset_name((task_dict[hsh]['train_dataset'], task_dict[hsh]['test_dataset'])) method = task_dict[hsh]['method'] outdir = '%s%s%s%s%s' % (settings.RESULT_DIR, os.sep, dset, os.sep, method) mkdir(outdir) dpath = '%s%s%s' % (outdir, os.sep, fname) shutil.move(fpath, dpath) clean_empty_dir(subpath)
def write_output(output, hsh): scratchdir = get_scratchdir() scratch_results = '%s/results' % scratchdir mkdir(scratch_results) outdir = get_output_dir(scratch_results) fname = '%s/%s.txt' % (outdir, hsh) with open(fname, 'w') as fid: fid.write(output)
def copy_data_file(filepath): src = get_data_path(filepath) datapath = os.path.join('assets', filepath) dest = os.path.join(settings.OUTPUT_DIR, 'html', datapath) destdir = os.path.dirname(dest) mkdir(destdir) shutil.copy(src, dest) return datapath
def __init__(self, methods=None, datasets=None, metrics=None, scalars=None): self.methods = set() self.datasets = set() self.metrics = set() self.metric_targets = set() self.scalars = set() self.cache = {} self.cachefile = settings.OUTPUT_DIR + os.sep + 'abed_cache.pkl' mkdir(settings.OUTPUT_DIR)
def get_results(basepath=None): if basepath is None: basepath = '{}/releases/current'.format(myfab.project_path) zip_path = '{}/bzips'.format(basepath) zip_glob = '*.tar.bz2' mkdir(settings.ZIP_DIR) get_files_from_glob(zip_path, zip_glob, settings.ZIP_DIR) log_path = '{}/logs'.format(basepath) log_glob = '*' mkdir(settings.LOG_DIR) get_files_from_glob(log_path, log_glob, settings.LOG_DIR)
def get_table_fname(table, ext, _type): if _type == 'html': outdir = '%s%s%s' % (settings.OUTPUT_DIR, os.sep, 'html') elif _type == 'txt': outdir = '%s%s%s' % (settings.OUTPUT_DIR, os.sep, 'txt') mkdir(outdir) if table.is_metric: fname = '%s%sABED_%s_%s_%s%s' % (outdir, os.sep, clean_str(table.target), clean_str(table.name), clean_str(table.type), ext) else: fname = '%s%sABED_%s_%s%s' % (outdir, os.sep, clean_str(table.target), clean_str(table.type), ext) return fname
def _unpack_zip(zipfile, all_tasks): fpath = '%s%s%s' % (settings.ZIP_DIR, os.sep, zipfile) try: b = bz2file.BZ2File(fpath) tar = tarfile.open(fileobj=b) except tarfile.ReadError: error("Could not read tarfile: %s" % fpath) return mkdir(settings.STAGE_DIR) tar.extractall(settings.STAGE_DIR) tar.close() move_results(all_tasks) ziplog = settings.ZIP_DIR + os.sep + 'abed_unzipped.txt' with open(ziplog, 'a') as fid: fid.write(zipfile + '\n')
def get_output_dir(result_dir, quiet=False): subdirs = os.listdir(result_dir) if not subdirs: outdir = '%s/0' % (result_dir) mkdir(outdir) if not quiet: info("Created result output dir %s" % outdir) return outdir latest = sorted(map(int, subdirs))[-1] files = os.listdir(result_dir + '/' + str(latest)) if len(files) >= settings.MAX_FILES: outdir = '%s/%i' % (result_dir, latest + 1) mkdir(outdir) if not quiet: info("Created result output dir %s" % outdir) else: outdir = '%s/%i' % (result_dir, latest) return outdir
def fab_repull(): releasepath = '{}/releases'.format(myfab.project_path) lstext = myfab.run('ls -1 {}'.format(releasepath)) special = ['current', 'previous'] paths = [x for x in lstext.split('\n') if not x in special] with open(settings.AUTO_FILE, 'r') as fid: lines = fid.readlines() auto_jobids = [x.strip() for x in lines] to_pull = [] for path in paths: fullpath = '{}/{}'.format(releasepath, path) logpath = '{}/{}'.format(fullpath, 'logs') jobid = get_jobid_from_logs(logpath) if jobid in auto_jobids: to_pull.append(fullpath) for path in to_pull: zip_path = '{}/bzips/'.format(path) zip_glob = '*.tar.bz2' mkdir(settings.ZIP_DIR) get_files_from_glob(zip_path, zip_glob, settings.ZIP_DIR)
def init_config(): txt = """ ############################################################################## # General Settings # ############################################################################## PROJECT_NAME = '' TASK_FILE = './abed_tasks.txt' AUTO_FILE = './abed_auto.txt' RESULT_DIR = '/path/to/local/results' STAGE_DIR = '/path/to/local/stagedir' MAX_FILES = 1000 ZIP_DIR = './zips' LOG_DIR = './logs' OUTPUT_DIR = './output' AUTO_SLEEP = 120 HTML_PORT = 8000 COMPRESSION = 'bzip2' ############################################################################## # Server parameters and settings # ############################################################################## REMOTE_NEEDS_INIT = True REMOTE_USER = '******' REMOTE_HOST = 'address.of.host' REMOTE_DIR = '/home/%s/projects/project_name' % REMOTE_USER REMOTE_PORT = 22 REMOTE_SCRATCH = None REMOTE_SCRATCH_ENV = 'TMPDIR' ############################################################################## # Settings for Master/Worker program # ############################################################################## MW_SENDATONCE = 100 # number of tasks (hashes!) to send at once MW_COPY_SLEEP = 120 ############################################################################## # Experiment type # ############################################################################## # Uncomment the desired type # Model assessment # #TYPE = 'ASSESS' # Cross validation with train and test dataset # #TYPE = 'CV_TT' #CV_BASESEED = 123456 #YTRAIN_LABEL = 'y_train' # Commands defined in a text file # #TYPE = 'RAW' #RAW_CMD_FILE = '/path/to/file.txt' ############################################################################## # Build settings # ############################################################################## NEEDS_BUILD = False # If remote compilation is required BUILD_DIR = 'build' # Relative directory where build takes place BUILD_CMD = 'make all' # Build command ############################################################################## # Experiment parameters and settings # ############################################################################## DATADIR = 'datasets' EXECDIR = 'execs' DATASETS = ['dataset_1', 'dataset_2'] METHODS = ['method_1', 'method_2'] PARAMS = { 'method_1': { 'param_1': [val_1, val_2], 'param_2': [val_3, val_4], 'param_3': [val_5, val_6] }, 'method_2': { 'param_1': [val_1, val_2, val_3], }, } COMMANDS = { 'method_1': ("{execdir}/method_1 {datadir}/{dataset} {param_1} " "{param_2} {param_3}"), 'method_2': "{execdir}/method_2 {datadir}/{dataset} {param_1}" } METRICS = { 'NAME_1': { 'metric': metric_function_1, 'best': max }, 'NAME_2': { 'metric': metric_function_2, 'best': min } } SCALARS = { 'time': { 'best': min }, } RESULT_PRECISION = 4 DATA_DESCRIPTION_CSV = None REFERENCE_METHOD = None SIGNIFICANCE_LEVEL = 0.05 ############################################################################### # PBS Settings # ############################################################################### PBS_NODES = 1 PBS_WALLTIME = 360 # Walltime in minutes PBS_CPUTYPE = None PBS_CORETYPE = None PBS_PPN = None PBS_MODULES = ['mpicopy', 'python/2.7.9'] PBS_EXPORTS = ['PATH=$PATH:/home/%s/.local/bin/abed' % REMOTE_USER] PBS_MPICOPY = ['datasets', EXECDIR, TASK_FILE] PBS_TIME_REDUCE = 600 # Reduction of runtime in seconds """ configfile = './abed_conf.py' with open(configfile, 'w') as fid: fid.write(txt) info("Wrote initial config to %s." % configfile) mkdir('datasets') mkdir('execs') info("Created 'datasets' and 'execs' directories") touch('./abed_auto.txt') touch('./abed_tasks.txt') info("Created 'abed_auto.txt' and 'abed_tasks.txt'")