def _gen(self): mylist = [] mylen = 0 while True: if not self.dirty and self.refresh is REFRESH.EVERY_LOOP: self.dirty = self.dirty_check(mylist) if self.dirty: # breakpoint() cn = mylist[0].__class__.__name__ if mylist else "" info(f"refreshing dirty {cn} generator") mylist = self.snapshot_list() mylen = len(mylist) if mylen == 0: err('source empty') self.dirty = False i = -1 if self.shuffle is SHUFFLE.EVERY_LOOP: random.shuffle(mylist) while i < (mylen - 1): if not self.dirty and self.refresh is REFRESH.EVERY_ITER: self.dirty = self.dirty_check(mylist) # will also force a shuffle even if SHUFFLE.EVERY_LOOP. # therefore, a source that's constantly dirty when REFRESH is EVERY_ITER # always de facto causes SHUFFLE.EVERY_ITER if self.dirty: break if self.shuffle is SHUFFLE.EVERY_ITER: random.shuffle(mylist) if mylen == 0: raise StopIteration # should never happen i += 1 yield mylist[i] if self.dirty: break # might be set externally
def reduced_label(label): if label == 'dog' or (label.startswith('S') and len(label) in [2, 3, 4]): return 0 elif label == 'cat' or (label.startswith('N') and len(label) in [3, 4, 5]): return 1 else: err('do not know reduced label for ' + label)
def preprocess(self, im): log('starting preprocess') file = None if is_file(im): file = im im = im.load() if len(im.shape) == 2: im = np.reshape(im, tuple(list(im.shape) + [1])) if im.shape[2] == 1: im = np.repeat(im, 3, axis=2) assert self.data_format == 'channels_last' assert self.channel_axis == 3 assert self.nchan == 3 log('starting preprocess ops') if len(im.shape) == 2: im = np.stack((im, im, im), axis=2) return self._preprocess_im(im, file) elif len(im.shape) == 3: return self._preprocess_im(im, file) elif len(im.shape) == 4: err('maybe this is the problem?') return arr([self._preprocess_im(i, file) for i in im]) else: err('or this?')
def build_net(self, FLAGS): import tensorflow as tf LRN, PoolHelper = gnet_layer_classes() if self.file.ext == '.h5': self.net = tf.keras.models.load_model(self.file.abspath, custom_objects={ 'PoolHelper': PoolHelper, 'LRN': LRN }) elif self.file.ext == 'onnx': if ismac(): onnx_tf = f'{HOME}/miniconda3/envs/dnn/bin/onnx-tf' else: onnx_tf = f'matt/miniconda3/envs/dnn/bin/onnx-tf' out = self.file.res_pre_ext("pb") eshell( f'{onnx_tf} convert -i {self.file.abspath} -o {out.abspath}') # onnx-tf convert -i /path/to/input.onnx -o /path/to/output.pb else: err('') if len(self.net.outputs) > 1: found = False for i, o in enum(self.net.outputs): if 'prob' in o.name: assert not found self.OUTPUT_IDX = i found = True assert found
def manage(): cloud_files = weval(wl.CloudObjects(wlexpr('$CloudRootDirectory'))) for wcf in listmap(File, cloud_files): if wcf.abspath in _REGISTERED_SUBROOTS: pass elif wcf.abspath == _MAIN_SUBROOT: @log_invokation(with_args=True, stack=True) def recurse_cloud_file(sub_wcf): # f = File(f'{sub_wcf.abspath}') if not sub_wcf.exists: recurse_cloud_file.my_stacker.done = True if boolinput( f'{sub_wcf} is not mirrored locally, delete cloud file?' ): sub_wcf.wc.delete() if sub_wcf.wc.isdir: if Folder(sub_wcf)['.CLOUD_FILES.txt'].exists: Folder(sub_wcf)['.CLOUD_FILES.txt'].write('\n'.join( listmap( lambda e: e.abspath.replace( f'{Folder(sub_wcf).abspath}/', ''), sub_wcf.wc.files))) else: [recurse_cloud_file(c) for c in sub_wcf.wc.files] recurse_cloud_file(wcf) recurse_cloud_file.my_stacker.done = True else: err(f'{wcf.abspath} is not a registered Wolfram Cloud subroot')
def data_format(self): if self.CA == 1: return 'channels_first' elif self.CA == 3: return None else: err('bad CA')
def validate(self): if self.refresh and not callable(self.source): err("refreshing only works on callable sources") if self.dirty_check and self.refresh is None: err("dirty checks are pointless without refreshing") if self.dirty_check is None and self.refresh is not None: self.dirty_check = lambda old: old != self.source( ) # might be slow!
def YesOrNo(q): response = answer_request('', q, None, False) if response == 'y': return True elif response == 'n': return False else: err(f'invalid response: {response}')
def check(self): from mlib.proj.struct import GIT_DIR, GIT_IGNORE if not self.file.rel.startswith('_') and self.file.parent.name != 'data': err('PermaDicts should be private (start with _)') if GIT_DIR.exists and (not GIT_IGNORE.exists or '/_*' not in GIT_IGNORE.read()): err(f'{self.file} needs to be ignored') if not self.file.exists: self.file.save({})
def __init__(self, mpath, lpath): self.mpath = self.abspath err('used to have line: self.lpath = mlib.file.abspath') from mlib.boot.lang import ismac, islinux if ismac(): thispath = mpath else: assert islinux() thispath = lpath super(SyncedDataFolder, self).__init__(thispath)
def isreal(n): if isstr(n): if 'inf' in n: return False else: err('strings cant be tested for real unless inf') # try: if n is None or abs(n) == np.inf: return False return np.isreal(n)
def paths(self): if not self.isdir: log(f'{self}:{self.isdir=}') err('not dir!') # assert self.isdir a = li([self.join(name) for name in sort(os.listdir(self.abspath))]) if self.DELETE_DS_STORE: Folder(self)['.DS_Store'].deleteIfExists(silent=True) if self.IGNORE_DS_STORE: a = a.filtered(lambda n: File(n).name != '.DS_Store') return a
def eval(self, s): if self.session is None: kwargs = {} if ismac() else dict( kernel='/home/matt/WOLFRAM/Executables/WolframKernel') self._start_session(kwargs) ev = self.session.evaluate_wrap_future(wl.UsingFrontEnd(s)) ev = ev.result() if ev.messages is not None: for m in ev.messages: err(m) return ev.result
def _load_weights(self): try: self.net.load_weights(self.weightsf()) except: import traceback print(traceback.format_exc()) ww = File(self.weightsf()).load() # DEBUG for k in listkeys(ww): for kk in listkeys(ww[k]): print(f'{kk}: {ww[k][kk].shape}') err('could not load weights')
def moveinto(self, new, overwrite=False): import shutil File(new).mkdirs() assert File(new).isdir if File(new)[self.name].exists: if overwrite: File(new)[self.name].delete(silent=True) else: err(f'{File(new)[self.name].abspath} already exists. Set overwrite=True to avoid this' ) shutil.move(self.abspath, File(new).abspath)
def smallify(): err('dev') files = glob.glob(sys.argv[1] + "/**/*.png", recursive=True) i = 0 log('found ' + str(len(files)) + ' images') with Progress(len(files)) as prog: for f in files: p = shell(['convert', f, '-resize', '20x20', f], silent=True) p.interact() i = i + 1 prog.tick() log('resized ' + str(i) + ' images') sys.exit()
def nanmean(lll): if ndims(lll) > 2: err('no ready') elif ndims(lll) == 2: rrr = arr() for i in range(0, lll.shape[1]): colu = list(filter(lambda x: not isnan(x), lll[:, i])) rrr += safemean(colu) else: # 1-d lll = list(filter(lambda x: not isnan(x), lll)) rrr = safemean(lll) # noinspection PyUnboundLocalVariable return rrr
def finish_process(self, p, SW): # if SW is None or SW.interact: import mlib.err p.expect( 'LETS_TRY_TO_HAVE_A_CLEAN_CONSOLE' ) # hopefully flush out my python command so I dont see it in my console! progress('python script must have started') def kill_on_finish_str(s, p): log(s, silent=True) # LOGPIE if ContainerBashScript.FINISH_STR in s or mlib.err.ERROR_EXIT_STR in s: p.close() stopper = p.log_to_stdout( # fun=kill_on_finish_str, # just_fun=True, o=p, print_not_log=True, stop_on=[ContainerBashScript.FINISH_STR, mlib.err.ERROR_EXIT_STR]) # p.log_to_stdout(o=p, print_not_log=True) import mdb # noinspection PyUnresolvedReferences i = p.expect([ ContainerBashScript.FINISH_STR, p.PROMPT, pexpect.EOF, '(pdb)', mdb.INTERACT_STR, mlib.err.ERROR_EXIT_STR ]) if i == 0: p.close() elif i in [1, 3, 4]: stopper.stop() p.interact() err("PDB") elif i == 2: p.close() err('EOF') elif i == 5: raise RelayException('got error in OM') # while p.alive(): # inp = input_while(lambda: p.alive()) # p.sendline(inp) # p.interact() # else: # old # p.pipe_and_close_on(ContainerBashScript.FINISH_STR, close_fun=lambda p: p.sendline('exit')) if self.host.n is not None: self.host.tick_job_finish()
def binary_results(y_true, y_pred): y_true = arr(y_true) y_pred = arr(y_pred) if any(arr(y_true) > 1) or any(arr(y_pred) > 1): err('binary results cannot be done when there are more than two classes' ) neg = 0 pos = 1 P = count_nonzero(y_true == pos) N = count_nonzero(y_true == neg) TP = count_nonzero(bitwise_and(y_pred == pos, y_true == pos)) FP = count_nonzero(bitwise_and(y_pred == pos, y_true == neg)) TN = count_nonzero(bitwise_and(y_pred == neg, y_true == neg)) FN = count_nonzero(bitwise_and(y_pred == neg, y_true == pos)) return TP, FP, TN, FN, P, N
def get_or_set_default(self, default, *keys): current_v = self for i, k in enum(keys): islast = i == len(keys) - 1 if k in listkeys(current_v): current_v = current_v[k] elif islast: current_v[k] = default if not self.just_sync_at_end: self.push() return default else: err(f'need to set root default first: {k}') return current_v
def __new__(mcs, name, bases, attrs): cls = super().__new__(mcs, name, bases, attrs) def replacement(): raise NotImplementedError # for k, v in listitems(asdict(cls.STATIC)): # cls.__setattr__(k, v) for k, v in listitems(cls.__dict__): if isinstance(v, Abstract): setattr(cls, k, property(replacement)) if ABC not in bases: err('bad') if ABC not in bases: cls.__meta_post_init__() return cls
def processGroup(group, rep, indent=0): for ke in listkeys(group): rep += '\t' * indent rep += ke item = group[ke] if 'Dataset' in cn(item): # c = 'Dataset' rep += f'\t\t{item.shape} {item.dtype}\n' elif 'Group' in cn(item): # c = 'Group' rep += '\n' rep = processGroup(item, rep, indent + 1) # sub = f'{item.shape} {item.dtype}' else: err(f'what is this: {cn(item)}') return rep
def recurse_h5(ff): # o = f # subd = d # for k in keypath: # o = o[k] # subd = subd[k] if type(ff) == h5py.File or type(ff) == h5py._hl.group.Group: ks = listkeys(ff) subd = {} for k in ks: subd[k] = recurse_h5(ff[k]) elif type(ff) == h5py._hl.dataset.Dataset: subd = np.array(ff) else: err(f'do not know what to do with {ff.__class__}') return subd
def caller_lines(): # inner_fun = caller_fun(1) # outer_fun = caller_fun(2) outer_file = caller_file(2) start = official_caller_line(2) end = start + 1 lines = listmap(lambda l: l.strip(), File(outer_file).readlines()) while True: if end - start > 40: err('really???') e = '\n'.join( lines[start - 1:end - 1]) + '\n' # MUST END WITH NEWLINE IN EVAL OR SINGLE MODE try: compile(e, '<string>', 'exec') break except SyntaxError: end += 1 return list(range(start, end))
def experiments_from_cfg(cfg, advance_id): experiments = [] for i in range(cfg.REPEAT_ALL): for exp in cfg.EXPS: for ntrain in cfg.NTRAIN: if advance_id: exp_id = str(Project.STATE["next_exp_id"]) Project.STATE["next_exp_id"] = int(exp_id) + 1 else: exp_id = None err('not ready to handle this') experiments += [ DNN_Experiment(arch=exp.arch, ntrain=ntrain, expid=exp_id, gpus=exp.gpus, folder=None) ] return arr(experiments)
def __init__(self, *command, silent=False, timeout=None, logfile_read=None, cwd=None, env=None, verbose=False): from mlib.file import File self.cwd = File(cwd).abspath if cwd is not None else None self.env = env super().__init__(*command, silent=silent) self.p.timeout = timeout self.p.logfile_read = logfile_read self._logging_to_stdout = False if verbose: if logfile_read is not None: err('set logfile read and verbose?') else: self.log_to_stdout()
def mcc_multi(y_true, y_pred): rrr, TP, FP, TN, FN, P, N = basics(y_true, y_pred, mcc_multi) if TP is not None: err('this should never happen if i have >2 classes') rrr = mcc_basic(TP, FP, TN, FN) elif rrr == _NON_BINARY: y_true, y_pred = prep_ys(y_true, y_pred) import sklearn.metrics # https://github.com/scikit-learn/scikit-learn/issues/16924 go = True for i in range(nnstate.num_pred_classes()): if count_nonzero(y_pred == i) == 0: go = False break if go: rrr = sklearn.metrics.matthews_corrcoef(list(map(int, y_true)), y_pred.tolist()) else: rrr = -6 return update_met_log(mcc_multi, rrr, inc=True)
def save(self, data, silent=None): import mlib.JsonSerializable as JsonSerializable for ext in save_extensions: new_data, was_converted = ext(data) if was_converted: data = new_data if isinstsafe(data, JsonSerializable.JsonSerializable): import json data = json.loads(data.to_json()) elif isinstance(data, JsonSerializable.obj): data = data.toDict() if not silent and not self.default_quiet or (silent is False): log('saving ' + self.abspath) if self.ext in ['yml', 'yaml']: import yaml self.mkparents() self.write(yaml.dump(data, sort_keys=False)) elif self.ext in JSON_EXTS: self.mkparents() import json self.write(json.dumps(data, indent=4)) elif self.ext == 'mat': self.mkparents() from scipy.io import loadmat, savemat savemat(self.abspath, data) elif self.ext in PICKLE_EXTS: self.mkparents() with open(self.abspath, 'wb') as f: import pickle pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL) elif self.ext == 'png': self.mkparents() im_data = np.vectorize(np.uint8)(data) import imageio imageio.imwrite(self.abspath, im_data) else: err(f'saving does not yet support .{self.ext} files')
def delete(self, silent=False): # os.remove() removes a file. # os.rmdir() removes an empty directory. # shutil.rmtree() deletes a directory and all its contents. if self.isdir: if isempty(self.paths): os.rmdir(self.abspath) if not silent: log(f"deleted folder {self}") else: import shutil shutil.rmtree(self.abspath) if not silent: log(f"deleted non-empty folder {self}") else: try: os.remove(self.abspath) except: if self.name != '.DS_Store': # these file is wild. It appears than disappears import traceback traceback.print_exc() err('bad') if not silent or LOG_LEVEL.value >= LogLevel.DEBUG.value: log(f"deleted {self}")
def human_exp(cfg): from human_exps.mc_wait_pilot.mc_wait_pilot import MC_Wait_Pilot from human_exps.time_pilot.time_pilot import Time_Pilot from human_exps.contour_pilot.contour_pilot import Contour_Pilot exp = { 'time_pilot': Time_Pilot, 'mc_wait_pilot': MC_Wait_Pilot, 'contour_pilot': Contour_Pilot, }[cfg.FLAGS[0]](_DEV=boolinput('dev')) command = strinput(f'what to do with {cfg.FLAGS[0]}', ['build', 'analyze']) if command == 'build': if boolinput('offline mode'): API.offline_mode = True Database.offline_mode = True if False: exp.DATABASE_IDS._hard_reset() exp.DATABASE_DATA._hard_reset() exp.build(_UPLOAD_RESOURCES=boolinput('upload resources'), _LOCAL_ONLY=boolinput('local only')) elif command == 'analyze': exp.analyze() else: err(f'unknown command: {command}')