def __init__(self, RESOURCES_ROOT: Folder, _DEV: bool = None): assert _DEV is not None self._DEV = _DEV self.RESOURCES_ROOT = RESOURCES_ROOT self.RESOURCES_ROOT = Folder(self.RESOURCES_ROOT) self.EXP_FOLDER = File(inspect.getfile(self.__class__)).parent self.FIG_FOLDER = Folder(self.EXP_FOLDER['figs']) self.changelist = self.EXP_FOLDER['changelist.yml'] self.VERSIONS = self.changelist self.THIS_VERSION = listitems(self.VERSIONS.load())[-1] self.ROOT = self.EXP_FOLDER['build/site']
def experiments_from_folder(f): r = [] for exp_folder in Folder(f).files: if exp_folder.name == MD_FILE: continue exp_folder = Folder(exp_folder) r += [ DNN_Experiment(expid=exp_folder.name, arch=exp_folder.md_file['arch'], ntrain=exp_folder.md_file['ntrainim'], gpus=None, folder=Folder(exp_folder)) ] return arr(r)
def temp_map_filenames(self): indexs = [] log('loading ims...') old_ims = [f.load() for f in Folder('_ImageNetTesting_old')] new_ims = [f.load() for f in Folder('_ImageNetTesting/unknown')] for oi, new_im in enum(new_ims): log(f'checking new im {oi}...') for i, old_im in enum(old_ims): if np.all(old_im == new_im): log(f'\tfound! @ {i}') indexs += [i] break assert len(indexs) == oi + 1 File('image_net_map.p').save(indexs) return None
def recurse_cloud_file(sub_wcf): # f = File(f'{sub_wcf.abspath}') if not sub_wcf.exists: recurse_cloud_file.my_stacker.done = True if boolinput( f'{sub_wcf} is not mirrored locally, delete cloud file?' ): sub_wcf.wc.delete() if sub_wcf.wc.isdir: if Folder(sub_wcf)['.CLOUD_FILES.txt'].exists: Folder(sub_wcf)['.CLOUD_FILES.txt'].write('\n'.join( listmap( lambda e: e.abspath.replace( f'{Folder(sub_wcf).abspath}/', ''), sub_wcf.wc.files))) else: [recurse_cloud_file(c) for c in sub_wcf.wc.files]
def get(self, *files, destination=None, merge=None, overwrite=False, verbose=False): if len(files) > 0: self.host.get(*files, project_name=self.name, verbose=verbose) # breakpoint() # files = (File(File(f).name) for f in files) # gen expression! YOU CAN ONLY ITERATE IT ONCE! files = [File(File(f).name) for f in files] all_mb_files = [] for f in files: mb_files = [(fi, si) for fi, si in f.files_recursive().map( lambda fi: (fi, fi.size())) if si.mb() >= 1] all_mb_files.extend(mb_files) size = f.size(recursive=True) progress(f'Downloaded file {f.name} is {size}') the_len = len(all_mb_files) progress(f'{the_len} files were 1 MB or larger' + (':' if the_len else '')) for mb_file, size in sorted(all_mb_files, key=lambda t: t[1].num_bytes): progress(f'\t{size}\t{mb_file.rel_to(pwd())}') if destination is not None and Folder( destination).abspath != Folder(pwd()).abspath: Folder(destination).mkdirs() for f in files: f.moveinto(destination, overwrite=overwrite) if merge is not None: for f in files: f.mergeinto(Folder(merge), overwrite=overwrite) f.deleteIfExists()
def __init__(self, parentFolder, data_database, id_database, *args, dev=False, **kwargs): dev_s = '_dev' if dev else '' apiFile = Folder(parentFolder)[f'databin{dev_s}.wl'] self.data_database = data_database self.id_database = id_database super().__init__(apiFile, *args, **kwargs)
def take_om_logs(OMP): manager = get_manager() from mlib.boot.lang import pwd from mlib.boot.stream import listitems from mlib.file import File, Folder with PipelineSection('downloading pipeline section data', log=True): OMP.get( File(PIPELINE_SECTION_FILE).rel_to(pwd()), destination=Folder('_data'), overwrite=True ) for seclabel, secdata in listitems(File(PIPELINE_SECTION_FILE).load(silent=True)): while seclabel in manager.PIPELINE_SECTIONS: seclabel = next_int_suffix(seclabel) manager.PIPELINE_SECTIONS[seclabel] = secdata
def count(): log('count here 1') data = { 'train': count_split("train"), 'validation': count_split("validation"), } real_data = {} for k, v in listitems(data['train']): real_data[k] = {'train': v} for k, v in listitems(data['validation']): real_data[k]['validation'] = v real_data = json.dumps(real_data, indent=2) log(f'data sample: {real_data[:20]}') Folder('_data').mkdir() File('_data/imagenet_count.json').write(real_data)
def prep_log_file(filename, new=False): if filename is None: filename = os.path.basename(sys.argv[0]).replace('.py', '') if ismac(): filename = f'_logs/local/{filename}.log' else: filename = f'_logs/remote/{filename}.log' from mlib.file import Folder filename = Folder(pwd())[filename] if new: filename = getNextIncrementalFile(filename) if Project.LOG_FILE is None: Project.LOG_FILE = File(filename) Project.LOG_FILE.deleteIfExists() Project.LOG_FILE.write('') mlog.LOG_FILE = Project.LOG_FILE if not mlog.QUIET: log(f'Initialized log file: {File(Project.LOG_FILE).relpath}')
from mlib.file import Folder SINGULARITY_DATA_FOLDER = Folder('/matt/data') OM_DATA_FOLDER = Folder('/om2/user/mjgroth/data') DATA_FOLDER = OM_DATA_FOLDER
class AssembledModel(ModelWrapper, ABC): # @dataclass # class STATIC_ATTS(ModelWrapper.STATIC_ATTS): ARCH_LABEL = Abstract(str) HEIGHT_WIDTH = Abstract(int) WEIGHTS = Abstract(Optional[str]) FLIPPED_CONV_WEIGHTS = False OUTPUT_IDX = None PP = Abstract(str) # IS_PRETRAINED: bool = field(init=False) @classmethod def __meta_post_init__(cls): # def __post_init__(self): super().__meta_post_init__() cls.IS_PRETRAINED = cls.WEIGHTS is not None WEIGHTS_PATH = Folder('_weights') ONNX_WEIGHTS_PATH = WEIGHTS_PATH['matlab'] def weightsf(self): return self.WEIGHTS_PATH[self.WEIGHTS].abspath def oweightsf(self): return self.ONNX_WEIGHTS_PATH[f'{self.ARCH_LABEL}.onnx'].abspath @log_invokation(with_class=True) def build_net(self, FLAGS): dims = [self.HEIGHT_WIDTH, self.HEIGHT_WIDTH, self.HEIGHT_WIDTH] dims[self.CI] = 3 from tensorflow.python.keras import Input self.inputs = Input(tuple(dims)) self.net = self.tf.python.keras.models.Model( inputs=self.inputs, outputs=self.assemble_layers(), name=self.FULL_NAME.replace(' ', '_')) if self.WEIGHTS is not None and FLAGS.TRANSFER_LEARNING: # transfer learning # breakpoint() self._load_weights() self.write_weight_reports() if self.FLIPPED_CONV_WEIGHTS: self._flip_conv_weights() elif (self.WEIGHTS is not None) and (not FLAGS.TRANSFER_LEARNING): log('not loading weights because TRANSFER_LEARNING is disabled') self._compile(net_mets.METS_TO_USE()) @log_invokation def test_record(self, ei): nnstate.CURRENT_PRED_MAP = self.train_data.class_label_map nnstate.CURRENT_TRUE_MAP = self.test_data.class_label_map ds = self.test_data.dataset(self.HEIGHT_WIDTH) steps = self.test_data.num_steps log('Recording(1)... (ims=$,steps=$)', len(self.test_data), steps) net_mets.cmat = zeros(len(listkeys(nnstate.CURRENT_PRED_MAP)), len(listkeys(nnstate.CURRENT_TRUE_MAP))) inter_lay_name = self.net.layers[self.INTER_LAY].name inter_output_model = self.tf.python.keras.models.Model( self.net.input, self.net.get_layer(index=self.INTER_LAY).output) y_pred = arr( self.net.predict( ds, steps=steps, verbose=Verbose.PROGRESS_BAR, use_multiprocessing=True, workers=16, )) log('done recording(1)') if len( y_pred.shape ) == 3: # GNET has 3 outputs, all identical I guess but not sure y_pred = y_pred[2] log('Recording(2)... (ims=$,steps=$)', len(self.test_data), steps) inter_activations = arr( inter_output_model.predict(ds, steps=steps, verbose=Verbose.PROGRESS_BAR, use_multiprocessing=True, workers=16)) log('done recording(2)') x, _ = self.test_data.x(self) y = self.test_data.y(self) y_true = arr(y).flatten() raw_images = x raw_images2 = [] if len(x.shape) == 5: for batch in raw_images: for im in batch: raw_images2.append(im) else: raw_images2 = raw_images raw_images = arr(raw_images2) raw_images2 = [] for i in itr(raw_images): raw_images2.append(raw_images[i].flatten()) raw_images = arr(raw_images2) inter_shape = inter_activations.shape inter_activations = np.reshape(inter_activations, (inter_shape[0], -1)) BLOCK_LEN = 10 # I'm writing this bc I think it was always 10 back when I ran this code TEST_CLASS_MAP = nnstate.CURRENT_TRUE_MAP clas_set = ClassSet( [Class(name=k, index=v) for k, v in TEST_CLASS_MAP.items()]) def run_and_save_rsa(nam, mat1, layer_name=None, layer_i=None): index_to_cn = {v: k for k, v in TEST_CLASS_MAP.items()} feature_matrix = FeatureMatrix( mat1, clas_set, [Class(index_to_cn[iii], iii) for iii, yt in enum(y_true)]) feature_matrix.sort_by_class_name() fd = feature_matrix.compare(rsa_norm).image_plot() tit = f'L2-{nam}' fd.title = f'{tit} ({nnstate.FLAGS.arch}{nnstate.FLAGS.ntrain}E{ei + 1})' if nam == 'Inter': fd.title = f'{fd.title}(Layer{layer_i}:{layer_name})' save_dnn_data(fd, tit, f'CM{ei + 1}', 'mfig') run_and_save_rsa('Output', y_pred, layer_name='Output', layer_i='-1') run_and_save_rsa('Inter', inter_activations, layer_name=inter_lay_name, layer_i=self.INTER_LAY) run_and_save_rsa('Raw', raw_images) for met in net_mets.METS_TO_USE(): met(y_true, y_pred) log('done recording.') @abstractmethod def assemble_layers(self): pass def _load_weights(self): try: self.net.load_weights(self.weightsf()) except: import traceback print(traceback.format_exc()) ww = File(self.weightsf()).load() # DEBUG for k in listkeys(ww): for kk in listkeys(ww[k]): print(f'{kk}: {ww[k][kk].shape}') err('could not load weights') def write_weight_reports(self): import h5py weights_file = h5py.File(self.weightsf(), "r") weights_report_file = self.arch_summary_folder[ f'{self.ARCH_LABEL}_weights.txt'] o_weights_report_file = self.arch_summary_folder[ f'{self.ARCH_LABEL}_weights_matlab.txt'] weights_report_file.write('') def processGroup(group, rep, indent=0): for ke in listkeys(group): rep += '\t' * indent rep += ke item = group[ke] if 'Dataset' in cn(item): # c = 'Dataset' rep += f'\t\t{item.shape} {item.dtype}\n' elif 'Group' in cn(item): # c = 'Group' rep += '\n' rep = processGroup(item, rep, indent + 1) # sub = f'{item.shape} {item.dtype}' else: err(f'what is this: {cn(item)}') return rep report = '' report = processGroup(weights_file, report) log('writing weights report...') weights_report_file.write(report) log('finished writing weights report') log('writing matlab weight report...') warn( 'THERE ARE 2 VERSIONS OF THE ONNX FILES IN _weights/matlab AND I DONT KNOW THE DIFFERENCE' ) import onnx o_model = onnx.load(self.oweightsf()) o_weights_report_file.write(repr(o_model.graph.node)) log('finished writing matlab weight report...') def _flip_conv_weights(self): # Theano > Tensorflow, just flips the weight arrays in the first 2 dims. Doesn't change shape. for layer in self.net.layers: if layer.__class__.__name__ == 'Conv2D': from tensorflow.keras import backend original_w = backend.eval(layer.kernel) from tensorflow.python.keras.utils.conv_utils import convert_kernel converted_w = convert_kernel(original_w) layer.kernel.assign(converted_w)
def after_build(self, FLAGS, tf_net: ModelWrapper): if tf_net.pretrained and 'SANITY' in FLAGS.pipeline: IN_files = tf_net.IMAGE_NET_FOLD['unknown'].files r = { 'files': IN_files.map(__.name), 'ml' : {}, 'tf' : {} # 'ml2tf': {} } # ml2tf_net = tf_net.from_ML_vers().build() for pp_name, pp in listitems(preprocessors(tf_net.hw)): # , r['ml2tf'][pp_name] = if SANITY_SET != SanitySet.Set100: import tensorflow as tf DATA_FOLDER.resolve('ImageNet/output') # root = Folder('/matt/data/ImageNet/output_tf') filenames = root.glob('validation*').map(lambda f: f.abspath).tolist() r[f'tf']['y_true'] = [None] * SANITY_SET.num ds = tf.data.TFRecordDataset(filenames) image_feature_description = { 'image/height' : tf.io.FixedLenFeature([], tf.int64), 'image/width' : tf.io.FixedLenFeature([], tf.int64), 'image/colorspace' : tf.io.FixedLenFeature([], tf.string), 'image/channels' : tf.io.FixedLenFeature([], tf.int64), 'image/class/label' : tf.io.FixedLenFeature([], tf.int64), 'image/class/synset': tf.io.FixedLenFeature([], tf.string), 'image/class/text' : tf.io.FixedLenFeature([], tf.string), # 'image/object/bbox/xmin' : tf.io.FixedLenFeature([], tf.float32), # 'image/object/bbox/xmax' : tf.io.FixedLenFeature([], tf.float32), # 'image/object/bbox/ymin' : tf.io.FixedLenFeature([], tf.float32), # 'image/object/bbox/ymax' : tf.io.FixedLenFeature([], tf.float32), # 'image/object/bbox/label': tf.io.FixedLenFeature([], tf.int64), 'image/format' : tf.io.FixedLenFeature([], tf.string), 'image/filename' : tf.io.FixedLenFeature([], tf.string), 'image/encoded' : tf.io.FixedLenFeature([], tf.string), } imap = {} # current_i = -1 def input_gen(): for i, raw_record in enum(ds): example = tf.io.parse_single_example(raw_record, image_feature_description) r[f'tf']['y_true'][i] = example['image/class/label'].numpy() # return tf.image.decode_jpeg(example['image/encoded'], channels=3).numpy() rrr = tf.image.decode_jpeg(example['image/encoded'], channels=3).numpy() # current_i = current_i + 1 imap[i] = rrr yield rrr igen = input_gen() def get_input(index): # log(f'trying to get index {index}') # log(f'current indices range from {safemin(list(imap.keys()))} to {safemax(list(imap.keys()))}') if index not in imap: # log('coud not get it') next(igen) return get_input(index) else: # log('got it!') rr = imap[index] for k in list(imap.keys()): if k < index: del imap[k] return rr # for raw_record in ds: # example = tf.io.parse_single_example(raw_record, image_feature_description) # r[f'tf']['y_true'][index] = example['image/class/label'].numpy() # return tf.image.decode_jpeg(example['image/encoded'], channels=3).numpy() # yield example # y_true = [] # ifs_for_labels = input_files() # for i in range(SANITY_SET.num): # y_true.append(next(ifs_for_labels)['image/class/label'].numpy()) # r[f'tf']['y_true'] = y_true # def input_file_raws(): # gen = input_files() # for example in gen: # yield tf.image.decode_jpeg(example['image/encoded'], channels=3).numpy() # IN_files = input_file_raws() IN_files = get_input # ALL = 49999 # TEST = 10 r[f'tf'][pp_name] = simple_predict( tf_net, # ,ml2tf_net pp, IN_files, length=SANITY_SET.num, # length=50000 ) # else: # y_pred = V_Stacker() # # root = Folder('/xboix/data/ImageNet/raw-data/validation') # root = Folder('/matt/data/ImageNet/output') # filenames = root.glob('validation*').tolist() # ds = tf.data.TFRecordDataset(filenames) # # for subroot in root: # for imgfile in subroot: # y_pred += tf_net.net.predict(dset, verbose=1) # r[f'tf'][pp_name] = y_pred # if tf_net.OUTPUT_IDX is not None: # r[f'tf'][pp_name] = r[f'tf'][pp_name][tf_net.OUTPUT_IDX] for pp_name in ['none', 'divstd_demean', 'unit_scale', 'demean_imagenet', 'DIIL']: r['ml'][pp_name] = Folder('_data/sanity')[tf_net.label][ f'ImageNetActivations_Darius_{pp_name}.mat' ].load()['scoreList'] # this was for before when darius was using the old order of activations # [ # File('image_net_map.p').load(), : # ] save_dnn_data( data=r, domain='sanity', nam='sanity', ext='pickle' )
def from_folder(f): if len(f.name.split('-')) != 2: err(f'{f} is not named right: {f.name.split("-")}') return DNN_ExperimentGroup(index=f.name.split('-')[0], name=f.name.split('-')[1], folder=Folder(f))
def __init__(self): assert FigData.FOLDER is None self.fold = Folder('temp/afolder', quiet=True).mkdirs().clear(silent=True) FigData.FOLDER = self.fold
# prepare a python environment with the required packages # (TDOO) set up a proper requirements.json or share the conda environment folder # for now, please use /om2/user/mjgroth/miniconda3/envs/dnn39/bin/python3 # go into a new, empty directory # clone https://github.com/mgroth0/dnn.git # clone https://github.com/mgroth0/mlib # cd dnn # Use the python environment from above here # PYTHONPATH=.:../mlib <python3> main/rsa.py # set this to where data files are located # neccesary files: jet.mat and activations mlib.val.DATA_FOLDER = Folder('/om2/user/mjgroth/data') # set this to the FULL path where you want figures to go FigData.FOLDER = Folder('/om2/user/mjgroth/temp/dnn/figs') if __name__ == '__main__': for pat in SYM_CLASS_SET_PATTERNS.keys(): RSAImagePlot(tags=['PATTERN', pat], comp_mat=rsa_pattern(pat, 10), net=f'PATTERN-{pat}', layer='PATTERN', pattern=pat, arch='PATTERN').save().build() rsa_comp.main(N_PER_CLASS=5, ACT_SIZE=10, INCLUDE_DARIUS=True,
from dataclasses import dataclass import numpy as np from typing import Optional from mlib.boot.lang import esorted, ismac, listvalues from mlib.datamodel.DataModelBase import Class, ClassSet from mlib.file import File, Folder OM_IM_DATA_ROOT = Folder('/om2/user/mjgroth/data/im') _RAW = 'raw' _TRAIN = 'train' _TEST = 'test' _EVAL = 'eval' class ImageDataset: def __init__(self, name): self.folder = OM_IM_DATA_ROOT[name] self.metadata_file = self.folder['metadata.json'] self.trans = {_RAW: ImageDatasetTransformation(self, _RAW)} self.splits = { n: ImageDatasetSplit(self, n) for n in [_TRAIN, _TEST, _EVAL] } if not ismac(): self.classes = ClassSet([ ImageDatasetClass(name=File(class_folder).name, index=i, dataset=self) for i, class_folder in esorted( listvalues(self.splits)[0].folder(
def dnn(cfg): mode = cfg.MODE log(f'MODE IS {mode}') Project.DNN_FIGS_FIGS_FOLDER.mkdirs() TEMP_FIGS_FOLDER = Folder(cfg.root) last_eg = get_last_exp_group() new_eg = None new_fig_folder = None muscle = Muscle(local=cfg.MUSCLE == 'local') if cfg.CLEAR_EG_DATA and islinux(): Project.DNN_FIGS_FIGS_FOLDER.clear() if 'JUSTRUN' in mode and cfg.SAVE_DATA: TEMP_FIGS_FOLDER.mkdir().clear() if 'next_exp_id' not in Project.STATE: Project.STATE['next_exp_id'] = 1 if 'last_submitted_exp_group_name' not in Project.STATE: Project.STATE['last_submitted_exp_group_name'] = '' def check(a): Project.STATE["last_submitted_exp_group_name"] = a figs_folder, message = get_figs_folder(a) return figs_folder is not None, figs_folder if figs_folder is not None else message if cfg.EXPERIMENT_NAME is None: new_fig_folder = answer_request.answer_request( Project.STATE["last_submitted_exp_group_name"], "Experiment Name:", check, gui=cfg.GUI) else: new_fig_folder = check(cfg.EXPERIMENT_NAME)[1] new_fig_folder = File(new_fig_folder) log(f'{new_fig_folder=}') if 'JUSTRUN' in mode or 'PUSH' in mode: if cfg.MUSCLE != 'local': SyncedFolder(pwd()).sync(config='mitili', lpath='mitili') if 'JUSTRUN' in mode: cfg.tic = str(mlog.TIC) experiments = experiments_from_cfg(cfg, advance_id=True) jobs = make_jobs(cfg, muscle=muscle, experiments=experiments) assert not cfg.GUI muscle.run_all_jobs_main(jobs, serial=cfg.RUN_EXPS_IN_SERIAL, gui=cfg.GUI) temp_eg = DNN_ExperimentGroup.temp(TEMP_FIGS_FOLDER) temp_eg.save_md(cfg) if cfg.SAVE_DATA: new_eg = muscle.pull_data(TEMP_FIGS_FOLDER, cfg, new_fig_folder) exp_group = new_eg or last_eg log(f'MODE IS {mode}') if 'COMPILE_TEST_ALL' in mode: log('in CTA!') analyze_exp_group(exp_group, cfg) # the stuff below is only temporarily commented out makefigs(exp_group.compile_folder, cfg.fig_backend, overwrite=True)
class DNN(Project): MODES = ['JUSTRUN', 'PUSH', 'COMPILE_TEST_ALL', 'MAKEREPORT'] HUMAN_EXPS_FOLDER = Folder('human_exps') if HUMAN_EXPS_FOLDER.exists: _human_exp_flags = listmap(__.name, HUMAN_EXPS_FOLDER.folders) else: _human_exp_flags = [] extra_flags = _human_exp_flags + MODES + [ 'IMAGENET_COUNT', 'RSA_MAIN', 'RSA_NEW', 'ASD' ] def run(self, cfg): # keep modular assert not (cfg.REGEN_DATA and cfg.OVERWRITE_NORMS ) # btw, both imply killing worker before exp from lib.dnn_lib import dnn from mlib.web.api import API from mlib.web.database import Database shadow.SHOW_INDEX = False if len(cfg.FLAGS) == 1 and cfg.FLAGS[0] == 'IMAGENET_COUNT': import count_imagenet_data count_imagenet_data.count() return None elif len(cfg.FLAGS) == 1 and cfg.FLAGS[0] == 'RSA_MAIN': # print('here1, doing Darius-RSA') # import rsa_for_darius import rsa_comp rsa_comp.main() finish_dnn_remote() return None elif len(cfg.FLAGS) == 1 and cfg.FLAGS[0] in self._human_exp_flags: from lib.human_exp import human_exp human_exp(cfg) else: flag_mode = ''.join( arr(cfg.FLAGS).filtered(lambda s: s in self.MODES)) if not isblank(flag_mode): cfg.MODE = flag_mode if isblank(cfg.MODE): cfg.MODE = ''.join(self.MODES) # unnecessary? if cfg.offline: API.offline_mode = True Database.offline_mode = True from lib import makereport makereport.MAKEREPORT_ONLINE = False from mlib.km import kmscript # keep modular if ismac(): kmscript('activate run tool window') dnn(cfg) finish_dnn_remote() instructions = '''Generate some images, train/test a model, run analyses, and generate plots. Tested on Mac, but not yet on linux/Windows. - `./dnn -cfg=gen_images --INTERACT=0` - `./dnn -cfg=test_one --INTERACT=0` The second command will fail with a Mathematica-related error, but your results will be saved in `_figs`. TODO: have to also consider running and developing other executables here: human_exp_1 and human_analyze ''' configuration = '''-MODE: (default = FULL) is a string that can contain any combination of the following (example: "CLEAN JUSTRUN") - CLEAN - JUSTRUN - GETANDMAKE - MAKEREPORT Edit [cfg.yml]() to save configuration options. Feel free to push these. If there is anything hardcoded that you'd like to be configurable, please submit an issue.''' credits = 'Darius, Xavier, Pawan\n\nheuritech, raghakot, joel'
from mlib.file import Folder, File if __name__ == '__main__': for shape in Folder('_images_human_copy_3/Contour/30').files: for f in shape.files: f = File(f) n = f.name_pre_ext.split(shape.name)[1] f.rename('contour' + str(n) + '.png')
File('reqs_conda.txt').write('\n'.join(reqs_conda)) conda_prune(just_cache=True) good2go = conda_prune() return reqs_conda, good2go def MITILI_FOLDER(): if ismac(): return File(pwd()) else: return File('/home/matt/mitili') GIT_IGNORE = File('.gitignore') GIT_DIR = Folder('.git') @log_invokation() def push_docs(): shell('git reset').interact() shell('git add docs').interact() shell('git commit docs -m "auto-gen docs"').interact() shell('git push').interact() log('imported Project module') def pingChecker(): f = File('_logs/local/pingchecker.log', w='') p = shell('ping www.google.com') while True: line = p.readline()
def apifile_for(database_file): return Folder(database_file.parent)[f'{database_file.name_pre_ext}.wl']
class Project(SuperRunner, ABC): INPUT_FILE = File('_input.txt') REQS_FILE = File('reqs.json') STATE = PermaDict('data/_metastate.json') # noinspection PyMethodMayBeStatic,PyMethodParameters def _default_config(): proto = {'placeholder1': None} alll = {'placeholder2': None} return { 'profiles': { 'proto' : proto, 'default': proto }, 'config' : { 'all' : alll, 'default': alll } } CFG = File('cfg.yml', default=_default_config(), quiet=True) DOCS_FOLDER = Folder('docs') # LOCAL_DOCS_FOLDER = Folder('_docs') RESOURCES_FOLDER = DOCS_FOLDER['resources'] SHADOW_RESOURCES = Folder('_Shadow_Resources') FIGS_FOLDER = RESOURCES_FOLDER['figs'] DNN_FIGS_FOLDER = Folder('_figs') DNN_WEB_FOLDER = Folder('_web') DNN_FIGS_FIGS_FOLDER = DNN_FIGS_FOLDER['figs_dnn'] GITHUB_LFS_IMAGE_ROOT = os.path.join('https://media.githubusercontent.com/media/mgroth0/', pwdf().name, 'master') PYCALL_FILE = RESOURCES_FOLDER['pycallgraph.png'] PYDEPS_OUTPUT = None LOG_FILE = None if main_mod_file() is not None: PYDEPS_OUTPUT = RESOURCES_FOLDER[ f'{main_mod_file().name_pre_ext}.svg' ] EXECUTABLE = main_mod_file().name_pre_ext if File('git').exists: GIT = Repo(pwd()) mbuild = False extra_flags = [] clear_clear_cache_flags = [ 'clear_cell_cache', 'cell_clear_cache', 'ccc' ] def registered_flags(self): return [ 'readme', 'build', 'cell', ] + self.extra_flags + self.clear_clear_cache_flags + listkeys( self.fun_registry() ) instructions = '' configuration = '' credits = '' cfg = None def _super_run(self, cfg_overrides=None): if cfg_overrides is None: cfg_overrides = {} from mlib.web.html import HTMLObject from mlib.web import shadow from mlib.proj.stat import py_deps, class_model_report self.prep_log_file(None) cfg = self._get_cfg() for k, v in listitems(cfg_overrides): setattr(cfg, k, v) self.cfg = cfg if ismac(): # why was this so important again? # self.daily( # self.write_reqs # ) # and this? # self.daily( # enable_py_call_graph, # Project.PYCALL_FILE # ) pass with WOLFRAM: if 'build' in cfg.FLAGS and self.mbuild: assert len(cfg.FLAGS) == 1 err('anything that depends on mlib has to push that too') build() write_README(self) self.push() elif 'readme' in cfg.FLAGS: assert len(cfg.FLAGS) == 1 write_README(self) elif any(x in cfg.FLAGS for x in self.clear_clear_cache_flags): assert len(cfg.FLAGS) == 1 clear_cell_cache() elif 'cell' in cfg.FLAGS: assert len(cfg.FLAGS) == 3 analysisFlag = cfg.FLAGS[1] cellName = cfg.FLAGS[2] analysisO = arr(ANALYSES(AnalysisMode.CELL)).first( lambda o: cn(o) == analysisFlag or mn(o).split('.')[-1] == analysisFlag ) cell = getattr(analysisO, cellName) if cell.inputs[0] is not None: inputs = cell.load_cached_input(analysisO) cell(*inputs) else: cell() else: if ismac(): # need to have dailyOrFlag # its asking me to delete nap online. not sure if I can do this so I'm commenting this out for now. # self.daily( # wolf_manager.manage # ) run_in_daemon( pingChecker) # this line wasnt nested in ismac b4 but got an error in openmind last time I ran log('about to run with cfg') self.run(cfg) self.daily( class_model_report, HTMLObject ) if ismac(): self.daily( # atexit.register, py_deps, main_mod_file(), Project.PYDEPS_OUTPUT ) # atexit.register( if ismac() and shadow.enabled: # not doing this on openmind yet because it erases docs_local/results.html which I am using. need to fix this though shadow.build_docs() # ) if ismac(): reloadIdeaFilesFromDisk() def push(self): if self.GIT.is_dirty(): log( f'A diff between the index and the commit’s tree your HEAD points to: {self.GIT.index.diff(self.GIT.head.commit)}') log( f'A diff between the index and the commit’s tree your HEAD points to: {self.GIT.index.diff(self.GIT.head.commit)}') log(f'A list of untracked files: {self.GIT.untracked_files}') inp = input('Ok to add, commit and push? [y/n] >') inp = inp in ['y', 'Y'] if inp: self.GIT.index.add('--all') inp = "Commit Message: " self.GIT.index.commit(inp.strip()) self.GIT.remotes[0].push() else: log('repo is not dirty') @log_invokation def _get_cfg(self): assert len(self.registered_flags()) == len(set(self.registered_flags())) freecfg = File('freecfg.json').load() prof = 'default' cfg = 'default' changes = {} flags = [] cell = False for idx, a in enum(sys.argv): if idx == 0: continue elif a.startswith('--'): k, v = tuple(a.replace('--', '').split('=')) if k == 'tic': continue changes[k] = v elif a.startswith('-'): k, v = tuple(a.replace('-', '').split('=')) if k == 'prof': prof = v elif k == 'cfg': cfg = v else: err('arguments with one dash (-) need to be prof= or cfg=') elif cell or a in self.registered_flags(): if a == 'cell': cell = True flags += [a] else: err(f'invalid argument:{a} please see README') prof = Project.CFG['profiles'][prof] cfg = Project.CFG['configs'][cfg] for k in listkeys(prof): if k in listkeys(cfg): prof_ntrain = prof[k] for i, n in enum(cfg[k]): if isstr(n) and n[0] == 'i': cfg[k][i] = prof_ntrain[int(n[1])] cfg = {**prof, **cfg, 'FLAGS': flags} for k, v in listitems(changes): if k not in listkeys(cfg): err(f'invalid -- arguments: {k}, please see {Project.CFG.name} for configuration options') if isinstance(cfg[k], bool): v = bool(int(v)) cfg[k] = v # hello from freecomp for k, v in listitems(freecfg): log(f'freecfg: {k}:{v}') cfg[k] = v # cfg['EPOCHS'] = freecfg['EPOCHS'] return obj(cfg) def fun_registry(self): if 'daily' not in listkeys(self.STATE): self.STATE['daily'] = {} return SubDictProxy(self.STATE, 'daily', makeObjs=False) def daily(self, fun, *args): self._daily(fun, fun.__name__, *args) def daily_reminder(self, ss): self._daily(lambda: input(ss), ss) def _daily(self, fun, key, *args): n = key if n in self.cfg.FLAGS: logy(f'running daily function FROM FLAG: {n}') fun(*args) elif n not in listkeys(self.fun_registry()): logy(f'running daily function: {n}') fun(*args) self.fun_registry().update({n: time()}) elif self.fun_registry()[n] < time() - (3600 * 24): logy(f'running daily function: {n}') fun(*args) self.fun_registry().update({n: time()}) else: nex = self.fun_registry()[n] + (3600 * 24) logc(f'{n} will run next in {nex - time()} seconds') @staticmethod def prep_log_file(filename, new=False): if filename is None: filename = os.path.basename(sys.argv[0]).replace('.py', '') if ismac(): filename = f'_logs/local/{filename}.log' else: filename = f'_logs/remote/{filename}.log' from mlib.file import Folder filename = Folder(pwd())[filename] if new: filename = getNextIncrementalFile(filename) if Project.LOG_FILE is None: Project.LOG_FILE = File(filename) Project.LOG_FILE.deleteIfExists() Project.LOG_FILE.write('') mlog.LOG_FILE = Project.LOG_FILE if not mlog.QUIET: log(f'Initialized log file: {File(Project.LOG_FILE).relpath}') @classmethod def write_reqs(cls): File('environment.yml').write(shell('conda env export').all_output()) reqs_conda = spshell( f'{HOME}/miniconda3/bin/conda list -n {pwdf().name} -e' ).readlines_and_raise_if_err().filtered( lambda l: 'pypi' not in l and (not l.strip().startswith("#")) ) File('reqs_conda.txt').write('\n'.join(reqs_conda)) conda_prune(just_cache=True) good2go = conda_prune() return reqs_conda, good2go
class ModelWrapper(AbstractAttributes, ABC): IMAGE_NET_FOLD = Folder('_ImageNetTesting') # @dataclass # class STATIC_ATTS(STATIC_ATTS): FULL_NAME = Abstract(str) CREDITS = Abstract(str) INTER_LAY = -2 CHANNEL_AXIS = 3 @classmethod def __meta_post_init__(cls): # ROW_AXIS: int = field(init=False) # COL_AXIS: int = field(init=False) # ROW_INDEX: int = field(init=False) # COL_INDEX: int = field(init=False) # CHANNEL_INDEX: int = field(init=False) # def __post_init__(self): assert cls.CHANNEL_AXIS in [1, 3] cls.ROW_AXIS = 1 if cls.CHANNEL_AXIS == 3 else 2 cls.COL_AXIS = 2 if cls.CHANNEL_AXIS == 3 else 3 cls.ROW_INDEX = cls.ROW_AXIS - 1 cls.COL_INDEX = cls.COL_AXIS - 1 cls.CHANNEL_INDEX = cls.CHANNEL_AXIS - 1 def __init__(self, *args, **kwargs): super().__init__() import tensorflow as tf self.tf = tf self.net = None self.train_data: Optional[PreDataset] = None self.val_data: Optional[PreDataset] = None self.test_data: Optional[PreDataset] = None @abstractmethod def build_net(self, FLAGS): pass _OPTIMIZER = 'ADAM' _LOSS = 'sparse_categorical_crossentropy' @log_invokation def _compile(self, mets=None): if mets is not None: self.net.compile(optimizer=self._OPTIMIZER, loss=self._LOSS, metrics=mets) else: self.net.compile(optimizer=self._OPTIMIZER, loss=self._LOSS) @log_invokation def build(self, FLAGS): self.build_net(FLAGS) assert isinstance(self.net, self.tf.keras.Model) self.write_arch_summary() if _PLOT_MODEL: self.plot_model() if _SAVE_MODEL: self._save(pretrained=self.pretrained) self.net.run_eagerly = True return self @property def CI(self): return self.CHANNEL_INDEX @property def CA(self): return self.CHANNEL_AXIS def data_format(self): if self.CA == 1: return 'channels_first' elif self.CA == 3: return None else: err('bad CA') arch_summary_folder = Folder('_arch') arch_summary_folder.mkdirs() def write_arch_summary(self): arch_summary_file = self.arch_summary_folder[f'{self.ARCH_LABEL}.txt'] log('writing summary') with open(arch_summary_file, 'w') as fh: self.net.summary(print_fn=lambda x: fh.write(x + '\n')) @log_invokation() def plot_model(self): arch_summary_im = self.arch_summary_folder[f'{self.ARCH_LABEL}.png'] try: self.tf.keras.utils.plot_model( self.net, to_file=arch_summary_im.abspath, show_shapes=True, show_layer_names=True, rankdir="TB", expand_nested=True, dpi=96, ) except AssertionError as e: # I think there are sometimes problems creating InceptionResNetV2s plot. This makes sense considering that it is huge. I think AssertionError is thrown when its too big but I'm not sure arch_summary_im.deleteIfExists() arch_summary_im.res_pre_ext('_sorry').resrepext('txt').write( f'{repr(e)}') @log_invokation() def _save(self, pretrained=False): model_save_file = f'_arch/{self.ARCH_LABEL}' if pretrained: model_save_file = f'{model_save_file}_pretrained' try: self.net.save(model_save_file) self.net.save(f'{model_save_file}.h5') log('saved model') except TypeError: warn(f'could not save model due to tf bug') File(model_save_file).deleteIfExists() File(f'{model_save_file}.h5').deleteIfExists() @property def pretrained(self): return self.IS_PRETRAINED @property def hw(self): return self.HEIGHT_WIDTH @property def label(self): return self.ARCH_LABEL def predict(self, inputs, verbose=1, **kwargs) -> np.array: # print('model_wrapper.py: importing tf') import tensorflow as tf # print('model_wrapper.py: finished importing tf') if not isinstance(inputs, types.GeneratorType) and not isinstance( inputs, tf.keras.utils.Sequence): if len(inputs.shape) == 3: inputs = np.expand_dims(inputs, axis=0) y_pred = self.net.predict(inputs, verbose=verbose, **kwargs) if self.OUTPUT_IDX is not None: y_pred = y_pred[self.OUTPUT_IDX] return y_pred def from_ML_vers(self): from arch.loaded_model import LoadedModel return LoadedModel( self.label.replace('_ORIG', ''), f'_data/darius_pretrained/{self.label.replace("_ORIG", "")}_pretrained.onnx', self.hw, is_pretrained=True) # why was this in assembled model and not here? VERBOSE_MODE = Verbose.PRINT_LINE_PER_EPOCH def train(self): log('training network...') nnstate.CURRENT_PRED_MAP = self.train_data.class_label_map nnstate.CURRENT_TRUE_MAP = self.train_data.class_label_map ds = self.train_data.dataset(self.HEIGHT_WIDTH) steps = self.train_data.num_steps log('Training... (ims=$,steps=$)', len(self.train_data), steps) net_mets.cmat = zeros(len(listkeys(nnstate.CURRENT_PRED_MAP)), len(listkeys(nnstate.CURRENT_TRUE_MAP))) history = self.net.fit( # x,y, ds, epochs=1, verbose=self.VERBOSE_MODE, use_multiprocessing=True, workers=16, steps_per_epoch=steps, shuffle=False) return history def val_eval(self): nnstate.CURRENT_TRUE_MAP = self.val_data.class_label_map ds = self.val_data.dataset(self.HEIGHT_WIDTH) steps = self.val_data.num_steps log('Testing... (ims=$,steps=$)', len(self.val_data), steps) net_mets.cmat = zeros(len(listkeys(nnstate.CURRENT_PRED_MAP)), len(listkeys(nnstate.CURRENT_TRUE_MAP))) nnstate.TEST_STEPS = steps return self.net.evaluate( ds, verbose=self.VERBOSE_MODE, steps=steps, use_multiprocessing=True, workers=16, )
class OnlineHumanExperiment(ABC): def __init__(self, RESOURCES_ROOT: Folder, _DEV: bool = None): assert _DEV is not None self._DEV = _DEV self.RESOURCES_ROOT = RESOURCES_ROOT self.RESOURCES_ROOT = Folder(self.RESOURCES_ROOT) self.EXP_FOLDER = File(inspect.getfile(self.__class__)).parent self.FIG_FOLDER = Folder(self.EXP_FOLDER['figs']) self.changelist = self.EXP_FOLDER['changelist.yml'] self.VERSIONS = self.changelist self.THIS_VERSION = listitems(self.VERSIONS.load())[-1] self.ROOT = self.EXP_FOLDER['build/site'] def setup_databases_and_apis(self): if self._DEV: self.DATABASE_IDS = self._setup_database_and_api('ids_dev', hidden=True) self.DATABASE_DATA = self._setup_database_and_api('data_dev', hidden=False) else: self.DATABASE_IDS = self._setup_database_and_api('ids', hidden=True) self.DATABASE_DATA = self._setup_database_and_api('data', hidden=False) self.EXP_API = ExperimentDataBinAPI(self.EXP_FOLDER, self.DATABASE_DATA, self.DATABASE_IDS, dev=self._DEV) def retrieve_api_info(self, name, hidden): database_folder = { False: self.EXP_FOLDER, True: self.EXP_FOLDER.hidden_version(pwd()) }[hidden] database_file = database_folder[f'{name}.json'] password_file = database_file.parent[ f'._{database_file.name_pre_ext}_password.txt'] return database_file, password_file def _setup_database_and_api(self, name, hidden): database_file, password_file = self.retrieve_api_info(name, hidden) database = Database(database_file) password = password_file.read( ) if password_file.exists else gen_password() password_file.write(password) SimpleAdminAPI(database, allow_set=False, password=password) return database @abstractmethod def pre_build(self): pass @log_invokation(with_result=True) def build(self, _UPLOAD_RESOURCES, _LOCAL_ONLY): _DEV = self._DEV if not _LOCAL_ONLY: input(f'make sure to change version ( {self.changelist.abspath} )') input(f'make sure to test all supported oss and browsers') self.pre_build() self.setup_databases_and_apis() html = self.html_body_children() if not isitr(html): html = [html] htmlDoc = HTMLPage( 'index', self.EXP_API.apiElements(), JScript(JS(compile_coffeescript(self.EXP_API.cs()), onload=False)), JScript(JS(self.EXP_FOLDER[f'{self.EXP_FOLDER.name}.coffee'])), *html, arg_tags(VERSION=self.THIS_VERSION[0], RESOURCES_ROOT=self.RESOURCES_ROOT.wcurl, RESOURCES_ROOT_REL=self.RESOURCES_ROOT.rel_to(self.ROOT), IS_DEV=_DEV), style=self.css()) write_index_webpage(htmlDoc=htmlDoc, root=self.ROOT, resource_root_file=self.RESOURCES_ROOT, upload_resources=_UPLOAD_RESOURCES, WOLFRAM=not _LOCAL_ONLY, DEV=_DEV) @abstractmethod def css(self): pass @abstractmethod def html_body_children(self): pass @abstractmethod def analyze(self): pass
if not cls._created_shadow: Shadow( inspect.getmodule(self).__file__, show=self.SHOW_SHADOW, analysis=self, style=PIXEL_CSS ) cls._created_shadow = True def cell(_func=None, inputs=None): def actual_dec(ff): return Cell(ff, inputs) return actual_dec if _func is None else actual_dec(_func) CELL_CACHE = Folder('_cache')['cell'] class CellInput(Enum): CACHE = 1 @dataclass class CellShadowOptions: ftype = None class Cell(Decorator): def __str__(self):
from mlib.file import Folder SALIENCE_RESULT_FOLDER = Folder('_data/result/salience').mkdirs()