def _load_weights(self): try: self.net.load_weights(self.weightsf()) except: import traceback print(traceback.format_exc()) ww = File(self.weightsf()).load() # DEBUG for k in listkeys(ww): for kk in listkeys(ww[k]): print(f'{kk}: {ww[k][kk].shape}') err('could not load weights')
def saveTestValResults(ARCH, nam, ds, ei): from lib.nn import net_mets from lib.nn import nnstate save_dnn_data( ConfusionMatrix(data=add_headers_to_mat( net_mets.cmat, sorted(listkeys(nnstate.CURRENT_PRED_MAP), key=lambda x: nnstate.CURRENT_PRED_MAP[x]), sorted(listkeys(nnstate.CURRENT_TRUE_MAP), key=lambda x: nnstate.CURRENT_TRUE_MAP[x]), alphabetize=True).T, title=f'{ARCH} E{ei + 1}', confuse_max=len(ds) / nnstate.num_pred_classes(), headers_included=True), f'{nam}', f'CM{ei + 1}', 'mfig')
def separate_comp_mat_by_classes_compared( normalized1: ComparisonMatrix, net: str, arch: str, method_name: str, sim_string: str, pattern: str ) -> StatisticalArrays: simsets = deepcopy(SYM_CLASS_SET_PATTERN_GROUPINGS[pattern]) simkeys = listkeys(SYM_CLASS_SET_PATTERN_GROUPINGS[pattern])[1:] for k in simsets: simsets[k] = arr() # rather than list average = np.nanmean(normalized1) for i, c in enum(RSA_CLASSES): for ii, cc in enum(RSA_CLASSES): if ii > i: continue comp_mat = normalized1[c, cc] normalized2 = arr([num for num in flatten(comp_mat).tolist() if not isnan(num)]) / average groupname1 = simkeys[SYM_CLASS_SET_PATTERNS[pattern][i]] groupname2 = simkeys[SYM_CLASS_SET_PATTERNS[pattern][ii]] if groupname1 == groupname2: simsets[groupname1] += flatten(normalized2) else: simsets['AC'] += flatten(normalized2) return StatisticalArrays( ylabel=f'{sim_string} Score ({method_strings[method_name]}) ({pattern_strings[pattern]})', xlabel='Class Comparison Group', data={k: v for k, v in simsets.items()}, title_suffix=f'{net}:{RSA_LAYERS[arch]} ({method_name}) ({pattern})' )
def compile_eg(self, eg: DNN_ExperimentGroup): experiments = experiments_from_folder(eg.folder) random_exp = experiments[0] finished_archs = [] pname = 'sanity.pickle' data = { k: {} for k in listkeys(random_exp.folder[f'sanity/{pname}'].load()) } data['dest'] = eg.compile_exp_res_folder[pname].abspath for exp in eg.experiments: if exp.arch in finished_archs: continue mat = exp.folder['sanity'][pname].load() for backendkey, bedata in listitems(mat): data[backendkey][exp.arch] = bedata if 'y_true' in bedata: data[backendkey]['y_true'] = bedata['y_true'] finished_archs += [exp.arch] data['files'] = data['files'][exp.arch] return data
def set_to_next_id(self, d): snap = self.non_proxy_snapshot() keys = listkeys(snap) if len(keys) == 0: self["0"] = d else: self[str(max([int(k) for k in keys]) + 1)] = d
def val_eval(self): nnstate.CURRENT_TRUE_MAP = self.val_data.class_label_map ds = self.val_data.dataset(self.HEIGHT_WIDTH) steps = self.val_data.num_steps log('Testing... (ims=$,steps=$)', len(self.val_data), steps) net_mets.cmat = zeros(len(listkeys(nnstate.CURRENT_PRED_MAP)), len(listkeys(nnstate.CURRENT_TRUE_MAP))) nnstate.TEST_STEPS = steps return self.net.evaluate( ds, verbose=self.VERBOSE_MODE, steps=steps, use_multiprocessing=True, workers=16, )
def _help_part(self) -> str: names = [] for c in self.__class__.mro(): names += listkeys(c.__dict__) names = set(names) methods = [getattr(self, func) for func in names if self._is_command(func)] prompt = ",".join(map(lambda m: self._short_help(m), methods)) return prompt
def non_proxy_snapshot(self, with_ids=False) -> DictExtension: rrr = {} for k in listkeys(self): rrr[k] = self.get_without_proxying(k) if with_ids: rrr[k]['id'] = k if self.obj_snapshots: rrr[k] = obj(rrr[k]) return DictExtension(rrr)
def build_job(experiment, cfg, muscle, gpus): return DNN_Job( job_args={ 'tic': cfg.tic, 'expid': '0' if experiment is None else experiment.expid, 'arch': 'JUST_CLEAR_FILES_AND_GEN_DATASET' if experiment is None else experiment.arch, 'ntrain': 0 if experiment is None else experiment.ntrain, 'proto_model': cfg.proto_model, 'pipeline': '' if experiment is None else cfg.PIPELINE.replace(' ', ''), 'epochs': cfg.EPOCHS, 'batchsize': 0 if experiment is None else cfg.BATCH_SIZE, 'verbose': cfg.VERBOSE, 'normtrainims': False if experiment is None else cfg.NORM_TRAIN_IMS, 'salience': cfg.salience, 'TRANSFER_LEARNING': cfg.TRANSFER_LEARNING, 'REGEN_NTRAIN': cfg.REGEN_NTRAIN, 'PRED_SIZE': cfg.PRED_SIZE, 'deletenorms': cfg.OVERWRITE_NORMS if experiment is None else False, 'gen': cfg.REGEN_DATA if experiment is None else False # implies kill }, job_cfg_arg=obj({ 'gen_cfg': { 'num_gpus': max(len(listkeys(muscle.GPU_IN_USE)), 2), 'TRAINING_SET_SIZES': cfg.NTRAIN, 'EVAL_SIZE': cfg.eval_nperc, 'RSA_SIZE_PER_CLASS': cfg.rsa_nperc, } if experiment is None else None, 'root': cfg.root, 'full_cfg': deepcopy(cfg).toDict( ) # REDUNDANT! also needs deepcopy because stupid toDict is in place }), gpus= gpus, # [0,1,2,3] if RUN_EXPS_IN_SERIAL else, if empty is actually set to use all 4 in muscle interact=cfg.INTERACT, remote=not muscle.local, commands=[ "rm -rf " + cfg.root, "find . -name \"*.pyc\" -exec rm -f {} \\;", "pkill -f miniconda3", "pkill -f MATLAB" ] if (experiment is None and not muscle.local) else [], )
def _common(self): return dict(x=listkeys(self.data), item_color=[[0, 0, b] for b in np.linspace(0, 1, len(self.data))], title=f'{self.ylabel} by {self.xlabel}' + (f' ({self.title_suffix})' if self.title_suffix else ''), xlabel=self.xlabel, ylabel=self.ylabel, bar_sideways_labels=False, title_size=20.0)
def acc_table(self, data): titles = { 'tf': f'Tensorflow ({100 if SANITY_SET == SanitySet.Set100 else SANITY_SET.num})', # 'ml2tf': 'MATLAB model imported into Tensorflow', 'ml': 'MATLAB (100)' } sanity_report_figdata = [] for be_key in listkeys(titles): be_data = data[be_key] if be_key in ['files', 'dest']: continue # , 'y_true' arch_rows = [] for akey, adata in listitems(be_data): if akey in ['y_true']: continue # , 'y_true' top_row = ['Arch'] ar = [akey] for ppkey, ppdata in listitems(adata): if ppkey in ['y_true']: continue top_row += [ppkey] ar += [str(int(ppdata['acc'] * 100)) + '\n' + str(int(ppdata['acc5'] * 100))] arch_rows += [ar] table = [top_row] + arch_rows sanity_report_figdata += [H3(titles[be_key])] sanity_report_figdata += [HTML_Pre(str(TextTableWrapper( data=table, col_align='c' * len(table[0]), col_valign='m' * len(table[0]) )))] if be_key == 'ml2tf': sanity_report_figdata += ['* Darius has uploaded new models that have not yet been tested'] sanity_report_figdata += [H3('ImageNet Results from Literature')] sanity_report_figdata += [HTML_Pre(str(TextTableWrapper( data=[ ['Arch', 'lit'], ['ALEX', f'?\n{int(0.847 * 100)}'], ['GNET', f'?\n{int(0.99333 * 100)}'], ['INC', f'80.4\n95.3'] ], col_align='c' * 2, col_valign='m' * 2 )))] sanity_report_figdata += [HTML_Pre(''' Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton. "Imagenet classification with deep convolutional neural networks." Advances in neural information processing systems. 2012. Szegedy, Christian, et al. "Going deeper with convolutions." Proceedings of the IEEE conference on computer vision and pattern recognition. 2015. Improving Inception and Image Classification in TensorFlow.” Google AI Blog, 31 Aug. 2016, ai.googleblog.com/2016/08/improving-inception-and-image.html. ''')] return Div(*sanity_report_figdata)
def train(self): log('training network...') nnstate.CURRENT_PRED_MAP = self.train_data.class_label_map nnstate.CURRENT_TRUE_MAP = self.train_data.class_label_map ds = self.train_data.dataset(self.HEIGHT_WIDTH) steps = self.train_data.num_steps log('Training... (ims=$,steps=$)', len(self.train_data), steps) net_mets.cmat = zeros(len(listkeys(nnstate.CURRENT_PRED_MAP)), len(listkeys(nnstate.CURRENT_TRUE_MAP))) history = self.net.fit( # x,y, ds, epochs=1, verbose=self.VERBOSE_MODE, use_multiprocessing=True, workers=16, steps_per_epoch=steps, shuffle=False) return history
def get_or_set_default(self, default, *keys): current_v = self for i, k in enum(keys): islast = i == len(keys) - 1 if k in listkeys(current_v): current_v = current_v[k] elif islast: current_v[k] = default if not self.just_sync_at_end: self.push() return default else: err(f'need to set root default first: {k}') return current_v
def _daily(self, fun, key, *args): n = key if n in self.cfg.FLAGS: logy(f'running daily function FROM FLAG: {n}') fun(*args) elif n not in listkeys(self.fun_registry()): logy(f'running daily function: {n}') fun(*args) self.fun_registry().update({n: time()}) elif self.fun_registry()[n] < time() - (3600 * 24): logy(f'running daily function: {n}') fun(*args) self.fun_registry().update({n: time()}) else: nex = self.fun_registry()[n] + (3600 * 24) logc(f'{n} will run next in {nex - time()} seconds')
def processGroup(group, rep, indent=0): for ke in listkeys(group): rep += '\t' * indent rep += ke item = group[ke] if 'Dataset' in cn(item): # c = 'Dataset' rep += f'\t\t{item.shape} {item.dtype}\n' elif 'Group' in cn(item): # c = 'Group' rep += '\n' rep = processGroup(item, rep, indent + 1) # sub = f'{item.shape} {item.dtype}' else: err(f'what is this: {cn(item)}') return rep
def recurse_h5(ff): # o = f # subd = d # for k in keypath: # o = o[k] # subd = subd[k] if type(ff) == h5py.File or type(ff) == h5py._hl.group.Group: ks = listkeys(ff) subd = {} for k in ks: subd[k] = recurse_h5(ff[k]) elif type(ff) == h5py._hl.dataset.Dataset: subd = np.array(ff) else: err(f'do not know what to do with {ff.__class__}') return subd
def build(self): top = [''] + listkeys(self.net_coefs) orig_top = deepcopy(top) if self.EXCLUDE_DARIUS_SMALLER_TRAIN_SIZES: top[1:] = [s.split('_')[0] for s in top[1:]] full = [top] first = self.net_coefs[orig_top[1]] left = [pattern_strings[k] for k in first.keys()] cols = [left] for coefs in self.net_coefs.values(): col = [sigfig(v, 3) for v in coefs.values()] cols.append(col) for i in itr(left): row = [col[i] for col in cols] full.append(row) TableData( data=full, title=f"Correlation Coefficients Between {method_strings[self.method_name]} of model activations and Perfect Classifier Patterns", title_size=70 ).draw(builder=self, tags=self.tags + ['table', 'CorrCoefTable'])
def get_report(md, resources_root, exp_name, index_url, database: Database = None, api: SimpleAdminAPI = None, editable=False, web_resources_root=None, show=False): if exp_name not in listkeys(database): database[exp_name] = {} analysis_figdata = flat1([ a.get_report_figdata(exp_name, resources_root, database) for a in ANALYSES(mode=AnalysisMode.PIPELINE) ]) all_arch_figs = [] ntrain_figs = md.ntrainims nepochs = md.nepochs for n in ntrain_figs: for suffix in [ f'__val_CM{nepochs}', f'__L2-Output_CM{nepochs}', f'__L2-Inter_CM{nepochs}', f'__L2-Raw_CM{nepochs}' ]: arch_figs = listmap(__.label, md.archs) arch_figs = listmap( lambda a: (resources_root[f'{a}_{n}{suffix}.png'], database.get_or_set_default( '', exp_name, f'{a}_{n}{suffix}')), arch_figs) all_arch_figs.extend(arch_figs) mcc_name = f'Matthews_Correlation_Coefficient' ntrain_figs = listmap( lambda n: (resources_root[f'{mcc_name}_{n}.png'], database.get_or_set_default('', exp_name, f'{mcc_name}_{n}')), ntrain_figs) doc = HTMLPage( f'index', f'Symmetry Detection Report by Matt Groth', f'', f'{len(md.archs)} Architectures: ', *listmap(__.label, md.archs), f'Experiments: {md.nrepeats} per architecture', f'Epochs: {md.nepochs} per experiment', f'Batch Size: {md.batchsize}', f'Training Images: {md.ntrainims}', f'Normalized Individual Images: {md.normalized}', FigureTable( *analysis_figdata, (resources_root['Final_Train_MCC.png'], database.get_or_set_default('', exp_name, 'Final_Train_MCC')), *ntrain_figs, (pwdf()["/_figs/figs_misc/RSA_patterns/RSA_patterns.001.jpeg"], database.get_or_set_default('', exp_name, 'RSA_patterns')) if INCLUDE_RSA_SLIDE else None, *all_arch_figs, resources_root=web_resources_root, exp_id=exp_name, editable=editable), Hyperlink('back to index', index_url), api.apiElements(), js=api.cs(), style=DNN_REPORT_CSS, show=show) return doc
def test_record(self, ei): nnstate.CURRENT_PRED_MAP = self.train_data.class_label_map nnstate.CURRENT_TRUE_MAP = self.test_data.class_label_map ds = self.test_data.dataset(self.HEIGHT_WIDTH) steps = self.test_data.num_steps log('Recording(1)... (ims=$,steps=$)', len(self.test_data), steps) net_mets.cmat = zeros(len(listkeys(nnstate.CURRENT_PRED_MAP)), len(listkeys(nnstate.CURRENT_TRUE_MAP))) inter_lay_name = self.net.layers[self.INTER_LAY].name inter_output_model = self.tf.python.keras.models.Model( self.net.input, self.net.get_layer(index=self.INTER_LAY).output) y_pred = arr( self.net.predict( ds, steps=steps, verbose=Verbose.PROGRESS_BAR, use_multiprocessing=True, workers=16, )) log('done recording(1)') if len( y_pred.shape ) == 3: # GNET has 3 outputs, all identical I guess but not sure y_pred = y_pred[2] log('Recording(2)... (ims=$,steps=$)', len(self.test_data), steps) inter_activations = arr( inter_output_model.predict(ds, steps=steps, verbose=Verbose.PROGRESS_BAR, use_multiprocessing=True, workers=16)) log('done recording(2)') x, _ = self.test_data.x(self) y = self.test_data.y(self) y_true = arr(y).flatten() raw_images = x raw_images2 = [] if len(x.shape) == 5: for batch in raw_images: for im in batch: raw_images2.append(im) else: raw_images2 = raw_images raw_images = arr(raw_images2) raw_images2 = [] for i in itr(raw_images): raw_images2.append(raw_images[i].flatten()) raw_images = arr(raw_images2) inter_shape = inter_activations.shape inter_activations = np.reshape(inter_activations, (inter_shape[0], -1)) BLOCK_LEN = 10 # I'm writing this bc I think it was always 10 back when I ran this code TEST_CLASS_MAP = nnstate.CURRENT_TRUE_MAP clas_set = ClassSet( [Class(name=k, index=v) for k, v in TEST_CLASS_MAP.items()]) def run_and_save_rsa(nam, mat1, layer_name=None, layer_i=None): index_to_cn = {v: k for k, v in TEST_CLASS_MAP.items()} feature_matrix = FeatureMatrix( mat1, clas_set, [Class(index_to_cn[iii], iii) for iii, yt in enum(y_true)]) feature_matrix.sort_by_class_name() fd = feature_matrix.compare(rsa_norm).image_plot() tit = f'L2-{nam}' fd.title = f'{tit} ({nnstate.FLAGS.arch}{nnstate.FLAGS.ntrain}E{ei + 1})' if nam == 'Inter': fd.title = f'{fd.title}(Layer{layer_i}:{layer_name})' save_dnn_data(fd, tit, f'CM{ei + 1}', 'mfig') run_and_save_rsa('Output', y_pred, layer_name='Output', layer_i='-1') run_and_save_rsa('Inter', inter_activations, layer_name=inter_lay_name, layer_i=self.INTER_LAY) run_and_save_rsa('Raw', raw_images) for met in net_mets.METS_TO_USE(): met(y_true, y_pred) log('done recording.')
def _get_cfg(self): assert len(self.registered_flags()) == len(set(self.registered_flags())) freecfg = File('freecfg.json').load() prof = 'default' cfg = 'default' changes = {} flags = [] cell = False for idx, a in enum(sys.argv): if idx == 0: continue elif a.startswith('--'): k, v = tuple(a.replace('--', '').split('=')) if k == 'tic': continue changes[k] = v elif a.startswith('-'): k, v = tuple(a.replace('-', '').split('=')) if k == 'prof': prof = v elif k == 'cfg': cfg = v else: err('arguments with one dash (-) need to be prof= or cfg=') elif cell or a in self.registered_flags(): if a == 'cell': cell = True flags += [a] else: err(f'invalid argument:{a} please see README') prof = Project.CFG['profiles'][prof] cfg = Project.CFG['configs'][cfg] for k in listkeys(prof): if k in listkeys(cfg): prof_ntrain = prof[k] for i, n in enum(cfg[k]): if isstr(n) and n[0] == 'i': cfg[k][i] = prof_ntrain[int(n[1])] cfg = {**prof, **cfg, 'FLAGS': flags} for k, v in listitems(changes): if k not in listkeys(cfg): err(f'invalid -- arguments: {k}, please see {Project.CFG.name} for configuration options') if isinstance(cfg[k], bool): v = bool(int(v)) cfg[k] = v # hello from freecomp for k, v in listitems(freecfg): log(f'freecfg: {k}:{v}') cfg[k] = v # cfg['EPOCHS'] = freecfg['EPOCHS'] return obj(cfg)
def fun_registry(self): if 'daily' not in listkeys(self.STATE): self.STATE['daily'] = {} return SubDictProxy(self.STATE, 'daily', makeObjs=False)
def registered_flags(self): return [ 'readme', 'build', 'cell', ] + self.extra_flags + self.clear_clear_cache_flags + listkeys( self.fun_registry() ) instructions = ''
def getCode(self, resource_root, resource_root_rel, force_fix_to_abs=False): ml = '<!DOCTYPE html>' # with Temp('temp.css') as f: # f.write(self.style) head_objs = [ '''<meta content="text/html;charset=utf-8" http-equiv="Content-Type"> <meta content="utf-8" http-equiv="encoding">''', '<META HTTP-EQUIV="CACHE-CONTROL" CONTENT="NO-CACHE">' ] + ([HTMLCSSLink(href='style.css')] if self.style_dot_css else []) + [ StyleTag(self.style) # StyleTag(lesscpy.compile(f.abspath, minify=True)) ] if self.jQuery: head_objs.extend([ ExternalScript( src= "https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js" ), ExternalScript( src= "https://ajax.googleapis.com/ajax/libs/jqueryui/1.12.1/jquery-ui.min.js" ), HTMLCSSLink( href= "https://ajax.googleapis.com/ajax/libs/jqueryui/1.12.1/themes/smoothness/jquery-ui.css" ) ]) for cdn in self.CDNs: head_objs.append(ExternalScript(cdn)) # head_objs.append(ExternalScript( # Blocked loading mixed active content # src='http://cdn.jsdelivr.net/gh/bestiejs/platform.js/platform.js', # )) head_objs.extend( listmap(lambda x: ExternalScript(src=x.replace('.coffee', '.js')), self.javascript_files) + ([JScript(JS(self.js, onload=self.wrap_js))] if self. js is not None else [])) columnChildren = [] for c in self.children: columnChildren.append(c) if isstr(c) or 'hidden' not in listkeys(c.attributes): columnChildren.append(Br) if columnChildren and columnChildren[-1] == Br: columnChildren = columnChildren[:-1] ml += HTMLRoot( HTMLHead(*head_objs), HTMLBody(*columnChildren, style=self.bodyStyle, **self.bodyAttributes, identified=self.identified).getCode( resource_root, resource_root_rel, force_fix_to_abs)).getCode(resource_root, resource_root_rel, force_fix_to_abs) return ml
def nnet_main(FLAGS): FLAGS.epochs = int(FLAGS.epochs) _IMAGES_FOLDER = pwdf()['_images'].mkdirs(mker=True) HUMAN_IMAGE_FOLDER = pwdf()['_images_human'].mkdirs(mker=True) if FLAGS.gen: gen_main(FLAGS, _IMAGES_FOLDER, HUMAN_IMAGE_FOLDER) if FLAGS.salience: class_map = {'dog': 0, 'cat': 1} dogcatfolder = DATA_FOLDER.resolve( 'tf_bug1/dogscats') # thousands, downloaded from kaggle ntrain_folder = dogcatfolder['ntrain'] dummy_folder = dogcatfolder['dummy'].mkdir() ntrain_folder.deleteIfExists().mkdir() for k, v in listitems(class_map): log('getting files') files = dogcatfolder['Training'][k].files.tolist() random.shuffle(files) log('looping files') for im in files[0:FLAGS.ntrain]: im.copyinto(ntrain_folder[k]) # NTEST = 100 NTEST = int(FLAGS.batchsize / len(listitems(class_map))) ntest_folder = dogcatfolder['ntest'] ntest_folder.deleteIfExists().mkdir() for k, v in listitems(class_map): log('getting files') files = dogcatfolder['Testing'][k].files.tolist() random.shuffle(files) log('looping files') for im in files[0:NTEST]: im.copyinto(ntest_folder[k]) GPU_TRAIN_FOLDER = NN_Data_Dir(ntrain_folder.abspath) GPU_TEST_FOLDER = NN_Data_Dir(ntest_folder.abspath) GPU_RSA_FOLDER = NN_Data_Dir(dummy_folder.abspath) else: GPU_IMAGES_FOLDER = _IMAGES_FOLDER[f'gpu{FLAGS.mygpufordata}'] GPU_TRAIN_FOLDER = NN_Data_Dir( GPU_IMAGES_FOLDER[f'Training/{FLAGS.ntrain}']) GPU_TEST_FOLDER = NN_Data_Dir(GPU_IMAGES_FOLDER[f'Testing']) GPU_RSA_FOLDER = NN_Data_Dir(GPU_IMAGES_FOLDER[f'RSA']) if FLAGS.deletenorms: GPU_TRAIN_FOLDER.delete_norm_dir() GPU_TEST_FOLDER.delete_norm_dir() GPU_RSA_FOLDER.delete_norm_dir() nn_init_fun.NRC_IS_FINISHED( ) # must be invoked this way since value of function changes if FLAGS.normtrainims: err('im doing this?') nnstate.use_reduced_map = len(GPU_TRAIN_FOLDER.files) != len( GPU_TEST_FOLDER.files) datasetTrain, _ = load_and_preprocess_ims( TRAIN_TEST_SPLIT=1, data_dir=GPU_TRAIN_FOLDER, normalize_single_images=FLAGS.normtrainims) _, datasetVal = load_and_preprocess_ims( TRAIN_TEST_SPLIT=0, data_dir=GPU_TEST_FOLDER, normalize_single_images=FLAGS.normtrainims) _, datasetTest = load_and_preprocess_ims( TRAIN_TEST_SPLIT=0, data_dir=GPU_RSA_FOLDER, normalize_single_images=FLAGS.normtrainims) if FLAGS.proto_model: net = PROTO() else: net = ARCH_MAP[FLAGS.arch]( max_num_classes=len(listkeys(datasetTest.class_label_map))) net.build(FLAGS) [a.after_build(FLAGS, net) for a in ANALYSES(mode=AnalysisMode.PIPELINE)] net.train_data = datasetTrain.prep(net.HEIGHT_WIDTH, net.PP) net.val_data = datasetVal.prep(net.HEIGHT_WIDTH, net.PP) net.test_data = datasetTest.prep(net.HEIGHT_WIDTH, net.PP) return trainTestRecord(net, '', FLAGS.epochs)
def _darius_and_shobhita_acts( N_PER_CLASS, ACT_SIZE, SHOBHITA=True, EXCLUDE_DARIUS_SMALLER_TRAIN_SIZES=True ): T_SIZES = [ 25, 50, 100, 150, 200 ] # 6 epochs for all, and only 70% for training if EXCLUDE_DARIUS_SMALLER_TRAIN_SIZES: T_SIZES = [T_SIZES[-1]] NETS = listkeys(RSA_LAYERS) if SHOBHITA: # noinspection PyRedeclaration T_SIZES = [100000] # num epochs? NETS = ["LSTM"] else: NETS.remove("LSTM") ACTIVATIONS_FOLDER = OM_DATA_FOLDER['activations'] if not SHOBHITA: imgActivations = ACTIVATIONS_FOLDER.resolve('imgActivationsForRSA') activations = {} for net_folder in imgActivations.folders: modelname = net_folder.name if modelname not in activations: activations[modelname] = {} for activations_mat in net_folder.files.filtered( lambda x: x.ext == 'mat' ): # breakpoint() activations[modelname][activations_mat.name_pre_ext] = activations_mat else: folder = ACTIVATIONS_FOLDER['rsa_activations_shobhita2'] files = {f.name.split('Cat')[1].split('_')[0]: f for f in folder.files} activations = { 'LSTM': {c.name: folder[files[c.name].name] for c in RSA_CLASSES} } for arch in NETS: arch_rand_perm = None for size in T_SIZES: net = arch with section(f'preparing {net} activations'): if not SHOBHITA: net = f'{net}_{size}' acts_for_rsa = None for c in [cls.name for cls in RSA_CLASSES]: acts = activations[net][c].load(silent=True) if not SHOBHITA: acts = acts['imageActivations'] if arch_rand_perm is None: progress(f"activation size of {net}: {len(acts[0])}") arch_rand_perm = randperm(range(len(acts[0]))) acts = [a[arch_rand_perm][:ACT_SIZE] for a in acts[0:N_PER_CLASS]] if acts_for_rsa is None: acts_for_rsa = acts else: acts_for_rsa = concat( acts_for_rsa, acts, axis=0 ) yield arch, net, FeatureMatrix( data=acts_for_rsa, ground_truth=np.repeat(RSA_CLASSES, int(len(acts_for_rsa) / len(RSA_CLASSES))).tolist(), class_set=RSA_CLASSES )