def __init__( self, data_path, id_path, csv_dir=None, infer_path=None, seed=23, train_test_split=0.9, train_test="train", **kwargs, ): self.data_path = Path(data_path).expanduser() self.id_path = Path(id_path).expanduser() self.seed = seed self.train_test_split = train_test_split self.train_test = train_test.lower() assert self.train_test in ["train", "test"] paths = sorted(glob(str(self.id_path / "*.*"))) size = len(paths) random.seed(self.seed) test_idx = random.sample(range(size), int(size * (1 - self.train_test_split))) train_idx = list(set(range(size)) - set(test_idx)) iterator = train_idx if self.train_test == 'train' else test_idx self.ids = [int(get_id_from_path(paths[i])) for i in iterator] clinical_df = pd.read_csv(self.data_path).drop(columns=DROP_COLS) self.paths = sorted([paths[i] for i in iterator]) self.__len = len(self.ids) train_ids = [int(get_id_from_path(paths[i])) for i in train_idx] train_input = clinical_df[clinical_df.PatientID.isin(train_ids)] self.encoder = PandasOneHotEncoder(CAT_COLS).fit(train_input) self.input_scaler = PandasScaler(REG_COLS).fit(train_input) if infer_path is None: clinical_df = (clinical_df[clinical_df.PatientID.isin(self.ids)] if self.train_test == 'test' else train_input) clinical_df = self.encoder.transform(clinical_df) clinical_df = self.input_scaler.transform(clinical_df) self.df = clinical_df.copy() else: infer_path = Path(infer_path).expanduser() infer_df = pd.read_csv(infer_path) infer_df = self.encoder.transform(infer_df) infer_df = self.input_scaler.transform(infer_df) self.df = infer_df.copy() self.csv_dir = csv_dir if csv_dir: self.csv_dir = Path(csv_dir).expanduser() ground_truth = pd.read_csv(self.csv_dir) train_truth = ground_truth[ground_truth.PatientID.isin(train_ids)] self.scaler = StandardScaler().fit( train_truth.SurvivalTime.values.reshape(-1, 1)) ground_truth = (ground_truth[ground_truth.PatientID.isin(self.ids)] if self.train_test == 'test' else train_truth) self.df = self.df.merge(ground_truth)
def main_memory(args): path = '../data/model_output/listener_lesions.csv' writer = EfficiencyWriter(args, path) # init listener model listener = AdaptiveAgent(args) listener.reduction_history_window = 'complete' grid = construct_context_grid(args) for ctx in grid: print("\n------gameid: {}, sample_num: {}, loss: {}, handle-oov: {}" .format(ctx['gameid'], ctx['sample_num'], ctx['loss'], ctx['handleOOV'])) # reset speaker & listener to pretrained settings listener.reset_to_initialization(ctx['dirs']) # setting context listener.loss = ctx['loss'] listener.dataset_type = ctx['ds_type'] listener.history = [] for datum in ctx['speaker_data'] : rep_num = datum['repNum'] trial_num = datum['trialNum'] target = utils.get_img_path(datum['targetImg']) raw_cap = datum['msg'] listener.trial_num = trial_num listener.sample_num = ctx['sample_num'] # Set up for new round print('\nround {}, target {}, msg {}'.format( rep_num, utils.get_id_from_path(target), raw_cap )) listener.set_image(target) id_cap = (listener.process_human_caption(raw_cap) if ctx['handleOOV'] else utils.words_to_ids(raw_cap, listener.vocab)) scores = listener.L0_score(np.expand_dims(id_cap, axis=0), ctx['dirs']) cap_score = listener.S0_score(utils.load_image(target).to(device), torch.tensor([id_cap]).to(device), len(id_cap)) # Write out scores = scores.data.cpu().numpy()[0] target_idx = listener.context.index(listener.raw_image) target_score = scores[target_idx] best = list(scores).index(max(scores)) correct = best == target_idx if args.debug: print(list(map(lambda x: np.exp(x), scores))) if args.debug: print('{}, model says: {}, target actually: {}'.format(correct, best, target_idx)) if args.debug: print('accuracy in real game: {}'.format(datum['correct'])) writer.writerow(ctx, datum, raw_cap, scores, len(raw_cap), target_score, cap_score, correct) # Update models as relevant if ctx['loss'] != 'fixed' : listener.update_model(trial_num, raw_cap)
def __init__( self, images_dir, csv_dir=None, seed=23, train_test_split=0.9, train_test="train", mode='concatenate', flip_proba=0.5, noise_magnitude=None, **kwargs, ): self.images_dir = Path(images_dir).expanduser() self.seed = seed self.train_test_split = train_test_split self.train_test = train_test.lower() assert self.train_test in ["train", "test"] self.mode = mode.lower() assert self.mode in ["concatenate", "scan", "mask"] self.flip_proba = flip_proba assert flip_proba <= 1. self.noise_magnitude = noise_magnitude paths = sorted(glob(str(self.images_dir / "*.*"))) size = len(paths) random.seed(self.seed) test_idx = random.sample(range(size), int(size * (1 - self.train_test_split))) train_idx = list(set(range(size)) - set(test_idx)) iterator = train_idx if self.train_test == 'train' else test_idx self.paths = sorted([paths[i] for i in iterator]) self.__len = len(self.paths) self.csv_dir = csv_dir if csv_dir: self.csv_dir = Path(csv_dir).expanduser() self.ground_truth = pd.read_csv(self.csv_dir) train_ids = [int(get_id_from_path(paths[i])) for i in train_idx] train_survival_time = self.ground_truth.loc[ self.ground_truth.PatientID.isin(train_ids), 'SurvivalTime'] self.scaler = StandardScaler().fit( train_survival_time.values.reshape(-1, 1)) if self.mode != "concatenate": transform = [ Convert(), T.RandomHorizontalFlip(self.flip_proba), T.RandomVerticalFlip(self.flip_proba), T.ToTensor(), ] if self.mode == 'scan' and self.noise_magnitude is not None: transform.append(T.Lambda(lambda x: x + torch.randn_like(x)), ) self.transform = T.Compose(transform)
def __getitem__(self, idx): patient_id = int(get_id_from_path(self.paths[idx])) output = dict() patient_info = self.df.loc[self.df.PatientID == patient_id] output['clinical'] = torch.Tensor( patient_info[REG_COLS + self.encoder.cols].values[0]) if self.csv_dir: output['y'] =\ self.scaler.transform(patient_info.SurvivalTime.values.reshape(1, 1))[0][0] output['info'] = np.array( [patient_id, int(patient_info.Event.values[0])]) else: output['info'] = [patient_id, 1.] return output
def main(args): path = '../data/model_output/speaker_lesions.csv' writer = EfficiencyWriter(args, path) speaker = AdaptiveAgent(args) grid = construct_context_grid(args) for ctx in grid: print("\n------gameid: {}, sample_num: {}, loss: {}, ds_type: {}, speaker_model: {}, cost_weight: {}" .format(ctx['gameid'], ctx['sample_num'], ctx['loss'], ctx['ds_type'], ctx['speaker_model'], ctx['cost_weight'])) speaker.loss = ctx['loss'] speaker.reset_to_initialization(ctx['dirs']) speaker.dataset_type = ctx['ds_type'] speaker.context_type = ctx['context_type'] speaker.cost_weight = ctx['cost_weight'] # simulate round-robin style by looping through targets in random order for datum in ctx['speaker_data'] : rep_num = datum['repNum'] trial_num = datum['trialNum'] target = utils.get_img_path(datum['targetImg']) print(target) speaker.trial_num = trial_num speaker.sample_num = ctx['sample_num'] speaker.set_image(target) cap = speaker.generate_utterance(ctx['speaker_model'], as_string = True) cap = utils.ids_to_words(utils.words_to_ids(cap, speaker.vocab), speaker.vocab) if cap[:7] == '<start>' : cap = cap[8:-6] print('\nround {}, target {}, msg {}'.format( rep_num, utils.get_id_from_path(target), cap )) if datum['correct'] == True : print('training') speaker.update_model(trial_num, cap) writer.writerow(ctx, datum, trial_num, target, cap, len(cap))
def response(self, path, content): content_json = json.loads(content) received_port = content_json['port'] port_id = get_id_from_path(path) port = ports[port_id] # only copy the relevant keys, fail if any of them is missing update_field_if_present(port, received_port, 'name') update_field_if_present(port, received_port, 'network_id') update_field_if_present(port, received_port, 'device_id') update_field_if_present(port, received_port, 'mac_address') update_field_if_present(port, received_port, 'device_owner') update_field_if_present(port, received_port, 'admin_state_up') update_field_if_present(port, received_port, 'binding:host_id') print "PUT PORT:" + str(port) # ports[port_id] = port return json.dumps({'port': port})
def response(self, path, content=None): subnet_id = utils.get_id_from_path(path) if subnet_id in subnets: del(subnets[subnet_id])
def response(self, path): port_id = utils.get_id_from_path(path) port = ports[port_id] return json.dumps({'port': port})
def response(self, path): network_id = utils.get_id_from_path(path) network = networks[network_id] return json.dumps({'network': network})
def response(self, path): subnet_id = utils.get_id_from_path(path) subnet = subnets[subnet_id] return json.dumps({'subnet': subnet})
def __getitem__(self, idx, seed=None): path = self.paths[idx] scanner = np.load(path) scan, mask = scanner["scan"], scanner["mask"] if self.train_test == "train": random.seed(seed) v_flip = random.random() < self.flip_proba h_flip = random.random() < self.flip_proba if self.mode == 'concatenate': scans = [] masks = [] to_pil = Convert() to_tens = T.ToTensor() for img, msk in zip(scan, mask): img, msk = to_pil(img), to_pil(np.uint8(msk)) if v_flip: img = TF.hflip(img) msk = TF.hflip(msk) if h_flip: img = TF.vflip(img) msk = TF.vflip(msk) img, msk = to_tens(img).unsqueeze(0).float(), to_tens( msk).unsqueeze(0).float() if self.noise_magnitude is not None: noise = torch.randn_like(img) img += noise scans.append(img) masks.append(msk) scans = torch.cat(scans, dim=1) masks = torch.cat(masks, dim=1) output = torch.cat([scans, masks], dim=0) elif self.mode == 'mask': output = np.expand_dims(mask, axis=1) output = torch.cat([self.transform(x) for x in output], dim=0) elif self.mode == 'scan': output = np.expand_dims(scan, axis=1) output = torch.cat([self.transform(x) for x in output], dim=0) else: if self.mode == 'mask': output = torch.Tensor(mask).unsqueeze(1).float() elif self.mode == 'scan': output = torch.Tensor(scan).unsqueeze(1).float() elif self.mode == 'concatenate': scan = torch.Tensor(scan).unsqueeze(0).float() mask = torch.Tensor(mask).unsqueeze(0).float() output = torch.cat([scan, mask], dim=0) output = {"images": output} patient_id = int(get_id_from_path(path)) if self.csv_dir: patient_info = self.ground_truth.loc[self.ground_truth.PatientID == patient_id] output['y'] = self.scaler.transform([[patient_info.iloc[0, 1]]])[0] output['info'] = np.array( [patient_id, int(patient_info.iloc[0, 2])]) else: output['info'] = np.array([patient_id, 1.]) return output
def main(args): path = '../data/model_output/listener_cat_forgetting.csv' writer = EfficiencyWriter(args, path) # init listener model listener = AdaptiveAgent(args) listener.reduction_history_window = 'complete' grid = construct_context_grid(args) for ctx in grid: print("\n------train gameid: {}, sample_num: {}, loss: {}".format( ctx['train_context_info']['gameid'], ctx['sample_num'], ctx['loss'])) # train: reduce with human # reset speaker and listener to pretrained setting listener.reset_to_initialization(ctx['train_dirs']) # setting context listener.loss = ctx['loss'] listener.dataset_type = ctx['ds_type'] listener.history = [] # TODO: redundant ? train_target = None for datum in ctx['train_context_info']['speaker_data']: rep_num = datum['repNum'] trial_num = datum['trialNum'] target = utils.get_img_path(datum['targetImg']) raw_cap = datum['msg'] listener.trial_num = trial_num listener.sample_num = ctx['sample_num'] # Set up for new round print('\nround {}, target {}, msg {}'.format( rep_num, utils.get_id_from_path(target), raw_cap)) listener.set_image(target) id_cap = (listener.process_human_caption(raw_cap) if ctx['handleOOV'] else utils.words_to_ids( raw_cap, listener.vocab)) scores = listener.L0_score(np.expand_dims(id_cap, axis=0), ctx['train_dirs']) cap_score = listener.S0_score( utils.load_image(target).to(device), torch.tensor([id_cap]).to(device), len(id_cap)) # Write out scores = scores.data.cpu().numpy()[0] target_idx = listener.context.index(listener.raw_image) target_score = scores[target_idx] best = list(scores).index(max(scores)) correct = best == target_idx if args.debug: print(list(map(lambda x: np.exp(x), scores))) if args.debug: print('{}, model says: {}, target actually: {}'.format( correct, best, target_idx)) if args.debug: print('accuracy in real game: {}'.format(datum['correct'])) # Update models as relevant if ctx['loss'] != 'fixed': listener.update_model(trial_num, raw_cap) if rep_num == 5: writer.writerow(ctx, datum, raw_cap, scores, len(raw_cap), target_score, cap_score, correct, { 'gameid': None, 'context_id': None }) # test on new human if args.debug: print("\nTESTING!") for j, test_context_info in enumerate(ctx['test_context_infos']): print("\ntest context: {}".format(j)) listener.set_context( ctx['test_dirs'] [j]) # set context to test dirs, BUT don't reset weights listener.history = [] # TODO: should we reset vocab? #for datum in ctx['test_context_info']['speaker_data']: for datum in test_context_info['speaker_data']: rep_num = datum['repNum'] if rep_num > 0: break trial_num = datum['trialNum'] target = utils.get_img_path(datum['targetImg']) raw_cap = datum['msg'] listener.trial_num = trial_num listener.sample_num = ctx['sample_num'] # Set up for new round print('\nround {}, target {}, msg {}'.format( rep_num, utils.get_id_from_path(target), raw_cap)) listener.set_image(target) id_cap = (listener.process_human_caption(raw_cap) if ctx['handleOOV'] else utils.words_to_ids( raw_cap, listener.vocab)) scores = listener.L0_score(np.expand_dims(id_cap, axis=0), ctx['train_dirs']) cap_score = listener.S0_score( utils.load_image(target).to(device), torch.tensor([id_cap]).to(device), len(id_cap)) # Write out scores = scores.data.cpu().numpy()[0] target_idx = listener.context.index(listener.raw_image) target_score = scores[target_idx] best = list(scores).index(max(scores)) correct = best == target_idx if args.debug: print(list(map(lambda x: np.exp(x), scores))) if args.debug: print('{}, model says: {}, target actually: {}'.format( correct, best, target_idx)) if args.debug: print('accuracy in real game: {}'.format(datum['correct'])) writer.writerow(ctx, datum, raw_cap, scores, len(raw_cap), target_score, cap_score, correct, test_context_info)
def response(self, path, content=None): port_id = utils.get_id_from_path(path) if port_id in ports: del(ports[port_id])
def response(self, path, content=None): network_id = utils.get_id_from_path(path) if network_id in networks: del(networks[network_id])