def add_word(request, kind): if request.method == 'POST': try: form = get_class(kind, 'Form')(request.POST) except AttributeError: raise Http404("No such form type found!") if form.is_valid(): word = form.save() word.save() if kind == 'Noun': ExNNS.make_new(word) ExAAS.make_new(word) ExLNS.make_new(word) elif kind == 'Verb': ExIIV.make_new(word) ExKKV.make_new(word) ExPPV.make_new(word) ExFFV.make_new(word) return redirect('add') else: czech_word = request.session['word']['czech'] word = Word.get_czech_word_type_and_wiki( czech_word, get_wikitext(czech_word, 'cz')) word_type = get_class(kind) word_json = word_type.make_czech_json(word) word_json['de'] = word_type.make_german_json(word_json['german']) form = get_class(kind, 'Form')(initial=word_json) return render(request, 'addwords/new_{}.html'.format(kind.lower()), {'form': form})
def is_pair(self, a, b, x, y): x = get_class(x) y = get_class(y) if isinstance(a, x) and isinstance(b, y): return (a, b) if isinstance(a, y) and isinstance(b, x): return (b, a) return False
def is_pair(self, a, b, x, y): x = get_class(x) y = get_class(y) if (isinstance(a, x) and isinstance(b, y)): return (a, b) if (isinstance(a, y) and isinstance(b, x)): return (b, a) return False
def _get_ranker_pair(self, class_name, ranker_args, feature_pair, feature_count, ties): return (get_class(class_name)( ranker_args, self._get_weight_vector(feature_pair[0], feature_count), ties=ties), get_class(class_name)(ranker_args, self._get_weight_vector( feature_pair[1], feature_count), ties=ties))
def __init__(self, feature_count, d, arg_str): # parse arguments parser = argparse.ArgumentParser( description="Initialize retrieval " "system with the specified feedback and learning mechanism.", prog="ListwiseLearningSystem") parser.add_argument("-w", "--init_weights", help="Initialization " "method for weights (random, zero).", required=True) parser.add_argument("--sample_weights", default="sample_unit_sphere") parser.add_argument("-c", "--comparison", required=True) parser.add_argument("-f", "--comparison_args", nargs="*") parser.add_argument("-r", "--ranker", required=True) parser.add_argument("-s", "--ranker_args", nargs="*") parser.add_argument("-t", "--ranker_tie", default="random") parser.add_argument("-d", "--delta", required=True, type=str) parser.add_argument("-a", "--alpha", required=True, type=str) parser.add_argument("--anneal", type=int, default=0) parser.add_argument("--normalize", default="False") args = vars(parser.parse_known_args(split_arg_str(arg_str))[0]) self.ranker_class = get_class(args["ranker"]) self.ranker_args = args["ranker_args"] self.ranker_tie = args["ranker_tie"] self.sample_weights = args["sample_weights"] self.init_weights = args["init_weights"] self.feature_count = feature_count self.ranker = self.ranker_class(d, self.ranker_args, self.ranker_tie, self.feature_count, sample=self.sample_weights, init=self.init_weights) if "," in args["delta"]: self.delta = array([float(x) for x in args["delta"].split(",")]) else: self.delta = float(args["delta"]) if "," in args["alpha"]: self.alpha = array([float(x) for x in args["alpha"].split(",")]) else: self.alpha = float(args["alpha"]) self.anneal = args["anneal"] self.comparison_class = get_class(args["comparison"]) if "comparison_args" in args and args["comparison_args"] != None: self.comparison_args = " ".join(args["comparison_args"]) self.comparison_args = self.comparison_args.strip("\"") else: self.comparison_args = None self.comparison = self.comparison_class(self.comparison_args) self.query_count = 0
def __init__(self, feature_count, d, arg_str): # parse arguments parser = argparse.ArgumentParser(description="Initialize retrieval " "system with the specified feedback and learning mechanism.", prog="ListwiseLearningSystem") parser.add_argument("-w", "--init_weights", help="Initialization " "method for weights (random, zero).", required=True) parser.add_argument("--sample_weights", default="sample_unit_sphere") parser.add_argument("-c", "--comparison", required=True) parser.add_argument("-f", "--comparison_args", nargs="*") parser.add_argument("-r", "--ranker", required=True) parser.add_argument("-s", "--ranker_args", nargs="*") parser.add_argument("-t", "--ranker_tie", default="random") parser.add_argument("-d", "--delta", required=True, type=str) parser.add_argument("-a", "--alpha", required=True, type=str) parser.add_argument("--anneal", type=int, default=0) parser.add_argument("--normalize", default="False") args = vars(parser.parse_known_args(split_arg_str(arg_str))[0]) self.ranker_class = get_class(args["ranker"]) self.ranker_args = args["ranker_args"] self.ranker_tie = args["ranker_tie"] self.sample_weights = args["sample_weights"] self.init_weights = args["init_weights"] self.feature_count = feature_count self.ranker = self.ranker_class(d,self.ranker_args, self.ranker_tie, self.feature_count, sample=self.sample_weights, init=self.init_weights) if "," in args["delta"]: self.delta = array([float(x) for x in args["delta"].split(",")]) else: self.delta = float(args["delta"]) if "," in args["alpha"]: self.alpha = array([float(x) for x in args["alpha"].split(",")]) else: self.alpha = float(args["alpha"]) self.anneal = args["anneal"] self.comparison_class = get_class(args["comparison"]) if "comparison_args" in args and args["comparison_args"] != None: self.comparison_args = " ".join(args["comparison_args"]) self.comparison_args = self.comparison_args.strip("\"") else: self.comparison_args = None self.comparison = self.comparison_class(self.comparison_args) self.query_count = 0
def _render_region(self, region): if region not in self.code_regions: raise RuntimeError("Region not scanned") lang = region.options.get('lang') renderer_name = self.config.renderers[lang] if not renderer_name: raise RuntimeError("No language specified for code block") renderer_cls = utils.get_class(renderer_name) if not renderer_cls: raise RuntimeError("Invalid renderer: {}".format(renderer_name or '(none)')) format = Config( utils.load_json(os.path.join('languages', lang + '.json'), True)) extra_format = self.formats.get(lang) if extra_format: format += extra_format renderer = renderer_cls(self.config, format) text = renderer.render(region) self._text = (self._text[:region.start] + text + self._text[region.start + region.length:]) self.code_regions.remove(region) new_region = Region(self, region.start, len(text)) self._update_regions(new_region.start, new_region.length - region.length)
def forward_pass(self, input_data, m=0, convert_to_class=False): """ Get output of ensemble of the last m networks where m <= n_snapshots. Args: input_data: Numpy matrix to make the predictions on m: the m most recent models from the ensemble to give outputs Default to get output from all models. convert_to_class: return class predictions from ensemble """ if m < 0 or m > len(self.networks): print('Select the m most recent models to get output from. ' 'Setting m to 0 (default to all models)\n') m = 0 prediction_collection = [] for net in self.networks[-m:]: prediction_collection += [net.forward_pass(input_data=input_data, convert_to_class=False)] prediction_collection = np.array(prediction_collection) raw_prediction = np.mean(prediction_collection, axis=0, dtype='float32') if convert_to_class: return get_class(raw_prediction) else: return raw_prediction
def __init__(self, feature_count, arg_str): self.feature_count = feature_count # parse arguments parser = argparse.ArgumentParser(description="Initialize retrieval " "system with the specified feedback and learning mechanism.", prog="PairwiseLearningSystem") parser.add_argument("-w", "--init_weights", help="Initialization " "method for weights (random, zero, fixed).", required=True) parser.add_argument("-e", "--epsilon", required=True, type=float) parser.add_argument("-f", "--eta", required=True, type=float) parser.add_argument("-l", "--lamb", type=float, default=0.0) parser.add_argument("-r", "--ranker", required=True) parser.add_argument("-s", "--ranker_args", nargs="*") parser.add_argument("-t", "--ranker_tie", default="random") args = vars(parser.parse_known_args(split_arg_str(arg_str))[0]) # initialize weights, comparison method, and learner w = self.initialize_weights(args["init_weights"], self.feature_count) self.ranker_class = get_class(args["ranker"]) if "ranker_args" in args and args["ranker_args"] != None: self.ranker_args = " ".join(args["ranker_args"]) self.ranker_args = self.ranker_args.strip("\"") else: self.ranker_args = None self.ranker_tie = args["ranker_tie"] self.ranker = self.ranker_class(self.ranker_args, w, self.ranker_tie) self.epsilon = args["epsilon"] self.eta = args["eta"] self.lamb = args["lamb"]
def cal(self, mod_rootdir=None, model_dirpaths=None, example_dirname='clean_example_data', n_samples=100): """ Implements calibration :param mod_rootdir: directory containing a bunch of model directories to be used for calibration. Either this or model_dirpaths should be set. :param model_dirpaths: list of model directories to be used for calibration. Either this or mod_rootdir should be set. :param example_dirname: name of the (clean) example data directory in each model directory :return: numpy array of calibrated probabilities, ordered like the model_dirpaths (or sorted directories in mod_rootdir) """ assert (mod_rootdir is not None) != (model_dirpaths is not None), "set either mod_rootdir or model_dirpaths" if model_dirpaths is None: print("deprecation warning: using mod_rootdir is deprecated in favor of explicitly setting model_dirpaths") model_dirpaths = utils.get_modeldirs(mod_rootdir) y = np.array([utils.get_class(os.path.join(pth, 'config.json'), classtype='binary', file=True) for pth in model_dirpaths]) y = y.reshape(-1, 1) * np.ones([1, n_samples]) y = y.reshape(-1) x = np.zeros([len(y), len(self.components)]) order = [i for i in range(len(self.components))] random.shuffle(order) for i in order: print('starting calibration for ensemble component', i) component = self.components[i] pcal = component.cal(model_dirpaths=model_dirpaths, **kwargs) x[:, i] = pcal pcal = self.ens_cal(x, y) return pcal
def build_context(self, json): logger.info("Constructing context.") context = {} components = json["Workflow"] for module, slots in sorted(components.items()): for slot_name, slot_details in slots.items(): if not slot_name in context: slot = {slot_name: {"content": None}} if slot_details: uri = slot_details["uri"] slot[slot_name]["uri"] = uri context.update(slot) else: if slot_details: uri = slot_details["uri"] slot[slot_name]["uri"] = uri context.update(slot) # add global configuration here if "global_config" in json.keys(): if isinstance(json["global_config"], dict): context["global_config"] = json["global_config"] elif isinstance(json["global_config"], str): # trying to build a dict from configuration code... import utils global_config = utils.get_class(json["global_config"]) context["global_config"] = global_config return context
def __init__(self, feature_count, arg_str): self.feature_count = feature_count # parse arguments parser = argparse.ArgumentParser( description="Initialize retrieval " "system with the specified feedback and learning mechanism.", prog="PairwiseLearningSystem") parser.add_argument("-w", "--init_weights", help="Initialization " "method for weights (random, zero, fixed).", required=True) parser.add_argument("-e", "--epsilon", required=True, type=float) parser.add_argument("-f", "--eta", required=True, type=float) parser.add_argument("-l", "--lamb", type=float, default=0.0) parser.add_argument("-r", "--ranker", required=True) parser.add_argument("-s", "--ranker_args", nargs="*") parser.add_argument("-t", "--ranker_tie", default="random") args = vars(parser.parse_known_args(split_arg_str(arg_str))[0]) # initialize weights, comparison method, and learner w = self.initialize_weights(args["init_weights"], self.feature_count) self.ranker_class = get_class(args["ranker"]) if "ranker_args" in args and args["ranker_args"] != None: self.ranker_args = " ".join(args["ranker_args"]) self.ranker_args = self.ranker_args.strip("\"") else: self.ranker_args = None self.ranker_tie = args["ranker_tie"] self.ranker = self.ranker_class(self.ranker_args, w, self.ranker_tie) self.epsilon = args["epsilon"] self.eta = args["eta"] self.lamb = args["lamb"]
def get(self, id, props = None): if props: props.append('_clazz') props.append('id') retrieved_dict = self._get_column_family().get(id, props) clazz_name = retrieved_dict['_clazz'] clazz = get_class(clazz_name) return object_from_key_jsonvalue_dict(clazz, retrieved_dict)
def get_exercise(request, kind): """ Get new exercise via REST framework """ ex = get_class(kind) if request.method == 'GET': exercise = ex.random() serializer_class = get_class(kind, 'Serializer') serializer = serializer_class(exercise) return JsonResponse(serializer.data) elif request.method == 'POST': ex_id = request.COOKIES.get('id') body = json.loads(request.body.decode("utf-8").replace("'", '"')) database = get_object_or_404(ex, id=ex_id) db_czech = database.czech[2:-2].split("', '") status = body['answer'] in db_czech return JsonResponse({'status': status, 'correct_answer': db_czech})
def __init__(self, training_queries, test_queries, feature_count, log_fh, args): """Initialize an experiment using the provided arguments.""" self.log_fh = log_fh self.training_queries = training_queries self.test_queries = test_queries self.feature_count = feature_count # construct system according to provided arguments self.num_queries = args["num_queries"] self.query_sampling_method = args["query_sampling_method"] self.um_class = get_class(args["user_model"]) self.um_args = args["user_model_args"] self.um = self.um_class(self.um_args) self.system_class = get_class(args["system"]) self.system_args = args["system_args"] self.system = self.system_class(self.feature_count, self.system_args) #if isinstance(self.system, AbstractOracleSystem): # self.system.set_test_queries(self.test_queries) self.evaluations = {} for evaluation in args["evaluation"]: self.evaluation_class = get_class(evaluation) self.evaluations[evaluation] = self.evaluation_class()
def exercise(request, kind): try: form = get_class(kind, 'Form') except AttributeError: raise Http404("No such exercise type found!") return render( request, 'quiz/home.html', context={ 'form': form, 'kind': kind }, )
def cal(out_fn, base_folder='data/round3models', example_folder_name='clean_example_data'): """ :param refn: :param out_fn: :param base_folder: :return: """ from sklearn.isotonic import IsotonicRegression from sklearn.metrics import log_loss, roc_auc_score import os calpath = 'calibration/data/' + out_fn + '_caldata.p' if os.path.exists(calpath): with open(calpath, 'rb') as f: ldirs, pcal = pickle.load(f) return ldirs, pcal acc_drops = [] y = [] dirs = os.listdir(path=base_folder) for dir in dirs: example_path = os.path.join(base_folder, dir, example_folder_name) model_path = os.path.join(base_folder, dir, 'model.pt') acc_drop = get_accdrop(model_path, example_path) truth_fn = os.path.join(base_folder, dir, 'config.json') cls = utils.get_class(truth_fn, classtype='binary', file=True) acc_drops.append(acc_drop) y.append(cls) ir_model = IsotonicRegression(out_of_bounds='clip') pcal = ir_model.fit_transform(acc_drops, y) kld = log_loss(y, pcal) # print(kld) roc1 = roc_auc_score(y, np.array(pcal)) print(out_fn, 'AUC:', roc1, 'KLD:', kld) # dump(ir_model, 'data/classifiers/blur' + '_ir.joblib') dump(ir_model, 'calibration/fitted/' + out_fn) pcal = pcal[np.argsort(dirs)] dirs.sort() with open(calpath, 'wb') as f: pickle.dump([dirs, pcal], f) return dirs, pcal
def register_actions(self, action_config_file): """ 注册动作。通过配置文件来配置命令名到命令类的映射 :param action_config_file: :return: """ fp = open(action_config_file) action_data = json.load(fp) fp.close() if "actions" in action_data: for action_config in action_data["actions"]: cls = utils.get_class(action_config["class_name"]) if cls: self.action_classes[action_config["name"]] = cls
def create_object(full_class_name = None): """Instantiate an object by just by a string representation of its class. The object must not have required arguments to the __init__ function. Makes use of the get_class function of the utils module. If full_class_name is not specified (None) then a base class XMLObject will be provided""" if full_class_name != None: class_obj = get_class(full_class_name) try: return class_obj() except TypeError, e: raise Exception("Failed to instantiate %s: %s" % (str(class_obj), str(e)))
def __init__(self, state): x_slice_s = state['x_slice'] y_slice_s = state['y_slice'] data_transforms = None if "data_transforms" in state: data_transforms = {} for key, val in state["data_transforms"].items(): if len(val.keys()) != 1: raise ValueError() data_transforms[key] = get_class( "data.data_utils.%s" % list(val.keys())[0][2:-2])(**list(val.values())[0]) super().__init__(x_slice=slice(x_slice_s['start'], x_slice_s['stop'], x_slice_s['step']), y_slice=slice(y_slice_s['start'], y_slice_s['stop'], y_slice_s['step']), name=state['name'], normalize=state["normalize"], uniqueness_threshold=state['uniqueness_threshold'], data_transforms=data_transforms) class_name = state['name'] if state['name'].find( '_') < 0 else state['name'][:state['name'].find('_')] self.__class__ = get_class("data.%s" % class_name)
def forward_pass(self, input_data, convert_to_class=False): """ Allow the implementer to quickly get outputs from the network. Args: input_data: Numpy matrix to make the predictions on get_predictions: If the output should return the class with highest probability Returns: Numpy matrix with the output probabilities with each class unless otherwise specified. """ if convert_to_class: return get_class(self.output(input_data)) else: return self.output(input_data)
def cal(self, mod_rootdir=None, model_dirpaths=None, example_dirname='clean_example_data', n_samples=100): """ Implements calibration :param mod_rootdir: directory containing a bunch of model directories to be used for calibration. Either this or model_dirpaths should be set. :param model_dirpaths: list of model directories to be used for calibration. Either this or mod_rootdir should be set. :param example_dirname: name of the (clean) example data directory in each model directory :param n_samples: number of noisy samples of each data point :return: numpy array of calibrated probabilities, ordered like the model_dirpaths (or sorted directories in mod_rootdir) """ assert (mod_rootdir is not None) != (model_dirpaths is not None), "set either mod_rootdir or model_dirpaths" if model_dirpaths is None: print("deprecation warning: using mod_rootdir is deprecated in favor of explicitly setting model_dirpaths") model_dirpaths = utils.get_modeldirs(mod_rootdir) # get the data for calibration mags = self.get_cal_data(model_dirpaths, example_dirname, n_samples=n_samples) mags = mags.reshape(-1) y = np.array([utils.get_class(os.path.join(pth, 'config.json'), classtype='binary', file=True) for pth in model_dirpaths]) if n_samples is not None: y = y.reshape(-1, 1) * np.ones([1, n_samples]) y = y.reshape(-1) # check for saved model irpath = self.get_irpath() if os.path.exists(irpath) and not self.overwrite: ir_model = joblib.load(irpath) else: # run the calibration & save model ir_model = IsotonicRegression(out_of_bounds='clip') clippedmags = np.clip(mags, np.percentile(mags, 10), np.percentile(mags, 90)) # clippedmags = np.clip(mags, np.percentile(mags, 25), np.percentile(mags, 75)) ir_model.fit(clippedmags, y) joblib.dump(ir_model, irpath) # get & return the calibrated probabilities pcal = ir_model.transform(mags) return pcal
def recognize(self, image): """ Recognize a face in a list of known faces Argument: - image: np.ndarray and in RGB order Return: None if image has no faces or unknown faces. Otherwise, return label """ faces = self.detector.detect_faces(image) if not faces: return None face = faces[0] emb = self.extractor.get_embeddings(face) distances, indices = self.index.search(emb, self.knn) indices = indices[distances < self.max_distance] if indices.shape[0] == 0: return None classes = self.df.loc[indices, 'class'] return get_class(classes)
def ask_obj(sentence): ''' retrieve sentences in clips matching the given sentence. Can use variables. ''' clps, templs = get_instances(sentence) sens = [] if clps: if isinstance(sentence, Thing): for ins in clps: sens.append(Namable.from_clips(ins)) elif isinstance(sentence, Fact): for ins in clps: i = clips.FindInstance(ins) if issubclass(utils.get_class(str(i.Class.Name)), Fact): sens.append(Fact.from_clips(ins)) return sens
def __init__(self, ranker_arg_str, ties, feature_count, init=None, sample=None): self.feature_count = feature_count ranking_model_str = "ranker.model.Linear" for arg in ranker_arg_str: if arg.startswith("ranker.model"): ranking_model_str = arg else: self.ranker_type = float(arg) self.ranking_model = get_class(ranking_model_str)(feature_count) self.sample = getattr(__import__("utils"), sample) self.ties = ties self.w = self.ranking_model.initialize_weights(init)
def _render_region(self, region): if region not in self.code_regions: raise RuntimeError("Region not scanned") lang = region.options.get("lang") renderer_name = self.config.renderers[lang] if not renderer_name: raise RuntimeError("No language specified for code block") renderer_cls = utils.get_class(renderer_name) if not renderer_cls: raise RuntimeError("Invalid renderer: {}".format(renderer_name or "(none)")) format = Config(utils.load_json(os.path.join("languages", lang + ".json"), True)) extra_format = self.formats.get(lang) if extra_format: format += extra_format renderer = renderer_cls(self.config, format) text = renderer.render(region) self._text = self._text[: region.start] + text + self._text[region.start + region.length :] self.code_regions.remove(region) new_region = Region(self, region.start, len(text)) self._update_regions(new_region.start, new_region.length - region.length)
def register_questions_types(*tuples): """ Take a list of tuples and return a list of ``dicts``. Each ``dict`` contains the following keys: * ``pretty_name`` - Human-readable name of question type * ``slug`` - Url-safe name of question type * ``class`` - class definition """ types = list(itertools.chain(*tuples)) question_types = [] for t in types: try: class_ = get_class(t, QuestionTypeRegisterError) except QuestionTypeRegisterError: print 'Failed to register %s' % t continue question_types.append({ 'pretty_name': class_.pretty_name(), 'slug': class_._meta.module_name, 'class': class_ }) return question_types
def uploaded_file(filename): img_name = 'static/img/' + filename + '.png' filename = os.path.join('static/pdf', filename) x, exit_status = from_pdf_to_vector(filename, tfidf) if exit_status != 0: flash('Sorry, it seems that something went wrong. Please try again.') return redirect('/') else: #Get the classes that best suit the article and the corresponding probabilities prob, clss = get_class(x, logr) #Find the first 10 similar articles to the one uploaded in the database similar_pos = find_similar(database['X'], x, 9)[0] # Pie chart, where the slices will be ordered and plotted counter-clockwise: fig, ax = plt.subplots() ax.pie(prob, labels=clss, autopct='%1.1f%%', shadow=True, startangle=160, wedgeprops={ "edgecolor": "0", 'linewidth': 1 }) ax.axis('equal' ) # Equal aspect ratio ensures that pie is drawn as a circle. img_name = 'static/img/plot' + str(randint(0, 500)) + '.png' plt.savefig(img_name, bbox_inches='tight', pad_inches=0) return render_template("classification.html", filename=os.path.join('/', filename), img=os.path.join('/', img_name), prob=prob, clss=clss, similar=database['links'][similar_pos])
def __init__(self, queries, feature_count, log_fh, args): """Initialize an experiment using the provided arguments.""" self.log_fh = log_fh self.queries = queries self.feature_count = feature_count self.ties = "first" # construct experiment according to provided arguments self.result_length = args["result_length"] self.num_queries = args["num_queries"] self.query_sampling_method = args["query_sampling_method"] self.um_class = get_class(args["user_model"]) self.um_args = args["user_model_args"] self.um = self.um_class(self.um_args) # set up methods to compare parser = argparse.ArgumentParser(description="parse arguments of an " "evaluation method.", prog="evaluation method configuration") parser.add_argument("-c", "--class_name") parser.add_argument("-r", "--ranker") parser.add_argument("-a", "--ranker_args") parser.add_argument("-i", "--interleave_method") self.rankers = {} self.live_methods = {} self.hist_methods = {} self.ndcg = evaluation.NdcgEval() # init live methods if "live_evaluation_methods" in args: for method_id, method in enumerate( args["live_evaluation_methods"]): self.live_methods[method] = {} method_args_str = \ args["live_evaluation_methods_args"][method_id] method_args = vars(parser.parse_known_args( method_args_str.split())[0]) class_name = method_args["class_name"] self.live_methods[method]["instance"] = \ get_class(class_name)(method_args_str) ranker = method_args["ranker"] ranker_args = method_args["ranker_args"] self.live_methods[method]["ranker"] = ranker self.live_methods[method]["ranker_args"] = ranker_args if not ranker in self.rankers: self.rankers[ranker] = {} if not ranker_args in self.rankers[ranker]: self.rankers[ranker][ranker_args] = {} # init hist methods if "hist_evaluation_methods" in args: for method_id, method in enumerate( args["hist_evaluation_methods"]): self.hist_methods[method] = {} method_args_str = \ args["hist_evaluation_methods_args"][method_id] method_args = vars(parser.parse_known_args( method_args_str.split())[0]) class_name = method_args["class_name"] self.hist_methods[method]["instance"] = \ get_class(class_name)(method_args_str) ranker = method_args["ranker"] ranker_args = method_args["ranker_args"] self.hist_methods[method]["ranker"] = method_args["ranker"] self.hist_methods[method]["ranker_args"] = \ method_args["ranker_args"] if not ranker in self.rankers: self.rankers[ranker] = {} if not ranker_args in self.rankers[ranker]: self.rankers[ranker][ranker_args] = {} self.hist_methods[method]["interleave_method"] = \ get_class(method_args["interleave_method"])() # sample source and target ranker pair, create deterministic and # probabilistic ranker pairs self.source_pair = [0, 0] self.source_pair[0] = self._sample_ranker_without_replacement( self.feature_count, []) self.source_pair[1] = self._sample_ranker_without_replacement( self.feature_count, [self.source_pair[0]]) self.target_pair = [0, 0] self.target_pair[0] = self._sample_ranker_without_replacement( self.feature_count, self.source_pair) self.target_pair[1] = self._sample_ranker_without_replacement( self.feature_count, [self.target_pair[0], self.source_pair[0], self.source_pair[1]]) # init rankers needed by live and/or hist methods for ranker in self.rankers: for ranker_args in self.rankers[ranker]: self.rankers[ranker][ranker_args]["source"] = \ self._get_ranker_pair(ranker, ranker_args, self.source_pair, self.feature_count, self.ties) self.rankers[ranker][ranker_args]["target"] = \ self._get_ranker_pair(ranker, ranker_args, self.target_pair, self.feature_count, self.ties)
def train(source, target): scaled_logits, src_acc, trgt_acc, grad = build_graph(source, target) init = tf.global_variables_initializer() summaries = tf.merge_all_summaries() if not path.isdir(savedir): print('No models found. Start training.') covnet_model.train() create_directories() if raw_input('Do you want to use your own weights? [y\N] ') == 'y': fname = raw_input('Enter saved model name > ') weights = path.join(savedir, fname) else: weights = path.join(savedir, 'default') with tf.Session() as sess: sess.run(init) covnet_model.saver.restore(sess, weights) print('Weights restored.') mnist = input_data.read_data_sets('MNIST_data', one_hot=True) writer = tf.train.SummaryWriter(logdir, graph=sess.graph) src_images, src_labels = get_class(source, mnist.test.images, mnist.test.labels) # pick a random image that is correctly classified by CNN k = 0 while True: original = src_images[np.newaxis, k] label = src_labels[np.newaxis, k] image = np.copy(original) l = scaled_logits.eval( feed_dict={ covnet_model.x: original, covnet_model.y: label, covnet_model.keep_prob: 1. }) if np.argmax(l) == source: # correctly classified break print('Generating Adversarial Image...') print('Open tensorboard to visualize.') # train loop i = 0 target_acc = 0. start_acc = [] while target_acc < .99: # fool to 99% acc source_acc, target_acc, dimg, summ = sess.run( [src_acc, trgt_acc, grad, summaries], feed_dict={ covnet_model.x: image, covnet_model.y: label, covnet_model.keep_prob: 1. }) if i == 0: start_acc.extend([source_acc, target_acc]) writer.add_summary(summ, global_step=i) image = image + learning_rate * dimg.reshape(1, 28 * 28) diff = np.abs(original - image) print("%d source_acc %.5f, target_acc %.5f, sum: %.5f" % (i, source_acc, target_acc, np.sum(diff))) i += 1 print('Adversarial example generated.') # Show the example fig = plt.figure(figsize=(30, 10)) plt.subplot(131) plt.imshow(original.reshape(28, 28), cmap='gray') plt.axis('off') plt.title('Original. source: (%f), target: (%f)' % tuple(start_acc)) plt.subplot(132) plt.imshow(diff.reshape(28, 28), cmap='gray') plt.title('Delta (%f)' % np.sum(diff)) plt.axis('off') plt.subplot(133) plt.imshow(image.reshape(28, 28), cmap='gray') plt.axis('off') plt.title('Adversarial source: (%f), target: (%f)' % (source_acc, target_acc)) plt.show() # ask to save while True: prompt = raw_input('Do you want to save this example? [y\N] ') if prompt == 'y': fname = raw_input( 'Enter name of npy file without extension > ') np.savez(path.join(exampledir, fname), source=original, delta=diff, target=image, source_acc=source_acc, target_acc=target_acc) break elif prompt == 'N': break covnet_model.train_sess.close()
def __init__(self, args_str=None): # parse arguments parser = argparse.ArgumentParser(description=""" Construct and run a learning experiment. Provide either the name of a config file from which the experiment configuration is read, or provide all arguments listed under Command line. If both are provided the config file is ignored.""", prog=self.__class__.__name__) # option 1: use a config file file_group = parser.add_argument_group("FILE") file_group.add_argument("-f", "--file", help="Filename of the config " "file from which the experiment details" " should be read.") # option 2: specify all experiment details as arguments detail_group = parser.add_argument_group("DETAILS") detail_group.add_argument("-i", "--training_queries", help="File from which to load the training queries (svmlight " "format).") detail_group.add_argument("-j", "--test_queries", help="File from which to load the test queries (svmlight format).") detail_group.add_argument("-c", "--feature_count", type=int, help="The number of features included in the data.") detail_group.add_argument("-r", "--num_runs", type=int, help="Number of runs (how many times to repeat the experiment).") detail_group.add_argument("-q", "--num_queries", type=int, help="Number of queries in each run.") detail_group.add_argument("-u", "--user_model", help="Class implementing a user model.") detail_group.add_argument("-v", "--user_model_args", help="Arguments for initializing the user model.") # the retrieval system maintains ranking functions, accepts queries and # generates result lists, and in return receives user clicks to learn # from detail_group.add_argument("-s", "--system", help="Which system to use (e.g., pairwise, listwise).") detail_group.add_argument("-a", "--system_args", help="Arguments for " "the system (comparison method, learning " "algorithm and parameters...).") detail_group.add_argument("-o", "--output_dir", help="(Empty) directory for storing output generated by this" " experiment. Subdirectory for different folds will be generated" "automatically.") detail_group.add_argument("--output_dir_overwrite", default="False") detail_group.add_argument("-p", "--output_prefix", help="Prefix to be added to output filenames, e.g., the name of " "the data set, fold, etc. Output files will be stored as " "OUTPUT_DIR/PREFIX-RUN_ID.txt.gz") detail_group.add_argument("-e", "--experimenter", help="Experimenter type.") # run the parser if args_str: args = parser.parse_known_args(args_str.split())[0] else: args = parser.parse_known_args()[0] # determine whether to use config file or detailed args self.experiment_args = None if args.file: config_file = open(args.file) self.experiment_args = yaml.load(config_file) config_file.close() # overwrite with command-line options if given for arg, value in vars(args).items(): if value: self.experiment_args[arg] = value else: self.experiment_args = vars(args) # workaround - check if we have all the arguments needed if not ("training_queries" in self.experiment_args and "test_queries" in self.experiment_args and "feature_count" in self.experiment_args and "num_runs" in self.experiment_args and "num_queries" in self.experiment_args and "user_model" in self.experiment_args and "user_model_args" in self.experiment_args and "system" in self.experiment_args and "system_args" in self.experiment_args and "output_dir" in self.experiment_args): parser.print_help() sys.exit("Missing required arguments, please check the program" " arguments or configuration file. %s" % self.experiment_args) # set default values for optional arguments if not "query_sampling_method" in self.experiment_args: self.experiment_args["query_sampling_method"] = "random" if not "output_dir_overwrite" in self.experiment_args: self.experiment_args["output_dir_overwrite"] = False if not "experimenter" in self.experiment_args: self.experiment_args["experimenter"] = "experiment.LearningExperiment" if not "evaluation" in self.experiment_args: self.experiment_args["evaluation"] = "evaluation.NdcgEval" if not "processes" in self.experiment_args: self.experiment_args["processes"] = 0 # locate or create directory for the current fold if not os.path.exists(self.experiment_args["output_dir"]): os.makedirs(self.experiment_args["output_dir"]) elif not(self.experiment_args["output_dir_overwrite"]) and \ os.listdir(self.experiment_args["output_dir"]): # make sure the output directory is empty raise Exception("Output dir %s is not an empty directory. " "Please use a different directory, or move contents out " "of the way." % self.experiment_args["output_dir"]) logging.basicConfig(format='%(asctime)s %(module)s: %(message)s', level=logging.INFO) logging.info("Arguments: %s" % self.experiment_args) for k, v in sorted(self.experiment_args.iteritems()): logging.info("\t%s: %s" % (k, v)) config_bk = os.path.join(self.experiment_args["output_dir"], "config_bk.yml") logging.info("Backing up configuration to: %s" % config_bk) config_bk_file = open(config_bk, "w") yaml.dump(self.experiment_args, config_bk_file, default_flow_style=False) config_bk_file.close() # load training and test queries training_file = self.experiment_args["training_queries"] test_file = self.experiment_args["test_queries"] self.feature_count = self.experiment_args["feature_count"] logging.info("Loading training data: %s " % training_file) self.training_queries = load_queries(training_file, self.feature_count) logging.info("... found %d queries." % self.training_queries.get_size()) logging.info("Loading test data: %s " % test_file) self.test_queries = load_queries(test_file, self.feature_count) logging.info("... found %d queries." % self.test_queries.get_size()) # initialize and run the experiment num_run times self.num_runs = self.experiment_args["num_runs"] self.output_dir = self.experiment_args["output_dir"] self.output_prefix = self.experiment_args["output_prefix"] self.experimenter = get_class(self.experiment_args["experimenter"])
def _get_ranker_pair(self, class_name, ranker_args, feature_pair, feature_count, ties): return (get_class(class_name)(ranker_args, self._get_weight_vector( feature_pair[0], feature_count), ties=ties), get_class(class_name)(ranker_args, self._get_weight_vector( feature_pair[1], feature_count), ties=ties))
def from_state(state): kls = state["kls"] model_kls = state["model_kls"] return get_class(kls)(get_class(model_kls)(**state['model_kwargs']))
# # Lerot is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Lerot is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with Lerot. If not, see <http://www.gnu.org/licenses/>. from include import * import argparse from utils import get_class if __name__ == "__main__": parser = argparse.ArgumentParser(description="""Analysis""", usage="%(prog)s ") parser.add_argument('--basedir', required=True) parser.add_argument('--analysis', nargs="+", required=True) args = parser.parse_known_args()[0] for analyse in args.analysis: aclass = get_class(analyse) a = aclass(args.basedir) a.update() print a.finish()
parser.add_argument("-t", "--test_file", default="test.txt.gz", help="In each fold of the test directory, the name of the test file.") parser.add_argument("-f", "--feature_count", required=True, type=int, help="Number of features (has to match test queries and weight files).") parser.add_argument("-e", "--experiment_dirs", nargs="+", required=True, help="List of directories that contain experiments (one per experiment). " "Results per experiment will be averaged over all folds and runs.") parser.add_argument("-s", "--file_ext", default="txt.gz", help="File extension of the files in which run results are stored.") args = parser.parse_args() cutoffs = [1, 3, 10, -1] metrics = [] scores = {} for metric in "evaluation.NdcgEval", "evaluation.LetorNdcgEval": eval_class = get_class(metric) eval_metric = eval_class() metrics.append(eval_metric) scores[eval_metric.__class__.__name__] = {} for cutoff in cutoffs: scores[eval_metric.__class__.__name__][cutoff] = [] # load all queries test_queries = {} for fold in range(1, 6): test_file = "".join((args.test_dir, str(fold))) test_file = os.path.join(test_file, args.test_file) qs = load_queries(test_file, args.feature_count) test_queries[fold] = qs # process all experiments for all metrics
def get_class(self, clazz_path): import utils return utils.get_class(clazz_path)
parser.add_argument('--top_k', action='store', type=int, default=3, help='how many most probable classes to print out') parser.add_argument('--category_names', action='store', help='file which maps classes to names') parser.add_argument('--gpu', action='store_true', help='use gpu to infer classes') args=parser.parse_args() if args.gpu: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") else: device = "cpu" model = Model.load_model(args.checkpoint, args.gpu) img = utils.process_image(args.input).to(device) probs, classes = Model.predict(img, model, args.top_k) if(args.category_names != None): classes = utils.get_class(classes, args.checkpoint, args.category_names) else: classes=utils.get_class(classes, args.checkpoint, None) utils.resultdisplay(img.to('cpu'), probs, classes, args.top_k) utils.show_classes(probs, classes, args.top_k)
def __init__(self, log_fh, args): """Initialize an experiment using the provided arguments.""" self.log_fh = log_fh self.ties = args["ties"] if "ties" in args else "first" # additional configuration: number of relevant documents # (number or "random") self.length = args["result_length"] self.num_relevant = args["num_relevant"] self.num_queries = args["num_queries"] self.um_class = get_class(args["user_model"]) self.um_args = args["user_model_args"] self.um = self.um_class(self.um_args) self.pareto_um_class = get_class("environment.FederatedClickModel") self.pareto_um = self.pareto_um_class(None) # initialize interleaved comparison methods according to configuration parser = argparse.ArgumentParser(description="parse arguments of an " "evaluation method.", prog="evaluation method configuration") parser.add_argument("-c", "--class_name") parser.add_argument("-r", "--ranker", help="can be 'det' or 'prob'") parser.add_argument("-a", "--ranker_args") parser.add_argument("-i", "--interleave_method") self.rankers = {} self.methods = {} # init live methods if "evaluation_methods" in args: for method_id, method in enumerate( args["evaluation_methods"]): self.methods[method] = {} method_args_str = \ args["evaluation_methods_args"][method_id] method_args = vars(parser.parse_known_args( method_args_str.split())[0]) class_name = method_args["class_name"] self.methods[method]["instance"] = \ get_class(class_name)(method_args_str) ranker = method_args["ranker"] ranker_args = method_args["ranker_args"] self.methods[method]["ranker"] = ranker self.methods[method]["ranker_args"] = ranker_args if not ranker in self.rankers: self.rankers[ranker] = {} if not ranker_args in self.rankers[ranker]: self.rankers[ranker][ranker_args] = {} # init rankers needed by the comparison methods. rankers can be # deterministic (det) or probabilistic (prob), and can have different # arguments for ranker in self.rankers: for ranker_args in self.rankers[ranker]: if ranker == "det": self.rankers[ranker][ranker_args] = \ (SyntheticDeterministicRankingFunction(ranker_args, # A self.ties), SyntheticDeterministicRankingFunction( # B ranker_args, self.ties)) elif ranker == "prob": self.rankers[ranker][ranker_args] = \ (SyntheticProbabilisticRankingFunction(ranker_args, # A self.ties), SyntheticProbabilisticRankingFunction( # B ranker_args, self.ties)) else: raise ValueError("Unknown ranker: " + ranker) # generate synthetic better and worse rankers (self.docids, self.labels) = self._generate_synthetic_documents( self.length, self.num_relevant) (self.better, self.worse, self.labels) = self._generate_synthetic_rankings_randomly( self.docids, self.labels, self.length, posmethod=args["vertical_posmethod"], docmethod=args["vertical_docmethod"], vertrel=args["vertical_vertrel"], blocksize=args["vertical_blocksize"], independentplacement=args["vertical_independentplacement"])
def __init__(self, args_str=None): # parse arguments parser = argparse.ArgumentParser(description=""" Construct and run a learning experiment. Provide either the name of a config file from which the experiment configuration is read, or provide all arguments listed under Command line. If both are provided the config file is ignored.""", prog=self.__class__.__name__) # option 1: use a config file file_group = parser.add_argument_group("FILE") file_group.add_argument("-f", "--file", help="Filename of the config " "file from which the experiment details" " should be read.") # option 2: specify all experiment details as arguments detail_group = parser.add_argument_group("DETAILS") detail_group.add_argument( "-i", "--training_queries", help="File from which to load the training queries (svmlight " "format).") detail_group.add_argument( "-j", "--test_queries", help="File from which to load the test queries (svmlight format).") detail_group.add_argument( "-c", "--feature_count", type=int, help="The number of features included in the data.") detail_group.add_argument( "-r", "--num_runs", type=int, help="Number of runs (how many times to repeat the experiment).") detail_group.add_argument("-q", "--num_queries", type=int, help="Number of queries in each run.") detail_group.add_argument("-u", "--user_model", help="Class implementing a user model.") detail_group.add_argument( "-v", "--user_model_args", help="Arguments for initializing the user model.") # the retrieval system maintains ranking functions, accepts queries and # generates result lists, and in return receives user clicks to learn # from detail_group.add_argument( "-s", "--system", help="Which system to use (e.g., pairwise, listwise).") detail_group.add_argument("-a", "--system_args", help="Arguments for " "the system (comparison method, learning " "algorithm and parameters...).") detail_group.add_argument( "-o", "--output_dir", help="(Empty) directory for storing output generated by this" " experiment. Subdirectory for different folds will be generated" "automatically.") detail_group.add_argument("--output_dir_overwrite", default="False") detail_group.add_argument( "-p", "--output_prefix", help="Prefix to be added to output filenames, e.g., the name of " "the data set, fold, etc. Output files will be stored as " "OUTPUT_DIR/PREFIX-RUN_ID.txt.gz") detail_group.add_argument("-e", "--experimenter", help="Experimenter type.") # run the parser if args_str: args = parser.parse_known_args(args_str.split())[0] else: args = parser.parse_known_args()[0] # determine whether to use config file or detailed args self.experiment_args = None if args.file: config_file = open(args.file) self.experiment_args = yaml.load(config_file) config_file.close() # overwrite with command-line options if given for arg, value in vars(args).items(): if value: self.experiment_args[arg] = value else: self.experiment_args = vars(args) # workaround - check if we have all the arguments needed if not ("training_queries" in self.experiment_args and "test_queries" in self.experiment_args and "feature_count" in self.experiment_args and "num_runs" in self.experiment_args and "num_queries" in self.experiment_args and "user_model" in self.experiment_args and "user_model_args" in self.experiment_args and "system" in self.experiment_args and "system_args" in self.experiment_args and "output_dir" in self.experiment_args): parser.print_help() sys.exit("Missing required arguments, please check the program" " arguments or configuration file. %s" % self.experiment_args) # set default values for optional arguments if not "query_sampling_method" in self.experiment_args: self.experiment_args["query_sampling_method"] = "random" if not "output_dir_overwrite" in self.experiment_args: self.experiment_args["output_dir_overwrite"] = False if not "experimenter" in self.experiment_args: self.experiment_args[ "experimenter"] = "experiment.LearningExperiment" if not "evaluation" in self.experiment_args: self.experiment_args["evaluation"] = "evaluation.NdcgEval" if not "processes" in self.experiment_args: self.experiment_args["processes"] = 0 # locate or create directory for the current fold if not os.path.exists(self.experiment_args["output_dir"]): os.makedirs(self.experiment_args["output_dir"]) elif not(self.experiment_args["output_dir_overwrite"]) and \ os.listdir(self.experiment_args["output_dir"]): # make sure the output directory is empty raise Exception( "Output dir %s is not an empty directory. " "Please use a different directory, or move contents out " "of the way." % self.experiment_args["output_dir"]) logging.basicConfig(format='%(asctime)s %(module)s: %(message)s', level=logging.INFO) logging.info("Arguments: %s" % self.experiment_args) for k, v in sorted(self.experiment_args.iteritems()): logging.info("\t%s: %s" % (k, v)) config_bk = os.path.join(self.experiment_args["output_dir"], "config_bk.yml") logging.info("Backing up configuration to: %s" % config_bk) config_bk_file = open(config_bk, "w") yaml.dump(self.experiment_args, config_bk_file, default_flow_style=False) config_bk_file.close() # load training and test queries training_file = self.experiment_args["training_queries"] test_file = self.experiment_args["test_queries"] self.feature_count = self.experiment_args["feature_count"] logging.info("Loading training data: %s " % training_file) self.training_queries = load_queries(training_file, self.feature_count) logging.info("... found %d queries." % self.training_queries.get_size()) logging.info("Loading test data: %s " % test_file) self.test_queries = load_queries(test_file, self.feature_count) logging.info("... found %d queries." % self.test_queries.get_size()) # initialize and run the experiment num_run times self.num_runs = self.experiment_args["num_runs"] self.output_dir = self.experiment_args["output_dir"] self.output_prefix = self.experiment_args["output_prefix"] self.experimenter = get_class(self.experiment_args["experimenter"])
nargs="+", required=True, help="List of directories that contain experiments (one per experiment). " "Results per experiment will be averaged over all folds and runs.") parser.add_argument( "-s", "--file_ext", default="txt.gz", help="File extension of the files in which run results are stored.") args = parser.parse_args() cutoffs = [1, 3, 10, -1] metrics = [] scores = {} for metric in "evaluation.NdcgEval", "evaluation.LetorNdcgEval": eval_class = get_class(metric) eval_metric = eval_class() metrics.append(eval_metric) scores[eval_metric.__class__.__name__] = {} for cutoff in cutoffs: scores[eval_metric.__class__.__name__][cutoff] = [] # load all queries test_queries = {} for fold in range(1, 6): test_file = "".join((args.test_dir, str(fold))) test_file = os.path.join(test_file, args.test_file) qs = load_queries(test_file, args.feature_count) test_queries[fold] = qs # process all experiments for all metrics
def __init__(self): # parse arguments parser = argparse.ArgumentParser(description="""Meta experiment""") file_group = parser.add_argument_group("FILE") file_group.add_argument( "-f", "--file", help="Filename of the config " "file from which the experiment details" " should be read." ) # option 2: specify all experiment details as arguments detail_group = parser.add_argument_group("DETAILS") detail_group.add_argument("-p", "--platform", help="Specify " "'local' or 'celery'") detail_group.add_argument( "--data", help="Data in the following" "format: trainfile,testfile,d,r such that " "a data file can be found in " "datadir/trainfile/Fold1/train.txt", type=str, nargs="+", ) detail_group.add_argument("--um", nargs="+") detail_group.add_argument("--uma", help="", type=str, nargs="+") detail_group.add_argument("--analysis", nargs="*") detail_group.add_argument("--data_dir") detail_group.add_argument("--output_base") detail_group.add_argument("--experiment_name") detail_group.add_argument("-r", "--rerun", action="store_true", help="Rerun last experiment.", default=False) detail_group.add_argument("--queue_name", type=str) args = parser.parse_known_args()[0] logging.basicConfig(format="%(asctime)s %(module)s: %(message)s", level=logging.INFO) # determine whether to use config file or detailed args self.experiment_args = None if args.file: config_file = open(args.file) self.experiment_args = yaml.load(config_file, Loader=Loader) config_file.close() try: self.meta_args = vars(parser.parse_known_args(self.experiment_args["meta"].split())[0]) except: parser.error("Please make sure there is a 'meta' section " "present in the config file") # overwrite with command-line options if given for arg, value in vars(args).items(): if value: self.meta_args[arg] = value else: self.meta_args = vars(args) for k in self.meta_args.keys() + ["meta"]: if k in self.experiment_args: del self.experiment_args[k] if self.meta_args["platform"] == "local": self.run = self.run_local elif self.meta_args["platform"] == "celery": self.experiment_args["processes"] = 0 self.run = self.run_celery elif self.meta_args["platform"] == "conf": self.run = self.run_conf else: parser.error("Please specify a valid platform.") usermodels = {} for umstr in self.meta_args["uma"]: parts = umstr.split(",") um, car = parts[:2] car = int(car) if len(parts) != car * 2 + 2: parser.error("Error in uma") p_click = ", ".join(parts[2 : 2 + car]) p_stop = ", ".join(parts[2 + car :]) if not um in usermodels: usermodels[um] = {} usermodels[um][car] = "--p_click %s --p_stop %s" % (p_click, p_stop) basedir = os.path.join(os.path.abspath(self.meta_args["output_base"]), self.meta_args["experiment_name"]) i = 0 while os.path.exists(os.path.join(basedir, "v%03d" % i)): i += 1 if i > 0 and self.meta_args["rerun"]: i -= 1 logging.info("Running experiment v%03d" % i) basedir = os.path.join(basedir, "v%03d" % i) if not os.path.exists(basedir): os.makedirs(basedir) logging.info("Results appear in %s" % basedir) config_bk = os.path.join(basedir, "meta_config_bk.yml") config_bk_file = open(config_bk, "w") yaml.dump(self.meta_args, config_bk_file, default_flow_style=False, Dumper=Dumper) config_bk_file.close() skip = 0 self.configurations = [] # for run_id in range(self.experiment_args["num_runs"]): for um in self.meta_args["um"]: for dstr in self.meta_args["data"]: data, d, r = dstr.split(",") d, r = int(d), int(r) user_model_args = usermodels[um][r] folds = glob.glob(os.path.join(os.path.abspath(self.meta_args["data_dir"]), data, "Fold*")) for fold in folds: args = self.experiment_args.copy() args["data_dir"] = self.meta_args["data_dir"] args["fold_dir"] = fold # args["run_id"] = run_id args["feature_count"] = d args["user_model_args"] = user_model_args args["output_dir"] = os.path.join(basedir, "output", um, data, os.path.basename(fold)) args["output_prefix"] = os.path.basename(fold) if self.meta_args["rerun"]: if not os.path.exists( os.path.join(args["output_dir"], "%s-%d.txt.gz" % (args["output_prefix"], run_id)) ): self.configurations.append(args) else: skip += 1 else: self.configurations.append(args) logging.info("Created %d configurations (and %d skipped)" % (len(self.configurations), skip)) self.analytics = [] for analyse in self.meta_args["analysis"]: aclass = get_class(analyse) a = aclass(basedir) self.analytics.append(a)
def run_test(network, test_x, test_y, figure_path='figures', plot=True): """ Will conduct the test suite to determine model strength. Args: test_x: data the model has not yet seen to predict test_y: corresponding truth vectors figure_path: string, folder to place images in. plot: bool, determines if graphs should be plotted when ran. """ if network.num_classes is None or network.num_classes == 0: raise ValueError('There\'s no classification layer') if test_y.shape[1] > 1: test_y = get_class(test_y) # Y is in one hot representation raw_prediction = network.forward_pass(input_data=test_x, convert_to_class=False) class_prediction = get_class(raw_prediction) confusion_matrix = get_confusion_matrix(prediction=class_prediction, truth=test_y) tp = np.diagonal(confusion_matrix).astype('float32') tn = (np.array([np.sum(confusion_matrix)] * confusion_matrix.shape[0]) - confusion_matrix.sum(axis=0) - confusion_matrix.sum(axis=1) + tp).astype('float32') # sum each column and remove diagonal fp = (confusion_matrix.sum(axis=0) - tp).astype('float32') # sum each row and remove diagonal fn = (confusion_matrix.sum(axis=1) - tp).astype('float32') sens = np.nan_to_num(tp / (tp + fn)) # recall spec = np.nan_to_num(tn / (tn + fp)) sens_macro = np.nan_to_num(sum(tp) / (sum(tp) + sum(fn))) spec_macro = np.nan_to_num(sum(tn) / (sum(tn) + sum(fp))) dice = 2 * tp / (2 * tp + fp + fn) ppv = np.nan_to_num(tp / (tp + fp)) # precision ppv_macro = np.nan_to_num(sum(tp) / (sum(tp) + sum(fp))) npv = np.nan_to_num(tn / (tn + fn)) npv_macro = np.nan_to_num(sum(tn) / (sum(tn) + sum(fn))) accuracy = np.sum(tp) / np.sum(confusion_matrix) f1 = np.nan_to_num(2 * (ppv * sens) / (ppv + sens)) f1_macro = np.average(np.nan_to_num(2 * sens * ppv / (sens + ppv))) print('{} test\'s results'.format(network.name)) print('TP:'), print(tp) print('FP:'), print(fp) print('TN:'), print(tn) print('FN:'), print(fn) print('\nAccuracy: {}'.format(accuracy)) print('Sensitivity:'), print(round_list(sens, decimals=3)) print('\tMacro Sensitivity: {:.4f}'.format(sens_macro)) print('Specificity:'), print(round_list(spec, decimals=3)) print('\tMacro Specificity: {:.4f}'.format(spec_macro)) print('DICE:'), print(round_list(dice, decimals=3)) print('\tAvg. DICE: {:.4f}'.format(np.average(dice))) print('Positive Predictive Value:'), print(round_list(ppv, decimals=3)) print('\tMacro Positive Predictive Value: {:.4f}'.format(ppv_macro)) print('Negative Predictive Value:'), print(round_list(npv, decimals=3)) print('\tMacro Negative Predictive Value: {:.4f}'.format(npv_macro)) print('f1-score:'), print(round_list(f1, decimals=3)) print('\tMacro f1-score: {:.4f}'.format(f1_macro)) print('') if not os.path.exists(figure_path): print('Creating figures folder') os.makedirs(figure_path) if not os.path.exists('{}/{}{}'.format(figure_path, network.timestamp, network.name)): print('Creating {}/{}{} folder'.format(figure_path, network.timestamp, network.name)) os.makedirs('{}/{}{}'.format(figure_path, network.timestamp, network.name)) print('Saving ROC figures to folder: {}/{}{}'.format( figure_path, network.timestamp, network.name)) plt.figure() plt.title("Confusion matrix for {}".format(network.name)) plt.xticks(range(confusion_matrix.shape[0])) plt.yticks(range(confusion_matrix.shape[0])) plt.ylabel('True label') plt.xlabel('Predicted label') plt.imshow(confusion_matrix, origin='lower', cmap='hot', interpolation='nearest') plt.colorbar() plt.savefig('{}/{}{}/confusion_matrix.png'.format(figure_path, network.timestamp, network.name)) if not plot: plt.close() fig = plt.figure() all_class_auc = [] for i in range(network.num_classes): if network.num_classes == 1: fpr, tpr, thresholds = metrics.roc_curve(test_y, raw_prediction, pos_label=1) else: fpr, tpr, thresholds = metrics.roc_curve(test_y, raw_prediction[:, i], pos_label=i) auc = metrics.auc(fpr, tpr) all_class_auc += [auc] # print ('AUC: {:.4f}'.format(auc)) # print ('\tGenerating ROC {}/{}{}/{}.png ...'.format(figure_path, # network.timestamp, # network.name, i)) plt.clf() plt.plot(fpr, tpr, label=("AUC: {:.4f}".format(auc))) plt.title("ROC Curve for {}_{}".format(network.name, i)) plt.xlabel('1 - Specificity') plt.ylabel('Sensitivity') plt.legend(loc='lower right') plt.ylim(0.0, 1.0) plt.xlim(0.0, 1.0) plt.savefig('{}/{}{}/{}.png'.format(figure_path, network.timestamp, network.name, i)) if plot: plt.show(False) if not plot: plt.close(fig.number) print('Average AUC: : {:.4f}'.format(np.average(all_class_auc))) return { 'accuracy': accuracy, 'macro_sensitivity': sens_macro, 'macro_specificity': spec_macro, 'avg_dice': np.average(dice), 'macro_ppv': ppv_macro, 'macro_npv': npv_macro, 'macro_f1': f1_macro, 'macro_auc': np.average(all_class_auc) }