Пример #1
0
def add_word(request, kind):
    if request.method == 'POST':
        try:
            form = get_class(kind, 'Form')(request.POST)
        except AttributeError:
            raise Http404("No such form type found!")
        if form.is_valid():
            word = form.save()
            word.save()
            if kind == 'Noun':
                ExNNS.make_new(word)
                ExAAS.make_new(word)
                ExLNS.make_new(word)
            elif kind == 'Verb':
                ExIIV.make_new(word)
                ExKKV.make_new(word)
                ExPPV.make_new(word)
                ExFFV.make_new(word)
            return redirect('add')
    else:
        czech_word = request.session['word']['czech']
        word = Word.get_czech_word_type_and_wiki(
            czech_word, get_wikitext(czech_word, 'cz'))
        word_type = get_class(kind)
        word_json = word_type.make_czech_json(word)
        word_json['de'] = word_type.make_german_json(word_json['german'])
        form = get_class(kind, 'Form')(initial=word_json)
        return render(request, 'addwords/new_{}.html'.format(kind.lower()),
                      {'form': form})
Пример #2
0
    def is_pair(self, a, b, x, y):
        x = get_class(x)
        y = get_class(y)

        if isinstance(a, x) and isinstance(b, y):
            return (a, b)
        if isinstance(a, y) and isinstance(b, x):
            return (b, a)

        return False
Пример #3
0
    def is_pair(self, a, b, x, y):
        x = get_class(x)
        y = get_class(y)

        if (isinstance(a, x) and isinstance(b, y)):
            return (a, b)
        if (isinstance(a, y) and isinstance(b, x)):
            return (b, a)

        return False
Пример #4
0
 def _get_ranker_pair(self, class_name, ranker_args, feature_pair,
                      feature_count, ties):
     return (get_class(class_name)(
         ranker_args,
         self._get_weight_vector(feature_pair[0], feature_count),
         ties=ties), get_class(class_name)(ranker_args,
                                           self._get_weight_vector(
                                               feature_pair[1],
                                               feature_count),
                                           ties=ties))
    def __init__(self, feature_count, d, arg_str):
        # parse arguments
        parser = argparse.ArgumentParser(
            description="Initialize retrieval "
            "system with the specified feedback and learning mechanism.",
            prog="ListwiseLearningSystem")
        parser.add_argument("-w",
                            "--init_weights",
                            help="Initialization "
                            "method for weights (random, zero).",
                            required=True)
        parser.add_argument("--sample_weights", default="sample_unit_sphere")
        parser.add_argument("-c", "--comparison", required=True)
        parser.add_argument("-f", "--comparison_args", nargs="*")
        parser.add_argument("-r", "--ranker", required=True)
        parser.add_argument("-s", "--ranker_args", nargs="*")
        parser.add_argument("-t", "--ranker_tie", default="random")
        parser.add_argument("-d", "--delta", required=True, type=str)
        parser.add_argument("-a", "--alpha", required=True, type=str)
        parser.add_argument("--anneal", type=int, default=0)
        parser.add_argument("--normalize", default="False")
        args = vars(parser.parse_known_args(split_arg_str(arg_str))[0])

        self.ranker_class = get_class(args["ranker"])
        self.ranker_args = args["ranker_args"]
        self.ranker_tie = args["ranker_tie"]
        self.sample_weights = args["sample_weights"]
        self.init_weights = args["init_weights"]
        self.feature_count = feature_count
        self.ranker = self.ranker_class(d,
                                        self.ranker_args,
                                        self.ranker_tie,
                                        self.feature_count,
                                        sample=self.sample_weights,
                                        init=self.init_weights)

        if "," in args["delta"]:
            self.delta = array([float(x) for x in args["delta"].split(",")])
        else:
            self.delta = float(args["delta"])
        if "," in args["alpha"]:
            self.alpha = array([float(x) for x in args["alpha"].split(",")])
        else:
            self.alpha = float(args["alpha"])

        self.anneal = args["anneal"]

        self.comparison_class = get_class(args["comparison"])
        if "comparison_args" in args and args["comparison_args"] != None:
            self.comparison_args = " ".join(args["comparison_args"])
            self.comparison_args = self.comparison_args.strip("\"")
        else:
            self.comparison_args = None
        self.comparison = self.comparison_class(self.comparison_args)
        self.query_count = 0
Пример #6
0
    def __init__(self, feature_count, d, arg_str):
        # parse arguments
        parser = argparse.ArgumentParser(description="Initialize retrieval "
            "system with the specified feedback and learning mechanism.",
            prog="ListwiseLearningSystem")
        parser.add_argument("-w", "--init_weights", help="Initialization "
            "method for weights (random, zero).", required=True)
        parser.add_argument("--sample_weights", default="sample_unit_sphere")
        parser.add_argument("-c", "--comparison", required=True)
        parser.add_argument("-f", "--comparison_args", nargs="*")
        parser.add_argument("-r", "--ranker", required=True)
        parser.add_argument("-s", "--ranker_args", nargs="*")
        parser.add_argument("-t", "--ranker_tie", default="random")
        parser.add_argument("-d", "--delta", required=True, type=str)
        parser.add_argument("-a", "--alpha", required=True, type=str)
        parser.add_argument("--anneal", type=int, default=0)
        parser.add_argument("--normalize", default="False")
        args = vars(parser.parse_known_args(split_arg_str(arg_str))[0])

        self.ranker_class = get_class(args["ranker"])
        self.ranker_args = args["ranker_args"]
        self.ranker_tie = args["ranker_tie"]
        self.sample_weights = args["sample_weights"]
        self.init_weights = args["init_weights"]
        self.feature_count = feature_count
        self.ranker = self.ranker_class(d,self.ranker_args,
                                        self.ranker_tie,
                                        self.feature_count,
                                        sample=self.sample_weights,
                                        init=self.init_weights)

        if "," in args["delta"]:
            self.delta = array([float(x) for x in args["delta"].split(",")])
        else:
            self.delta = float(args["delta"])
        if "," in args["alpha"]:
            self.alpha = array([float(x) for x in args["alpha"].split(",")])
        else:
            self.alpha = float(args["alpha"])

        self.anneal = args["anneal"]

        self.comparison_class = get_class(args["comparison"])
        if "comparison_args" in args and args["comparison_args"] != None:
            self.comparison_args = " ".join(args["comparison_args"])
            self.comparison_args = self.comparison_args.strip("\"")
        else:
            self.comparison_args = None
        self.comparison = self.comparison_class(self.comparison_args)
        self.query_count = 0
Пример #7
0
    def _render_region(self, region):
        if region not in self.code_regions:
            raise RuntimeError("Region not scanned")
        lang = region.options.get('lang')
        renderer_name = self.config.renderers[lang]
        if not renderer_name:
            raise RuntimeError("No language specified for code block")
        renderer_cls = utils.get_class(renderer_name)
        if not renderer_cls:
            raise RuntimeError("Invalid renderer: {}".format(renderer_name
                                                             or '(none)'))

        format = Config(
            utils.load_json(os.path.join('languages', lang + '.json'), True))
        extra_format = self.formats.get(lang)
        if extra_format:
            format += extra_format
        renderer = renderer_cls(self.config, format)
        text = renderer.render(region)
        self._text = (self._text[:region.start] + text +
                      self._text[region.start + region.length:])
        self.code_regions.remove(region)
        new_region = Region(self, region.start, len(text))
        self._update_regions(new_region.start,
                             new_region.length - region.length)
Пример #8
0
    def forward_pass(self, input_data, m=0, convert_to_class=False):
        """
        Get output of ensemble of the last m networks where m <= n_snapshots.

        Args:
            input_data: Numpy matrix to make the predictions on
            m: the m most recent models from the ensemble to give outputs
               Default to get output from all models.
            convert_to_class: return class predictions from ensemble
        """
        if m < 0 or m > len(self.networks):
            print('Select the m most recent models to get output from. '
                  'Setting m to 0 (default to all models)\n')
            m = 0

        prediction_collection = []
        for net in self.networks[-m:]:
            prediction_collection += [net.forward_pass(input_data=input_data,
                                                       convert_to_class=False)]
        prediction_collection = np.array(prediction_collection)
        raw_prediction = np.mean(prediction_collection, axis=0,
                                 dtype='float32')
        if convert_to_class:
            return get_class(raw_prediction)
        else:
            return raw_prediction
Пример #9
0
 def __init__(self, feature_count, arg_str):
     self.feature_count = feature_count
     # parse arguments
     parser = argparse.ArgumentParser(description="Initialize retrieval "
         "system with the specified feedback and learning mechanism.",
         prog="PairwiseLearningSystem")
     parser.add_argument("-w", "--init_weights", help="Initialization "
         "method for weights (random, zero, fixed).", required=True)
     parser.add_argument("-e", "--epsilon", required=True, type=float)
     parser.add_argument("-f", "--eta", required=True, type=float)
     parser.add_argument("-l", "--lamb", type=float, default=0.0)
     parser.add_argument("-r", "--ranker", required=True)
     parser.add_argument("-s", "--ranker_args", nargs="*")
     parser.add_argument("-t", "--ranker_tie", default="random")
     args = vars(parser.parse_known_args(split_arg_str(arg_str))[0])
     # initialize weights, comparison method, and learner
     w = self.initialize_weights(args["init_weights"], self.feature_count)
     self.ranker_class = get_class(args["ranker"])
     if "ranker_args" in args and args["ranker_args"] != None:
         self.ranker_args = " ".join(args["ranker_args"])
         self.ranker_args = self.ranker_args.strip("\"")
     else:
         self.ranker_args = None
     self.ranker_tie = args["ranker_tie"]
     self.ranker = self.ranker_class(self.ranker_args, w, self.ranker_tie)
     self.epsilon = args["epsilon"]
     self.eta = args["eta"]
     self.lamb = args["lamb"]
Пример #10
0
    def cal(self, mod_rootdir=None, model_dirpaths=None, example_dirname='clean_example_data', n_samples=100):
        """
        Implements calibration
        :param mod_rootdir: directory containing a bunch of model directories to be used for calibration. Either this or
         model_dirpaths should be set.
        :param model_dirpaths: list of model directories to be used for calibration. Either this or mod_rootdir should
        be set.
        :param example_dirname: name of the (clean) example data directory in each model directory
        :return: numpy array of calibrated probabilities, ordered like the model_dirpaths (or sorted directories in
        mod_rootdir)
        """

        assert (mod_rootdir is not None) != (model_dirpaths is not None), "set either mod_rootdir or model_dirpaths"
        if model_dirpaths is None:
            print("deprecation warning: using mod_rootdir is deprecated in favor of explicitly setting model_dirpaths")
            model_dirpaths = utils.get_modeldirs(mod_rootdir)

        y = np.array([utils.get_class(os.path.join(pth, 'config.json'), classtype='binary', file=True) for pth in model_dirpaths])
        y = y.reshape(-1, 1) * np.ones([1, n_samples])
        y = y.reshape(-1)

        x = np.zeros([len(y), len(self.components)])
        order = [i for i in range(len(self.components))]
        random.shuffle(order)
        for i in order:
            print('starting calibration for ensemble component', i)
            component = self.components[i]
            pcal = component.cal(model_dirpaths=model_dirpaths, **kwargs)
            x[:, i] = pcal

        pcal = self.ens_cal(x, y)
        return pcal
Пример #11
0
    def build_context(self, json):
        logger.info("Constructing context.")

        context = {}
        components = json["Workflow"]

        for module, slots in sorted(components.items()):
            for slot_name, slot_details in slots.items():
                if not slot_name in context:
                    slot = {slot_name: {"content": None}}
                    if slot_details:
                        uri = slot_details["uri"]
                        slot[slot_name]["uri"] = uri
                    context.update(slot)
                else:
                    if slot_details:
                        uri = slot_details["uri"]
                        slot[slot_name]["uri"] = uri
                    context.update(slot)

        # add global configuration here
        if "global_config" in json.keys():
            if isinstance(json["global_config"], dict):
                context["global_config"] = json["global_config"]
            elif isinstance(json["global_config"], str):
                # trying to build a dict from configuration code...
                import utils
                global_config = utils.get_class(json["global_config"])
                context["global_config"] = global_config

        return context
Пример #12
0
 def __init__(self, feature_count, arg_str):
     self.feature_count = feature_count
     # parse arguments
     parser = argparse.ArgumentParser(
         description="Initialize retrieval "
         "system with the specified feedback and learning mechanism.",
         prog="PairwiseLearningSystem")
     parser.add_argument("-w",
                         "--init_weights",
                         help="Initialization "
                         "method for weights (random, zero, fixed).",
                         required=True)
     parser.add_argument("-e", "--epsilon", required=True, type=float)
     parser.add_argument("-f", "--eta", required=True, type=float)
     parser.add_argument("-l", "--lamb", type=float, default=0.0)
     parser.add_argument("-r", "--ranker", required=True)
     parser.add_argument("-s", "--ranker_args", nargs="*")
     parser.add_argument("-t", "--ranker_tie", default="random")
     args = vars(parser.parse_known_args(split_arg_str(arg_str))[0])
     # initialize weights, comparison method, and learner
     w = self.initialize_weights(args["init_weights"], self.feature_count)
     self.ranker_class = get_class(args["ranker"])
     if "ranker_args" in args and args["ranker_args"] != None:
         self.ranker_args = " ".join(args["ranker_args"])
         self.ranker_args = self.ranker_args.strip("\"")
     else:
         self.ranker_args = None
     self.ranker_tie = args["ranker_tie"]
     self.ranker = self.ranker_class(self.ranker_args, w, self.ranker_tie)
     self.epsilon = args["epsilon"]
     self.eta = args["eta"]
     self.lamb = args["lamb"]
Пример #13
0
 def get(self, id, props = None):    
     if props:
         props.append('_clazz')
         props.append('id')
     retrieved_dict = self._get_column_family().get(id, props)
     clazz_name = retrieved_dict['_clazz']
     clazz = get_class(clazz_name)
     return object_from_key_jsonvalue_dict(clazz, retrieved_dict)
Пример #14
0
def get_exercise(request, kind):
    """
    Get new exercise via REST framework
    """
    ex = get_class(kind)
    if request.method == 'GET':
        exercise = ex.random()
        serializer_class = get_class(kind, 'Serializer')
        serializer = serializer_class(exercise)
        return JsonResponse(serializer.data)

    elif request.method == 'POST':
        ex_id = request.COOKIES.get('id')
        body = json.loads(request.body.decode("utf-8").replace("'", '"'))
        database = get_object_or_404(ex, id=ex_id)
        db_czech = database.czech[2:-2].split("', '")
        status = body['answer'] in db_czech
        return JsonResponse({'status': status, 'correct_answer': db_czech})
Пример #15
0
 def __init__(self, training_queries, test_queries, feature_count, log_fh,
              args):
     """Initialize an experiment using the provided arguments."""
     self.log_fh = log_fh
     self.training_queries = training_queries
     self.test_queries = test_queries
     self.feature_count = feature_count
     # construct system according to provided arguments
     self.num_queries = args["num_queries"]
     self.query_sampling_method = args["query_sampling_method"]
     self.um_class = get_class(args["user_model"])
     self.um_args = args["user_model_args"]
     self.um = self.um_class(self.um_args)
     self.system_class = get_class(args["system"])
     self.system_args = args["system_args"]
     self.system = self.system_class(self.feature_count, self.system_args)
     #if isinstance(self.system, AbstractOracleSystem):
     #    self.system.set_test_queries(self.test_queries)
     self.evaluations = {}
     for evaluation in args["evaluation"]:
         self.evaluation_class = get_class(evaluation)
         self.evaluations[evaluation] = self.evaluation_class()
Пример #16
0
def exercise(request, kind):
    try:
        form = get_class(kind, 'Form')
    except AttributeError:
        raise Http404("No such exercise type found!")
    return render(
        request,
        'quiz/home.html',
        context={
            'form': form,
            'kind': kind
        },
    )
Пример #17
0
 def __init__(self, training_queries, test_queries, feature_count, log_fh,
         args):
     """Initialize an experiment using the provided arguments."""
     self.log_fh = log_fh
     self.training_queries = training_queries
     self.test_queries = test_queries
     self.feature_count = feature_count
     # construct system according to provided arguments
     self.num_queries = args["num_queries"]
     self.query_sampling_method = args["query_sampling_method"]
     self.um_class = get_class(args["user_model"])
     self.um_args = args["user_model_args"]
     self.um = self.um_class(self.um_args)
     self.system_class = get_class(args["system"])
     self.system_args = args["system_args"]
     self.system = self.system_class(self.feature_count, self.system_args)
     #if isinstance(self.system, AbstractOracleSystem):
     #    self.system.set_test_queries(self.test_queries)
     self.evaluations = {}
     for evaluation in args["evaluation"]:
         self.evaluation_class = get_class(evaluation)
         self.evaluations[evaluation] = self.evaluation_class()
Пример #18
0
def cal(out_fn,
        base_folder='data/round3models',
        example_folder_name='clean_example_data'):
    """
    :param refn:
    :param out_fn:
    :param base_folder:
    :return:
    """

    from sklearn.isotonic import IsotonicRegression
    from sklearn.metrics import log_loss, roc_auc_score
    import os

    calpath = 'calibration/data/' + out_fn + '_caldata.p'

    if os.path.exists(calpath):
        with open(calpath, 'rb') as f:
            ldirs, pcal = pickle.load(f)
        return ldirs, pcal

    acc_drops = []
    y = []

    dirs = os.listdir(path=base_folder)
    for dir in dirs:
        example_path = os.path.join(base_folder, dir, example_folder_name)
        model_path = os.path.join(base_folder, dir, 'model.pt')
        acc_drop = get_accdrop(model_path, example_path)
        truth_fn = os.path.join(base_folder, dir, 'config.json')
        cls = utils.get_class(truth_fn, classtype='binary', file=True)
        acc_drops.append(acc_drop)
        y.append(cls)

    ir_model = IsotonicRegression(out_of_bounds='clip')
    pcal = ir_model.fit_transform(acc_drops, y)
    kld = log_loss(y, pcal)
    # print(kld)
    roc1 = roc_auc_score(y, np.array(pcal))
    print(out_fn, 'AUC:', roc1, 'KLD:', kld)

    # dump(ir_model, 'data/classifiers/blur' + '_ir.joblib')
    dump(ir_model, 'calibration/fitted/' + out_fn)
    pcal = pcal[np.argsort(dirs)]
    dirs.sort()
    with open(calpath, 'wb') as f:
        pickle.dump([dirs, pcal], f)

    return dirs, pcal
Пример #19
0
    def register_actions(self, action_config_file):
        """
        注册动作。通过配置文件来配置命令名到命令类的映射
        :param action_config_file:
        :return:
        """
        fp = open(action_config_file)
        action_data = json.load(fp)
        fp.close()

        if "actions" in action_data:
            for action_config in action_data["actions"]:
                cls = utils.get_class(action_config["class_name"])
                if cls:
                    self.action_classes[action_config["name"]] = cls
Пример #20
0
def create_object(full_class_name = None):
    """Instantiate an object by just by a string representation of its class.
    The object must not have required arguments to the __init__ function.
    Makes use of the get_class function of the utils module.
    
    If full_class_name is not specified (None) then a base class XMLObject
    will be provided"""

    if full_class_name != None:
        class_obj = get_class(full_class_name)
        try:
            return class_obj()
        except TypeError, e:
            raise Exception("Failed to instantiate %s: %s" % (str(class_obj),
                                                              str(e)))
Пример #21
0
    def __init__(self, state):
        x_slice_s = state['x_slice']
        y_slice_s = state['y_slice']
        data_transforms = None
        if "data_transforms" in state:
            data_transforms = {}
            for key, val in state["data_transforms"].items():
                if len(val.keys()) != 1:
                    raise ValueError()
                data_transforms[key] = get_class(
                    "data.data_utils.%s" %
                    list(val.keys())[0][2:-2])(**list(val.values())[0])

        super().__init__(x_slice=slice(x_slice_s['start'], x_slice_s['stop'],
                                       x_slice_s['step']),
                         y_slice=slice(y_slice_s['start'], y_slice_s['stop'],
                                       y_slice_s['step']),
                         name=state['name'],
                         normalize=state["normalize"],
                         uniqueness_threshold=state['uniqueness_threshold'],
                         data_transforms=data_transforms)
        class_name = state['name'] if state['name'].find(
            '_') < 0 else state['name'][:state['name'].find('_')]
        self.__class__ = get_class("data.%s" % class_name)
Пример #22
0
    def forward_pass(self, input_data, convert_to_class=False):
        """
        Allow the implementer to quickly get outputs from the network.

        Args:
            input_data: Numpy matrix to make the predictions on
            get_predictions: If the output should return the class
                             with highest probability

        Returns: Numpy matrix with the output probabilities
                 with each class unless otherwise specified.
        """
        if convert_to_class:
            return get_class(self.output(input_data))
        else:
            return self.output(input_data)
Пример #23
0
    def cal(self, mod_rootdir=None, model_dirpaths=None, example_dirname='clean_example_data', n_samples=100):
        """
        Implements calibration
        :param mod_rootdir: directory containing a bunch of model directories to be used for calibration. Either this or
         model_dirpaths should be set.
        :param model_dirpaths: list of model directories to be used for calibration. Either this or mod_rootdir should
        be set.
        :param example_dirname: name of the (clean) example data directory in each model directory
        :param n_samples: number of noisy samples of each data point
        :return: numpy array of calibrated probabilities, ordered like the model_dirpaths (or sorted directories in
        mod_rootdir)
        """

        assert (mod_rootdir is not None) != (model_dirpaths is not None), "set either mod_rootdir or model_dirpaths"
        if model_dirpaths is None:
            print("deprecation warning: using mod_rootdir is deprecated in favor of explicitly setting model_dirpaths")
            model_dirpaths = utils.get_modeldirs(mod_rootdir)

        # get the data for calibration
        mags = self.get_cal_data(model_dirpaths, example_dirname, n_samples=n_samples)
        mags = mags.reshape(-1)
        y = np.array([utils.get_class(os.path.join(pth, 'config.json'), classtype='binary', file=True) for pth in
                      model_dirpaths])
        if n_samples is not None:
            y = y.reshape(-1, 1) * np.ones([1, n_samples])
            y = y.reshape(-1)



        # check for saved model
        irpath = self.get_irpath()
        if os.path.exists(irpath) and not self.overwrite:
            ir_model = joblib.load(irpath)
        else:
            # run the calibration & save model
            ir_model = IsotonicRegression(out_of_bounds='clip')
            clippedmags = np.clip(mags, np.percentile(mags, 10), np.percentile(mags, 90))
            # clippedmags = np.clip(mags, np.percentile(mags, 25), np.percentile(mags, 75))

            ir_model.fit(clippedmags, y)
            joblib.dump(ir_model, irpath)

        # get & return the calibrated probabilities
        pcal = ir_model.transform(mags)
        return pcal
Пример #24
0
 def recognize(self, image):
     """
         Recognize a face in a list of known faces
         Argument:
             - image: np.ndarray and in RGB order
         Return: None if image has no faces or unknown faces. Otherwise, return label
     """
     faces = self.detector.detect_faces(image)
     if not faces:
         return None
     face = faces[0]
     emb = self.extractor.get_embeddings(face)
     distances, indices = self.index.search(emb, self.knn)
     indices = indices[distances < self.max_distance]
     if indices.shape[0] == 0:
         return None
     classes = self.df.loc[indices, 'class']
     return get_class(classes)
Пример #25
0
def ask_obj(sentence):
    '''
    retrieve sentences in clips
    matching the given sentence.
    Can use variables.
    '''
    clps, templs = get_instances(sentence)
    sens = []
    if clps:
        if isinstance(sentence, Thing):
            for ins in clps:
                sens.append(Namable.from_clips(ins))
        elif isinstance(sentence, Fact):
            for ins in clps:
                i = clips.FindInstance(ins)
                if issubclass(utils.get_class(str(i.Class.Name)), Fact):
                    sens.append(Fact.from_clips(ins))
    return sens
Пример #26
0
    def __init__(self,
                 ranker_arg_str,
                 ties,
                 feature_count,
                 init=None,
                 sample=None):

        self.feature_count = feature_count
        ranking_model_str = "ranker.model.Linear"
        for arg in ranker_arg_str:
            if arg.startswith("ranker.model"):
                ranking_model_str = arg
            else:
                self.ranker_type = float(arg)
        self.ranking_model = get_class(ranking_model_str)(feature_count)

        self.sample = getattr(__import__("utils"), sample)

        self.ties = ties
        self.w = self.ranking_model.initialize_weights(init)
Пример #27
0
    def __init__(self,
                 ranker_arg_str,
                 ties,
                 feature_count,
                 init=None,
                 sample=None):

        self.feature_count = feature_count
        ranking_model_str = "ranker.model.Linear"
        for arg in ranker_arg_str:
            if arg.startswith("ranker.model"):
                ranking_model_str = arg
            else:
                self.ranker_type = float(arg)
        self.ranking_model = get_class(ranking_model_str)(feature_count)

        self.sample = getattr(__import__("utils"), sample)

        self.ties = ties
        self.w = self.ranking_model.initialize_weights(init)
Пример #28
0
    def _render_region(self, region):
        if region not in self.code_regions:
            raise RuntimeError("Region not scanned")
        lang = region.options.get("lang")
        renderer_name = self.config.renderers[lang]
        if not renderer_name:
            raise RuntimeError("No language specified for code block")
        renderer_cls = utils.get_class(renderer_name)
        if not renderer_cls:
            raise RuntimeError("Invalid renderer: {}".format(renderer_name or "(none)"))

        format = Config(utils.load_json(os.path.join("languages", lang + ".json"), True))
        extra_format = self.formats.get(lang)
        if extra_format:
            format += extra_format
        renderer = renderer_cls(self.config, format)
        text = renderer.render(region)
        self._text = self._text[: region.start] + text + self._text[region.start + region.length :]
        self.code_regions.remove(region)
        new_region = Region(self, region.start, len(text))
        self._update_regions(new_region.start, new_region.length - region.length)
Пример #29
0
def register_questions_types(*tuples):
    """
    Take a list of tuples and return a list of ``dicts``.  Each ``dict``
    contains the following keys:

        * ``pretty_name`` - Human-readable name of question type
        * ``slug``        - Url-safe name of question type
        * ``class``       - class definition
    """
    types = list(itertools.chain(*tuples))
    question_types = []
    for t in types:
        try:
            class_ = get_class(t, QuestionTypeRegisterError)
        except QuestionTypeRegisterError:
            print 'Failed to register %s' % t
            continue
        question_types.append({
            'pretty_name': class_.pretty_name(),
            'slug': class_._meta.module_name,
            'class': class_
        })
    return question_types
Пример #30
0
def register_questions_types(*tuples):
    """
    Take a list of tuples and return a list of ``dicts``.  Each ``dict``
    contains the following keys:

        * ``pretty_name`` - Human-readable name of question type
        * ``slug``        - Url-safe name of question type
        * ``class``       - class definition
    """
    types = list(itertools.chain(*tuples))
    question_types = []
    for t in types:
        try:
            class_ = get_class(t, QuestionTypeRegisterError)
        except QuestionTypeRegisterError:
            print 'Failed to register %s' % t
            continue
        question_types.append({
            'pretty_name': class_.pretty_name(),
            'slug': class_._meta.module_name,
            'class': class_
        })
    return question_types
Пример #31
0
def uploaded_file(filename):
    img_name = 'static/img/' + filename + '.png'
    filename = os.path.join('static/pdf', filename)
    x, exit_status = from_pdf_to_vector(filename, tfidf)
    if exit_status != 0:
        flash('Sorry, it seems that something went wrong. Please try again.')
        return redirect('/')
    else:
        #Get the classes that best suit the article and the corresponding probabilities
        prob, clss = get_class(x, logr)
        #Find the first 10 similar articles to the one uploaded in the database
        similar_pos = find_similar(database['X'], x, 9)[0]

        # Pie chart, where the slices will be ordered and plotted counter-clockwise:

        fig, ax = plt.subplots()
        ax.pie(prob,
               labels=clss,
               autopct='%1.1f%%',
               shadow=True,
               startangle=160,
               wedgeprops={
                   "edgecolor": "0",
                   'linewidth': 1
               })
        ax.axis('equal'
                )  # Equal aspect ratio ensures that pie is drawn as a circle.
        img_name = 'static/img/plot' + str(randint(0, 500)) + '.png'
        plt.savefig(img_name, bbox_inches='tight', pad_inches=0)

        return render_template("classification.html",
                               filename=os.path.join('/', filename),
                               img=os.path.join('/', img_name),
                               prob=prob,
                               clss=clss,
                               similar=database['links'][similar_pos])
 def __init__(self, queries, feature_count, log_fh, args):
     """Initialize an experiment using the provided arguments."""
     self.log_fh = log_fh
     self.queries = queries
     self.feature_count = feature_count
     self.ties = "first"
     # construct experiment according to provided arguments
     self.result_length = args["result_length"]
     self.num_queries = args["num_queries"]
     self.query_sampling_method = args["query_sampling_method"]
     self.um_class = get_class(args["user_model"])
     self.um_args = args["user_model_args"]
     self.um = self.um_class(self.um_args)
     # set up methods to compare
     parser = argparse.ArgumentParser(description="parse arguments of an "
         "evaluation method.", prog="evaluation method configuration")
     parser.add_argument("-c", "--class_name")
     parser.add_argument("-r", "--ranker")
     parser.add_argument("-a", "--ranker_args")
     parser.add_argument("-i", "--interleave_method")
     self.rankers = {}
     self.live_methods = {}
     self.hist_methods = {}
     self.ndcg = evaluation.NdcgEval()
     # init live methods
     if "live_evaluation_methods" in args:
         for method_id, method in enumerate(
                 args["live_evaluation_methods"]):
             self.live_methods[method] = {}
             method_args_str = \
                 args["live_evaluation_methods_args"][method_id]
             method_args = vars(parser.parse_known_args(
                 method_args_str.split())[0])
             class_name = method_args["class_name"]
             self.live_methods[method]["instance"] = \
                 get_class(class_name)(method_args_str)
             ranker = method_args["ranker"]
             ranker_args = method_args["ranker_args"]
             self.live_methods[method]["ranker"] = ranker
             self.live_methods[method]["ranker_args"] = ranker_args
             if not ranker in self.rankers:
                 self.rankers[ranker] = {}
             if not ranker_args in self.rankers[ranker]:
                 self.rankers[ranker][ranker_args] = {}
     # init hist methods
     if "hist_evaluation_methods" in args:
         for method_id, method in enumerate(
                 args["hist_evaluation_methods"]):
             self.hist_methods[method] = {}
             method_args_str = \
                 args["hist_evaluation_methods_args"][method_id]
             method_args = vars(parser.parse_known_args(
                 method_args_str.split())[0])
             class_name = method_args["class_name"]
             self.hist_methods[method]["instance"] = \
                 get_class(class_name)(method_args_str)
             ranker = method_args["ranker"]
             ranker_args = method_args["ranker_args"]
             self.hist_methods[method]["ranker"] = method_args["ranker"]
             self.hist_methods[method]["ranker_args"] = \
                 method_args["ranker_args"]
             if not ranker in self.rankers:
                 self.rankers[ranker] = {}
             if not ranker_args in self.rankers[ranker]:
                 self.rankers[ranker][ranker_args] = {}
             self.hist_methods[method]["interleave_method"] = \
             get_class(method_args["interleave_method"])()
     # sample source and target ranker pair, create deterministic and
     # probabilistic ranker pairs
     self.source_pair = [0, 0]
     self.source_pair[0] = self._sample_ranker_without_replacement(
         self.feature_count, [])
     self.source_pair[1] = self._sample_ranker_without_replacement(
         self.feature_count, [self.source_pair[0]])
     self.target_pair = [0, 0]
     self.target_pair[0] = self._sample_ranker_without_replacement(
         self.feature_count, self.source_pair)
     self.target_pair[1] = self._sample_ranker_without_replacement(
         self.feature_count, [self.target_pair[0], self.source_pair[0],
         self.source_pair[1]])
     # init rankers needed by live and/or hist methods
     for ranker in self.rankers:
         for ranker_args in self.rankers[ranker]:
             self.rankers[ranker][ranker_args]["source"] = \
                 self._get_ranker_pair(ranker, ranker_args,
                 self.source_pair, self.feature_count, self.ties)
             self.rankers[ranker][ranker_args]["target"] = \
                 self._get_ranker_pair(ranker, ranker_args,
                 self.target_pair, self.feature_count, self.ties)
Пример #33
0
def train(source, target):

    scaled_logits, src_acc, trgt_acc, grad = build_graph(source, target)

    init = tf.global_variables_initializer()
    summaries = tf.merge_all_summaries()

    if not path.isdir(savedir):
        print('No models found. Start training.')
        covnet_model.train()

    create_directories()

    if raw_input('Do you want to use your own weights? [y\N] ') == 'y':
        fname = raw_input('Enter saved model name > ')
        weights = path.join(savedir, fname)
    else:
        weights = path.join(savedir, 'default')

    with tf.Session() as sess:
        sess.run(init)
        covnet_model.saver.restore(sess, weights)
        print('Weights restored.')

        mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

        writer = tf.train.SummaryWriter(logdir, graph=sess.graph)

        src_images, src_labels = get_class(source, mnist.test.images,
                                           mnist.test.labels)

        # pick a random image that is correctly classified by CNN
        k = 0
        while True:
            original = src_images[np.newaxis, k]
            label = src_labels[np.newaxis, k]
            image = np.copy(original)

            l = scaled_logits.eval(
                feed_dict={
                    covnet_model.x: original,
                    covnet_model.y: label,
                    covnet_model.keep_prob: 1.
                })

            if np.argmax(l) == source:
                # correctly classified
                break

        print('Generating Adversarial Image...')
        print('Open tensorboard to visualize.')

        # train loop
        i = 0
        target_acc = 0.
        start_acc = []

        while target_acc < .99:  # fool to 99% acc
            source_acc, target_acc, dimg, summ = sess.run(
                [src_acc, trgt_acc, grad, summaries],
                feed_dict={
                    covnet_model.x: image,
                    covnet_model.y: label,
                    covnet_model.keep_prob: 1.
                })

            if i == 0:
                start_acc.extend([source_acc, target_acc])

            writer.add_summary(summ, global_step=i)

            image = image + learning_rate * dimg.reshape(1, 28 * 28)

            diff = np.abs(original - image)

            print("%d  source_acc %.5f, target_acc %.5f, sum: %.5f" %
                  (i, source_acc, target_acc, np.sum(diff)))

            i += 1

        print('Adversarial example generated.')

        # Show the example
        fig = plt.figure(figsize=(30, 10))

        plt.subplot(131)
        plt.imshow(original.reshape(28, 28), cmap='gray')
        plt.axis('off')
        plt.title('Original. source: (%f), target: (%f)' % tuple(start_acc))

        plt.subplot(132)
        plt.imshow(diff.reshape(28, 28), cmap='gray')
        plt.title('Delta (%f)' % np.sum(diff))
        plt.axis('off')

        plt.subplot(133)
        plt.imshow(image.reshape(28, 28), cmap='gray')
        plt.axis('off')
        plt.title('Adversarial source: (%f), target: (%f)' %
                  (source_acc, target_acc))

        plt.show()

        # ask to save
        while True:
            prompt = raw_input('Do you want to save this example? [y\N] ')

            if prompt == 'y':
                fname = raw_input(
                    'Enter name of npy file without extension > ')
                np.savez(path.join(exampledir, fname),
                         source=original,
                         delta=diff,
                         target=image,
                         source_acc=source_acc,
                         target_acc=target_acc)
                break
            elif prompt == 'N':
                break

        covnet_model.train_sess.close()
Пример #34
0
    def __init__(self, args_str=None):
        # parse arguments
        parser = argparse.ArgumentParser(description="""
            Construct and run a learning experiment. Provide either the name
            of a config file from which the experiment configuration is
            read, or provide all arguments listed under Command line. If
            both are provided the  config file is ignored.""",
            prog=self.__class__.__name__)

        # option 1: use a config file
        file_group = parser.add_argument_group("FILE")
        file_group.add_argument("-f", "--file", help="Filename of the config "
                                "file from which the experiment details"
                                " should be read.")

        # option 2: specify all experiment details as arguments
        detail_group = parser.add_argument_group("DETAILS")
        detail_group.add_argument("-i", "--training_queries",
            help="File from which to load the training queries (svmlight "
            "format).")
        detail_group.add_argument("-j", "--test_queries",
            help="File from which to load the test queries (svmlight format).")
        detail_group.add_argument("-c", "--feature_count", type=int,
            help="The number of features included in the data.")
        detail_group.add_argument("-r", "--num_runs", type=int,
            help="Number of runs (how many times to repeat the experiment).")
        detail_group.add_argument("-q", "--num_queries", type=int,
            help="Number of queries in each run.")
        detail_group.add_argument("-u", "--user_model",
            help="Class implementing a user model.")
        detail_group.add_argument("-v", "--user_model_args",
            help="Arguments for initializing the user model.")
        # the retrieval system maintains ranking functions, accepts queries and
        # generates result lists, and in return receives user clicks to learn
        # from
        detail_group.add_argument("-s", "--system",
            help="Which system to use (e.g., pairwise, listwise).")
        detail_group.add_argument("-a", "--system_args", help="Arguments for "
                                  "the system (comparison method, learning "
                                  "algorithm and parameters...).")
        detail_group.add_argument("-o", "--output_dir",
            help="(Empty) directory for storing output generated by this"
            " experiment. Subdirectory for different folds will be generated"
            "automatically.")
        detail_group.add_argument("--output_dir_overwrite", default="False")
        detail_group.add_argument("-p", "--output_prefix",
            help="Prefix to be added to output filenames, e.g., the name of "
            "the data set, fold, etc. Output files will be stored as "
            "OUTPUT_DIR/PREFIX-RUN_ID.txt.gz")
        detail_group.add_argument("-e", "--experimenter",
            help="Experimenter type.")
        # run the parser
        if args_str:
            args = parser.parse_known_args(args_str.split())[0]
        else:
            args = parser.parse_known_args()[0]

        # determine whether to use config file or detailed args
        self.experiment_args = None
        if args.file:
            config_file = open(args.file)
            self.experiment_args = yaml.load(config_file)
            config_file.close()
            # overwrite with command-line options if given
            for arg, value in vars(args).items():
                if value:
                    self.experiment_args[arg] = value
        else:
            self.experiment_args = vars(args)

        # workaround - check if we have all the arguments needed
        if not ("training_queries" in self.experiment_args and
                "test_queries" in self.experiment_args and
                "feature_count" in self.experiment_args and
                "num_runs" in self.experiment_args and
                "num_queries" in self.experiment_args and
                "user_model" in self.experiment_args and
                "user_model_args" in self.experiment_args and
                "system" in self.experiment_args and
                "system_args" in self.experiment_args and
                "output_dir" in self.experiment_args):
            parser.print_help()
            sys.exit("Missing required arguments, please check the program"
                     " arguments or configuration file. %s" %
                     self.experiment_args)

        # set default values for optional arguments
        if not "query_sampling_method" in self.experiment_args:
            self.experiment_args["query_sampling_method"] = "random"
        if not "output_dir_overwrite" in self.experiment_args:
            self.experiment_args["output_dir_overwrite"] = False
        if not "experimenter" in self.experiment_args:
            self.experiment_args["experimenter"] = "experiment.LearningExperiment"
        if not "evaluation" in self.experiment_args:
            self.experiment_args["evaluation"] = "evaluation.NdcgEval"
        if not "processes" in self.experiment_args:
            self.experiment_args["processes"] = 0

        # locate or create directory for the current fold
        if not os.path.exists(self.experiment_args["output_dir"]):
            os.makedirs(self.experiment_args["output_dir"])
        elif not(self.experiment_args["output_dir_overwrite"]) and \
                            os.listdir(self.experiment_args["output_dir"]):
            # make sure the output directory is empty
            raise Exception("Output dir %s is not an empty directory. "
            "Please use a different directory, or move contents out "
            "of the way." %
             self.experiment_args["output_dir"])

        logging.basicConfig(format='%(asctime)s %(module)s: %(message)s',
                        level=logging.INFO)

        logging.info("Arguments: %s" % self.experiment_args)
        for k, v in sorted(self.experiment_args.iteritems()):
            logging.info("\t%s: %s" % (k, v))
        config_bk = os.path.join(self.experiment_args["output_dir"],
                                 "config_bk.yml")
        logging.info("Backing up configuration to: %s" % config_bk)
        config_bk_file = open(config_bk, "w")
        yaml.dump(self.experiment_args,
                  config_bk_file,
                  default_flow_style=False)
        config_bk_file.close()

        # load training and test queries
        training_file = self.experiment_args["training_queries"]
        test_file = self.experiment_args["test_queries"]
        self.feature_count = self.experiment_args["feature_count"]
        logging.info("Loading training data: %s " % training_file)
        self.training_queries = load_queries(training_file, self.feature_count)
        logging.info("... found %d queries." %
            self.training_queries.get_size())
        logging.info("Loading test data: %s " % test_file)
        self.test_queries = load_queries(test_file, self.feature_count)
        logging.info("... found %d queries." % self.test_queries.get_size())

        # initialize and run the experiment num_run times
        self.num_runs = self.experiment_args["num_runs"]
        self.output_dir = self.experiment_args["output_dir"]
        self.output_prefix = self.experiment_args["output_prefix"]
        self.experimenter = get_class(self.experiment_args["experimenter"])
 def _get_ranker_pair(self, class_name, ranker_args, feature_pair,
     feature_count, ties):
     return (get_class(class_name)(ranker_args, self._get_weight_vector(
             feature_pair[0], feature_count), ties=ties),
         get_class(class_name)(ranker_args, self._get_weight_vector(
             feature_pair[1], feature_count), ties=ties))
Пример #36
0
 def from_state(state):
     kls = state["kls"]
     model_kls = state["model_kls"]
     return get_class(kls)(get_class(model_kls)(**state['model_kwargs']))
Пример #37
0
#
# Lerot is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Lerot is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Lerot.  If not, see <http://www.gnu.org/licenses/>.

from include import *
import argparse
from utils import get_class

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="""Analysis""",
                                     usage="%(prog)s ")
    parser.add_argument('--basedir', required=True)
    parser.add_argument('--analysis', nargs="+", required=True)
    args = parser.parse_known_args()[0]

    for analyse in args.analysis:
        aclass = get_class(analyse)
        a = aclass(args.basedir)
        a.update()
        print a.finish()
Пример #38
0
parser.add_argument("-t", "--test_file", default="test.txt.gz",
    help="In each fold of the test directory, the name of the test file.")
parser.add_argument("-f", "--feature_count", required=True, type=int,
    help="Number of features (has to match test queries and weight files).")
parser.add_argument("-e", "--experiment_dirs", nargs="+", required=True,
    help="List of directories that contain experiments (one per experiment). "
    "Results per experiment will be averaged over all folds and runs.")
parser.add_argument("-s", "--file_ext", default="txt.gz",
    help="File extension of the files in which run results are stored.")
args = parser.parse_args()

cutoffs = [1, 3, 10, -1]
metrics = []
scores = {}
for metric in  "evaluation.NdcgEval", "evaluation.LetorNdcgEval":
    eval_class = get_class(metric)
    eval_metric = eval_class()
    metrics.append(eval_metric)
    scores[eval_metric.__class__.__name__] = {}
    for cutoff in cutoffs:
        scores[eval_metric.__class__.__name__][cutoff] = []

# load all queries
test_queries = {}
for fold in range(1, 6):
    test_file = "".join((args.test_dir, str(fold)))
    test_file = os.path.join(test_file, args.test_file)
    qs = load_queries(test_file, args.feature_count)
    test_queries[fold] = qs

# process all experiments for all metrics
Пример #39
0
 def get_class(self, clazz_path):
     import utils
     return utils.get_class(clazz_path)
Пример #40
0
parser.add_argument('--top_k', action='store', type=int, default=3, help='how many most probable classes to print out')
parser.add_argument('--category_names', action='store', help='file which maps classes to names')
parser.add_argument('--gpu', action='store_true', help='use gpu to infer classes')
args=parser.parse_args()

if args.gpu:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
else: device = "cpu"
    
model = Model.load_model(args.checkpoint, args.gpu)

img = utils.process_image(args.input).to(device) 

probs, classes = Model.predict(img, model, args.top_k)

if(args.category_names != None): 
    classes = utils.get_class(classes, args.checkpoint, args.category_names)
else:
    classes=utils.get_class(classes, args.checkpoint, None)
    

utils.resultdisplay(img.to('cpu'), probs, classes, args.top_k)

utils.show_classes(probs, classes, args.top_k)


    



Пример #41
0
 def __init__(self, log_fh, args):
     """Initialize an experiment using the provided arguments."""
     self.log_fh = log_fh
     self.ties = args["ties"] if "ties" in args else "first"
     # additional configuration: number of relevant documents
     # (number or "random")
     self.length = args["result_length"]
     self.num_relevant = args["num_relevant"]
     self.num_queries = args["num_queries"]
     self.um_class = get_class(args["user_model"])
     self.um_args = args["user_model_args"]
     self.um = self.um_class(self.um_args)
     self.pareto_um_class = get_class("environment.FederatedClickModel")
     self.pareto_um = self.pareto_um_class(None)
     # initialize interleaved comparison methods according to configuration
     parser = argparse.ArgumentParser(description="parse arguments of an "
         "evaluation method.", prog="evaluation method configuration")
     parser.add_argument("-c", "--class_name")
     parser.add_argument("-r", "--ranker", help="can be 'det' or 'prob'")
     parser.add_argument("-a", "--ranker_args")
     parser.add_argument("-i", "--interleave_method")
     self.rankers = {}
     self.methods = {}
     # init live methods
     if "evaluation_methods" in args:
         for method_id, method in enumerate(
                 args["evaluation_methods"]):
             self.methods[method] = {}
             method_args_str = \
                 args["evaluation_methods_args"][method_id]
             method_args = vars(parser.parse_known_args(
                 method_args_str.split())[0])
             class_name = method_args["class_name"]
             self.methods[method]["instance"] = \
                 get_class(class_name)(method_args_str)
             ranker = method_args["ranker"]
             ranker_args = method_args["ranker_args"]
             self.methods[method]["ranker"] = ranker
             self.methods[method]["ranker_args"] = ranker_args
             if not ranker in self.rankers:
                 self.rankers[ranker] = {}
             if not ranker_args in self.rankers[ranker]:
                 self.rankers[ranker][ranker_args] = {}
     # init rankers needed by the comparison methods. rankers can be
     # deterministic (det) or probabilistic (prob), and can have different
     # arguments
     for ranker in self.rankers:
         for ranker_args in self.rankers[ranker]:
             if ranker == "det":
                 self.rankers[ranker][ranker_args] = \
                     (SyntheticDeterministicRankingFunction(ranker_args, # A
                     self.ties), SyntheticDeterministicRankingFunction(  # B
                     ranker_args, self.ties))
             elif ranker == "prob":
                 self.rankers[ranker][ranker_args] = \
                     (SyntheticProbabilisticRankingFunction(ranker_args, # A
                     self.ties), SyntheticProbabilisticRankingFunction(  # B
                     ranker_args, self.ties))
             else:
                 raise ValueError("Unknown ranker: " + ranker)
     # generate synthetic better and worse rankers
     (self.docids, self.labels) = self._generate_synthetic_documents(
         self.length, self.num_relevant)
     (self.better, self.worse, self.labels) = self._generate_synthetic_rankings_randomly(
         self.docids, self.labels, self.length,
         posmethod=args["vertical_posmethod"],
         docmethod=args["vertical_docmethod"],
         vertrel=args["vertical_vertrel"],
         blocksize=args["vertical_blocksize"],
         independentplacement=args["vertical_independentplacement"])
Пример #42
0
#
# Lerot is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Lerot is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Lerot.  If not, see <http://www.gnu.org/licenses/>.

from include import *
import argparse
from utils import get_class

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="""Analysis""",
                                     usage="%(prog)s ")
    parser.add_argument('--basedir', required=True)
    parser.add_argument('--analysis', nargs="+", required=True)
    args = parser.parse_known_args()[0]
    
    for analyse in args.analysis:
        aclass = get_class(analyse)
        a = aclass(args.basedir)
        a.update()
        print a.finish()
Пример #43
0
    def __init__(self, args_str=None):
        # parse arguments
        parser = argparse.ArgumentParser(description="""
            Construct and run a learning experiment. Provide either the name
            of a config file from which the experiment configuration is
            read, or provide all arguments listed under Command line. If
            both are provided the  config file is ignored.""",
                                         prog=self.__class__.__name__)

        # option 1: use a config file
        file_group = parser.add_argument_group("FILE")
        file_group.add_argument("-f",
                                "--file",
                                help="Filename of the config "
                                "file from which the experiment details"
                                " should be read.")

        # option 2: specify all experiment details as arguments
        detail_group = parser.add_argument_group("DETAILS")
        detail_group.add_argument(
            "-i",
            "--training_queries",
            help="File from which to load the training queries (svmlight "
            "format).")
        detail_group.add_argument(
            "-j",
            "--test_queries",
            help="File from which to load the test queries (svmlight format).")
        detail_group.add_argument(
            "-c",
            "--feature_count",
            type=int,
            help="The number of features included in the data.")
        detail_group.add_argument(
            "-r",
            "--num_runs",
            type=int,
            help="Number of runs (how many times to repeat the experiment).")
        detail_group.add_argument("-q",
                                  "--num_queries",
                                  type=int,
                                  help="Number of queries in each run.")
        detail_group.add_argument("-u",
                                  "--user_model",
                                  help="Class implementing a user model.")
        detail_group.add_argument(
            "-v",
            "--user_model_args",
            help="Arguments for initializing the user model.")
        # the retrieval system maintains ranking functions, accepts queries and
        # generates result lists, and in return receives user clicks to learn
        # from
        detail_group.add_argument(
            "-s",
            "--system",
            help="Which system to use (e.g., pairwise, listwise).")
        detail_group.add_argument("-a",
                                  "--system_args",
                                  help="Arguments for "
                                  "the system (comparison method, learning "
                                  "algorithm and parameters...).")
        detail_group.add_argument(
            "-o",
            "--output_dir",
            help="(Empty) directory for storing output generated by this"
            " experiment. Subdirectory for different folds will be generated"
            "automatically.")
        detail_group.add_argument("--output_dir_overwrite", default="False")
        detail_group.add_argument(
            "-p",
            "--output_prefix",
            help="Prefix to be added to output filenames, e.g., the name of "
            "the data set, fold, etc. Output files will be stored as "
            "OUTPUT_DIR/PREFIX-RUN_ID.txt.gz")
        detail_group.add_argument("-e",
                                  "--experimenter",
                                  help="Experimenter type.")
        # run the parser
        if args_str:
            args = parser.parse_known_args(args_str.split())[0]
        else:
            args = parser.parse_known_args()[0]

        # determine whether to use config file or detailed args
        self.experiment_args = None
        if args.file:
            config_file = open(args.file)
            self.experiment_args = yaml.load(config_file)
            config_file.close()
            # overwrite with command-line options if given
            for arg, value in vars(args).items():
                if value:
                    self.experiment_args[arg] = value
        else:
            self.experiment_args = vars(args)

        # workaround - check if we have all the arguments needed
        if not ("training_queries" in self.experiment_args
                and "test_queries" in self.experiment_args and "feature_count"
                in self.experiment_args and "num_runs" in self.experiment_args
                and "num_queries" in self.experiment_args
                and "user_model" in self.experiment_args and "user_model_args"
                in self.experiment_args and "system" in self.experiment_args
                and "system_args" in self.experiment_args
                and "output_dir" in self.experiment_args):
            parser.print_help()
            sys.exit("Missing required arguments, please check the program"
                     " arguments or configuration file. %s" %
                     self.experiment_args)

        # set default values for optional arguments
        if not "query_sampling_method" in self.experiment_args:
            self.experiment_args["query_sampling_method"] = "random"
        if not "output_dir_overwrite" in self.experiment_args:
            self.experiment_args["output_dir_overwrite"] = False
        if not "experimenter" in self.experiment_args:
            self.experiment_args[
                "experimenter"] = "experiment.LearningExperiment"
        if not "evaluation" in self.experiment_args:
            self.experiment_args["evaluation"] = "evaluation.NdcgEval"
        if not "processes" in self.experiment_args:
            self.experiment_args["processes"] = 0

        # locate or create directory for the current fold
        if not os.path.exists(self.experiment_args["output_dir"]):
            os.makedirs(self.experiment_args["output_dir"])
        elif not(self.experiment_args["output_dir_overwrite"]) and \
                            os.listdir(self.experiment_args["output_dir"]):
            # make sure the output directory is empty
            raise Exception(
                "Output dir %s is not an empty directory. "
                "Please use a different directory, or move contents out "
                "of the way." % self.experiment_args["output_dir"])

        logging.basicConfig(format='%(asctime)s %(module)s: %(message)s',
                            level=logging.INFO)

        logging.info("Arguments: %s" % self.experiment_args)
        for k, v in sorted(self.experiment_args.iteritems()):
            logging.info("\t%s: %s" % (k, v))
        config_bk = os.path.join(self.experiment_args["output_dir"],
                                 "config_bk.yml")
        logging.info("Backing up configuration to: %s" % config_bk)
        config_bk_file = open(config_bk, "w")
        yaml.dump(self.experiment_args,
                  config_bk_file,
                  default_flow_style=False)
        config_bk_file.close()

        # load training and test queries
        training_file = self.experiment_args["training_queries"]
        test_file = self.experiment_args["test_queries"]
        self.feature_count = self.experiment_args["feature_count"]
        logging.info("Loading training data: %s " % training_file)
        self.training_queries = load_queries(training_file, self.feature_count)
        logging.info("... found %d queries." %
                     self.training_queries.get_size())
        logging.info("Loading test data: %s " % test_file)
        self.test_queries = load_queries(test_file, self.feature_count)
        logging.info("... found %d queries." % self.test_queries.get_size())

        # initialize and run the experiment num_run times
        self.num_runs = self.experiment_args["num_runs"]
        self.output_dir = self.experiment_args["output_dir"]
        self.output_prefix = self.experiment_args["output_prefix"]
        self.experimenter = get_class(self.experiment_args["experimenter"])
    nargs="+",
    required=True,
    help="List of directories that contain experiments (one per experiment). "
    "Results per experiment will be averaged over all folds and runs.")
parser.add_argument(
    "-s",
    "--file_ext",
    default="txt.gz",
    help="File extension of the files in which run results are stored.")
args = parser.parse_args()

cutoffs = [1, 3, 10, -1]
metrics = []
scores = {}
for metric in "evaluation.NdcgEval", "evaluation.LetorNdcgEval":
    eval_class = get_class(metric)
    eval_metric = eval_class()
    metrics.append(eval_metric)
    scores[eval_metric.__class__.__name__] = {}
    for cutoff in cutoffs:
        scores[eval_metric.__class__.__name__][cutoff] = []

# load all queries
test_queries = {}
for fold in range(1, 6):
    test_file = "".join((args.test_dir, str(fold)))
    test_file = os.path.join(test_file, args.test_file)
    qs = load_queries(test_file, args.feature_count)
    test_queries[fold] = qs

# process all experiments for all metrics
Пример #45
0
    def __init__(self):
        # parse arguments
        parser = argparse.ArgumentParser(description="""Meta experiment""")

        file_group = parser.add_argument_group("FILE")
        file_group.add_argument(
            "-f", "--file", help="Filename of the config " "file from which the experiment details" " should be read."
        )
        # option 2: specify all experiment details as arguments
        detail_group = parser.add_argument_group("DETAILS")
        detail_group.add_argument("-p", "--platform", help="Specify " "'local' or 'celery'")
        detail_group.add_argument(
            "--data",
            help="Data in the following"
            "format: trainfile,testfile,d,r such that "
            "a data file can be found in "
            "datadir/trainfile/Fold1/train.txt",
            type=str,
            nargs="+",
        )
        detail_group.add_argument("--um", nargs="+")
        detail_group.add_argument("--uma", help="", type=str, nargs="+")
        detail_group.add_argument("--analysis", nargs="*")
        detail_group.add_argument("--data_dir")
        detail_group.add_argument("--output_base")
        detail_group.add_argument("--experiment_name")
        detail_group.add_argument("-r", "--rerun", action="store_true", help="Rerun last experiment.", default=False)
        detail_group.add_argument("--queue_name", type=str)

        args = parser.parse_known_args()[0]

        logging.basicConfig(format="%(asctime)s %(module)s: %(message)s", level=logging.INFO)

        # determine whether to use config file or detailed args
        self.experiment_args = None
        if args.file:
            config_file = open(args.file)
            self.experiment_args = yaml.load(config_file, Loader=Loader)
            config_file.close()
            try:
                self.meta_args = vars(parser.parse_known_args(self.experiment_args["meta"].split())[0])
            except:
                parser.error("Please make sure there is a 'meta' section " "present in the config file")
            # overwrite with command-line options if given
            for arg, value in vars(args).items():
                if value:
                    self.meta_args[arg] = value
        else:
            self.meta_args = vars(args)

        for k in self.meta_args.keys() + ["meta"]:
            if k in self.experiment_args:
                del self.experiment_args[k]

        if self.meta_args["platform"] == "local":
            self.run = self.run_local
        elif self.meta_args["platform"] == "celery":
            self.experiment_args["processes"] = 0
            self.run = self.run_celery
        elif self.meta_args["platform"] == "conf":
            self.run = self.run_conf
        else:
            parser.error("Please specify a valid platform.")

        usermodels = {}
        for umstr in self.meta_args["uma"]:
            parts = umstr.split(",")
            um, car = parts[:2]
            car = int(car)
            if len(parts) != car * 2 + 2:
                parser.error("Error in uma")
            p_click = ", ".join(parts[2 : 2 + car])
            p_stop = ", ".join(parts[2 + car :])
            if not um in usermodels:
                usermodels[um] = {}
            usermodels[um][car] = "--p_click %s --p_stop %s" % (p_click, p_stop)

        basedir = os.path.join(os.path.abspath(self.meta_args["output_base"]), self.meta_args["experiment_name"])

        i = 0
        while os.path.exists(os.path.join(basedir, "v%03d" % i)):
            i += 1
        if i > 0 and self.meta_args["rerun"]:
            i -= 1
        logging.info("Running experiment v%03d" % i)
        basedir = os.path.join(basedir, "v%03d" % i)
        if not os.path.exists(basedir):
            os.makedirs(basedir)
        logging.info("Results appear in %s" % basedir)

        config_bk = os.path.join(basedir, "meta_config_bk.yml")
        config_bk_file = open(config_bk, "w")
        yaml.dump(self.meta_args, config_bk_file, default_flow_style=False, Dumper=Dumper)
        config_bk_file.close()

        skip = 0
        self.configurations = []
        #        for run_id in range(self.experiment_args["num_runs"]):
        for um in self.meta_args["um"]:
            for dstr in self.meta_args["data"]:
                data, d, r = dstr.split(",")
                d, r = int(d), int(r)
                user_model_args = usermodels[um][r]
                folds = glob.glob(os.path.join(os.path.abspath(self.meta_args["data_dir"]), data, "Fold*"))
                for fold in folds:
                    args = self.experiment_args.copy()
                    args["data_dir"] = self.meta_args["data_dir"]
                    args["fold_dir"] = fold
                    #            args["run_id"] = run_id
                    args["feature_count"] = d
                    args["user_model_args"] = user_model_args
                    args["output_dir"] = os.path.join(basedir, "output", um, data, os.path.basename(fold))
                    args["output_prefix"] = os.path.basename(fold)
                    if self.meta_args["rerun"]:
                        if not os.path.exists(
                            os.path.join(args["output_dir"], "%s-%d.txt.gz" % (args["output_prefix"], run_id))
                        ):
                            self.configurations.append(args)
                        else:
                            skip += 1
                    else:
                        self.configurations.append(args)
        logging.info("Created %d configurations (and %d skipped)" % (len(self.configurations), skip))
        self.analytics = []
        for analyse in self.meta_args["analysis"]:
            aclass = get_class(analyse)
            a = aclass(basedir)
            self.analytics.append(a)
Пример #46
0
def run_test(network, test_x, test_y, figure_path='figures', plot=True):
    """
    Will conduct the test suite to determine model strength.

    Args:
        test_x: data the model has not yet seen to predict
        test_y: corresponding truth vectors
        figure_path: string, folder to place images in.
        plot: bool, determines if graphs should be plotted when ran.
    """
    if network.num_classes is None or network.num_classes == 0:
        raise ValueError('There\'s no classification layer')

    if test_y.shape[1] > 1:
        test_y = get_class(test_y)  # Y is in one hot representation

    raw_prediction = network.forward_pass(input_data=test_x,
                                          convert_to_class=False)
    class_prediction = get_class(raw_prediction)

    confusion_matrix = get_confusion_matrix(prediction=class_prediction,
                                            truth=test_y)

    tp = np.diagonal(confusion_matrix).astype('float32')
    tn = (np.array([np.sum(confusion_matrix)] * confusion_matrix.shape[0]) -
          confusion_matrix.sum(axis=0) - confusion_matrix.sum(axis=1) +
          tp).astype('float32')
    # sum each column and remove diagonal
    fp = (confusion_matrix.sum(axis=0) - tp).astype('float32')
    # sum each row and remove diagonal
    fn = (confusion_matrix.sum(axis=1) - tp).astype('float32')

    sens = np.nan_to_num(tp / (tp + fn))  # recall
    spec = np.nan_to_num(tn / (tn + fp))
    sens_macro = np.nan_to_num(sum(tp) / (sum(tp) + sum(fn)))
    spec_macro = np.nan_to_num(sum(tn) / (sum(tn) + sum(fp)))
    dice = 2 * tp / (2 * tp + fp + fn)
    ppv = np.nan_to_num(tp / (tp + fp))  # precision
    ppv_macro = np.nan_to_num(sum(tp) / (sum(tp) + sum(fp)))
    npv = np.nan_to_num(tn / (tn + fn))
    npv_macro = np.nan_to_num(sum(tn) / (sum(tn) + sum(fn)))
    accuracy = np.sum(tp) / np.sum(confusion_matrix)
    f1 = np.nan_to_num(2 * (ppv * sens) / (ppv + sens))
    f1_macro = np.average(np.nan_to_num(2 * sens * ppv / (sens + ppv)))

    print('{} test\'s results'.format(network.name))

    print('TP:'),
    print(tp)
    print('FP:'),
    print(fp)
    print('TN:'),
    print(tn)
    print('FN:'),
    print(fn)

    print('\nAccuracy: {}'.format(accuracy))

    print('Sensitivity:'),
    print(round_list(sens, decimals=3))
    print('\tMacro Sensitivity: {:.4f}'.format(sens_macro))

    print('Specificity:'),
    print(round_list(spec, decimals=3))
    print('\tMacro Specificity: {:.4f}'.format(spec_macro))

    print('DICE:'),
    print(round_list(dice, decimals=3))
    print('\tAvg. DICE: {:.4f}'.format(np.average(dice)))

    print('Positive Predictive Value:'),
    print(round_list(ppv, decimals=3))
    print('\tMacro Positive Predictive Value: {:.4f}'.format(ppv_macro))

    print('Negative Predictive Value:'),
    print(round_list(npv, decimals=3))
    print('\tMacro Negative Predictive Value: {:.4f}'.format(npv_macro))

    print('f1-score:'),
    print(round_list(f1, decimals=3))
    print('\tMacro f1-score: {:.4f}'.format(f1_macro))
    print('')

    if not os.path.exists(figure_path):
        print('Creating figures folder')
        os.makedirs(figure_path)

    if not os.path.exists('{}/{}{}'.format(figure_path, network.timestamp,
                                           network.name)):
        print('Creating {}/{}{} folder'.format(figure_path, network.timestamp,
                                               network.name))
        os.makedirs('{}/{}{}'.format(figure_path, network.timestamp,
                                     network.name))
    print('Saving ROC figures to folder: {}/{}{}'.format(
        figure_path, network.timestamp, network.name))

    plt.figure()
    plt.title("Confusion matrix for {}".format(network.name))
    plt.xticks(range(confusion_matrix.shape[0]))
    plt.yticks(range(confusion_matrix.shape[0]))
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.imshow(confusion_matrix,
               origin='lower',
               cmap='hot',
               interpolation='nearest')
    plt.colorbar()

    plt.savefig('{}/{}{}/confusion_matrix.png'.format(figure_path,
                                                      network.timestamp,
                                                      network.name))
    if not plot:
        plt.close()

    fig = plt.figure()
    all_class_auc = []
    for i in range(network.num_classes):
        if network.num_classes == 1:
            fpr, tpr, thresholds = metrics.roc_curve(test_y,
                                                     raw_prediction,
                                                     pos_label=1)
        else:
            fpr, tpr, thresholds = metrics.roc_curve(test_y,
                                                     raw_prediction[:, i],
                                                     pos_label=i)

        auc = metrics.auc(fpr, tpr)
        all_class_auc += [auc]
        # print ('AUC: {:.4f}'.format(auc))
        # print ('\tGenerating ROC {}/{}{}/{}.png ...'.format(figure_path,
        #                                                     network.timestamp,
        #                                                     network.name, i))
        plt.clf()
        plt.plot(fpr, tpr, label=("AUC: {:.4f}".format(auc)))
        plt.title("ROC Curve for {}_{}".format(network.name, i))
        plt.xlabel('1 - Specificity')
        plt.ylabel('Sensitivity')
        plt.legend(loc='lower right')
        plt.ylim(0.0, 1.0)
        plt.xlim(0.0, 1.0)

        plt.savefig('{}/{}{}/{}.png'.format(figure_path, network.timestamp,
                                            network.name, i))
        if plot:
            plt.show(False)

    if not plot:
        plt.close(fig.number)
    print('Average AUC: : {:.4f}'.format(np.average(all_class_auc)))
    return {
        'accuracy': accuracy,
        'macro_sensitivity': sens_macro,
        'macro_specificity': spec_macro,
        'avg_dice': np.average(dice),
        'macro_ppv': ppv_macro,
        'macro_npv': npv_macro,
        'macro_f1': f1_macro,
        'macro_auc': np.average(all_class_auc)
    }