示例#1
0
def main(verbose=False,
         beta=0.05,
         prior_probability=0.5,
         epsilon=0.05,
         num_features=8,
         feature_cardinality=5,
         num_examples=100,
         visualization_interval=100,
         biased_feature_proportion=0.2,
         biased_feature_effect_length=10**100,
         directory="/tmp/adpredictor/",
         extension="png"):
    # Initialize globals
    np.random.seed(1)
    logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO)

    # Construct settings
    simulation = SimulationRunner.Simulation(
        predictor_config=AdPredictor.Config(
            beta=beta,
            prior_probability=prior_probability,
            epsilon=epsilon,
            num_features=num_features),
        feature_cardinality=feature_cardinality,
        num_examples=num_examples,
        directory=directory,
        biased_feature_proportion=biased_feature_proportion,
        biased_feature_effect_length=biased_feature_effect_length,
        visualization_interval=visualization_interval,
        extension=extension)

    # Train and output graphs
    simulation_runner = SimulationRunner(simulation)
    simulation_runner.run()
def libsvm_file_process(config):
    adpredictor = AdPredictor(AdPredictor.Config(config.beta, config.epsilon), config.feature_num)
    train_pred, train_label = [], []
    for train_file in config.train_files:
        for features, label in FileSampler(train_file).generate_samples():
            prob = adpredictor.train(features, label, True)
            train_pred.append(prob)
            train_label.append(1 if label > 0 else 0)
    adpredictor.save_model(config.model_file)
    test_pred, test_label = [], []
    for test_file in config.test_files:
        for features, label in FileSampler(test_file).generate_samples():
            prob = adpredictor.predict(features)
            test_pred.append(prob)
            test_label.append(1 if label > 0 else 0)
    return train_pred, train_label, test_pred, test_label, config
def libsvm_file_process(config):
    adpredictor = AdPredictor(AdPredictor.Config(config.beta, config.epsilon),
                              config.feature_num)
    train_pred, train_label = [], []
    for train_file in config.train_files:
        for features, label in FileSampler(train_file).generate_samples():
            prob = adpredictor.train(features, label, True)
            train_pred.append(prob)
            train_label.append(1 if label > 0 else 0)
    adpredictor.save_model(config.model_file)
    test_pred, test_label = [], []
    for test_file in config.test_files:
        for features, label in FileSampler(test_file).generate_samples():
            prob = adpredictor.predict(features)
            test_pred.append(prob)
            test_label.append(1 if label > 0 else 0)
    return train_pred, train_label, test_pred, test_label, config
示例#4
0
 def __init__(self, simulation):
     self._simulation = simulation
     self._predictor = AdPredictor(simulation.predictor_config)
     self._sampler = Sampler(simulation)
示例#5
0
class SimulationRunner(object):
    Simulation = namedtuple('Simulation', [
        'predictor_config', 'feature_cardinality', 'num_examples',
        'biased_feature_proportion', 'directory', 'extension',
        'visualization_interval', 'biased_feature_effect_length'
    ])

    COLORS = brewer2mpl.get_map('Set2', 'qualitative', 8).mpl_colors

    def __init__(self, simulation):
        self._simulation = simulation
        self._predictor = AdPredictor(simulation.predictor_config)
        self._sampler = Sampler(simulation)

    def _current_weights_by_feature(self):
        by_feature = lambda kv: kv[0].feature
        by_feature_value = lambda kv: (kv[0].feature, kv[0].value)
        weights = sorted(self._predictor.weights, key=by_feature_value)
        for feature, group in itertools.groupby(weights, key=by_feature):
            yield feature, [(f, w.mean, w.variance) for (f, w) in group]

    def _plot_weights(self):
        for color, (feature, weights) in itertools.izip(
                itertools.cycle(self.COLORS),
                self._current_weights_by_feature()):
            _, means, variances = zip(*weights)
            logging.debug("Feature %s, Weights: %s", feature, weights)

            label = "F{}".format(feature) if feature != 0 else "Bias"
            plt.scatter(means,
                        variances,
                        label=label,
                        color=color,
                        alpha=0.8,
                        s=40)

    def _annotate_biased_weights(self):
        for _, weights in self._current_weights_by_feature():
            for (feature, mean, variance) in weights:
                bias_weight = self._sampler.get_bias_for_feature(feature)
                if bias_weight is not None:
                    plt.annotate('+' if bias_weight else '-', (mean, variance),
                                 size=40)

    def _visualize(self, num_examples):
        plt.clf()

        self._plot_weights()
        self._annotate_biased_weights()

        plt.title(u"(μ, σ²) after {} examples".format(num_examples))
        plt.xlabel(u"μ")
        plt.ylabel(u"σ²")
        plt.legend(loc='best')
        plt.xlim(-4, 4)
        plt.ylim(-0.1, 1.1)

        filename = "{:03d}.{}".format(num_examples, self._simulation.extension)
        logger.info("Saving graph to %s", filename)
        plt.savefig(os.path.join(self._simulation.directory, filename),
                    dpi=300)

    def run(self):
        samples = itertools.islice(self._sampler,
                                   self._simulation.num_examples)
        for iteration, (features, label) in enumerate(samples):
            self._predictor.train(features, label)
            if iteration % self._simulation.visualization_interval == 0:
                self._visualize(iteration)
示例#6
0
 def _create_predictor(beta=0.05, prior=0.3, epsilon=0.01, num_features=10):
     config = AdPredictor.Config(beta, prior, epsilon, num_features)
     return AdPredictor(config)
示例#7
0
 def __init__(self, simulation):
     self._simulation = simulation
     self._predictor = AdPredictor(simulation.predictor_config)
     self._sampler = Sampler(simulation)
示例#8
0
class SimulationRunner(object):
    Simulation = namedtuple(
        'Simulation',
        ['predictor_config', 'feature_cardinality', 'num_examples',
         'biased_feature_proportion', 'directory', 'extension',
         'visualization_interval', 'biased_feature_effect_length'])

    COLORS = brewer2mpl.get_map('Set2', 'qualitative', 8).mpl_colors

    def __init__(self, simulation):
        self._simulation = simulation
        self._predictor = AdPredictor(simulation.predictor_config)
        self._sampler = Sampler(simulation)

    def _current_weights_by_feature(self):
        by_feature = lambda kv: kv[0].feature
        by_feature_value = lambda kv: (kv[0].feature, kv[0].value)
        weights = sorted(self._predictor.weights, key=by_feature_value)
        for feature, group in itertools.groupby(weights, key=by_feature):
            yield feature, [(f, w.mean, w.variance) for (f, w) in group]

    def _plot_weights(self):
        for color, (feature, weights) in itertools.izip(
                itertools.cycle(self.COLORS),
                self._current_weights_by_feature()):
            _, means, variances = zip(*weights)
            logging.debug("Feature %s, Weights: %s", feature, weights)

            label = "F{}".format(feature) if feature != 0 else "Bias"
            plt.scatter(means, variances,
                        label=label, color=color, alpha=0.8, s=40)

    def _annotate_biased_weights(self):
        for _, weights in self._current_weights_by_feature():
            for (feature, mean, variance) in weights:
                bias_weight = self._sampler.get_bias_for_feature(feature)
                if bias_weight is not None:
                    plt.annotate('+' if bias_weight else '-', (mean, variance),
                                 size=40)

    def _visualize(self, num_examples):
        plt.clf()

        self._plot_weights()
        self._annotate_biased_weights()

        plt.title(u"(μ, σ²) after {} examples".format(num_examples))
        plt.xlabel(u"μ")
        plt.ylabel(u"σ²")
        plt.legend(loc='best')
        plt.xlim(-4, 4)
        plt.ylim(-0.1, 1.1)

        filename = "{:03d}.{}".format(num_examples, self._simulation.extension)
        logger.info("Saving graph to %s", filename)
        plt.savefig(os.path.join(self._simulation.directory, filename),
                    dpi=300)

    def run(self):
        samples = itertools.islice(self._sampler,
                                   self._simulation.num_examples)
        for iteration, (features, label) in enumerate(samples):
            self._predictor.train(features, label)
            if iteration % self._simulation.visualization_interval == 0:
                self._visualize(iteration)