示例#1
0
 def get_reults(self, bellwethers=None, n_reps=12):
     data_path = os.path.realpath("./data")
     projects = get_all_projects(data_path)
     for project in projects:
         print(project.name.upper())
         files = project.files()
         results_0 = []
         for transfer in [Pooyan, Baseline, Waterloo]:
             results_1 = [transfer.__doc__.upper()]
             for source_name, source_conf in files.iteritems():
                 for target_name, target_conf in files.iteritems():
                     if not source_name == target_name:
                         if transfer.__doc__ == "baseline":
                             if source_name in bellwethers[project.name]:
                                 results_1.extend([
                                     transfer.learner(
                                         source_conf, target_conf)
                                     for _ in xrange(n_reps)
                                 ])
                         else:
                             results_1.extend([
                                 transfer.learner(source_conf, target_conf)
                                 for _ in xrange(n_reps)
                             ])
             results_0.append(results_1)
         rdivDemo(results_0)
         print("")
示例#2
0
文件: Waterloo.py 项目: vcvvc/BEETLE
def main(n_reps=30):
    data_path = os.path.realpath("./data")
    projects = get_all_projects(data_path)
    results = dict()
    for project in projects:
        files = project.files()
        results_0 = dict()
        for source_name, source_conf in files.iteritems():
            results_0.update({source_name: {}})
            for target_name, target_conf in files.iteritems():
                if not source_name == target_name:
                    r_diff = []
                    for _ in xrange(n_reps):
                        "Construct a prediction model using source"
                        predict_model = train_prediction_model(source_conf,
                                                               T=5)
                        """Sample 15 from train and test datasets
                        to train a transfer model
                        """

                        "Find common configs between source and target"
                        common = pd.merge(source_conf,
                                          target_conf,
                                          how="inner")

                        "Pick random 15 samples"
                        some = common.sample(n=10)

                        "Get the dependent variables to construct a LR model"
                        p_src = some[source_conf.columns[-1]]
                        p_tgt = some[target_conf.columns[-1]]

                        "Train a transfer model"
                        transfer_model = train_transfer_model(p_src=p_src,
                                                              p_tgt=p_tgt)

                        "Remove elements used to train transfer model from target"
                        target_conf = target_conf.drop(some.index,
                                                       errors="ignore")

                        "Perform tansfer"
                        target_indep = target_conf[target_conf.columns[:-1]]
                        target_actual = target_conf[target_conf.columns[-1]]
                        predicted_raw = predict_model.predict(
                            target_indep).reshape(-1, 1)
                        target_predicted = transfer_model.predict(
                            predicted_raw).reshape(1, -1)[0]

                        "Get rank difference"
                        r_diff.append(
                            rank_diff(actual=target_actual,
                                      predicted=target_predicted))

                    results_0[source_name].update(
                        {target_name: int(np.median(r_diff))})

        results.update({project.name: pd.DataFrame(results_0)})
    # -------------------- DEBUG -------------------- #
    set_trace()
示例#3
0
    def compare(self, data_pairs):
        data_path = os.path.realpath("./data")
        projects = get_all_projects(data_path)
        for project in projects:
            print(project.name.upper())
            files = project.files()
            best, worst = data_pairs[project.name]
            rest = [
                dframe for fname, dframe in files.iteritems()
                if fname not in data_pairs[project.name]
            ]
            best_results = ["best"]
            worst_results = ["worst"]
            for tgt in rest:
                best_results.extend(
                    [Pooyan.learner(files[best], tgt) for _ in xrange(1)])
                worst_results.extend(
                    [Pooyan.learner(files[worst], tgt) for _ in xrange(1)])

            rdivDemo([best_results, worst_results])

            set_trace()
示例#4
0
def main(n_reps=30):
    data_path = os.path.realpath("./data")
    projects = get_all_projects(data_path)
    results = dict()
    for project in projects:
        files = project.files()
        results_0 = dict()
        for source_name, source_conf in files.iteritems():
            results_0.update({source_name: {}})
            for target_name, target_conf in files.iteritems():
                if not source_name == target_name:
                    r_diff = []
                    for _ in xrange(n_reps):

                        "Get the dependent variables to construct a LR model"
                        p_src = source_conf[source_conf.columns[-1]]
                        p_tgt = target_conf[target_conf.columns[-1]]

                        "Construct a gaussian process model using source"
                        predict_model = train_gaussproc_model(
                            source_conf, target_conf)

                        "Perform tansfer"
                        target_indep = target_conf[target_conf.columns[:-1]]
                        target_actual = target_conf[target_conf.columns[-1]]
                        target_predicted = predict_model.predict(
                            target_indep).reshape(-1, 1)

                        "Get rank difference"
                        r_diff.append(
                            rank_diff(actual=target_actual,
                                      predicted=target_predicted))

                    results_0[source_name].update(
                        {target_name: int(np.median(r_diff))})

        results.update({project.name: pd.DataFrame(results_0)})
    # -------------------- DEBUG -------------------- #
    set_trace()