Exemplo n.º 1
0
def test_db():
    if not TestDatabase.db:
        error(
            "You can configure experiment database for tests by calling for_tests=True flag in setup_experiment_database"
        )
        return None
    return TestDatabase.db
Exemplo n.º 2
0
    def run(self, data, knobs):

        if not super(self.__class__, self).run(data, knobs):
            error("Aborting analysis.")
            return

        # pooled_std = sqrt((var(self.y1) + var(self.y2)) / 2)
        # effect_size = self.mean_diff / pooled_std

        sample_size = len(self.y1)

        power = tt_ind_solve_power(effect_size=self.effect_size,
                                   nobs1=sample_size,
                                   alpha=self.alpha,
                                   alternative=self.alternative)

        result = dict()
        result["effect_size"] = self.effect_size
        result["sample_size"] = sample_size
        result["alpha"] = self.alpha
        result["power"] = power
        # result["mean_diff"] = self.mean_diff
        # result["pooled_std"] = pooled_std

        return result
Exemplo n.º 3
0
def user_db():
    if not UserDatabase.db:
        error(
            "You can setup the user database using experiment_db_config.json file"
        )
        return None
    return UserDatabase.db
Exemplo n.º 4
0
    def run(self, data, knobs):

        if not super(self.__class__, self).run(data, knobs):
            error("Aborting analysis.")
            return

        if not self.effect_size:
            if not self.mean_diff:
                raise Exception(
                    "You cannot leave both mean_diff and effect_size paramaters empty"
                )
            pooled_std = sqrt((var(self.y1) + var(self.y2)) / 2)
            effect_size = self.mean_diff / pooled_std
        else:
            effect_size = self.effect_size

        sample_size = tt_ind_solve_power(effect_size=effect_size,
                                         nobs1=None,
                                         alpha=self.alpha,
                                         power=self.power,
                                         alternative=self.alternative)

        result = dict()
        result["effect_size"] = effect_size
        result["sample_size"] = floor(sample_size)
        result["alpha"] = self.alpha
        result["power"] = self.power
        # result["mean_diff"] = self.mean_diff
        # result["pooled_std"] = pooled_std

        return result
Exemplo n.º 5
0
def db():
    if not ExperimentDatabase.DB:
        error(
            "You can configure experiment database using Configuration section in the dashboard."
        )
        return None
    return ExperimentDatabase.DB
Exemplo n.º 6
0
 def __init__(self, stage_ids, y_key, alpha=0.05):
     super(self.__class__, self).__init__(stage_ids, y_key)
     if alpha not in [0.15, 0.10, 0.05, 0.02, 0.01]:
         error("For Anderson-Darling test, please select as alpha one of 0.15, 0.10, 0.05, 0.02, or 0.01. "
               "Falling back to default value of alpha = 0.05")
         self.alpha = 0.05
     else:
         self.alpha = alpha
Exemplo n.º 7
0
    def run(self, data, knobs):

        if len(data) < 2:
            error("Cannot run " + self.name + " on less than two samples.")
            return False
        self.y = [[d for d in data[i]] for i in range(len(self.stage_ids))]

        return True
Exemplo n.º 8
0
    def run(self, data, knobs):
        dataframe_data = dict()
        # NEW: removed logic of d[self.y_key] because they are already retrieved in proper form
        y_values = [d for i in range(self.stages_count) for d in data[i]]
        dataframe_data[self.y_key] = y_values

        # NEW: alter knob_keys to fit for the previous logic
        # IMPORTANT ASSUMPTION HERE: as discussed before, we apply these analysis tests to stages of the same experiment
        # so, knobs[0].keys() == knobs[1].keys() == knobs[2].keys() == global keys
        if not self.knob_keys:
            self.knob_keys = knobs[0].keys()

        if len(self.knob_keys) < 2:
            error("Cannot run " + self.name + " on one factor.")
            error("Aborting analysis")
            return

        for knob_key in self.knob_keys:
            res = []
            for i in range(self.stages_count):
                for d in data[i]:
                    res.append(knobs[i][knob_key])
            dataframe_data[knob_key] = res

        # data for quick tests:
        # dataframe_data = {}
        # dataframe_data["overhead"] = [2.3, 1.3, 2.8, 2.5, 2.9, 2.4, 1.4, 2.6, 1.8, 1.9, 1.2, 3.0]
        # dataframe_data["route_random_sigma"] = [0, 0, 0, 0.2, 0.2, 0.2, 0, 0, 0, 0, 0, 0]
        # dataframe_data["exploration_percentage"] = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0.2, 0.2, 0.2]

        df = pd.DataFrame(dataframe_data)
        print(df)
        print("------------------")

        formula = self.create_formula()
        # formula = "overhead ~ route_random_sigma * exploration_percentage"
        print(formula)
        print("------------------")

        data_lm = ols(formula, data=dataframe_data).fit()
        print(data_lm.summary())
        print("------------------")

        aov_table = anova_lm(data_lm, typ=2)
        aov_table_sqr = deepcopy(aov_table)
        self.eta_squared(aov_table_sqr)
        self.omega_squared(aov_table_sqr)
        # TODO: aov_table = aov_table[aov_table["omega_sq"] > min_effect_size] can also be integrated

        # remove same cols, see: https://stackoverflow.com/questions/13411544/delete-column-from-pandas-dataframe-using-del-df-column-name
        columns = ['sum_sq', 'df', 'F', 'PR(>F)']
        aov_table_sqr.drop(columns, inplace=True, axis=1)
        return aov_table, aov_table_sqr
Exemplo n.º 9
0
    def run(self, data, knobs):

        if len(data) < 2:
            error("Cannot run " + self.name + " on less than two samples.")
            return False

        if len(data) > 2:
            warn("Cannot run " + self.name + " on more than two samples.")
            warn("Comparing only the first two samples.")

        self.y1 = [d for d in data[0]]
        self.y2 = [d for d in data[1]]

        return True
Exemplo n.º 10
0
    def run(self, data, knobs):

        if not super(EqualVarianceTest, self).run(data, knobs):
            error("Aborting analysis.")
            return

        statistic, pvalue = self.get_statistic_and_pvalue(self.y)

        not_equal_variance = bool(pvalue <= self.alpha)

        result = dict()
        result["statistic"] = statistic
        result["pvalue"] = pvalue
        result["alpha"] = self.alpha
        result["not_equal_variance"] = not_equal_variance

        return result
Exemplo n.º 11
0
    def run(self, data, knobs):

        if not super(DifferentDistributionsTest, self).run(data, knobs):
            error("Aborting analysis.")
            return

        statistic, pvalue = self.get_statistic_and_pvalue(self.y)

        different_distributions = bool(pvalue <= self.alpha)

        result = dict()
        result["statistic"] = statistic
        result["pvalue"] = pvalue
        result["alpha"] = self.alpha
        result["different_distributions"] = different_distributions

        return result
Exemplo n.º 12
0
def setup_user_database():

    with open('./oeda/databases/user_db_config.json') as json_data_file:
        try:
            config_data = load(json_data_file)
            UserDatabase.DB = create_instance_for_users(
                config_data['db_type'], config_data['host'],
                config_data['port'], config_data)
        except ValueError:
            error(
                "> You need to specify the user database configuration in databases/user_db_config.json"
            )
            exit(0)
        except KeyError:
            error(
                "> You need to specify 'db_type', 'host', 'port' values in databases/user_db_config.json properly"
            )
            exit(0)
Exemplo n.º 13
0
def setup_experiment_database(db_type, host, port):

    with open('./oeda/databases/experiment_db_config.json') as json_data_file:
        try:
            config_data = load(json_data_file)
            ExperimentDatabase.DB = create_instance_for_experiments(
                db_type, host, port, config_data)
        except ValueError:
            error(
                "> You need to specify the user database configuration in databases/experiment_db_config.json"
            )
            exit(0)
        except KeyError:
            error(
                "> You need to specify 'db_type', 'host', 'port' values in databases/experiment_db_config.json properly"
            )
            exit(0)
        except ConnectionError as conn_err:
            raise conn_err
Exemplo n.º 14
0
def setup_experiment_database(db_type, host, port, for_tests=False):
    current_directory = os.path.dirname(__file__)
    parent_directory = os.path.split(current_directory)[0]
    file_path = os.path.join(parent_directory, 'databases',
                             'experiment_db_config.json')
    try:
        with open(file_path) as json_data_file:
            try:
                config_data = load(json_data_file)
                if for_tests:
                    # change the index
                    config_data["index"]["name"] += str("_test")
                    TestDatabase.db = create_db_instance_for_experiments(
                        db_type, host, port, config_data)
                else:
                    ExperimentDatabase.db = create_db_instance_for_experiments(
                        db_type, host, port, config_data)
            except ValueError as ve:
                print(ve)
                error(
                    "> You need to specify the user database configuration in databases/experiment_db_config.json"
                )
                exit(0)
            except KeyError:
                error(
                    "> You need to specify 'db_type', 'host', 'port' values in databases/experiment_db_config.json properly"
                )
                exit(0)
            except ConnectionError as conn_err:
                raise conn_err
    except Exception as exc:
        error(
            "> You need to specify a 'experiment_db_config.json' file in the databases folder. You can copy the template in that folder!"
        )
        raise exc
Exemplo n.º 15
0
def setup_user_database():
    current_directory = os.path.dirname(__file__)
    parent_directory = os.path.split(current_directory)[0]
    file_path = os.path.join(parent_directory, 'databases',
                             'user_db_config.json')
    try:
        with open(file_path) as json_data_file:
            try:
                config_data = load(json_data_file)
                user_db = create_db_instance_for_users(config_data)
                UserDatabase.db = user_db
            except ValueError:
                error(
                    "> You need to specify the user database configuration in databases/user_db_config.json"
                )
                exit(0)
            except KeyError:
                error(
                    "> You need to specify 'db_type', 'host', 'port' values in databases/user_db_config.json properly"
                )
                exit(0)
    except:
        error(
            "> You need to specify a 'user_db_config.json' file in the databases folder. You can copy the templates in that folder!"
        )
        exit(0)
Exemplo n.º 16
0
    def run(self, data, knobs):

        if not super(self.__class__, self).run(data, knobs):
            error("Aborting analysis.")
            return

        statistic, pvalue = ttest_ind(self.y1, self.y2, equal_var=False)

        different_averages = bool(pvalue <= self.alpha)

        result = dict()
        result["statistic"] = statistic
        result["pvalue"] = pvalue
        result["alpha"] = self.alpha
        result["different_averages"] = different_averages

        result["mean_diff"] = mean(self.y1) - mean(self.y2)

        # calculating cohen's d effect size ():
        pooled_std = sqrt((var(self.y1) + var(self.y2)) / 2)
        result["effect_size"] = result["mean_diff"] / pooled_std

        return result