示例#1
0
 def setUp(self):
     self.method = getConfig().eval(self.__class__.__name__, "method")
     self.corr_threshold = getConfig().eval(self.__class__.__name__,
                                            "corr_threshold")
     self.walk = getConfig().eval(self.__class__.__name__, "random_walks")
     self.continue_from_log = getConfig().eval(self.__class__.__name__,
                                               "continue_from_log")
示例#2
0
 def __init__(self, db):
     AbstractController.__init__(self, db)
     self.db = db
     self.continue_from_log = getConfig().eval(self.__class__.__name__, "continue_from_log")
     self.csv_data_path = getConfig().eval(self.__class__.__name__, "csv_data_path")
     self.corr_method = getConfig().eval(self.__class__.__name__, "corr_function")
     self.corr_threshold = getConfig().eval(self.__class__.__name__, "corr_threshold")
     self.data_files = [f for f in listdir(self.csv_data_path) if isfile(join(self.csv_data_path, f))]
     self.corr_mat = {}
     self.targets = {"dataset_name": [], "target_feature": []}
示例#3
0
    def setup(self):
        self.randomWalk_length = getConfig().eval(self.__class__.__name__,
                                                  'random_walk_length')
        self.random_walk_num = getConfig().eval(self.__class__.__name__,
                                                'random_walk_number')
        self.subGraphs_directory_path = getConfig().eval(
            self.__class__.__name__, 'subGraphs_directory_path')
        self.random_walk_directory_path_output = getConfig().eval(
            self.__class__.__name__, 'random_walk_directory_path_output')
        self.classifier_files_directory = getConfig().eval(
            self.__class__.__name__, 'classifier_files_directory')
        self.statistics_output_path = getConfig().eval(
            self.__class__.__name__, 'statistics_output_path')

        self.subGraphs_list = []
        self.rw_list_of_graphs_train = []
        self.rw_list_of_graphs_train_positive = []
        self.rw_list_of_graphs_train_negative = []
        self.rw_list_of_graphs_test = []

        self.rw_args = json.loads(getConfig().eval(self.__class__.__name__,
                                                   'randomwalk_args'))
        self.rw_extensions = getConfig().eval(self.__class__.__name__,
                                              'rw_extensions').split(",")
        self.doc2vec_args = json.loads(getConfig().eval(
            self.__class__.__name__, 'doc2vec_args'))
 def __init__(self, db):
     AbstractController.__init__(self, db)
     self.target = getConfig().eval(self.__class__.__name__, "target_attr")
     self.append_new_graphs = getConfig().eval(self.__class__.__name__,
                                               "append_new_graphs")
     self.fields = [
         'graph_name', 'dataset_name', 'connected', 'density', 'Avg_CC',
         'Median_deg', 'Variance_deg', 'Avg_degree', 'Median_wights',
         'Variance_wights', 'Avg_weight', 'Avg_weight_abs', 'edges',
         'nodes', 'self_loops', 'edge_to_node_ratio', 'Num_of_zero_weights',
         'negative_edges', 'min_vc', 'target'
     ]
     self.db = db
 def __init__(self, db):
     AbstractController.__init__(self, db)
     self.target = getConfig().eval(self.__class__.__name__, "target_attr")
     self.append_new_graphs = getConfig().eval(self.__class__.__name__, "append_new_graphs")
     self.fields = ['graph_name', 'dataset_name', 'global_avg_betweenness', 'global_var_betweenness',
                    'global_average_edge_weight', 'global_var_average_edge_weight',
                    'global_avg_degree', 'global_var_degree',
                    'global_avg_authority', 'global_var_authority',
                    'global_avg_hub', 'global_var_hub',
                    'Avg_degree', 'Median_wights', 'Variance_wights',
                    'Avg_weight', 'Avg_weight_abs', 'edges',
                    'nodes', 'edge_to_node_ratio', 'Num_of_zero_weights',
                    'negative_edges', 'target']
     self.db = db
示例#6
0
 def setUp(self):
     print('setting up test_module')
     self.dataset = getConfig().eval(self.__class__.__name__, "dataset")
     self.target = getConfig().eval(self.__class__.__name__, "target")
     self.xgb_model_loaded = pickle.load(
         open('data/RF_regression_model.dat', "rb"))
     population = pd.read_csv(self.dataset).columns
     self.source_inds = list(population)
     self.population = list()
     for ind in population:
         l = list()
         for ind2 in population:
             n = int(len(population) / 10)
             l.append(random.sample(self.source_inds, n))
         self.population.append(l)
示例#7
0
    def setUp(self):
        # configInst = getConfig()
        self._date = getConfig().eval(self.__class__.__name__, "start_date")
        # self._pathToEngine = configInst.get(self.__class__.__name__, "DB_path") + \
        #                      configInst.get(self.__class__.__name__, "DB_name_prefix") + \
        #                      configInst.get(self.__class__.__name__, "DB_name_suffix")

        # if configInst.eval(self.__class__.__name__, "remove_on_setup"):
        #     self.deleteDB()
        #
        # self.engine = create_engine("sqlite:///" + self._pathToEngine, echo=False)
        # self.Session = sessionmaker()
        # self.Session.configure(bind=self.engine)
        #
        # self.session = self.Session()

        # @event.listens_for(self.engine, "connect")
        # def connect(dbapi_connection, connection_rec):
        #    dbapi_connection.enable_load_extension(True)
        #    dbapi_connection.execute(
        #        'SELECT load_extension("{0}{1}")'.format(configInst.get("DB", "DB_path_to_extension"), '.dll'))
        #
        #     dbapi_connection.enable_load_extension(False)
        #
        # if getConfig().eval(self.__class__.__name__, "dropall_on_setup"):
        #     Base.metadata.drop_all(self.engine)
        #
        # Base.metadata.create_all(self.engine)
        pass
 def setUp(self):
     self.dataset = getConfig().eval(self.__class__.__name__, "dataset")
     self.exclude_log = getConfig().eval(self.__class__.__name__, "exclude_log")
     self.exclude_table_list = getConfig().eval(self.__class__.__name__, "exclude_table_list")
     self.labels = []
     self.datasets_dfs = dict()
     for file in listdir(join('data', 'dataset_in')):
         print('loading ', file)
         with open('data/dataset_out/target_features.csv', newline='') as csv_file:
             csv_reader = csv.reader(csv_file, delimiter=',')
             for row in csv_reader:
                 if row[0] == str(file.split('.')[0])+"_corr_graph":
                     target_feature = row[1]
         df = pd.read_csv(join('data', 'dataset_in', file))
         if type(df[target_feature]) != 'float64' and type(df[target_feature].dtype) != 'int64':
             df[target_feature] = df[target_feature].astype('category').cat.codes
         self.datasets_dfs[file.split('.')[0]] = (df, target_feature)
示例#9
0
 def setUp(self):
     self.max_depth = getConfig().eval(self.__class__.__name__, "max_depth")
     self.eta = getConfig().eval(self.__class__.__name__, "eta")
     self.silent = getConfig().eval(self.__class__.__name__, "silent")
     self.objective_multi = getConfig().eval(self.__class__.__name__,
                                             "objective_multi")
     self.objective_binary = getConfig().eval(self.__class__.__name__,
                                              "objective_binary")
     self.nthread = getConfig().eval(self.__class__.__name__, "nthread")
     self.epochs = getConfig().eval(self.__class__.__name__, "epochs")
     self.dataset = getConfig().eval(self.__class__.__name__, "dataset")
     self.exclude_table_list = getConfig().eval(self.__class__.__name__,
                                                "exclude_table_list")
     self.labels = []
 def setUp(self):
     self.corr_threshold = getConfig().eval(self.__class__.__name__,
                                            "corr_threshold")
     self.target = getConfig().eval(self.__class__.__name__, "target")
     self.early_stop = getConfig().eval(self.__class__.__name__,
                                        "early_stop")
     self.dataset = getConfig().eval(self.__class__.__name__, "dataset")
     self.corr_method = getConfig().eval(self.__class__.__name__,
                                         "corr_method")
     self.model_path = getConfig().eval(self.__class__.__name__,
                                        "model_path")
     self.target_att = getConfig().eval(self.__class__.__name__,
                                        "target_att")
     self.corr_method = getattr(corr_calc, self.corr_method)
     df = pd.read_csv(self.dataset)
     self.data = df.copy()
     if self.target_att in df.columns:
         features_df = df.drop(self.target_att, axis=1)
     else:
         features_df = df.drop(df.columns[-1], axis=1)
         self.target_att = df.columns[-1]
     self.corr_mat = self.corr_method(features_df)
     self.features = list(features_df.columns)
     self.model = pickle.load(open(self.model_path, 'rb'))
     features_df = self.corr_mat.set_index(self.corr_mat.columns)
     self.full_graph = nx.from_pandas_adjacency(features_df)
     print('calculating invalid edges...')
     egdes_to_remove = [
         edge for edge in self.full_graph.edges if abs(self.full_graph[
             edge[0]][edge[1]]['weight']) > self.corr_threshold
     ]
     print('removing edges...')
     self.full_graph.remove_edges_from(egdes_to_remove)
 def setUp(self):
     self.dataset = getConfig().eval(self.__class__.__name__, "dataset")
     self.feature_set_in = getConfig().eval(self.__class__.__name__,
                                            "feature_set_in")
     self.results_out = getConfig().eval(self.__class__.__name__,
                                         "results_out")
     self.model_path = getConfig().eval(self.__class__.__name__,
                                        "model_path")
     self.path_to_truth = getConfig().eval(self.__class__.__name__,
                                           "path_to_truth")
     self.eval_only = getConfig().eval(self.__class__.__name__, "eval_only")
     self.corr_threshold = getConfig().eval(self.__class__.__name__,
                                            "corr_threshold")
示例#12
0
 def randomWalk(self):
     self.main_graph = nx.read_gml(
         self.getPath(relative_path=getConfig().eval(
             self.__class__.__name__, 'main_graph_path')))
     random_walk_object = rw.RandomWalk(
         threshold=self.randomWalk_length,
         number_of_graphs=self.random_walk_num,
         args=self.rw_args,
         extensions=self.rw_extensions,
         main_graph=self.main_graph)
     for k in self.subGraphs_list:
         g = k[1]
         g.graph["name"] = k[0]
         if g.graph['type'] == "trainset":
             self.rw_list_of_graphs_train = random_walk_object.insertGraphToSet(
                 list_of_graphs=self.rw_list_of_graphs_train, graph=g)
示例#13
0
 def __init__(self, db):
     AbstractController.__init__(self, db)
     self.db = db
     self.iterations = getConfig().eval(self.__class__.__name__,
                                        "iterations")
     self.dimensions = getConfig().eval(self.__class__.__name__,
                                        "dimensions")
     self.windowSize = getConfig().eval(self.__class__.__name__,
                                        "windowSize")
     self.dm = getConfig().eval(self.__class__.__name__, "dm")
     self.walkLength = getConfig().eval(self.__class__.__name__,
                                        "walkLength")
     self.embedding_type = getConfig().eval(self.__class__.__name__,
                                            "embedding_type")
     self.att = getConfig().eval(self.__class__.__name__, "attribute")
示例#14
0
 def setUp(self):
     self.continue_from_log = getConfig().eval(self.__class__.__name__,
                                               "continue_from_log")
     self.clear_existing_subgraphs = getConfig().eval(
         self.__class__.__name__, "clear_existing_subgraphs")
     self.save_path = getConfig().eval(self.__class__.__name__, "save_path")
     self.corr_threshold = getConfig().eval(self.__class__.__name__,
                                            "corr_threshold")
     self.input_folder = getConfig().eval(self.__class__.__name__,
                                          "input_folder")
     self.output_folder = getConfig().eval(self.__class__.__name__,
                                           "output_folder")
示例#15
0
 def __init__(self, db):
     self.db = db
     self.data_path = getConfig().eval(self.__class__.__name__, "data")
     self.out_path = getConfig().eval(self.__class__.__name__, "out")
示例#16
0
 def setUp(self):
     self.dataset = getConfig().eval(self.__class__.__name__, "dataset")
     self.target_att = getConfig().eval(self.__class__.__name__, "target_att")
     self.out = getConfig().eval(self.__class__.__name__, "out")
     self.test_att = getConfig().eval(self.__class__.__name__, "test_att")
 def setUp(self):
     self.data_path = getConfig().eval(self.__class__.__name__, "data")
     self.model = getConfig().eval(self.__class__.__name__, "model")
示例#18
0
 def setUp(self):
     self.corr_threshold = getConfig().eval(self.__class__.__name__,
                                            "corr_threshold")
     pass
示例#19
0
 def setUp(self):
     self.path = getConfig().eval(self.__class__.__name__, "path")
示例#20
0
 def __init__(self):
     self.data_path = getConfig().eval(self.__class__.__name__, "csv_path")
     self.pysqldf = lambda q: sqldf(q, self.data_path)
     self.is_csv = getConfig().eval(self.__class__.__name__, "is_csv")
示例#21
0
modules_dict['random_walk'] = random_walk
modules_dict['structural_feature_extraction'] = structural_feature_extraction
modules_dict['xgboost_generator'] = xgboost_generator
modules_dict['Decision_Tree'] = Decision_Tree
modules_dict['global_local_fs'] = global_local_fs
modules_dict['full_graph_fs'] = full_graph_fs
modules_dict['sub2vec'] = sub2vec
modules_dict['RandomForestReg'] = RandomForestReg
modules_dict['XgboostRegression'] = XgboostRegression
modules_dict['GA_Feature_Selection'] = GA_Feature_Selection
modules_dict['test_dataset_cross_validation'] = test_dataset_cross_validation
modules_dict['challenge_prediction'] = challenge_prediction
modules_dict['simulated_annealing_feature_selection'] = simulated_annealing_feature_selection
modules_dict['benchmark'] = benchmark

window_start = getConfig().eval("DEFAULT", "start_date")
disable_prints = getConfig().eval("DEFAULT", "disable_prints")
if disable_prints:
    sys.stdout = open(os.devnull, 'w')
newbmrk = os.path.isfile("benchmark.csv")
bmrk_file = open("benchmark.csv", 'a', newline='')
bmrk_results = csv.DictWriter(bmrk_file,
                              ["time", "jobnumber", "config", "window_size", "window_start", "dones", "posts",
                               "authors"] + list(modules_dict.keys()),
                              dialect="excel", lineterminator="\n")
if not newbmrk:
    bmrk_results.writeheader()

modules_dict["DB"] = lambda x: x
pipeline = []
for module in getConfig().sections():
 def setUp(self):
     self.dataset_table = getConfig().eval(self.__class__.__name__,
                                           "dataset_table")