def __init__(self,
              dev_name,
              model,
              is_model_pkl=False,
              use_cols=None,
              y_col=None,
              train=None,
              is_train_csv=False):
     self.dev_name = dev_name
     if is_model_pkl:
         self.load_model_from_pkl(model)
     else:
         self.model = model
     if use_cols is not None:
         self.use_cols = use_cols
     else:
         self.use_cols = pd.read_csv(os.path.abspath('data/use_cols.csv'))
     if y_col:
         self.y_col = y_col
     else:
         self.y_col = 'device_category'
     if train:
         if is_train_csv:
             train = utils.load_data_from_csv(train, use_cols)
         elif use_cols:
             train = train[use_cols]
         self.train(train)
Exemplo n.º 2
0
 def get_node2vec_data(self):
   '''
   Need to work on it. Get page embeddings #Harman
   '''
   df_relation, df_output = utils.load_data_from_csv(dtype="relation")
   df_n2v = ""
   return df_n2v
Exemplo n.º 3
0
def run_multi_dev_experiments(datasets, dev_model_csv, pred_methods):
    y_col = 'device_category'
    use_cols = pd.read_csv(os.path.abspath('data/use_cols.csv'))
    metrics_headers = list(pd.read_csv(os.path.abspath('data/metrics_headers.csv')))
    dev_model_combos = pd.read_csv(dev_model_csv)

    for dataset_name in datasets:
        print('@', dataset_name)
        dataset = utils.load_data_from_csv('data/{}.csv'.format(dataset_name), use_cols=use_cols)

        for idx, dev_model_combo in dev_model_combos.iterrows():
            print('@@', dev_model_combo)
            for pred_method in pred_methods:
                multi_dev_cls = MultipleDeviceClassifier(
                    dev_model_combo,
                    is_model_pkl=True,
                    pred_method=pred_method,
                    use_cols=use_cols,
                    y_col=y_col)

                eval_classifier(
                    classifier=multi_dev_cls,
                    dataset=dataset,
                    model_name=dev_model_combo.to_dict(),
                    dataset_name=dataset_name,
                    classification_method='multi_dev',
                    seq_len=dev_model_combo.to_dict(),
                    opt_seq_len=dev_model_combo.to_dict(),
                    metrics_headers=metrics_headers,
                    metrics_dir=metrics_dir)
 def eval_on_dataset(self, dataset, is_dataset_csv=False):
     if is_dataset_csv:
         dataset = utils.load_data_from_csv(dataset, self.use_cols)
     # Split data to features and labels
     x, y_true = utils.split_data(dataset, self.y_col)
     # Classify data
     y_pred = self.predict(x)
     # Evaluate predictions
     return utils.eval_predictions(y_true, y_pred)
Exemplo n.º 5
0
  def get_relation_data(self):
    df_relation, df_output = utils.load_data_from_csv(dtype="relation")

    # Getting sparse matrix based on page likes
    df_relation_matrix = utils.get_transformed_relation(df_relation, min_likes=5)

    df_relation_matrix = pd.merge(df_relation_matrix, df_output,
                                  left_on="userid",
                                  right_on="userid",
                                  how="outer")

    # Filling mean values for users with no page likes (among the pages selected)
    df_relation_matrix.fillna(df_relation_matrix.mean(), inplace=True)
    return df_relation_matrix
Exemplo n.º 6
0
def run_experiment_with_datasets_devices(exp, datasets, devices, models_dir, metrics_dir):
    y_col = 'device_category'
    use_cols = pd.read_csv(os.path.abspath('data/use_cols.csv'))
    metrics_headers = list(pd.read_csv(os.path.abspath('data/metrics_headers.csv')))

    for dataset_name in datasets:
        print('@', dataset_name)
        dataset = utils.load_data_from_csv('data/{}.csv'.format(dataset_name), use_cols=use_cols)

        for dev_name in devices:
            print('@@', dev_name)
            for model_pkl in os.listdir(os.path.join(models_dir, dev_name)):
                model_name = os.path.splitext(model_pkl)[0]
                print('@@@', model_name)

                exp(y_col, use_cols, metrics_headers, models_dir, metrics_dir, dataset_name, dataset, dev_name, model_pkl, model_name)
Exemplo n.º 7
0
 def __init__(self,
              dev_name,
              model,
              is_model_pkl=False,
              opt_seq_len=1,
              use_cols=None,
              y_col=None,
              train=None,
              is_train_csv=False,
              validation=None,
              is_validation_csv=False):
     super().__init__(dev_name=dev_name,
                      model=model,
                      is_model_pkl=is_model_pkl,
                      use_cols=use_cols,
                      y_col=y_col, train=train,
                      is_train_csv=is_train_csv)
     if train and validation:
         if is_validation_csv:
             validation = utils.load_data_from_csv(validation, use_cols)
         self.find_opt_seq_len(validation)
     else:
         self.opt_seq_len = opt_seq_len
Exemplo n.º 8
0
 def get_text_data(self, target):
   df_text, _ = utils.load_data_from_csv(dtype="text")
   df_text, y = utils.extract_data(df_text, target, type="text")
   # df_text = preprocess(df_text, dtype="text")
   return df_text, y
Exemplo n.º 9
0
 def get_face_data(self, target):
   df_face, _ = utils.load_data_from_csv(dtype="face")
   df_face, y = utils.extract_data(df_face, target, type="face")
   # df_face = preprocess(df_face, dtype="face")
   return df_face, y
Exemplo n.º 10
0
 def fetch_node2vec_data(self):
     df_relation, df_output = utils.load_data_from_csv(dtype="relation")
     df_n2v = ""
     return df_n2v
Exemplo n.º 11
0
 def fetch_text_data(self):
     df_text, _ = utils.load_data_from_csv(dtype="text")
     df_text = preprocess(df_text, dtype="text")
     return df_text
Exemplo n.º 12
0
 def fetch_face_data(self):
     df_face, _ = utils.load_data_from_csv(dtype="face")
     df_face = preprocess(df_face, dtype="face")
     return df_face
 def fetch_text_data(self):
   df_text, df_output = utils.load_data_from_csv(dtype="text")
   return df_text
 def fetch_face_data(self):
   df_face, df_output = utils.load_data_from_csv(dtype="face")
   return df_face