def fin_cols(col_l, col_r, id_l, id_r): fin = ['ltable.'+id_l, 'rtable.'+id_r] l = mg.diff(col_l, [id_l]) l = ['ltable.'+ x for x in l] fin.extend(l) r = mg.diff(col_r, [id_r]) r = ['rtable.'+ x for x in r] fin.extend(r) return fin
def predict_ex_attrs(self, table, exclude_attrs): if not isinstance(exclude_attrs, list): exclude_attrs = [exclude_attrs] table = table.to_dataframe() attrs_to_project = mg.diff(table.columns, exclude_attrs) x = table[attrs_to_project] y = self.predict_sklearn(x, check_rem=False) return y
def get_xy_data_ex(table, exclude_attrs, target_attr): if not isinstance(exclude_attrs, list): exclude_attrs = [exclude_attrs] attrs_to_project = mg.diff(table.columns, exclude_attrs) table = table.to_dataframe() x = table[attrs_to_project].values y = table[target_attr].values y = y.ravel() # to mute warnings from svm and cross validation return x, y
def get_data_in_xy_format_given_ex_attrs(table, exclude_attrs, target_attr): if not isinstance(exclude_attrs, list): exclude_attrs = [exclude_attrs] # get a set with key and foreign keys s = set([table.get_key(), table.get_property('foreign_key_ltable'), table.get_property('foreign_key_rtable')]) exclude_attrs = list(set(exclude_attrs).union(s)) attrs_to_project = diff(table.columns, exclude_attrs) x = table[attrs_to_project].values y = table[target_attr].values return x, y
def fit_ex_attrs(self, table, exclude_attrs, target_attr): # assume the exclude attrs and target attr is present if not isinstance(exclude_attrs, list): exclude_attrs = [exclude_attrs] attrs_to_project = mg.diff(table.columns, exclude_attrs) table = table.to_dataframe() x = table[attrs_to_project] #print attrs_to_project y = table[target_attr] self.fit_sklearn(x, y, check_rem=False)
def get_data_in_xy_format_given_ex_attrs(table, exclude_attrs, target_attr): if not isinstance(exclude_attrs, list): exclude_attrs = [exclude_attrs] # get a set with key and foreign keys s = set([ table.get_key(), table.get_property('foreign_key_ltable'), table.get_property('foreign_key_rtable') ]) exclude_attrs = list(set(exclude_attrs).union(s)) attrs_to_project = diff(table.columns, exclude_attrs) x = table[attrs_to_project].values y = table[target_attr].values return x, y