예제 #1
0
class XGB(BaseModel):
    def __init__(self):
        self.clf = XGBClassifier(
            n_estimators=200,
            max_depth=20,
            learning_rate=0.1,
            random_state=0,
            booster="gbtree",
            use_label_encoder=False,
        )

    def train(self, X_train, Y_train):
        X_train, Y_train = do_rebalance(X_train, Y_train)
        self.clf.fit(X_train, Y_train)

    def test(self, X_test, Y_test):
        Y_prob = self.clf.predict_proba(X_test)
        auc = metrics.roc_auc_score(Y_test, Y_prob[:, 1])

    def predict(self, X):
        Y_prob = self.clf.predict_proba(X)
        return Y_prob

    def load_model(self, model_path):
        self.clf.load_model(model_path)
        # with open(model_path, "rb+") as file:
        #     self.clf = pickle.load(file)

    def save_model(self, model_path):
        self.clf.save_model(model_path)
예제 #2
0
class WrappedXGBClassifier(WrappedModel):
    def base_init_finished(self):
        self.reset()

    def fit(self, X, y):
        self._value.fit(X, y, **self._fit_kwargs)
        return self

    def reset(self):
        from xgboost import XGBClassifier
        self._value = XGBClassifier(**self._init_kwargs)

    def predict(self, X):
        return self._value.predict(X)

    def predict_proba(self, X):
        if self._pos_index is None:
            raise Exception('predict_proba need pos_index')
        return self._value.predict_proba(X)[:, self._pos_index]

    def dump(self, dirpath, name):
        self.value.save_model(pathjoin(dirpath, name + '.bin'))
        return self

    def load(self, dirpath, name):
        self._value.load_model(pathjoin(dirpath, name + '.bin'))
        return self
예제 #3
0
class XGBoost_Ranker():
    def __init__(self, timestamp, load=True):
        self.model = XGBClassifier()
        self.model.load_model(timestamp + '.file')
        self.factor = 1.0

    def set_factor(self, factor):
        self.factor = factor

    def rank_features(self, features):
        _features = np.copy(features)
        for f in _features:
            f[1] *= self.factor
            f[4] *= self.factor
            f[5] *= self.factor
        # return np.array([0, 1, 2, 3, 4])

        test_x = []
        for i in range(len(_features)):
            for j in range(len(_features)):
                if i == j:
                    continue
                test_x.append(
                    np.concatenate((_features[i], _features[j]), axis=0))

        test_x = np.array(test_x)
        print(test_x.shape)
        y = self.model.predict(test_x).reshape(len(_features),
                                               len(_features) - 1)
        y = np.sum(y, axis=1)
        # print(y)
        return np.argsort(y)[::-1]
예제 #4
0
class StabilityClassifier():
    def __init__(self, modelfile='spock.json'):
        pwd = os.path.dirname(__file__)
        self.model = XGBClassifier()
        self.model.load_model(pwd + '/models/'+modelfile)

    def check_errors(self, sim):
        if sim.N_real < 4:
            raise AttributeError("SPOCK Error: SPOCK only applicable to systems with 3 or more planets") 
        
    def predict_stable(self, sim):
        triofeatures, stable = self.generate_features(sim)
        if stable == False:
            return 0
       
        trioprobs = self.predict_from_features(triofeatures)
        return trioprobs.min()          # minimum prob among all trios tested

    def generate_features(self, sim):
        sim = sim.copy()
        init_sim_parameters(sim)
        self.check_errors(sim)
        
        trios = [[i,i+1,i+2] for i in range(1,sim.N_real-2)] # list of adjacent trios   
        featureargs = [10000, 80, trios]
        triofeatures, stable = features(sim, featureargs)
        
        return triofeatures, stable

    def predict_from_features(self, triofeatures):
        # xgboost model expects a 2D array of shape (Npred, Nfeatures) where Npred is number of samples to predict, Nfeatures is # of features per sample
        featurevals = np.array([[val for val in features.values()] for features in triofeatures])
        return self.model.predict_proba(featurevals)[:,1] # take 2nd column for probability it belongs to stable class
예제 #5
0
def load_modele(path):
    '''Renvoie le modele en tant qu\'objet à partir du chemin'''
    if 'GradientBoosting' in str(path):
        #print('Chargement XGBOOST')
        model = XGBClassifier()
        model.load_model(path)
        return model
    else:
        #print('Chargement Pickle')
        return pickle.load(open(path, 'rb'))
예제 #6
0
def export_model(amnt_data, client):
    model = XGBClassifier()
    model.load_model('boa.model')
    output = open('model.pb', 'wb')
    pickle.dump([model, amnt_data], output)
    output.close()
    bucket_test = client.get_bucket('traina-data')
    blob_test = bucket_test.blob('model.pb')
    blob_test.upload_from_filename(filename='model.pb')
    os.remove('model.pb')
    print(Fore.GREEN+'Exported Model Sucessfully')
    return
예제 #7
0
def get_model():

    param_path = os.path.join(STORAGE, "params.json")
    with open(param_path, "r") as f:
        json_data = f.read()
    params = dict(json.loads(json_data))

    model = XGBClassifier(**params)
    model_path = os.path.join(STORAGE, "model.xgb")
    model.load_model(model_path)

    return model
예제 #8
0
class ProcessPlugin(WorkerPlugin):
    def __init__(self,
                 cfg_path=os.environ.get("MODEL_CONFIG"),
                 weights_path=os.environ.get("PP_WEIGHTS_PTH"),
                 classes_path=os.environ.get("CLASSES_PTH")):
        self.cfg_path = cfg_path
        postprocess_weights_pth = weights_path
        self.postprocess_model = XGBClassifier()
        self.postprocess_model.load_model(postprocess_weights_pth)
        self.classes_pth = classes_path
        with open(self.classes_pth) as stream:
            self.classes = yaml.load(stream)["CLASSES"]
예제 #9
0
def train_lazy():
    # Load the dataset
    X, y = load_data()
    # Split the data
    X_train, X_val, y_train, y_val = split_dataset(X, y)
    # # Normalize
    X_train = normalize(X_train)
    X_val = normalize(X_val)

    # uncomment to check the performance of the 25 models
    # clf = LazyClassifier(verbose=0,ignore_warnings=True, custom_metric=None)
    # # fit
    # scores,_ = clf.fit(X_train, X_val, y_train, y_val)
    # # print
    # print(scores)

    # Final model
    # check if model exist
    if os.path.isfile(config.MODEL_PATH):
        model = XGBClassifier()
        model.load_model(config.MODEL_PATH)
    else:
        model = XGBClassifier()
        model.fit(X_train,
                  y_train,
                  eval_metric="error",
                  eval_set=[(X_train, y_train), (X_val, y_val)],
                  verbose=True)
        # save model
        model.save_model(config.MODEL_PATH)
    # performance on train set
    y_pred = model.predict(X_train)
    # evaluate predictions
    print_performance(y_train, y_pred, 'train')

    # performance on val set
    y_pred = model.predict(X_val)
    # evaluate predictions
    print_performance(y_val, y_pred, 'val')

    # Load the test dataset
    X_test, y_test = load_test_data()
    # # Normalize
    X_test = normalize(X_test)
    # get prediction
    y_pred = model.predict(X_test)
    # evaluate predictions
    print_performance(y_test, y_pred, 'test')
    # print
    plot_performance(model)
def predict_probability_of_winning(gold_diff_at_10, exp_diff_at_10, team):

    dirname = os.path.dirname(__file__)
    os.path.join(dirname, 'djangoapp/chart/utils/models/red_model.json')
    model = XGBClassifier()

    model.load_model(os.path.join(dirname, f'./models/{team}_model.json'))

    df = pd.DataFrame({
        team + 'GoldDiff': [gold_diff_at_10],
        team + 'ExperienceDiff': [exp_diff_at_10]
    })
    # values are casted into lists because pandas constructor doesnt allow scalars
    predicts = model.predict_proba(df)

    for i, col in enumerate(['redWin', 'blueWin']):
        df[col] = predicts[:, i]
    return df
예제 #11
0
def generate_shap_html(feature, user_bin, user_id):
    xgb_clf = XGBClassifier()
    xgb_clf.load_model(os.path.join(MODEL_DIRECTORY, "xgb.model"))
    explainer = shap.TreeExplainer(xgb_clf)
    values = explainer.shap_values(feature)
    shap.initjs()
    fp = shap.force_plot(explainer.expected_value[user_bin - 1],
                         values[user_bin - 1][0],
                         feature,
                         show=False)

    shap.save_html(os.path.join(MODEL_DIRECTORY, f"User_{user_id}.html"), fp)
    with open(os.path.join(MODEL_DIRECTORY, f"User_{user_id}.html"),
              "r",
              encoding='utf-8') as f:
        html = f.read()
    os.remove(os.path.join(MODEL_DIRECTORY, f"User_{user_id}.html"))
    return str(html), values
예제 #12
0
def load_model_and_generate_evaluation_images(*, model_filename, input_path,
                                              output_path, feature_names):
    model = XGBClassifier()
    model.load_model(model_filename)

    frame_folders = sorted(get_frame_folders(input_path))

    for frame_folder in frame_folders:
        frame_path = os.path.join(input_path, frame_folder)
        segment_names = [
            name for name in os.listdir(frame_path) if name[1].isdigit()
        ]

        if len(segment_names) != 0:
            continue

        for camera_name in ["60", "180", "300"]:
            image_name = "camera" + camera_name + ".png"
            print(frame_path + "/" + image_name)
            image_bgr = cv.imread(os.path.join(frame_path, image_name))
            features, shape = create_features(image_bgr=image_bgr,
                                              flatten=True)

            X = pd.DataFrame(features)[feature_names]
            y = model.predict(X)

            segments = y.reshape(shape)

            segments_bgr = [class2bgr(idx) for idx in segments.flatten()]
            segments_bgr = np.array(segments_bgr).reshape(*shape, 3)

            path = os.path.join(output_path, frame_folder)
            if not os.path.exists(path):
                os.makedirs(path)

            image_and_segments_bgr = np.concatenate([image_bgr, segments_bgr],
                                                    axis=1)
            # cv.imwrite(filename=os.path.join(path, image_name),
            #            img=image_bgr)
            segments_filename = "camera" + camera_name + "_segments" + ".png"
            cv.imwrite(
                filename=os.path.join(path, segments_filename),
                img=image_and_segments_bgr,
            )
def load_model_and_generate_evaluation_images(
    model_filename,
    input_path: pathlib.Path,
    output_path: pathlib.Path,
    feature_names,
):
    model = XGBClassifier()
    model.load_model(model_filename)

    for frame_folder in sorted(get_subdirectories(input_path)):
        segment_names = [
            f.name for f in frame_folder.iterdir()
            if f.is_file() and f.name[1].isdigit()
        ]

        if len(segment_names) != 0:
            continue

        for camera_name in ["60", "180", "300"]:
            image_name = "camera" + camera_name + ".png"
            print(frame_folder / image_name)
            image_bgr = cv.imread(str(frame_folder / image_name))
            features, shape = create_features(image_bgr=image_bgr,
                                              flatten=True)

            X = pd.DataFrame(features)[feature_names]
            y = model.predict(X)

            segments = y.reshape(shape)

            segments_bgr = [class2bgr(idx) for idx in segments.flatten()]
            segments_bgr = np.array(segments_bgr).reshape(*shape, 3)

            path = output_path / frame_folder.name
            if not path.exists():
                path.mkdir(parents=True)

            image_and_segments_bgr = np.concatenate([image_bgr, segments_bgr],
                                                    axis=1)
            segments_filename = "camera" + camera_name + "_segments" + ".png"
            cv.imwrite(
                filename=str(path / segments_filename),
                img=image_and_segments_bgr,
            )
예제 #14
0
def predict(inputs):

    main_category, category, goal, country, currency, today = inputs

    """ Encode inputs """
    encoder = load(ENCODER_PATH)
    inputs2enc = np.array([category, main_category, currency, country]).reshape(1, -1)
    inputs_encoded = encoder.transform(inputs2enc)

    """ Stack """
    numericals = np.array([goal, today.day, today.month]).reshape(1, -1)
    final_inputs = np.hstack([numericals, inputs_encoded]).astype(np.float32)

    """ Load model and predict """
    model = XGBClassifier(seed=42)
    model.load_model(MODEL_PATH)
    result = model.predict_proba(final_inputs)

    return result
예제 #15
0
def predict(age,
            count,
            diagnosis, 
            fpath='static/model/HFEA_model_{}',
            nmodels=5):
    """
    Loads and predicts from the models.

    Parameters:
    -----------
    age : int,
        Age of the patient in years.

    count : int, 
        The number of Oocytes (eggs) collected following the treatment.

    diagnosis : str, 
        The patients infertility diagnosis, must be one of Ovulatory disorder, 
        Male factor, Endometriosis or Unexplained.

    fpath : str (default='static/model/HFEA_model_{}'),
        Path to the models.

    nmodel : int (default=5),
        The number of models (i.e., the number of folds used in the
        cross-validation proceedure during training).
        
    Returns:
    --------
    pred, float:
        
    """
    age_group = map_age_to_age_group(age)
    # add the four infertility diagnosis features
    infertility = create_infertility_feature(diagnosis)
    X = np.r_[[age_group, count], infertility]
    pred = 0
    for i in range(5):
        clf = XGBClassifier()
        clf.load_model(f'static/model/HFEA_model_{i}')
        pred += clf.predict_proba(X.reshape((1,-1)))[:,1][0] / nmodels
    return pred
예제 #16
0
def predict(name, match_analysis_num, api_key):
    #모델 및 데이터 로드
    ss = joblib.load("model/standard_scaler.pkl")
    xgb = XGBClassifier()
    xgb.load_model("model/LOL_predict_xgb.bst")
    match_df, player_stat, game_minute, win_lable = datapipe.collect_predict_data_by_name(
        name, match_analysis_num, api_key)
    del [[player_stat]]
    gc.collect
    if (win_lable[0] == -1):
        return -1, -1
    elif (win_lable[0] == -404):
        return -404, -404
    else:
        #승률예측
        match_scaled = ss.fit_transform(match_df)
        win_rate = xgb.predict_proba(match_scaled)
        real_win_rate = win_lable.mean()
        predict_win_rate = win_rate[:, 1].mean()
        return real_win_rate, predict_win_rate
예제 #17
0
def predict_xgb(data):
    """Perform prediction using trained model."""
    model = XGBClassifier()
    data = normalizator(prepare_data(data))
    try:
        model.load_model(
            "cotopaxi/identification_models/proto_XGB_20201112.model")
    except ValueError as exc:
        raise CotopaxiException from exc(
            "[!] Cannot load machine learning classifier!"
            "    This may be caused by incompatible version of tensorflow"
            "    (please install tensorflow version 2.2.0)!")
    result = model.predict(data)
    unique, counts = numpy.unique(result, return_counts=True)
    devices = list()
    for unit in unique:
        devices.append(unit)
    result_dict = dict(zip(devices, counts))
    result_dict = sorted(result_dict.items(), key=lambda x: x[1], reverse=True)
    result_class = result_dict[0][0]
    return result_class, result_dict, counts.sum()
예제 #18
0
    def run(self):
        model_folder_path = '../model'
        model_path = os.path.join(model_folder_path, 'xgb_final.pkl')
        trained_model = XGBClassifier()
        trained_model.load_model(model_path)

        # read the processed features file
        df_test = pd.read_csv(self.input().path)

        # predict churn
        prediction = trained_model.predict(df_test)

        # putting prediction in dataframe as well as index ids (this is the
        #	the submission format)
        submission = pd.DataFrame(data=prediction, columns=['churn'])
        submission['churn'] = submission['churn'].map({1: 'yes', 0: 'no'})
        submission.reset_index(inplace=True)
        submission.rename(columns={'index': 'id'}, inplace=True)
        submission['id'] = submission['id'] + 1
        # write submission to file
        submission.to_csv(self.output().path, index=False)
예제 #19
0
def stroke_predict(gender, age, hypertension, heart_disease, ever_married,
                   work_type, Residence_type, avg_glucose_level, bmi,
                   smoking_status):
    # creating the pandas dataframe and replicating previous steps
    a_dict = {
        'gender': [int(gender)],
        'age': [int(age)],
        'hypertension': [int(hypertension)],
        'heart_disease': [int(heart_disease)],
        'ever_married': [int(ever_married)],
        'work_type': [int(work_type)],
        'Residence_type': [int(Residence_type)],
        'avg_glucose_level': [float(avg_glucose_level)],
        'bmi': [float(bmi)],
        'smoking_status': [int(smoking_status)]
    }
    data = pd.DataFrame(a_dict)
    data['gender'] = 1 if gender == 'male' else 0
    data['ever_married'] = 1 if ever_married == 'Yes' else 0
    work_mapping = {
        'Self_employed': 3,
        'Private': 2,
        'children': 1,
        'Govt_job': 0
    }
    data['work_type'] = data['work_type'].map(work_mapping)
    data['Residence_type'] = 1 if Residence_type == 'Urban' else 0
    smoke_mapping = {
        'Unknown': 0,
        'formerly smoked': 1,
        'never_smoked': 2,
        'smokes': 3
    }
    data['smoking_status'] = data['smoking_status'].map(smoke_mapping)
    # after data has been replicated
    xgb = XGBClassifier()
    xgb.load_model("weights/stroke.model")
    return xgb.predict(data)[0]
예제 #20
0
def start_pre(val_img_list, val_tar_list, type_class=minor_type_class):
    real_class_pair_list = cut_class_pair

    model_base_path = 'outs/'
    result_list = [list() for i in range(len(val_img_list))]
    config = Config()

    result_max_item_list = [(0, 0) for i in range(len(val_img_list))]
    for ci, class_pair in enumerate(type_class):
        model_path = model_base_path + 'xgboost_model_per_class' + str(
            class_pair) + '.pkl'
        print('part ', ci, ' of ', len(real_class_pair_list))

        clr = XGBClassifier()
        clr.load_model(model_path)
        y_p_x = clr.predict_proba(val_img_list)

        pre_for_f1 = []
        t_for_f1 = []
        for i_ys, ys in enumerate(y_p_x):
            if len(val_tar_list) > 0:
                tail = ''
                mid = ''
                if class_pair in val_tar_list[i_ys]:
                    tail = '-----------'
                if ys[1] >= 0.5:
                    mid = '||||||||'
                print('ci ', ci, ' i_ys ', i_ys, ' pre ', ys, mid, ' c ',
                      class_pair, ' t ', val_tar_list[i_ys], tail)
            else:
                print('ci ', ci, ' i_ys ', i_ys, ' pre ', ys, ' c ',
                      class_pair)

            sub_result = result_list[i_ys]
            if ys[1] >= 0.5:
                sub_result.append(class_pair)
                pre_for_f1.append(1)
            else:
                pre_for_f1.append(0)

            result_list[i_ys] = sub_result
            max_item_idx, max_item_f = result_max_item_list[i_ys]
            if ys[1] > max_item_f:
                result_max_item_list[i_ys] = (class_pair, ys[1])
        if len(val_tar_list) > 0:
            for tar in val_tar_list:
                if class_pair in tar:
                    t_for_f1.append(1)
                else:
                    t_for_f1.append(0)
            print('c ', class_pair, '---------f1 ',
                  f1_score(t_for_f1, pre_for_f1, average="macro"))
    #  print('sub ', ci, ' r:', sub_result)

    pre_list = []
    for this_sub_i, sub_result in enumerate(result_list):
        print('this_sub_i ', this_sub_i, ' sub_result ', sub_result)
        result_i = np.zeros(28)
        for i_s, s in enumerate(sub_result):
            result_i[s] += 1

    #  print('result_i ', result_i)
        result = []
        for i, r_i in enumerate(result_i):
            if r_i == 1 and (i in type_class):
                #     print('i ', i,  ' r_i ', r_i)
                result.append(i)
        if len(val_tar_list) > 0:
            print('pre ', result, ' t ', val_tar_list[this_sub_i])
        pre_list.append(result)
    return pre_list, result_max_item_list
예제 #21
0
class FeatureClassifier():
    def __init__(self, modelfile='featureclassifier.json'):
        pwd = os.path.dirname(__file__)
        self.model = XGBClassifier()
        self.model.load_model(pwd + '/models/'+modelfile)

    def check_errors(self, sim):
        if sim.N_real < 4:
            raise AttributeError("SPOCK Error: SPOCK only applicable to systems with 3 or more planets") 
        
    def predict_stable(self, sim, n_jobs=-1):
        """
        Predict whether passed simulation will be stable over 10^9 orbits of the innermost planet.

        Parameters:

        sim (rebound.Simulation): Orbital configuration to test
        n_jobs (int):               Number of cores to use for calculation (only if passing more than one simulation). Default: Use all available cores. 

        Returns:

        float:  Estimated probability of stability. Will return exactly zero if configuration goes 
                unstable within first 10^4 orbits.

        """
        res = self.generate_features(sim, n_jobs=n_jobs)

        try:
            stable = np.array([r[1] for r in res]) 
            features = [r[0] for r in res]
            Nsims = len(sim)
        except:
            stable = np.array([res[1]])
            features = [res[0]]
            Nsims = 1

        # We take the negligible hit of evaluating XGBoost for all systems, and overwrite prob=0 for ones that went unstable in the short integration at the end
        # array of Ntrios x 10 features to evaluate with XGboost (Nsims*Ntriospersim x 10 features)
        featurevals = np.array([[val for val in trio.values()] for system in features for trio in system]) 
        probs = self.model.predict_proba(featurevals)[:,1] # take 2nd column for probability it belongs to stable class
        # XGBoost evaluated a flattened list of all trios, reshape so that trios in same sim grouped
        trios_per_sim = int(len(probs)/Nsims)
        probs = probs.reshape((Nsims, trios_per_sim))
        # Take the minimum probability of stability within the trios for each simulation
        probs = np.min(probs, axis=1)
        # Set probabilities for systems that went unstable within short integration to exactly zero
        probs[~stable] = 0

        if Nsims == 1:
            return probs[0]
        else:
            return probs

    def generate_features(self, sim, n_jobs=-1):
        """
        Generates the set of summary features used by the feature classifier for prediction. 

        Parameters:

        sim (rebound.Simulation): Orbital configuration to test
        n_jobs (int):               Number of cores to use for calculation (only if passing more than one simulation). Default: Use all available cores. 

        Returns:

        List of OrderedDicts:   A list of sets of features for each adjacent trio of planets in system.
                                Each set of features is an ordered dictionary of 10 summary features. See paper.
       
        stable (int):           An integer for whether the N-body integration survived the 10^4 orbits (1) or 
                                went unstable (0).
        """
        if isinstance(sim, rebound.Simulation):
            sim = [sim]
        
        args = []
        if len(set([s.N_real for s in sim])) != 1:
            raise ValueError("If running over many sims at once, they must have the same number of particles!")
        for s in sim:
            s = s.copy()
            init_sim_parameters(s)
            minP = np.min([p.P for p in s.particles[1:s.N_real]])
            self.check_errors(s)
            trios = [[j,j+1,j+2] for j in range(1,s.N_real-2)] # list of adjacent trios   
            featureargs = [10000, 80, trios]
            args.append([s, featureargs])

        def run(params):
            sim, featureargs = params
            triofeatures, stable = features(sim, featureargs)
            return triofeatures, stable

        if len(args) == 1: # single sim
            res = run(args[0])    # stable will be 0 if an orbit is hyperbolic
        else:

            if n_jobs == -1:
                n_jobs = cpu_count()
            #pool = ThreadPool(n_jobs)
            res = map(run, args)
       
        return list(res)
예제 #22
0
파일: estimate.py 프로젝트: daniekie/aml
    'model__colsample_bylevel': (0.01, 1.0, 'uniform'),
    'model__learning_rate': (0.01, 1.0, 'log-uniform'),
    'model__n_estimators': Integer(60, 400),
    'model__max_depth': Integer(3, 12),
    # 'model__scale_pos_weight': Real(1, 1000, 'log-uniform'), only binary
    'model__min_child_weight': Integer(1, 15),
    'model__gamma': Real(0.1, 3),
    'model__alpha': Real(0, 1),
    'model__lambda': Real(0, 1),
    'model__subsample': Real(0.3, 1),
    'model__colsample_bytree': Real(0, 1),
    'model__colsample_bynode': Real(0, 1)
}

xg = XGBClassifier()
xg.load_model('XGBoost_model.json')
xgb_search_prev = {
    'model': [xg],
    # 'model__learning_rate': (0.01, 1.0, 'log-uniform'),
    # 'model__min_child_weight': (0, 10),
    # 'model__max_delta_step': Integer(0, 20),
    # 'model__colsample_bytree': (0.01, 1.0, 'uniform'),
    # 'model__colsample_bylevel': (0.01, 1.0, 'uniform'),

    # 'model__n_estimators': Integer(100, 200),
    # 'model__scale_pos_weight': Real(1, 1000, 'log-uniform'),
    # 'model__min_child_weight': Integer(1, 10),
    # 'model__gamma': Integer(1, 5),
    # 'model__subsample': Real(0.3, 1),
    # 'model__colsample_bytree': Real(0.1, 1),
    # 'model__max_depth': Integer(6, 12)
예제 #23
0
    RED = '\u001b[31m'
    GREEN = '\u001b[32m'
    BLUE = '\u001b[34m'
    RESET = '\033[0m'


xgb_mod = XGBClassifier(booster='dart',
                        tree_method="gpu_hist",
                        n_estimators=300,
                        learning_rate=0.05,
                        predictor='gpu_predictor',
                        eval_metric='logloss',
                        max_depth=3,
                        gpu_id=0)

xgb_mod.load_model('CVD_mod')

cvd_df = pd.read_csv('cardio_train.csv', sep=';', index_col=0)

cvd_df['age'] = cvd_df['age'] / 365.24
cvd_df['gender'] = cvd_df['gender'] - 1

cvd_df = cvd_df[(cvd_df['ap_lo'] <= 370) & (cvd_df['ap_lo'] > 0)]
cvd_df = cvd_df[(cvd_df['ap_hi'] <= 370) & (cvd_df['ap_hi'] > 0)]
cvd_df = cvd_df[cvd_df['ap_hi'] >= cvd_df['ap_lo']]

cvd_df.reset_index(drop=True, inplace=True)

X_train, X_test, y_train, y_test = train_test_split(cvd_df.drop(['cardio'],
                                                                axis=1),
                                                    cvd_df['cardio'],
예제 #24
0
class XGBoost(BaseAlgorithm):
    def __init__(self, algorithm_settings, problem_type):
        super().__init__(algorithm_settings)
        self.problem_type = problem_type

    def build(self):
        if self.problem_type == SupervisedTask.regression:
            self.build_regression_model()

        elif self.problem_type == SupervisedTask.classification:
            self.build_classification_model()

        else:
            raise TypeError('Unknown problem_type')

    def build_regression_model(self):
        from xgboost import XGBRegressor
        self.model = XGBRegressor(
            max_depth=self.algorithm_settings.max_depth,
            learning_rate=self.algorithm_settings.learning_rate,
            n_estimators=self.algorithm_settings.n_estimators,
            objective=self.algorithm_settings.objective,
            booster=self.algorithm_settings.booster,
            n_jobs=self.algorithm_settings.n_jobs,
            gamma=self.algorithm_settings.gamma,
            min_child_weight=self.algorithm_settings.min_child_weight,
            max_delta_step=self.algorithm_settings.max_delta_step,
            subsample=self.algorithm_settings.subsample,
            reg_alpha=self.algorithm_settings.reg_alpha,
            reg_lambda=self.algorithm_settings.reg_lambda,
            random_state=self.algorithm_settings.random_state)

    def build_classification_model(self):
        from xgboost import XGBClassifier
        self.model = XGBClassifier(
            max_depth=self.algorithm_settings.max_depth,
            learning_rate=self.algorithm_settings.learning_rate,
            n_estimators=self.algorithm_settings.n_estimators,
            objective=self.algorithm_settings.objective,
            booster=self.algorithm_settings.booster,
            n_jobs=self.algorithm_settings.n_jobs,
            gamma=self.algorithm_settings.gamma,
            min_child_weight=self.algorithm_settings.min_child_weight,
            max_delta_step=self.algorithm_settings.max_delta_step,
            subsample=self.algorithm_settings.subsample,
            reg_alpha=self.algorithm_settings.reg_alpha,
            reg_lambda=self.algorithm_settings.reg_lambda,
            random_state=self.algorithm_settings.random_state)

    def train(self, train_x, train_y, settings):
        self.model.fit(train_x,
                       train_y,
                       eval_metric=self.algorithm_settings.eval_metric)
        self.save(settings)

    def evaluate(self, test_x):
        prediction = self.model.predict(test_x)
        prediction = prediction.reshape(-1, 1)
        return prediction

    def load(self, model_path):
        self.model.load_model(fname=model_path)

    def save(self, settings):
        model_save_dir = os.path.join(settings.models_path, 'xgboost_models')
        os.makedirs(model_save_dir, exist_ok=True)
        model_name = self.get_model_name(settings)
        save_path = os.path.join(model_save_dir, model_name)
        self.model.save_model(fname=save_path)
        print(f"Model saved to: {save_path}")

    def get_model_name(self, settings):
        if settings.problem_type == SupervisedTask.regression:
            return 'regression_model.xgb'

        else:
            return 'classification_model.xgb'
예제 #25
0
def get_model():
    xgboost_quora_model = BASE_URL + "/xgboost_xcfl_quora_model.model"
    x_cfl = XGBClassifier()
    x_cfl.load_model(xgboost_quora_model)
    return x_cfl
예제 #26
0
# rmse, mae, logloss, error(설명 error가 accuracy), auc(설명 accuracy친구)

results = model.evals_result()
# print("eval's results : ", results)

# print("r2 Score : %.2f%%:" %(r2*100.0))

y_pred = model.predict(x_test)
acc = accuracy_score(y_pred, y_test)
print("acc : ", acc)

#####################################################################################################
# import pickle   # 파이썬에서 제공한다.

# from joblib import dump, load
# import joblib
# pickle.dump(model, open("./model/xgb_save/cancer.pickle.dat", "wb")) # wb형식으로 저장하겠다.
# joblib.dump(model, "./model/xgb_save/cancer.joblib.dat")
model.save_model("./model/xgb_save/cancer.xgb.model")
print("저장됬다.")

# model2 = pickle.load(open("./model/xgb_save/cancer.pickle.dat", "rb"))
# model2 = joblib.load("./model/xgb_save/cancer.joblib.dat")
model2 = XGBClassifier()
model2.load_model("./model/xgb_save/cancer.xgb.model")
print('불러왔다.')

y_pred = model2.predict(x_test)
acc = accuracy_score(y_pred, y_test)
print("acc : ", acc)
예제 #27
0
    provides recommendations for portfolio action, backtests, and incrementally trains
    the given XGBoost Classifier Model'''

from xgboost import XGBClassifier
import cpdb
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Imputer
from sklearn.metrics import zero_one_loss
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

btc_model = XGBClassifier()
eth_model = XGBClassifier()
btc_model = btc_model.load_model('btc_model.bin')
eth_model = eth_model.load_model('eth_model.bin')

model = {
    'btc': btc_model,
    'eth': eth_model,
}
'''Delivers a recommendation based on model classification'''


def get_recommendation(coinName, features):
    coin_model = model.get(coinName)
    recommendation = coin_model.predict(features)

    return recommendation
def is_spam(data, mode=2, classifier='manual'):

    if (classifier == 'manual'):
        message_body = data

        if (mode != 2):
            message_body = get_email(data, mode)

        clean_message = clean_message_no_html(message_body,
                                              stop_words=set(
                                                  stopwords.words('english')))

        word_columns_df = pd.DataFrame.from_records([clean_message])
        word_columns_df.index.name = 'DOC_ID'

        word_index = pd.Index(vocab.VOCAB_WORD)

        sparse_matrix = make_sparse_matrix(word_columns_df,
                                           word_index).groupby([
                                               'DOC_ID', 'WORD_ID'
                                           ]).sum().reset_index().to_numpy()

        full_matrix = make_full_matrix(sparse_matrix,
                                       vocab.shape[0]).to_numpy()

        spam_email_prob = PROB_SPAM
        ham_email_prob = 1 - PROB_SPAM
        # denominator = 1

        for j in range(full_matrix.shape[1]):

            if full_matrix[0, j] > 0:

                if prob_token_spam[j] > 0:
                    spam_email_prob = spam_email_prob * \
                        (prob_token_spam[j]**full_matrix[0, j])
                    if spam_email_prob == 0:
                        spam_email_prob = prev_spam
                        ham_email_prob = prev_ham
                        break

                if prob_token_ham[j] > 0:
                    ham_email_prob = ham_email_prob * \
                        (prob_token_ham[j]**full_matrix[0, j])
                    if ham_email_prob == 0:
                        spam_email_prob = prev_spam
                        ham_email_prob = prev_ham
                        break

                prev_spam = spam_email_prob
                prev_ham = ham_email_prob

                # denominator = denominator * prob_all_tokens[j]

        # print(spam_email_prob/denominator > ham_email_prob/denominator)
        print(spam_email_prob > ham_email_prob)

        # joint_log_spam = full_matrix.dot(
        #     np.log(prob_token_spam+0.000000000000001) - np.log(prob_all_tokens+0.000000000000001)) + np.log(PROB_SPAM)
        # print(joint_log_spam)

        # joint_log_ham = full_matrix.dot(
        #     np.log(prob_token_ham+0.000000000000001) - np.log(prob_all_tokens+0.000000000000001)) + np.log(1 - PROB_SPAM)
    # print(joint_log_ham)

    elif (classifier == 'xgb'):
        xgb_classifier = XGBClassifier()
        xgb_classifier.load_model('./XGB.model')

        data_list = []
        data_list.append(data)

        doc_term_matrix = vectorizer.transform(data_list)
        print(xgb_classifier.predict(doc_term_matrix)[0] == 1)
예제 #29
0
class Classifier:

    # for initializing train and test sets, classifier and accuracy score
    # Change method to gpu_hist if you want xgboost to run on a GPU
    def __init__(self,
                 params={
                     'objective': 'reg:squarederror',
                     'verbosity': 0
                 }):
        self.X_train = []
        self.X_labels = []
        self.test = []
        self.test_labels = []
        self.model = XGBClassifier(**params)
        self.prediction = 0
        self.error = 0

    def size(self):
        if isinstance(self.X_train, np.ndarray):
            return self.X_train.size
        return len(self.X_train)

    # adding the data points
    def input_train(self, features, feature):
        if isinstance(self.X_train, np.ndarray) and self.X_train.size > 0:
            self.X_train = self.X_train.tolist()
            self.X_labels = self.X_labels.tolist()
        self.X_train.append(features)
        self.X_labels.append(feature)

    # train the data
    def train(self):
        self.X_train = np.asarray(self.X_train)
        self.X_labels = np.asarray(self.X_labels)
        self.model.fit(self.X_train, self.X_labels)

    def train_eval(self, metric='error'):
        self.X_train = np.asarray(self.X_train)
        self.X_labels = np.asarray(self.X_labels)
        X_train, X_test, y_train, y_test = train_test_split(self.X_train,
                                                            self.X_labels,
                                                            test_size=0.33)
        self.model.fit(X_train,
                       y_train,
                       eval_set=[(X_train, y_train), (X_test, y_test)],
                       eval_metric=metric)
        evals_result = self.model.evals_result()
        if metric == 'error':
            validations = []
            for val in evals_result.values():
                lst = val.get("error")
                validations.append(sum(lst) / len(lst))
            return 1 - (sum(validations) / len(validations))
        else:
            validations = []
            for val in evals_result.values():
                lst = val.get(metric)
                validations.append(lst[-1])
            return validations

    # input test labels if you want to check accuracy
    def label(self, label):
        self.test_labels.append(label)

    def input_test(self, features):
        if isinstance(self.test, np.ndarray) and self.test.size > 0:
            self.test = self.test.tolist()
        self.test.append(features)

    # test data
    def predict(self):
        if not isinstance(self.test, np.ndarray):
            self.test = np.asarray(self.test)
        self.prediction = self.model.predict(self.test)
        return self.prediction

    def predict_proba(self):
        if not isinstance(self.test, np.ndarray):
            self.test = np.asarray(self.test)
        self.prediction = self.model.predict_proba(self.test)
        return self.prediction

    # if you have the test labels you can check the error rate (you want error close to 0)
    def check_error(self):
        self.test_labels = np.asarray(self.test_labels)
        self.error = metrics.mean_absolute_error(self.test_labels,
                                                 self.prediction)
        return self.error

    # save classifier
    def save_classifier(self, file):
        self.model.save_model(file)

    # open saved classifier
    def open_classifier(self, file):
        self.model.load_model(file)

    # removes all training data
    def clean_train(self):
        self.X_train = []
        self.X_labels = []

    # removes all testing data
    def clean_test(self):
        self.test = []
        self.test_labels = []
예제 #30
0
def predict_sent(vecs, xgb_model_analyze):
    xgb = XGBC()
    xgb.load_model(xgb_model_analyze)
    pred = predict(vecs, w2v_model, xgb, 300)
    df = pd.DataFrame(pred, columns=['sent'])
    return df