"make a random prediction)") args = parser.parse_args() #============================================================================== # Generate classification data #============================================================================== SEED = 2018 # Load training data LS = load_from_csv(args.ls) # Load test data TS = load_from_csv(args.ts) with measure_time("Creating fingerprint"): X_train = create_fingerprints(LS["SMILES"].values) y_train = LS["ACTIVE"].values TS = load_from_csv(args.ts) X_test = create_fingerprints(TS["SMILES"].values) #============================================================================== # Define Base (level 0) and Stacking (level 1) estimators #============================================================================== base_clf = [ RandomForestClassifier(n_estimators=3100, bootstrap=True, max_depth=None, class_weight='balanced_subsample'), MLPClassifier(random_state=42, alpha=1e-5,
help="Use a decision tree classifier (by default, " "make a random prediction)") args = parser.parse_args() # Load training data LS = load_from_csv(args.ls) # Load test data TS = load_from_csv(args.ts) # -------------------------- Model --------------------------- # # LEARNING # Create fingerprint features and output with measure_time("Creating fingerprint"): X_LS = create_fingerprints(LS["SMILES"].values) y_LS = LS["ACTIVE"].values # Set the parameters by cross-validation tuned_parameters = [{ # every hyper-parameter can be tested }] scores = ['roc_auc'] for score in scores: # Chercher GridSearchCV dans documentation clf = GridSearchCV(KNeighborsClassifier(n_neighbors=53, algorithm='auto', weights='distance'), tuned_parameters, cv=2, scoring='%s' % score, n_jobs=-1, verbose=10) clf.fit(X_LS, y_LS)
minority = LS[LS.ACTIVE == 1] # Upsample minority class minority_upsampled = resample( minority, replace=True, # sample with replacement n_samples=len(majority), # to match majority class random_state=0) # reproducible results # Combine majority class with upsampled minority class LS_upsampled = pd.concat([majority, minority_upsampled]) # -------------------------- Model --------------------------- # # LEARNING # Create fingerprint features and output with measure_time("Creating fingerprint"): X_LS = create_fingerprints(LS_upsampled["SMILES"].values) y_LS = LS_upsampled["ACTIVE"].values # Set the parameters by cross-validation tuned_parameters = [{ # every hyper-parameter can be tested }] scores = ['roc_auc'] for score in scores: # Chercher GridSearchCV dans documentation clf = GridSearchCV(RandomForestClassifier( n_estimators=3100, bootstrap=True, max_depth=None, class_weight='balanced_subsample',