Exemplo n.º 1
0
		),),
#('scaler'    ,StandardScaler(copy=True,with_mean=False,with_std=True)),
('scaler'    , MaxAbsScaler(copy=True)),
('classifier', SGDClassifier(verbose=0, class_weight='balanced',
		random_state=randomSeeds['randForClassifier']) ),
] )
parameters={'vectorizer__ngram_range':[(1,2)],
	'vectorizer__min_df':[2],
	'vectorizer__max_df':[.98],
	'classifier__alpha':[1],
	'classifier__learning_rate':['invscaling'],
	'classifier__eta0':[ .01],
	'classifier__loss':[ 'hinge' ],
	'classifier__penalty':['l2'],
	}
p = tl.TextPipelineTuningHelper( pipeline, parameters,
		    trainingDataDir=args.trainingData,
		    testSplit=args.testSplit,
		    gridSearchBeta=args.gridSearchBeta,
		    gridSearchCV=args.gridSearchCV,
		    indexOfYes=args.indexOfYes,
		    randomSeeds=randomSeeds,
		    ).fit()
print p.getReports(wIndex=args.wIndex,
		    tuningIndexFile=args.tuningIndexFile,
		    wPredictions=args.wPredictions,
		    predFilePrefix=args.predFilePrefix,
		    compareBeta=args.compareBeta,
		    verbose=args.verbose,
		    )
Exemplo n.º 2
0
            lowercase=False,  # done in preprocessing
            stop_words='english',
            # token_pattern=r'\b([a-z_]\w+)\b', # use default for now
        ),
    ),
    ('scaler', StandardScaler(copy=True, with_mean=False, with_std=True)),
    #('scaler'    , MaxAbsScaler(copy=True)),
    ('classifier',
     SGDClassifier(verbose=0, random_state=randomSeeds['randForClassifier'])),
])
parameters = {
    'vectorizer__ngram_range': [(1, 2)],
    'vectorizer__min_df': [0.1],
    'vectorizer__max_df': [.7],
    #'vectorizer__max_features':[2000],
    'classifier__loss': ['hinge'],
    'classifier__penalty': ['l2'],
    'classifier__alpha': [5],
    'classifier__learning_rate': ['optimal'],
    'classifier__class_weight': ['balanced'],
    'classifier__eta0': [.01],
}
p = tl.TextPipelineTuningHelper(
    pipeline,
    parameters,
    beta=4,
    cv=2,
    randomSeeds=randomSeeds,
).fit()
print p.getReports()
Exemplo n.º 3
0
         verbose=1,
         random_state=randomSeeds['randForClassifier'],
         learning_rate=0.05,
         n_estimators=1600,
         max_depth=3,
         min_samples_split=600,
         min_samples_leaf=150,
         max_features=0.7,
         subsample=0.85,
     )),
])
parameters = {
    #	'classifier__learning_rate': [0.025],
    #	'classifier__n_estimators': [3200,],
    #	'classifier__max_depth': [3, 6, 9 ],
    #	'classifier__min_samples_split': [525, 550, 575, 600, 625, ],
    #	'classifier__min_samples_leaf': [100, 125, 150, 175],
    #	'classifier__max_features': [0.6, 0.65, 0.7, 0.75, 0.8, 0.9, None, ],
    #	'classifier__subsample': [0.6, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0,],
}
note = '\n'.join([
    "blessed GB.",
]) + '\n'
p = tl.TextPipelineTuningHelper(
    pipeline,
    parameters,
    randomSeeds=randomSeeds,
    note=note,
).fit()
print(p.getReports())