def bde(ctx):
    delete_cached_model()
    click.echo("Decision engine cache deleted")
    delete_previous_analysis_reports()
    click.echo("Analysis reports deleted")
    get_decision_engine(get_ml_data())
    click.echo("Decision engine created")
def dea(ctx):
    delete_previous_analysis_reports()
    click.echo("Analysis reports deleted")
    ml_data = get_ml_data()
    decision_engine = get_decision_engine(ml_data)
    analyzer = DecisionEngineAnalyzer(decision_engine, ml_data)
    analyzer.create_analysis_reports()
    click.echo("Reports created in directory %s" % ANALYSIS_RESULTS_DIR)
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import LabelBinarizer

from data_processing.ml_data_prepairer import get_ml_data
from models.preprocess_pipeline import CongestiveHeartFailurePreprocessor

data = get_ml_data()

treatment_label_binarizer = LabelBinarizer()

preprocessor = CongestiveHeartFailurePreprocessor(False)
preprocessor.fit(data)
transformed_data = preprocessor.transform(data)

y = treatment_label_binarizer.fit_transform(data.treatment.values)

X_train, X_test, y_train, y_test = train_test_split(transformed_data, y, test_size=0.3)

pipeline = Pipeline([
    ('rf', RandomForestClassifier())
])

param_grid = [{
    'rf__max_depth': list(range(9, 20)),
    'rf__n_estimators': list(range(45, 70, 5)),
    'rf__criterion': ["gini", "entropy"],
    "rf__max_features": ["auto", None]
}]
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import LabelBinarizer

from data_processing.ml_data_prepairer import get_ml_data
from models.preprocess_pipeline import CongestiveHeartFailurePreprocessor

data = get_ml_data()

treatment_label_binarizer = LabelBinarizer()

preprocessor = CongestiveHeartFailurePreprocessor(False)
preprocessor.fit(data)
transformed_data = preprocessor.transform(data)

y = treatment_label_binarizer.fit_transform(data.treatment.values)

X_train, X_test, y_train, y_test = train_test_split(transformed_data,
                                                    y,
                                                    test_size=0.3)

pipeline = Pipeline([('rf', RandomForestClassifier())])

param_grid = [{
    'rf__max_depth': list(range(9, 20)),
    'rf__n_estimators': list(range(45, 70, 5)),
    'rf__criterion': ["gini", "entropy"],
    "rf__max_features": ["auto", None]
}]
def pd(ctx):
    # When building a new dataset, we should clear all cache since the models and analysis are no longer valid
    _all_clean()
    get_ml_data()
    click.echo("New dataset built")
import logging

from data_processing.ml_data_prepairer import get_ml_data
from models.build_decision_engine import get_decision_engine
from models.analysis.decision_engine_analyzer import DecisionEngineAnalyzer

logger = logging.getLogger()
logger.setLevel(logging.INFO)

congestive_heart_failure_data = get_ml_data()

decision_engine = get_decision_engine(congestive_heart_failure_data)

decision_engine_analyzer = DecisionEngineAnalyzer(decision_engine, congestive_heart_failure_data)

print("Important Features for outcome prediction")
print(decision_engine.get_outcome_feature_importance().sort_values('importance', ascending=False))

print("Important Features for actual treatment prediction")
print(decision_engine.get_actual_treatment_feature_importance().sort_values('importance', ascending=False))

recommended_treatment_overview = decision_engine_analyzer.get_recommended_treatment_overview()
print('Recommended treatment overview')
print(recommended_treatment_overview)

outcome_changes = decision_engine_analyzer.get_outcome_change_by_recommended_and_actual_treatment()
print('Recommended treatment counts per actual treatment')
print(outcome_changes)

top_treatment_improvements = outcome_changes[(outcome_changes.counts > 20) & (outcome_changes.survival_rate_improvement > 0.025)]
print("Top opportunities for treatment improvements")
import logging

from data_processing.ml_data_prepairer import get_ml_data
from models.build_decision_engine import get_decision_engine
from models.analysis.decision_engine_analyzer import DecisionEngineAnalyzer

logger = logging.getLogger()
logger.setLevel(logging.INFO)

congestive_heart_failure_data = get_ml_data()

decision_engine = get_decision_engine(congestive_heart_failure_data)

decision_engine_analyzer = DecisionEngineAnalyzer(
    decision_engine, congestive_heart_failure_data)

print("Important Features for outcome prediction")
print(decision_engine.get_outcome_feature_importance().sort_values(
    'importance', ascending=False))

print("Important Features for actual treatment prediction")
print(decision_engine.get_actual_treatment_feature_importance().sort_values(
    'importance', ascending=False))

recommended_treatment_overview = decision_engine_analyzer.get_recommended_treatment_overview(
)
print('Recommended treatment overview')
print(recommended_treatment_overview)

outcome_changes = decision_engine_analyzer.get_outcome_change_by_recommended_and_actual_treatment(
)