Exemplo n.º 1
0
# =====

money: float = 0.0

while True:
    # Get user selection
    selection = functions.user_selection()
    if selection == "":
        continue

    # Take action to perform user selection
    elif selection == 3:
        functions.switch_off()

    elif selection == 4:
        functions.print_report(money)

    else:
        # Check sufficient ingredients to make the drink
        sufficient = functions.check_ingredients(selection)
        if not sufficient:
            print("Maintenance required")

        if sufficient:
            # Sufficient ingredients to make the drink
            # Ask for payment
            result = functions.get_payment(selection, money)
            paid = result[0]
            money = result[1]

            if paid:
Exemplo n.º 2
0
#ETHNIC_SLURS = set(load_ethnic_slurs())

training_data_path = build_data_path('augmented_train.csv')

df = pd.read_csv(training_data_path)

X = df['comment_text']
y = df[LABEL_COLS]

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.33)

clf = OneVsRestClassifier(MultinomialNB())

tfidf = TfidfVectorizer(strip_accents='ascii',
                        stop_words='english',
                        ngram_range=(1, 1),
                        norm='l2')
bad_word_counter = CountVectorizer(vocabulary=BAD_WORDS)
#slur_counter = CountVectorizer(vocabulary=ETHNIC_SLURS)

#union = make_union(tfidf, bad_word_counter, slur_counter)
union = make_union(tfidf, bad_word_counter)
pipeline = make_pipeline(union, clf)
optimizer = pipeline

optimizer.fit(X_train, y_train)
y_predictions = optimizer.predict(X_valid)

# best_estimator_ = optimizer.best_estimator_
print_report(y_valid, y_predictions)
Exemplo n.º 3
0
y = df[LABEL_COLS]

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.33)

label_counts = y_train.sum()

total = label_counts.sum()

label_freqs = defaultdict(lambda: 0)
for key in label_counts.index:
    label_freqs[key] = label_counts[key] / total


def predict(X_values):
    predictions = []
    for example in X_values:
        prediction = []
        for key in LABEL_COLS:
            rand_value = random()
            frequency = label_freqs[key]
            prediction.append(1 if rand_value < frequency else 0)
        predictions.append(prediction)
    return np.array(predictions)


random_predictions = predict(X_valid)

print('Baseline Data')
print_report(y_valid, random_predictions)
print()
                             scoring='f1_weighted',
                             verbose=3)
fit_params = {
    'randomforestclassifier__sample_weights':
    compute_sample_weight('balanced', y_train)
}
optimizer.fit(X_train, y_train)

y_predictions = optimizer.predict(X_valid)
print(y_predictions.shape, y_valid.shape)
# best_estimator_ = optimizer.best_estimator_

metrics.roc_auc_score(y_valid, y_predictions)

test_data = build_data_path('test.csv')
data_df = pd.read_csv(test_data)
test_labels = build_data_path('test_labels.csv')
label_df = pd.read_csv(test_labels)
test_df = data_df.set_index('id').join(label_df.set_index('id'))
CONDITIONS = [f'{label} != -1' for label in LABEL_COLS]
QUERY_STRING = ' & '.join(CONDITIONS)
test_df = test_df.query(QUERY_STRING)
X_test = test_df['comment_text']
y_test = test_df[LABEL_COLS]
y_predictions = optimizer.predict(X_test)
print_report(y_test, y_predictions, data_type='TESTING')

# The Inspiration for the Code
# has been taken from
# https://github.com/SaltyQuetzals/Toxicroak