def update_answers(user_label, problem_label):
    train_problems = data_files.get_train_problems()
    train_users = data_files.get_train_users()
    submissions = data_files.get_submissions()
    test_problems = data_files.get_test_problems()
    test_users = data_files.get_test_users()

    tests_df = pandas.read_csv('test/test.csv')

    known_problems = set(train_problems.keys())
    known_users = set(train_users.keys())

    queries = []
    for test in tests_df.values:
        user_id = test[1]
        problem_id = test[2]
        u = 1 if user_id in known_users else 0
        p = 1 if problem_id in known_problems else 0
        if u == user_label and p == problem_label:
            queries.append({'index': test[0], 'user_id': user_id,
                            'problem_id': problem_id})
    solutions = [[u0p0.solution, u0p0.solution],
                 [u0p0.solution, u1p1.solution]]
    answers = solutions[user_label][problem_label](queries, train_problems,
                                                   train_users, test_problems,
                                                   test_users, submissions)
    result = pandas.read_csv(ANSWER_FILE)
    result = result.solved_status.tolist()
    for index, answer in enumerate(answers):
        query_id = queries[index]['index']
        result[query_id] = answer
    utils.save_answer(result, ANSWER_FILE)
def run():
    train = train_data.values
    test = test_data.values

    X_train = train[:, 1:]
    y_train = train[:, 0]

    X_test = test[:, 1:]
    y_test = test[:, 0]

    perceptron = train_perceptron(X_train, y_train)
    predictions = perceptron.predict(X_test)

    default_ac = accuracy_score(y_test, predictions)

    print('Default accuracy:', default_ac)

    scaler = StandardScaler()

    X_train_scaled = scaler.fit_transform(X_train, y_train)
    X_test_scaled = scaler.transform(X_test)

    perceptron_scaled = train_perceptron(X_train_scaled, y_train)
    predictions_scaled = perceptron_scaled.predict(X_test_scaled)

    scaled_ac = accuracy_score(y_test, predictions_scaled)

    print('Scaled accuracy:', scaled_ac)

    diff = scaled_ac - default_ac

    print('Difference between default and scaled is:', diff)
    save_answer(os.path.join(BASE_DIR, 'answer.txt'), round(diff, 3))
Пример #3
0
def calc_max_roc_auc_score(y_true):
    calc_scores_list = list()
    calc_scores = dict()
    for score_key in SCORE_HEADERS:
        s = roc_auc_score(y_true, scores[score_key])
        calc_scores_list.append(s)
        calc_scores[s] = score_key

    print('Max AUC-ROC on table:', calc_scores[max(calc_scores_list)])
    save_answer(os.path.join(BASE_DIR, '3.txt'),
                calc_scores[max(calc_scores_list)])
Пример #4
0
def calc_max_precision_on_recall_lt70(y_true):
    score_key_by_max_p = dict()
    max_p_list = list()
    for score_key in SCORE_HEADERS:
        precision, recall, thresholds = precision_recall_curve(
            y_true, scores[score_key])
        ps = list()
        for i, p in enumerate(precision):
            if recall[i] >= 0.7:
                ps.append(p)
        max_p = max(ps)
        score_key_by_max_p[max_p] = score_key
        max_p_list.append(max_p)
    max_p = max(max_p_list)
    print('Max P is:', max_p, 'Metrics is:', score_key_by_max_p[max_p])
    save_answer(os.path.join(BASE_DIR, '4.txt'), score_key_by_max_p[max_p])
Пример #5
0
def end(update, context):
    query = update.callback_query
    finished = True
    # Encerrar todas as polls da conversa:
    for poll_id in context.user_data["polls"]:
        required = context.bot_data[poll_id]["required"]
        if ("has_answer" in context.bot_data[poll_id]
                and context.bot_data[poll_id]["has_answer"]) or not required:
            if context.bot_data[poll_id]["open"]:
                context.bot_data[poll_id]["open"] = False
                context.bot.stop_poll(
                    context.bot_data[poll_id]["chat_id"],
                    context.bot_data[poll_id]["message_id"],
                )
                if "has_answer" in context.bot_data[poll_id]:
                    print(context.bot_data[poll_id]["answer_string"])
                    user_id = query.message.chat.id
                    utils.save_answer(
                        context.user_data["bot_name"],
                        user_id,
                        context.bot_data[poll_id]["question_id"],
                        context.bot_data[poll_id]["answer_string"],
                    )
        else:
            finished = False
            query.answer("Campos obrigatórios não preenchidos!")
    if finished:
        query.answer()
        query.edit_message_reply_markup(None)

        for _, values in context.user_data["regular_answers"].items():
            question_id, answer_string = values[0], values[1]
            print(question_id, answer_string)
            user_id = query.message.chat.id
            utils.save_answer(
                context.user_data["bot_name"],
                user_id,
                question_id,
                answer_string,
            )

        context.bot.send_message(
            update.effective_user.id,
            "Agradeço por ter disponibilizado seu tempo em responder o questionário!"
        )
        return ConversationHandler.END
Пример #6
0
def run():
    preprocess_data()

    train_weights, test_weights = find_tfidf_weights()
    train_cat, test_cat = get_one_hot_tags()

    X_train = hstack([train_cat, train_weights])
    X_test = hstack([test_cat, test_weights])

    clf = Ridge(random_state=241, alpha=1)
    clf.fit(X_train, train['SalaryNormalized'])

    predictions = clf.predict(X_test)

    print('Predicted salary are:', predictions)

    save_answer(os.path.join(BASE_DIR, 'answer.txt'), ' '.join([str(round(v, 2)) for v in predictions]))
Пример #7
0
def calc_four_metrics(y_true, y_pred):
    metric_scores = list()

    accuracy = accuracy_score(y_true, y_pred)
    print('Accuracy score:', accuracy)
    metric_scores.append(accuracy)

    precision = precision_score(y_true, y_pred)
    print('Precision score:', precision)
    metric_scores.append(precision)

    recall = recall_score(y_true, y_pred)
    print('Recall score:', recall)
    metric_scores.append(recall)

    f1 = f1_score(y_true, y_pred)
    print('F1 score:', f1)
    metric_scores.append(f1)

    save_answer(os.path.join(BASE_DIR, '2.txt'),
                ' '.join([str(round(v, 2)) for v in metric_scores]))
Пример #8
0
def run():
    vectorizer = TfidfVectorizer()
    X = vectorizer.fit_transform(newsgroups.data)
    y = newsgroups.target
    clf = SVC(kernel='linear', random_state=241)
    clf.fit(X, y)
    names = vectorizer.get_feature_names()
    arr = clf.coef_.toarray()
    arr[0] = [abs(v) for v in arr[0]]
    sorted_weights = arr[::, arr[0, :].argsort()[::-1]]
    top_10_weights = sorted_weights[0, :10]
    words = list()

    for w in top_10_weights:
        index = np.where(arr == w)
        word_index = index[1][0]
        words.append(names[word_index])

    words.sort()
    print('Most weight words are:', words)
    save_answer(os.path.join(BASE_DIR, 'answer.txt'), ','.join(words))
def run():
    clf = PCA(n_components=10)
    clf.fit(train.values[:, 1:])

    total_dispersion = 0.0
    dispersion_components_gt_90_enough = 0
    for r in clf.explained_variance_ratio_:
        total_dispersion += r
        dispersion_components_gt_90_enough += 1
        if total_dispersion >= 0.9:
            break
    print('Components enough fot 90% dispersion:',
          dispersion_components_gt_90_enough)
    save_answer(os.path.join(BASE_DIR, '1.txt'),
                str(dispersion_components_gt_90_enough))

    transformed_train = clf.transform(train.values[:, 1:])
    X = transformed_train[:, 0]
    corr_coef = np.corrcoef(X, test['^DJI'])[0, 1]
    print('Pirson correlation coef:', corr_coef)
    save_answer(os.path.join(BASE_DIR, '2.txt'), str(round(corr_coef, 2)))

    first_component_list = list(clf.components_[0])
    company_max_weight_index = first_component_list.index(
        max(first_component_list))
    company_max_weight = list(train.columns)[company_max_weight_index + 1]
    print('Company with max weight on first component:', company_max_weight)
    save_answer(os.path.join(BASE_DIR, '3.txt'), company_max_weight)
Пример #10
0
def run():
    data['Sex'] = data['Sex'].apply(lambda s: -1
                                    if s == 'F' else (0 if s == 'I' else 1))
    classes_names = data.columns[:-1]
    X = data[classes_names]
    y = data['Rings']

    r2_min_score_gt_52_n_estimators = None

    k_fold = KFold(random_state=1, n_splits=5, shuffle=True)

    for i in range(1, 51):
        max_score = get_r2_score_for_rfr(X, y, cv=k_fold, n_estimators=i)
        print('Max score for n_estimators =', i, 'is:', max_score)
        if r2_min_score_gt_52_n_estimators is None and max_score > 0.52:
            r2_min_score_gt_52_n_estimators = i
            break

    # because of the update KFold library and the problem of the floating point answer may differ on +- 1
    save_answer(os.path.join(BASE_DIR, '1.txt'),
                str(r2_min_score_gt_52_n_estimators - 1))
    save_answer(os.path.join(BASE_DIR, '2.txt'),
                str(r2_min_score_gt_52_n_estimators))

    # correct answer on MacOS, scikit-learn==0.20.3, python 3.6.5
    save_answer(os.path.join(BASE_DIR, '3.txt'),
                str(r2_min_score_gt_52_n_estimators + 1))
Пример #11
0
"""
Problem:

Given set of (x,y) points in the plane, sampled in the vicinity of a circle,
estimate the circle's radius and center point.

How to get started:
    
Run this script. It will read the input points and plot them. Then, add your 
code below (see comments).

"""

from utils import read_pnts, plot_points, save_answer

pnts = read_pnts('./training_data_full.npy')
plot_points(pnts)
#
# Here you should complete an algorithm to estimate the circle radius and center
#
estimated_radius, estimated_center = 1.0, (2.0, 3.0)
#
# In the end, save the estimates circle coordinates to a file
#
save_answer('./estimated_circle_coordinates_full.json', estimated_radius,
            estimated_center)
Пример #12
0
def include_answer(class_name,answer):
    answers = load_answer()
    answers[class_name] = answer
    save_answer(answers)
Пример #13
0
def calc_confusion_matrix(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    matrix = np.array([[tp, fp], [fn, tn]])
    print('Confusion matrix: ', matrix)
    save_answer(os.path.join(BASE_DIR, '1.txt'),
                ' '.join([str(v) for v in matrix.flatten()]))