def find_automatic_alignment_stats(ave_stats_writer):
    # open input files
    hits_input = csv.reader(open(sys.argv[5], 'rU'), delimiter = ",")
    
    hits_num = 0
    ave_stats = []
    for row in hits_input:
        try:
            # skip first line
            if row[0] == "source":
                continue
            
            print(row[0])
            print(hits_num + 1)
            
            # convert all alignment results to sets
            sure_sub = set(row[4].split())
            sure_ans = set(row[8].split())
            
            # calculate stats of alignment results
            stats_list = [precision(sure_sub, sure_ans), recall(sure_sub, sure_ans)]
            stats_list.append(f1(stats_list[0], stats_list[1]))
            
            # if answer key has two possible alignments, take one with the higher f1
            if row[12]:
                print("two answers exists")
                sure_ans_2 = set(row[12].split())
                stats_list_2 = [precision(sure_sub, sure_ans_2), recall(sure_sub, sure_ans_2)]
                stats_list_2.append(f1(stats_list_2[0], stats_list_2[1]))
                print("1st F1 value: " + str(stats_list[2]))
                print("2nd F1 value: " + str(stats_list_2[2]))
            
                if stats_list_2[2] > stats_list[2]:
                    print("selected answer B")
                    stats_list = stats_list_2
                else:
                    print("selected answer A")
                
            # add values to the average calculation
            # the case where this is the first HIT in the list
            if hits_num == 0:
                hits_num += 1
                ave_stats = stats_list
            
            # the case where this is not the first HIT in the list
            else:
                hits_num += 1
                for i in range(0, 3):
                    ave_stats[i] = float(ave_stats[i]) + ((stats_list[i] - ave_stats[i]) / float(hits_num))
            
            print(ave_stats)
            print("")
            
        except:
            pass
    
    # print averages
    ave_stats_writer.writerow(["automatic_alignments", hits_num] + ave_stats)
    
    return
def scan_results(qual_type, results, worker_dict):
    for row in results:
        try:
            workerId = row[15]
            
            # skip first line
            if row[0] == "HITId":
                continue
            print(row[0])
            print("WORKERID: " + workerId)
            
            # if an "unchanged" value is encountered, replace with the proper value
            if row[50] == "unchanged": #sureAlignments
                row[50] = row[31]
            
            # convert all alignment results to sets
            sure_sub = set(row[50].split())
            sure_ans = set(row[35].split())
             
            # calculate stats of worker and store in stats_list
            # stats_list[0] = precision, stats_list[1] = recall, stats_list[2] = f1
            stats_list = [precision(sure_sub, sure_ans), recall(sure_sub, sure_ans)]
            stats_list.append(f1(stats_list[0], stats_list[1]))

            # if answer key has two possible alignments, take one with the higher f1
            if row[39]:
                sure_ans_2 = set(row[39].split())
                stats_list_2 = [precision(sure_sub, sure_ans_2), recall(sure_sub, sure_ans_2)]
                stats_list_2.append(f1(stats_list_2[0], stats_list_2[1]))
                print("1st F1 value: " + str(stats_list[2]))
                print("2nd F1 value: " + str(stats_list_2[2]))
            
                if stats_list_2[2] > stats_list[2]:
                    print("selected answer 2")
                    stats_list = stats_list_2
                else:
                    print("selected answer 1")
                
            # add worker to worker_dict
            # the case where worker does not exist in worker_dict
            if workerId not in worker_dict:
                worker_list = [1, qual_type]
                worker_list = worker_list + stats_list
                worker_dict[workerId] = worker_list
            
            # the case where worker already exists in worker_dict
            else:
                worker_list = worker_dict[workerId]
                worker_list[0] = worker_list[0] + 1
                worker_list[2] = float(worker_list[2]) + ((stats_list[0] - worker_list[2]) / float(worker_list[0]))
                worker_list[3] = float(worker_list[3]) + ((stats_list[1] - worker_list[3]) / float(worker_list[0]))
                worker_list[4] = float(worker_list[4]) + ((stats_list[2] - worker_list[4]) / float(worker_list[0]))
            
        except:
            pass
        
        print("")

    return
def find_automatic_alignment_stats(ave_stats_writer):
    # open input files
    hits_input = csv.reader(open(sys.argv[5], 'rU'), delimiter = ",")
    
    hits_num = 0
    ave_stats = []
    for row in hits_input:
        try:
            # skip first line
            if row[0] == "source":
                continue
            
            print(row[0])
            
            # if answer key has two possible alignments, choose one at random
            if row[8] and row[12]:
                print("selecting random answer")
                ans_string = random.choice([row[8], row[12]])
            else:
                ans_string = row[8]
                
            if ans_string == row[8]:
                print("selected answer 1")
            else:
                print("selected answer 2")
            
            # convert all alignment results to sets
            sure_sub = set(row[4].split())
            sure_ans = set(ans_string.split())
            
            print(sure_sub)
            print(sure_ans)
            
            # calculate stats of alignment results
            stats_list = [precision(sure_sub, sure_ans), recall(sure_sub, sure_ans)]
            stats_list.append(f1(stats_list[0], stats_list[1]))
            print("selected F1 value: " + str(stats_list[2]))
                
            # add values to the average calculation
            # the case where this is the first HIT in the list
            if hits_num == 0:
                hits_num += 1
                ave_stats = stats_list
            
            # the case where this is not the first HIT in the list
            else:
                hits_num += 1
                for i in range(0, 3):
                    ave_stats[i] = float(ave_stats[i]) + ((stats_list[i] - ave_stats[i]) / float(hits_num))
            
            print("")
            
        except:
            pass
    
    # print averages
    ave_stats_writer.writerow(["automatic_alignments", hits_num] + ave_stats)
    
    return
def main():
    # open input and writer CSV files
    train_input = csv.reader(open(sys.argv[1], 'rU'), delimiter = ",")
    output_writer = csv.writer(open(sys.argv[2], 'wb'), delimiter = ",")
    
    output_writer.writerow(["hitId", "inputSureAlignments", "answerSureAlignments", "precision", "recall", "f1"])
    
    # set initial values for average precision, recall and f1
    prec_ave = -1
    rec_ave = -1
    f1_ave = -1
    
    num_hits = 0
    
    for row in train_input:
        try:
            
            if row[0] == "instructions":
                continue

            num_hits += 1
            
            # print the precision, recall and f1 performance on the current HIT
            sure_in = set(row[4].split())
            sure_ans = set(row[8].split())
            prec = precision(sure_in, sure_ans)
            rec = recall(sure_in, sure_ans)
            if prec == 0 and rec == 0:
                f1 = 0
            else:
                f1 = float(2 * prec * rec) / float(prec + rec)
            output_writer.writerow([row[0], row[4], row[8], prec, rec, f1])
            
            # set values as averages if this is the first row
            if prec_ave == -1:
                prec_ave = prec
                rec_ave = rec
                f1_ave = f1
            
            # update averages if this is not the first row
            else:
                prec_ave = float(prec_ave) + ((prec - prec_ave) / float(num_hits))
                rec_ave = float(rec_ave) + ((rec - rec_ave) / float(num_hits))
                f1_ave = float(f1_ave) + ((f1 - f1_ave) / float(num_hits))
                
        except:
            pass
    
    # write average values
    output_writer.writerow(["average", None, None, prec_ave, rec_ave, f1_ave])
Пример #5
0
    def predict(self):
        print 'Predicting'

        if self.classifier == 'knn':
            train_score = self.model.score(self.x_train, self.y_train)
            print 'KNN train score:', train_score

            if self.do_validation():
                val_score = self.model.score(self.x_validate, self.y_validate)
                print 'KNN val score: ', val_score
        else:
            import pprint

            self.predict_train = self.model.predict_proba(self.x_train)
            print 'Training Score logloss: ', functions.logloss(self.predict_train, self.y_train), ',',
            print 'Training Score precision: ', functions.precision(self.predict_train, self.y_train), ',',

            if self.do_validation():
                self.predict_validate = self.model.predict_proba(self.x_validate)
                precision = functions.logloss(self.predict_validate, self.y_validate)
                print 'Validation Score: ', precision, ',',
                precision = functions.precision(self.predict_validate, self.y_validate)
                print 'Validation Score precision: ', precision, ',',
Пример #6
0
 def print_matrix(matrix, classes):
     p = precision(matrix)
     r = recall(matrix)
     f = f1(p, r)
     table = BeautifulTable()
     table.column_headers = [
         "Class Name", "Precision", "Recall", "F1 Score"
     ]
     for i in range(len(classes)):
         table.append_row([classes[i][0], p[i], r[i], f[i]])
     print(table)
     total_f1 = np.sum(f) / len(f)
     print("Total F1 score: " + str(total_f1))
     return total_f1
Пример #7
0
    def print_score(matrix, classes, points=8):
        def to_str(num):
            return ("%." + str(points) + "f") % round(float(num), points)

        p = precision(matrix)
        r = recall(matrix)
        f = f1(p, r)
        table = BeautifulTable()
        table.column_headers = [
            "Class Name", "Precision", "Recall", "F1 Score"
        ]
        for i in range(len(classes)):
            table.append_row(
                [classes[i][0],
                 to_str(p[i]),
                 to_str(r[i]),
                 to_str(f[i])])
        print(table)
        total_f1 = np.sum(f) / len(f)
        print("Total F1 score: " + to_str(total_f1))
        return total_f1
Пример #8
0
        #threshold_plot = func.threshold_plot(X_train, X_test, y_train, y_test, gamma, thresholds)

        # Calculating ytilde and the model of logistic regression
        z = X_test @ betas_train  # choosing best beta here?
        model = func.logistic_function(z)
        model = func.IndicatorFunc(model, threshold=0.44)

        # Get AUC score and predict_proba_scikit. Used for plots and terminal print
        acc_scikit, TPR_scikit, precision_scikit, f1_score_scikit, AUC_scikit, predict_proba_scikit \
        = func.scikit(X_train, X_test, y_train, y_test, model)

        # Calculating the different metrics:
        print('\n-------------------------------------------')
        print('The accuracy is  : %.3f' % func.accuracy(model, y_test))
        print('The F1 score is  : %.3f' % func.F1_score(y_test, model))
        print('The precision is : %.3f' % func.precision(y_test, model))
        print('The recall is    : %.3f' % func.recall(y_test, model))
        print('The AUC is       : %.3f' % AUC_scikit)
        print('-------------------------------------------')

        # Make Cumulative gain and ROC plot
        P.Cumulative_gain_plot(y_test, model)
        P.ROC_plot(y_test, predict_proba_scikit)

        # Creating a Confusion matrix using pandas and pandas dataframe
        P.Confusion_matrix(y_test, model)

    elif arg == "NN":

        X_train_sc = X_train
        X_test_sc = X_test
Пример #9
0
def print_accuracy(predicted, y):
    print "logloss: ", functions.logloss(predicted, y)
    print "precision: ", functions.precision(predicted, y)
Пример #10
0
def train_reg(reg, clazz, X, X_val, two_class_y, two_class_y_val):
    print 'Training clazz', clazz, 'with C=', reg
    model = LogisticRegression('l1', False, C=reg)
    model.fit(X, two_class_y)
    precision = functions.precision(model.predict_proba(X_val), two_class_y_val)
    return model, precision
Пример #11
0
    from sklearn import datasets
    from sklearn.cross_validation import StratifiedShuffleSplit

    digits = datasets.load_digits()
#    x, y = digits.data[:200], digits.target[:200]
    x, y = digits.data, digits.target
    x_train, x_val, y_train, y_val = None,None,None,None


    print y.shape
    skf = StratifiedShuffleSplit(y,test_size= 0.3, n_iterations=2)
    for train_index, test_index in skf:
        print 'in skf'
        x_train, x_val = x[train_index], x[test_index]
        y_train, y_val = y[train_index], y[test_index]
        print x_train.shape
        print x_val.shape

#    model.train_cross_validation(x_train, x_val, y_train, y_val, [1,2,50,x_train.shape[0]+1])

    from sklearn.svm import SVC
    model = SVC(C=1000000, kernel='poly', degree=3, probability=True)
    model.fit(x_train, y_train)
    proba_train = model.predict_proba(x_train)
    print 'train: ', functions.precision(proba_train, y_train)
    proba_val = model.predict_proba(x_val)
    print 'validate:', functions.precision(proba_val, y_val)


def scan_results(qual_type, filename, worker_dict):
    print("****************************************************")
    print(qual_type)
    print(filename)
    print("****************************************************")
    results = csv.reader(open(filename, 'rU'), delimiter = ",")
    
    for row in results:
        try:
            print("")
            hit_id = row[0]
            worker_id = row[15]
            
            # skip first line and a particular user who was completed both "all" and "participated" HITs
            if hit_id == "HITId" or (worker_id == "AURYD2FH3FUOQ" and qual_type == "all"):
                continue
            
            print("")
            print(row)
            print("HITID: " + str(hit_id))
            print("WORKERID: " + str(worker_id))
            print("QUAL TYPE: " + qual_type)
            
            # make corrections to fields labeled "unchanged" or "{}"
            if row[46] == "unchanged": 
                row[46] = row[31]
            if row[43] == "unchanged":
                row[43] = row[32]
            for i in [35, 36, 43, 46]:
                if row[i] == "{}":
                    row[i] = ""
            
            sure_submission = row[46]
            poss_submission = row[43]
            sure_control = row[35]
            poss_control = row[36]
            print("defined rows")
                
            # convert all alignments to sets
            sure_submission_set = set(sure_submission.split())
            print("SURE_SUBMISSION_STRING: " + str(sure_submission))
            print("SURE_SUBMISSION_SET: " + str(sure_submission_set))
            all_submission_set = set(poss_submission.split()) | sure_submission_set
            print("ALL_SUBMISSION_SET: " + str(all_submission_set))
            sure_control_set = set(sure_control.split())
            print("SURE_CONTROL_SET: " + str(sure_control_set))
            all_control_set = set(poss_control.split()) | sure_control_set
            print("ALL_CONTROL_SET: " + str(all_control_set))
            
            # create a list of accuracy stats for the current HIT
            # stats_list[0] = precision, stats_list[1] = recall, stats_list[2] = f1
            stats_list = [precision(sure_submission_set, all_control_set), recall(all_submission_set, sure_control_set)]
            print("STATS_LIST_INITIAL: " + str(stats_list))
            stats_list.append(f1(stats_list[0], stats_list[1]))
            print("STATS_LIST_FINAL: " + str(stats_list))
            
            # add worker to worker_dict
            # the case where worker does not exist in worker_dict
            if worker_id not in worker_dict:
                worker_list = [1, qual_type]
                worker_list = worker_list + stats_list
                worker_dict[worker_id] = worker_list
            
            # the case where worker already exists in worker_dict
            else: 
                worker_list = worker_dict[worker_id]
                worker_list[0] = worker_list[0] + 1
                for i in range(0, 3):
                    worker_list[i + 2] = float(worker_list[i + 2]) + ((stats_list[i] - worker_list[i + 2]) / float(worker_list[0]))
            
        except:
            pass
Пример #13
0
def Random_Forest(X_train, X_test, y_train, y_test, candidates, GoldiLock,	\
      feature_list, header_names, seed=0, threshold=0.5, 		\
      plot_confuse_matrix=False, plot_feauture_importance=False, Goldilock_zone=False):
    """
	Ha en input oversikt...?

	threshold		| 0.5 == RF.predict
					  0.7 --> 	Need 70$%$ probability to be an exoplanet, to
					  			be calssified as an exoplanet
	"""
    print("Exoplanet threshold = %g" % threshold)

    # Print best parameters, this takes time! Parameters set in Best_params()
    #Best_params(seed, X_train, y_train)

    # Plot error against number of trees?

    RF = RandomForestClassifier(
        n_estimators=300,
        max_features='auto',
        max_depth=8,
        min_samples_leaf=1,
        random_state=seed,
        criterion='gini',  # 'entropy'
        bootstrap=True)
    RF.fit(X_train, y_train)

    # https://github.com/erykml/medium_articles/blob/master/Machine%20Learning/feature_importance.ipynb

    header_names = np.load('feature_names.npy', allow_pickle=True)

    # function for creating a feature importance dataframe
    def feature_importance(column_names, importances):
        df = pd.DataFrame({'feature': column_names,'feature_importance': importances}) \
        .sort_values('feature_importance', ascending = False) \
        .reset_index(drop = True)
        return df

    # plotting a feature importance dataframe (horizontal barchart)
    def feature_importance_plot(feature_importances, title):
        feature_importances.columns = ['feature', 'feature_importance']
        sns.barplot(x = 'feature_importance', y = 'feature', data = feature_importances, orient = 'h') \
        .set_title(title, fontsize = 15)
        plt.ylabel('Feature', fontsize=15)
        plt.xlabel('Feature importance', fontsize=15)
        plt.show()

    if plot_feauture_importance == True:
        feature_imp = feature_importance(header_names[1:],
                                         RF.feature_importances_)
        feature_importance_plot(feature_imp[:11],
                                "Feature Importance (Random Forest)")

    # Calculating different metrics
    predict = RF.predict(X_test)
    accuracy = RF.score(X_test, y_test)
    precision = func.precision(y_test, predict)
    recall = func.recall(y_test, predict)
    F1_score = func.F1_score(y_test, predict)

    # Calculate the absolute errors
    errors = abs(predict - y_test)

    # Printing the different metrics:
    func.Print_parameters(accuracy,
                          F1_score,
                          precision,
                          recall,
                          errors,
                          name='Random Forest')

    if plot_confuse_matrix == True:

        func.ConfusionMatrix_Plot(y_test, predict,
                                  'Random Forest (Candidates)', threshold)

    #print(RF.decision_path(X_test))

    # Pull out one tree from the forest
    tree_nr = 5
    tree = RF.estimators_[tree_nr]

    func.PlotOneTree(tree, feature_list)  # header_names?

    predict_candidates = np.array(RF.predict_proba(candidates))

    # Prediction with threshold
    predict_candidates[:, 0] = (predict_candidates[:, 0] <
                                threshold).astype('int')
    predict_candidates[:, 1] = (predict_candidates[:, 1] >=
                                threshold).astype('int')

    predicted_false_positive = (predict_candidates[:, 1] == 0).sum()
    predicted_exoplanets = (predict_candidates[:, 1] == 1).sum()

    # Information print to terminal
    print('\nThe Random Forest Classifier predicted')
    print('--------------------------------------')
    print('%-5g exoplanets      of %g candidates' %
          (predicted_exoplanets, len(predict_candidates)))
    print('%-5g false positives of %g candidates' %
          (predicted_false_positive, len(predict_candidates)))

    # Plotting a bar plot of candidates predicted as confirmed and false positives
    func.Histogram2(predict_candidates[:, 1], 'Random Forest (Candidates)',
                    threshold)

    #func.Histogram2(g=df.loc[:, (df.columns == 'koi_disposition')].values)

    if Goldilock_zone:

        print("")
        print("Goldilock zone calculations")

        predict_goldilocks = np.array(RF.predict_proba(GoldiLock))

        predict_goldilocks[:, 0] = (predict_goldilocks[:, 0] <
                                    threshold).astype('int')
        predict_goldilocks[:, 1] = (predict_goldilocks[:, 1] >=
                                    threshold).astype('int')

        predicted_false_positive_goldilocs = (
            predict_goldilocks[:, 1] == 0).sum()
        predicted_exoplanets_goldilocks = (predict_goldilocks[:, 1] == 1).sum()

        # Information print to terminal
        print('\nThe Random Forest Classifier predicted')
        print('--------------------------------------')
        print('%-3g exoplanets      of %g GL candidates' %
              (predicted_exoplanets_goldilocks, len(predict_goldilocks)))
        print('%-3g false positives of %g GL candidates' %
              (predicted_false_positive_goldilocs, len(predict_goldilocks)))

        # Plotting a bar plot of candidates predicted as confirmed and false positives
        func.Histogram2(predict_goldilocks[:, 1], 'Random Forest (Goldilock)',
                        threshold)

        GL.GoldilocksZone(predict_goldilocks[:, 1], 'RandomForest', threshold)
    '''
	feature_importance = RF.feature_importances_
	print(feature_importance)
	print(len(feature_importance))


	#for i in range(len(feature_importance)):
		# Check the i in feature_importance
		# assign corresponding header name


	plt.hist(feature_importance, align='left', histtype='bar', orientation='horizontal', rwidth=0.3)
	plt.title('Feature Importance')
	plt.xlabel('--')
	plt.ylabel('--')
	#plt.xlim([lb-width/2, ub-width/2])
	plt.show()
	'''
    '''
def scan_csv(results, worker_dict, hits_result_writer):
    # Dictionary indexes hits by hitId, each hitId maps to a list 
    # where l[0] = # hits completed and l[1] = # hits correct
    hits_dict = {}
    
    num_rows = 0
    for row in results:
        print(num_rows)
        num_rows += 1
        try:
            hitId = row[0]
            workerId = row[15]
            
            print("hitId: "  + hitId)
            print("workerId: "  + workerId)
            
            # skip first line
            if hitId == "HITId":
                continue    
            
            ### skip HITs 311HQEI8RS1Q91M7H2OGRN5V4US7ZI and 37Y5RYYI0PQNN46K4NY6PTLGJI8SXE
            ### This is because those HITs have strange answer values
            if hitId == "311HQEI8RS1Q91M7H2OGRN5V4US7ZI" or hitId == "37Y5RYYI0PQNN46K4NY6PTLGJI8SXE":
                continue
            
            # if an "unchanged" value is encountered, replace with the proper value
            if row[48] == "unchanged": #sureAlignments
                row[48] = row[31]
            if row[42] == "unchanged": #possAlignments
                row[42] = "{}"
            if row[44] == "unchanged": #sourceHighlights
                row[44] = "{}"
            if row[50] == "unchanged": #targetHighlights
                row[50] = "{}"
            
            # convert all alignment results to sets
            sure_sub_f = set(row[48].split())
            sure_sub_i = set(row[49].split())
            sure_ans = set(row[35].split())
            pos_sub_f = set(row[42].split())
            pos_sub_i = set(row[43].split())
            pos_ans = set(row[36].split())
            src_sub_f = set(row[44].split())
            src_sub_i = set(row[45].split())
            src_ans = set(row[37].split())
            tgt_sub_f = set(row[50].split())
            tgt_sub_i = set(row[51].split())
            tgt_ans = set(row[38].split())
            
            prec_i = precision(sure_sub_i, sure_ans)
            print("prec_i" + str(prec_i))
            rec_i = recall(sure_sub_i, sure_ans)
            print("rec_i" + str(rec_i))
            f1_i = f1(prec_i, rec_i)
            print("f1_i" + str(f1_i))
            prec_f = precision(sure_sub_f, sure_ans)
            print("prec_f" + str(prec_f))
            rec_f = recall(sure_sub_f, sure_ans)
            print("rec_f" + str(rec_f))
            f1_f = f1(prec_f, rec_f)
            print("f1_f" + str(f1_f))
                        
            # create dictionary of HITs data
            if hitId not in hits_dict:
                hits_list = [1, 0]
                hits_dict[hitId] = hits_list
            else:
                hits_list = hits_dict[hitId]
                hits_list[0] = hits_list[0] + 1
            
            # the case where the worker does not exist in worker_dict
            if workerId not in worker_dict:
                # initialize initial worker_list
                completed_HITs = [hitId]
                worker_list = [1, 0, 0, prec_i, rec_i, f1_i, prec_f, rec_f, f1_f, completed_HITs]
                worker_dict[workerId] = worker_list
                
                # check if user's final submission is correct
                if sure_sub_f == sure_ans and pos_sub_f == pos_ans and src_sub_f == src_ans and tgt_sub_f == tgt_ans:
                    print("Correct submission")
                    worker_list[1] = worker_list[1] + 1
                    hits_list[1] = hits_list[1] + 1 ### DELETE THIS LATER, DEBUGGING ONLY
                    
                # print out errors if encountered
                if sure_sub_f != sure_ans:
                    print("Sure alignments incorrect, expected " + str(sure_ans) + ", got " + str(sure_sub_f))
                
                if pos_sub_f != pos_ans:
                    print("Possible alignments incorrect, expected " + str(pos_ans) + ", got " + str(pos_sub_f))
                    
                if src_sub_f != src_ans:
                    print("Source highlights incorrect, expected " + str(src_ans) + ", got " + str(src_sub_f))
                    
                if tgt_sub_f != tgt_ans:
                    print("Target highlights incorrect, expected " + str(tgt_ans) + ", got " + str(tgt_sub_f))
                
                #change correctness rate
                worker_list[2] = worker_list[1]
                    
            # the case where the worker already exists in work_dict
            else:
                
                worker_list = worker_dict[workerId]
                
                # if user has already completed this HIT, skip
                if hitId in worker_list[9]:
                    continue
            
                # mark the worker as having completed this HIT
                worker_list[9].append(hitId)
                
                
                # increase worker's completed HIT count   
                worker_list[0] = worker_list[0] + 1
                
                # check if user's final submission is correct
                if sure_sub_f == sure_ans and pos_sub_f == pos_ans and src_sub_f == src_ans and tgt_sub_f == tgt_ans:
                    print("Correct submission")
                    worker_list[1] = worker_list[1] + 1
                    hits_list[1] = hits_list[1] + 1 
                    
                ### print out errors if encountered
                if sure_sub_f != sure_ans:
                    print("Sure alignments incorrect, expected " + str(sure_ans) + ", got " + str(sure_sub_f))
                
                if pos_sub_f != pos_ans:
                    print("Possible alignments incorrect, expected " + str(pos_ans) + ", got " + str(pos_sub_f))
                    
                if src_sub_f != src_ans:
                    print("Source highlights incorrect, expected " + str(src_ans) + ", got " + str(src_sub_f))
                    
                if tgt_sub_f != tgt_ans:
                    print("Target highlights incorrect, expected " + str(tgt_ans) + ", got " + str(tgt_sub_f))

                    
                # update correctness rate
                worker_list[2] = float(worker_list[1]) / float(worker_list[0])
                
                # update precision, recall and f1 for initial guesses
                worker_list[3] = float(worker_list[3]) + ((prec_i - worker_list[3]) / float(worker_list[0]))
                
                worker_list[4] = float(worker_list[4]) + ((rec_i - worker_list[4]) / float(worker_list[0]))
                
                worker_list[5] = float(worker_list[5]) + ((f1_i - worker_list[5]) / float(worker_list[0]))
                
                # update precision, recall and f1 after seeing answer key
                worker_list[6] = float(worker_list[6]) + ((prec_f - worker_list[6]) / float(worker_list[0]))
                
                worker_list[7] = float(worker_list[7]) + ((rec_f - worker_list[7]) / float(worker_list[0]))
                
                worker_list[8] = float(worker_list[8]) + ((f1_f - worker_list[8]) / float(worker_list[0]))
            
            print("")
            
        except:
            pass
        
    # write HITs statistics 
    for hitId in hits_dict:
        hits_list = hits_dict[hitId]
        hits_result_writer.writerow([hitId, hits_list[0], hits_list[1], float(hits_list[1]) / float(hits_list[0])])
        print (str(hitId) + ":  [" + str(hits_list[0]) + ", " + str(hits_list[1]) + ", " + str((float(hits_list[1]) / float(hits_list[0]))) + "]")
    print("")
        
    return
    K = np.matrix(K.toPandas())
    K = K.reshape(num_points, num_points)

    ctrain, gamma = fun.dual_classification(la, ytr, Atr, nu, option, Atr)
    ctest, gamma = fun.dual_classification(la, ytr, Atr, nu, option, Ate)
    if option == 2:
        w = fun.dual_w(la, ytr, Atr, nu)

if option != 3:
    ctest = fun.primal_classification(Ate, w, yte, gamma)
'''
Imprimimos los resultados en el archivo resultados.txt
'''
if option == 3:
    fun.print_to_txt(gamma=gamma,
                     acc1=fun.precision(ytr, ctrain),
                     acc2=fun.precision(yte, ctest),
                     option=option)
else:
    fun.print_to_txt(w=w,
                     gamma=gamma,
                     acc1=fun.precision(ytr, ctrain),
                     acc2=fun.precision(yte, ctest),
                     option=option)

#os.remove('./ampl_data.dat')
# Eliminamos el .dat que le pasamos a AMPL y nos guardamos únicamente
# el de generación. Para mantener el .dat comentar esta línea.

# Error de formato de entrada
'''