def calc_markov_length_end(raw_data, P, Q, actual):
    pos_data = gene_predictor.predict_by_threshold(raw_data)
    pos_data = pos_data[0] + pos_data[1]
    v = []
    for orf in pos_data:
        start = orf[0]
        end = orf[1]
        seq = raw_data[start-1:end]
        length = len(seq)
        markov = gene_predictor.markov_score(P, Q, seq)
        is_valid = end in actual
        v.append((length, markov, is_valid))
    return v
def generate_roc_length(actual, raw_data):
    fp = []
    tp = []
    end_pos = []
    for i in range(50, 1410, 10):
        preds = gene_predictor.predict_by_threshold(raw_data, i)
        true_positives, false_positives, false_negatives = gene_predictor.check_predictions(preds[1], actual)

        tru_pos_rate = true_positives / len(actual)
        false_pos_rate = false_positives / (len(preds[0]) + len(preds[1]) - len(actual))

        tp.append(tru_pos_rate)
        fp.append(false_pos_rate)
    return tp, fp
def generate_roc_markov(actual, raw_data, P, Q):
    fp = []
    tp = []
    all_orfs = gene_predictor.predict_by_threshold(raw_data)
    all_orfs = all_orfs[0] + all_orfs[1]
    for markov_score in range(100):
        preds = []
        total = 0
        for r in all_orfs:
            start = r[0] - 1
            end = r[1]
            seq = raw_data[start:end]
            mm = gene_predictor.markov_score(P, Q, seq)
            if mm > (float(markov_score)/5):
                preds.append(r)
            total += 1
        true_positives, false_positives, false_negatives = gene_predictor.check_predictions(preds, actual)

        tru_pos_rate = true_positives / len(actual)
        false_pos_rate = false_positives / (total - len(actual))

        tp.append(tru_pos_rate)
        fp.append(false_pos_rate)
    return tp, fp
                f.write(str(tp_m[i]) + "," + str(fp_m[i]) + '\n')
        with open("length_roc_curve.csv", 'w') as f:
            for i in range(len(tp_l)):
                f.write(str(tp_l[i]) + "," + str(fp_l[i]) + '\n')

    if write_markov_roc_csv:
        graphVals(valid_orfs, invalid_orfs, unk_orfs, actual, raw_data, P, Q)

    if uh:
        v = calc_markov_length_end(raw_data, P, Q, actual)
        calc_line(v)

if write_length_csv:
    res = []
    for i in range(50, 1500, 10):
        pred = gene_predictor.predict_by_threshold(raw_data, i)
        tru_pos, false_pos, false_neg = gene_predictor.check_predictions(pred[1], actual)
        res.append((str(i), str(float(tru_pos)/(tru_pos+false_neg)), str(tru_pos), str(false_pos)))
        #print(str(tru_pos) + " " + str(false_pos) + " " + str(false_neg))

    f = open('length.csv', 'w')
    for point in res:
        f.write(point[0] + ', ' + point[1] + ', ' + point[2] + ', ' + point[3] + '\n')
    f.close()

if write_markov_csv:
    res = []
    for markov_score in range(100):
        all_orfs = gene_predictor.predict_by_threshold(raw_data)
        all_orfs = all_orfs[0] + all_orfs[1]
        pred = []