Exemplo n.º 1
0
def mnb():
    x, y = process.read_data("train.csv", 1, 1)
    x_test, y_test = process.read_data("test.csv", 1, 0)
    y_train = y.T
    mnb = MultinomialNB(alpha=19.6, fit_prior=False)
    mnb.fit(x, y_train)
    return mnb, x_test
Exemplo n.º 2
0
def lr():
    x, y = process.read_data("train.csv", 1, 1)
    x_train_preprocessed = preprocessing.scale(x)
    x_test, y_test = process.read_data("test.csv", 1, 0)
    x_test_preprocessed = preprocessing.scale(x_test)
    y_train = y.T
    lr = LogisticRegression(C=0.05,
                            random_state=1,
                            solver='saga',
                            multi_class='multinomial',
                            max_iter=800)
    lr.fit(x_train_preprocessed, y_train)
    return lr, x_test_preprocessed
Exemplo n.º 3
0
    def marker_map(self):

        a = all_data()
        df = read_data()


        html_dom_dict = self.gen_html_dom()
        html_body = self.gen_html_body()

        center_lat_prime = 41.7943 #df['Latitude'].mean()
        center_lon_prime = -87.5907# df['Longitude'].mean()

        #41.7943° N, 87.5907° W

        print (center_lat_prime, center_lon_prime)

        colors = html_dom_dict["marker_map_colors"]

        map_data =Map.process_data()

        points = Map.create_map_points(html_dom_dict, map_data)

        return html_body.format(
            map_points=points,#map_points_prime,
            center_lat=center_lat_prime,
            center_lon=center_lon_prime,
            colors = colors,
            style=html_dom_dict["style"])
Exemplo n.º 4
0
    def process_data():

        df = read_data()
        map_data = [[str(y) for y in x[1]] for x in df.iterrows()]

        d = {}
        k = {}

        for street, number, lat, lon in map_data:

            num = int(number)

            if street in d:
                if num > d[street]:
                    d[street] = num
                    k[street] = [street, num, lat, lon]
            else:

                d[street] = num
                k[street] = [street, num, lat, lon]


        sorted_map_data = sorted(map_data, key = lambda x: x[1])

        return sorted_map_data
Exemplo n.º 5
0
def svm_train_cv():
    x, y = process.read_data("train.csv", 1, 1)
    sscaler = StandardScaler()
    sscaler.fit(x)
    x_train_preprocessed = sscaler.transform(x)
    y_train = y.T
    best_score = 0.0
    num = 0
    for coef0 in [0.1, 0.12, 0.14, 0.16]:
        svm_rbf = svm.SVC(C=0.90,
                          cache_size=500,
                          kernel='sigmoid',
                          gamma='scale',
                          coef0=coef0)
        num = num + 1
        scores = cross_val_score(svm_rbf, x_train_preprocessed, y_train, cv=3)
        score = scores.mean()
        print("Iteration time:{}th".format(num))
        print("Current score on validation set:{:.9f}".format(score))
        print("Current parameters:{:.2f}".format(coef0))
        if score > best_score:
            best_score = score
            best_parameters = {"coef0": coef0}
    print("Best score on validation set:{:.9f}".format(best_score))
    print("Best parameters:{}".format(best_parameters))
Exemplo n.º 6
0
def main():
    syn_symbols = {SYNCHROTRONS[s][0]: s for s in OUTPUT_SYN}
    data = {
        s: {y: [0] * days_in_year(y)
            for y in OUTPUT_YEARS}
        for s in ALL_OUTPUT
    }
    for items in read_data():
        syn = SYNCHROTRONS.get(items[0][11])
        name = None
        if items[0][8] == 'ELECTRON MICROSCOPY':
            # No stats for CryoEM. In most of entries _em_imaging.date is null.
            name = None
        elif syn:
            name = syn_symbols.get(syn[0])
        elif items[0][9] in ('SEALED TUBE', 'ROTATING ANODE'):
            name = 'home'
        if not name:
            continue
        coll = parse_date(items[0][19])
        if not coll or coll.year not in OUTPUT_YEARS:
            continue
        day = coll.timetuple().tm_yday
        data[name][coll.year][day - 1] += 1
    # summary to stderr
    for syn in ALL_OUTPUT:
        totals = [sum(data[syn][year]) for year in OUTPUT_YEARS]
        print('%-10.10s %-17s total: %4d' % (syn, totals, sum(totals)),
              file=sys.stderr)
    # JSON to stdout
    print_data(data)
Exemplo n.º 7
0
def svm_train():
    x, y = process.read_data("train.csv", 1, 1)
    print(x)
    print(y)
    x_test, y_test = process.read_data("test.csv", 1, 0)
    sscaler = StandardScaler()
    sscaler.fit(x)
    x_train_preprocessed = sscaler.transform(x)
    x_test_preprocessed = sscaler.transform(x_test)
    y_train = y.T
    svm_rbf = svm.SVC(C=0.90,
                      cache_size=500,
                      kernel='sigmoid',
                      gamma='auto',
                      coef0=0.1)
    svm_rbf.fit(x_train_preprocessed, y_train)
    return svm_rbf, x_test_preprocessed
Exemplo n.º 8
0
        def read_text(f):
            """

            Reads text from file

            f<string> -> string

            """
            if Globals.override:

                from process import read_data
                return read_data()

            with open(f, 'r') as infile:
                return infile.read().lower()
Exemplo n.º 9
0
def mlp_train():
    x, y = process.read_data("train.csv", 1, 1)
    sscaler = StandardScaler()
    sscaler.fit(x)
    x_train_preprocessed = sscaler.transform(x)
    y_train = y.T
    mlp = MLPClassifier(activation='logistic',
                        solver='sgd',
                        alpha=1e-5,
                        batch_size=200,
                        hidden_layer_sizes=(50, 50),
                        random_state=1,
                        learning_rate='adaptive',
                        max_iter=600)
    scores_mlp = cross_val_score(mlp, x_train_preprocessed, y_train, cv=3)
    print(scores_mlp)
    print("scores_mlp Accuracy: %0.9f (+/- %0.9f)" %
          (scores_mlp.mean(), scores_mlp.std() * 2))
Exemplo n.º 10
0
def cnb_cv():
    x, y = process.read_data("train.csv", 1, 1)
    #x_train_preprocessed = preprocessing.scale(x)
    y_train = y.T
    best_score = 0.0
    num = 0
    for alpha in [0.00001, 0.0001, 0.001, 0.01, 0.1, 1]:
        num = num + 1
        cnb = ComplementNB(alpha=alpha, fit_prior=False)
        scores = cross_val_score(cnb, x, y_train, cv=4)
        score = scores.mean()
        print("Iteration time:{}th".format(num))
        print("Current score on validation set:{:.9f}".format(score))
        print("Current parameters:{:.2f}".format(alpha))
        if score > best_score:
            best_score = score
            best_parameters = {"alpha": alpha}
    print("Best score on validation set:{:.9f}".format(best_score))
    print("Best parameters:{}".format(best_parameters))
Exemplo n.º 11
0
def lr_cv():
    x, y = process.read_data("train.csv", 1, 1)
    x_train_preprocessed = preprocessing.scale(x)
    y_train = y.T
    best_score = 0.0
    num = 0
    for C in [0.03, 0.05, 0.07]:
        num = num + 1
        lr = LogisticRegression(C=C,
                                random_state=1,
                                solver='saga',
                                multi_class='multinomial',
                                max_iter=1000,
                                penalty='l2')
        scores = cross_val_score(lr, x_train_preprocessed, y_train, cv=3)
        score = scores.mean()
        print("Iteration time:{}th".format(num))
        print("Current score on validation set:{:.9f}".format(score))
        print("Current parameters:{:.4f}".format(C))
        if score > best_score:
            best_score = score
            best_parameters = {"C": C}
    print("Best score on validation set:{:.9f}".format(best_score))
    print("Best parameters:{}".format(best_parameters))