Пример #1
0
def loss(flow, predictions):
    flow = flow * 0.05

    losses = []
    INPUT_HEIGHT, INPUT_WIDTH = float(flow.shape[1].value), float(
        flow.shape[2].value)

    # L2 loss between predict_flow6, blob23 (weighted w/ 0.32)
    predict_flow6 = predictions['predict_flow6']
    size = [predict_flow6.shape[1], predict_flow6.shape[2]]
    downsampled_flow6 = downsample.downsample(flow, size)
    losses.append(average_endpoint_error(downsampled_flow6, predict_flow6))

    # L2 loss between predict_flow5, blob28 (weighted w/ 0.08)
    predict_flow5 = predictions['predict_flow5']
    size = [predict_flow5.shape[1], predict_flow5.shape[2]]
    downsampled_flow5 = downsample.downsample(flow, size)
    losses.append(average_endpoint_error(downsampled_flow5, predict_flow5))

    # L2 loss between predict_flow4, blob33 (weighted w/ 0.02)
    predict_flow4 = predictions['predict_flow4']
    size = [predict_flow4.shape[1], predict_flow4.shape[2]]
    downsampled_flow4 = downsample.downsample(flow, size)
    losses.append(average_endpoint_error(downsampled_flow4, predict_flow4))

    # L2 loss between predict_flow3, blob38 (weighted w/ 0.01)
    predict_flow3 = predictions['predict_flow3']
    size = [predict_flow3.shape[1], predict_flow3.shape[2]]
    downsampled_flow3 = downsample.downsample(flow, size)
    losses.append(average_endpoint_error(downsampled_flow3, predict_flow3))

    # L2 loss between predict_flow2, blob43 (weighted w/ 0.005)
    predict_flow2 = predictions['predict_flow2']
    size = [predict_flow2.shape[1], predict_flow2.shape[2]]
    downsampled_flow2 = downsample.downsample(flow, size)
    losses.append(average_endpoint_error(downsampled_flow2, predict_flow2))

    loss = tf.losses.compute_weighted_loss(losses,
                                           [0.32, 0.08, 0.02, 0.01, 0.005])

    # Return the 'total' loss: loss fns + regularization terms defined in the model
    return tf.losses.get_total_loss()
Пример #2
0
 def test_001_t(self):
     src_data = (1, 2, 3, 4, 5, 6, 7, 8)
     expected_result = (1, 3, 5, 7)
     src = blocks.vector_source_f(src_data)
     print "derp"
     upsample = downsample(2)
     print "derp"
     snk = blocks.vector_sink_f()
     self.tb.connect(src, upsample)
     self.tb.connect(upsample, snk)
     self.tb.run()
     result_data = snk.data()
     self.assertFloatTuplesAlmostEqual(expected_result, result_data, 6)
Пример #3
0
def loss(flow, predictions):
    losses = []
    # L2 loss between predict_disp6, blob23 (weighted w/ 0.32)
    predict_disp6 = predictions['disp6']
    size = [predict_disp6.shape[1], predict_disp6.shape[2]]
    downsampled_disp6 = downsample.downsample(flow, size)
    losses.append(
        tf.losses.mean_squared_error(downsampled_disp6, predict_disp6))

    # L2 loss between predict_disp5, blob28 (weighted w/ 0.08)
    predict_disp5 = predictions['disp5']
    size = [predict_disp5.shape[1], predict_disp5.shape[2]]
    downsampled_disp5 = downsample.downsample(flow, size)
    losses.append(
        tf.losses.mean_squared_error(downsampled_disp5, predict_disp5))

    # L2 loss between predict_disp4, blob33 (weighted w/ 0.02)
    predict_disp4 = predictions['disp4']
    size = [predict_disp4.shape[1], predict_disp4.shape[2]]
    downsampled_disp4 = downsample.downsample(flow, size)
    losses.append(
        tf.losses.mean_squared_error(downsampled_disp4, predict_disp4))

    # L2 loss between predict_disp3, blob38 (weighted w/ 0.01)
    predict_disp3 = predictions['disp3']
    size = [predict_disp3.shape[1], predict_disp3.shape[2]]
    downsampled_disp3 = downsample.downsample(flow, size)
    losses.append(
        tf.losses.mean_squared_error(downsampled_disp3, predict_disp3))

    # L2 loss between predict_disp2, blob43 (weighted w/ 0.005)
    predict_disp2 = predictions['disp2']
    size = [predict_disp2.shape[1], predict_disp2.shape[2]]
    downsampled_disp2 = downsample.downsample(flow, size)
    losses.append(
        tf.losses.mean_squared_error(downsampled_disp2, predict_disp2))

    predict_disp1 = predictions['disp1']
    size = [predict_disp1.shape[1], predict_disp1.shape[2]]
    downsampled_disp1 = downsample.downsample(flow, size)
    losses.append(
        tf.losses.mean_squared_error(downsampled_disp1, predict_disp1))

    predict_disp0 = predictions['disp0']
    size = [predict_disp0.shape[1], predict_disp0.shape[2]]
    downsampled_disp0 = downsample.downsample(flow, size)
    losses.append(
        tf.losses.mean_squared_error(downsampled_disp0, predict_disp0))

    #loss = tf.losses.compute_weighted_loss(losses, [0.005, 0.01, 0.02, 0.04, 0.08, 0.16, 0.32])
    # Return the 'total' loss: loss fns + regularization terms defined in the model
    return losses[0] * 0.32 + losses[1] * 0.16 + losses[2] * 0.08 + losses[
        3] * 0.04 + losses[4] * 0.02 + losses[5] * 0.01 + losses[6] * 0.005
Пример #4
0
def clean(folder_name):
    print('-----------------')
    print('Digesting: {}'.format(folder_name))

    print('Merging all CSV files in one...')
    os.system('cat {}/emojis_raw/{}/* > {}/clean_emojis/{}_unified.csv'.format(base_path, folder_name, base_path, folder_name))
    print('Removing headers...')
    remove = '"username","date","retweets","favorites","text","geo","mentions","hashtags","id","permalink","emoji"'
    os.system("awk '!/{}/' {}/clean_emojis/{}_unified.csv > temp && mv temp {}/clean_emojis/{}_no_header.csv".format(remove, base_path, folder_name, base_path, folder_name))

    date_dict = {}

    for date in date_array:
        date_dict[date] = 0

    print('Digesting...')
    with open('{}/clean_emojis/{}_no_header.csv'.format(base_path, folder_name)) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        line_count = 0
        for row in tqdm(csv_reader):
            try:
                datetime_object = datetime.datetime.strptime(row[1],'%Y-%m-%d %H:%M').replace(hour=0,minute=0)
                date_dict[datetime_object] += 1
            except:
                pass

            line_count +=1
    print('Deleting intermediate files...')
    os.system('rm {}/clean_emojis/{}_unified.csv'.format(base_path, folder_name))
    print("Writing CSV...")
    with open('{}/emojis_3600/{}.csv'.format(base_path, folder_name), mode='w') as f:
        writer = csv.writer(f, delimiter=',')
        writer.writerow(['day,usage'])
        for date in date_array:
            writer.writerow([date.strftime("%Y-%m-%d"), date_dict[date]])
    
    downsample('{}/emojis_3600/{}.csv'.format(base_path, folder_name), '{}/emojis_50/{}.csv'.format(base_path, folder_name), downsample_factor)
Пример #5
0
def test_balance_set():
    """

    Given a set of labels of a training set
    balance_set will determine how balanced the
    set is and then downsample to a specfied proportion

    Input
    -----
    fraction_1s: float
       fraction of ones we want
    fraction_0s: float
       fraction of zeros we want

    Output
    ------
    labels: ls
       index of labels that have the proper portion to
       downsample
    """
    config = sett.SetContainer(test_run_config, model_config)
    #data = get_feature_table(config.tablename)
    data = pd.read_csv(test_csv).set_index('block_year')
    break_window = '2Year'
    pX_train, Y_train, pX_valid, Y_valid, pX_test, Y_test, date_dic = train_valid_test_split(
        data,
        break_window,
        config.static_features,
        config.cv_cuts['thirty_seventy'],
        config.past,
        config.future,
        past_yr=4)

    ls_balance = [[0.3, 0.7], [0.2, 0.8], [0.1, 0.9], [0.5, 0.5]]
    for balance in ls_balance:
        break_bal, nobreak_bal = balance
        X_bal, Y_bal = downsample(pX_train,
                                  Y_train,
                                  downsample_balance=balance,
                                  Verbose=True)

        print 'Y_train: ', np.sum(Y_train == 1)
        print check_balance(Y_train)
        balance_after = check_balance(Y_bal)
        assert np.isclose(break_bal, balance_after['break'],
                          atol=1e-4), '{} {}'.format(break_bal,
                                                     balance_after['break'])
        assert np.isclose(nobreak_bal, balance_after['no_break'], atol=1e-4)
        print balance_after
Пример #6
0
for fname in os.listdir(directory):
    # print fname
    t1, v1 = read_scope.read_scope(directory + fname)

    # Shaping filter
    v2 = lpf.lpfFirstOrder(v1, TAU, 10)  # shaping, 10GSPS
    t2 = t1

    # Simulate antialiasing filter (bessel)
    b, a = scipy.signal.bessel(NFO, FBK / (10000. / 2.), 'low')
    # v2 = scipy.signal.filtfilt(b, a, v1)
    v2 = scipy.signal.filtfilt(b, a, v2)
    t2 = t1

    # Downsample
    t3, v3 = downsample.downsample(t2, v2, 10. / FGSPS)  # 250MSPS

    # Discriminator
    found, tddc = ddc.disc_neg(t3[10:], v3[10:])
    if (found):
        plt.plot(t1, v1)  # Full BW, 10GSPS

        # plt.plot(t2,v2,'.-') # LPF to simulate front end

        # Downsample
        t3, v3 = downsample.downsample(t2, v2, 10. / FGSPS)  # 250MSPS
        plt.plot(t3, v3, '.-')  # LPF to simulate front end

        # Boxcar
        t4, v4 = boxcar.boxcar(t3, v3, NAVG1)
        plt.plot(t4, v4, '.-')
Пример #7
0
    ymin = np.min(y)
    yscale = [yi / ymin for yi in y]
    if (i == 0):
        yavg = np.zeros(len(y))
        xavg = copy.copy(x)
    yavg = [(yavgi * i + yscalei) / (i + 1)
            for yavgi, yscalei in zip(yavg, yscale)]
    # print len(xavg),len(yavg)

    # 1.) Low pass filter
    y1 = lpf.lpfFirstOrder(y, TAU, SCOPE_FSPS)
    peak_1.append(np.min(y1))

    # 2.) Downsample
    x2, y2 = downsample.downsample(x, y1, dsf=SCOPE_FSPS / DIG_FSPS)
    pk = np.min(y2)
    peak_2.append(pk)
    q = trap_int.trap_int(x2, y2, 0, len(x2) - 1)
    charge_2.append(q / (50. * 1.6 * 10**-19))

    # 3.) CFD with no interpolation
    xprev = 0.
    yprev = 0.
    for x2i, y2i in zip(x2, y2):
        if y2i < 0.5 * pk:
            time_cfd.append(x2i)
            # tinterp = xprev + ((x2i-xprev)/(y2i-yprev))*0.5*pk
            # time_cfd_interp.append(tinterp)
            xprev = x2i
            yprev = y2i
Пример #8
0
def least_squares_fit(filename, variable1, variable2):

    with tables.openFile(filename, 'r') as data:
        # fetch values variable 1 and 2
        variable_1 = data.root.correlation.table.col('variable1')
        variable_2 = data.root.correlation.table.col('variable2')

    y_axis = query_yes_no("Do you want to plot %s on the y-axis?" % variable1[0][0])

    if len(variable_1.shape) != 1:
        print 'There are %d plates with an individual %s value.' % (variable_1.shape[1], variable1[0][0])
        plate_number1 = int(question.digit_plate("Enter the plate number that you want to you use in your correlation analysis ( e.g. '1' ): ", variable_1.shape[1]))
        variable_1 = variable_1[:, plate_number1 - 1]

    if len(variable_2.shape) != 1:
        print 'There are %d plates with an individual %s value.' % (variable_2.shape[1], variable2[0][0])
        plate_number2 = int(question.digit_plate("Enter the plate number that you want to you use in your correlation analysis ( e.g. '1' ): ", variable_2.shape[1]))
        variable_2 = variable_2[:, plate_number2 - 1]

    if y_axis == True:
        y = variable_1 # e.g. 'event_rates'
        x = variable_2 # e.g. 'barometric pressure'
        x, y = lose_nans(x, y)

    elif y_axis == False:
        x = variable_1 # e.g. 'event_rates'
        y = variable_2 # e.g. 'barometric pressure'
    else:
        print 'weird'
    del variable_1, variable_2


    # Apply a linear least square fit:
    # a line, ``y = mx + c``, through the data-points:

    # We can rewrite the line equation as ``y = Ap``, where ``A = [[x 1]]``
    # and ``p = [[m], [c]]``.  Now use `lstsq` to solve for `p`:

    A = np.vstack([x, np.ones(len(x))]).T

    a, b = np.linalg.lstsq(A, y)[0]
    del A

    if y_axis == True:
        print ''
        print "The equation for the linear fit line is: ( y = a * x + b )   y = " + str(a) + " * x + " + str(b)
        print ''
        print "or     '" + variable1[0][0] + "' = " + str(a) + " * '" + variable2[0][0] + "' + " + str(b)
    elif y_axis == False:
        print ''
        print "The equation for the linear fit line is: ( y = a * x + b )   y = " + str(a) + " * x + " + str(b)
        print ''
        print "or     '" + variable2[0][0] + "' = " + str(a) + " * '" + variable1[0][0] + "' + " + str(b)

    # Calculate sample pearson correlation coefficient
    cor_coef = np.corrcoef([x, y])[0, 1]

    absolute_cor_coef = abs(cor_coef)
    print ''
    pearson_text = "The Pearson correlation coefficient between '%s' and '%s' is: %s" % (variable1[0][0], variable2[0][0], str(cor_coef))
    print pearson_text
    print ''

    if absolute_cor_coef < 0.1:
        correlation = 'NO'
    elif 0.1 <= absolute_cor_coef <= 0.3:
        correlation = 'a SMALL'
    elif 0.3 <= absolute_cor_coef <= 0.5:
        correlation = 'a MEDIUM'
    elif 0.5 <= absolute_cor_coef <= 1:
        correlation = 'a STRONG'
    else:
        correlation = ''

    if cor_coef >= 0.1:
        pos_neg = ' POSITIVE'
    elif cor_coef <= -0.1:
        pos_neg = ' NEGATIVE'
    else:
        pos_neg = ''

    conclusion = "For this sample you have found %s%s correlation between '%s' and '%s'." % (correlation, pos_neg, variable1[0][0], variable2[0][0])
    print conclusion

    """
    # calculate chi squared
    list_exp = array([a*i + b for i in x])

    begin3 = datetime.now()
    chi2, p = chisquare(y,list_exp)
    end3 = datetime.now()
    print end3 - begin3

    combo = zip(y,list_exp)

    begin = datetime.now()

    ch2 = 0

    for i in combo:
        ch2 = ch2 + (i[0]-i[1]-0.5)**2/i[1]

    print 'chi squared is ', ch2
    end = datetime.now()
    print end - begin



    print ''
    print 'chi squared:', chi2
    print 'associated p-value: ', p
    print ''

    degrees_of_freedom = (len(x) - 1)

    print 'chi squared divided by the number of measurements: ', chi2/degrees_of_freedom

    chi2_prob = chisqprob(chi2,degrees_of_freedom) # probability value associated with the provided chi-square value and degrees of freedom

    print 'probability value associated with the provided chi-square value and degrees of freedom:', chi2_prob
    """

    # Plot the data along with the fitted line:

    if(len(x) > 500000):
        x, y = downsample(x, y)

    plt.plot(x, y, 'o', label='Original data', markersize=1)
    plt.plot(x, a * x + b, 'r', label='Fitted line')

    if y_axis == True:
        plt.ylabel(variable1[0][0] + ' (' + units[variable1[0][0]] + ')')
        plt.xlabel(variable2[0][0] + ' (' + units[variable2[0][0]] + ')')
    elif y_axis == False:
        plt.ylabel(variable2[0][0] + ' (' + units[variable2[0][0]] + ')')
        plt.xlabel(variable1[0][0] + ' (' + units[variable1[0][0]] + ')')

    tit = "Fit line: ( y = ax + b )   y = " + str(a) + " * x + " + str(b)

    plt.legend()
    plt.title(tit)

    start_date_interval, stop_date_interval = get_date_interval_from_file_names(variable1, variable2)
    inter_filename = filename.replace('.h5', '')
    fname = inter_filename + ' ' + start_date_interval + '_' + stop_date_interval

    plt.savefig(fname + ".png")
    plt.show()

    fit_info = open(fname + '.txt', 'w')
    fit_info.write(tit)
    fit_info.write("%s\n" % (''))
    fit_info.write(str(pearson_text))
    fit_info.write("%s\n" % (''))
    fit_info.write(str(conclusion))
    fit_info.close

    """
    # calculate mean y value
    mean_y = sum(y) / len(y)
    print 'mean_y = ', mean_y

    relative_deviation_from_mean_y_list = []
    #relative deviation of the cosmic ray intensity (deltaI/I) from the mean intensity.

    for i in range(len(y)):
        deviation_of_mean_y = y[i] - mean_y
        relative_deviation_from_mean_y = deviation_of_mean_y/mean_y
        relative_deviation_from_mean_y_list.append(relative_deviation_from_mean_y)

    plt.plot(x,relative_deviation_from_mean_y_list,'o',markersize=1)

    plt.ylabel('deltaMPV_p/<MPV_p>')
    plt.xlabel('Outside temperature (degrees Celsius)')

    tit = "Correlation between the Relative deviation of the MPV of the pulseheight (3h intervals) from the mean MPV value with the outside temperature."
    plt.title(tit)

    fname = 'Correlation between relative deviation of the MPV of the pulseheights (3h intervals) from the mean MPV value with T_out'
    plt.savefig(fname +".png")

    plt.show()
    """
    """
Пример #9
0
def verify(gsmall, gbig, atlas):
    gbig = downsample(gbig, atlas=atlas)  # gbig has been downsampled
    assert ds_big.get_adjacency() == gsmall.get_adjacency(), "Adjacency matrices unequal!"
    assert gbig.es["weight"] == gsmall.es["weight"], "Adjacency matrix weights unequal!"
Пример #10
0
# Fri Dec 21 16:46:58 EST 2018

import sys
sys.path.append('../')
import numpy as np
import matplotlib.pyplot as plt
import downsample

for i in range(8010):
    fin = open('/media/tyler/Seagate Expansion Drive/20181220_watchman_spe_filter/l3/%05d.txt' % i)
    print i
    x = []
    y = []
    for line in fin: 
        x.append(float(line.split(',')[0]))
        y.append(float(line.split(',')[1]))
    fin.close()

    # 1.) Downsample
    x1,y1 = downsample.downsample(x,y,dsf=40)
    
    plt.plot(x,y)
    plt.plot(x1,y1,'o')
    plt.ylim(-0.0250,0.005)
    plt.show()

    
Пример #11
0
def verify(gsmall, gbig, atlas):
    gbig = downsample(gbig, atlas=atlas)  # gbig has been downsampled
    assert ds_big.get_adjacency() == gsmall.get_adjacency(
    ), "Adjacency matrices unequal!"
    assert gbig.es["weight"] == gsmall.es[
        "weight"], "Adjacency matrix weights unequal!"
Пример #12
0
def run_pipeline(run_config):
    """
    Main function for running throug the pipeline

    #read the model config and run yaml file here.


    The run_pipeline does the following;

    - Load the configuration file
    - Load the data from the features table
    - Loops through break windows and past_yrs
    - In the loop implement a cross_validation strategy that
      is either "seventy_thirty' or 'no_overlap'
    - In the loop run through models and parameters.

    **If the debug flag is True then there will be no output
    to the DB.**

    **The writeToDB must be True to write to the DB**

    Input
    -----
    run_config: yaml object
        run configutaion for doing a run.

    """
    config = sett.SetContainer(run_config, model_config)
    data = get_feature_table(config.tablename)
    for break_window in config.break_windows:
        print break_window
        for _past_yr in config.past_years:
            print 'past_yr', _past_yr

            if config.cross_valname == 'seventy_thirty':
                pX_train, _Y_train, pX_valid, Y_valid, pX_test, Y_test, dic_year = \
                    train_valid_test_split(
                        data,
                        break_window,
                        config.static_features,
                        config.cv_cuts[
                            'thirty_seventy'],
                        config.past,
                        config.future,
                        past_yr=_past_yr,
                        config=config)

                X_train = dumify_categorical_features(pX_train)
                X_valid = dumify_categorical_features(pX_valid)
                X_test = dumify_categorical_features(pX_test)
                print dic_year
            elif config.cross_valname == 'no_overlap':
                _X_train, _Y_train, X_valid, Y_valid, X_test, Y_test, dic_year =\
                                            CV_no_overlap(break_window,
                                                          _past_yr,
                                                          data,
                                                          config)
            else:
                raise CVerror, 'no cross validation set'

            if config.downsample:
                for ls_dwn_smple in config.rebalancing:
                    X_train, Y_train = downsample(
                        _X_train, _Y_train, downsample_balance=ls_dwn_smple)

                    X_train_cols = X_train.columns.tolist()
                    X_valid_cols = X_valid.columns.tolist()
                    X_test_cols = X_test.columns.tolist()

                    assert set(X_train_cols) == set(X_valid_cols)
                    assert set(X_train_cols) == set(X_test_cols)
                    assert set(X_test_cols) == set(X_valid_cols)

                    run_models(config.clfs,
                               config.visualize,
                               break_window,
                               X_train,
                               Y_train,
                               X_valid,
                               Y_valid,
                               config.cross_valname,
                               results_dir=config.results_dir,
                               dic_year=dic_year,
                               config=config,
                               past_year=_past_yr)

            else:  # case of no down sampling then just rename

                X_train = _X_train
                Y_train = _Y_train

                run_models(config.clfs,
                           config.visualize,
                           break_window,
                           X_train,
                           Y_train,
                           X_valid,
                           Y_valid,
                           config.cross_valname,
                           results_dir=config.results_dir,
                           dic_year=dic_year,
                           config=config,
                           past_year=_past_yr)