Пример #1
def average_data():
    Computes average_data_year() for each year from start_year to end_year as
    specified in config.par. 
        int: Function completed indicator
    start_year = int(uf.get_parameter('start_year'))
    end_year = int(uf.get_parameter('end_year'))
    for i in range(start_year, end_year):
    return 1
Пример #2
    def loadData(self):
        self.filepath = uf.get_parameter('filepath')
        ideals = np.load(self.filepath + 'Ideal_shifts/ideal_shifts_'+str(self.start_year)+'.npy')
        self.ACE = np.load(self.filepath + 'Data/ACE_avg_'+str(self.start_year)+'.npy')
        self.ACE_B = np.load(self.filepath + 'Data/ACE_B_avg_'+str(self.start_year)+'.npy')
        self.GOES = np.load(self.filepath + 'Data/GOES_avg_'+str(self.start_year)+'.npy')

        for year in range(self.start_year+1, self.end_year):
            ideals = np.append(ideals, np.load(self.filepath + 'Ideal_shifts/ideal_shifts_'+str(year)+'.npy'), axis = 0)
            self.ACE = np.append(self.ACE, np.load(self.filepath + 'Data/ACE_avg_'+str(year)+'.npy'), axis = 0)
            self.ACE_B = np.append(self.ACE_B, np.load(self.filepath + 'Data/ACE_B_avg_'+str(year)+'.npy'), axis = 0)
            self.GOES = np.append(self.GOES, np.load(self.filepath + 'Data/GOES_avg_'+str(year)+'.npy'), axis = 0)
        self.ideal_shifts = ideals[:,0]
        self.ideal_shifts_corrs = ideals[:,1]
        #Get rid of nans? This will need to be changed if I want to utilize all data, instead of just averages.
        self.ACE = self.ACE[np.isfinite(self.ideal_shifts)]
        self.ACE_B = self.ACE_B[np.isfinite(self.ideal_shifts)]
        self.GOES = self.GOES[np.isfinite(self.ideal_shifts)]

        self.ideal_shifts_corrs = self.ideal_shifts_corrs[np.isfinite(self.ideal_shifts)]
        self.ideal_shifts = self.ideal_shifts[np.isfinite(self.ideal_shifts)]
        return 1
Пример #3
def generate_ideal_timeshifts():
Generate and save a list of ideal (correct) timeshifts generated from cross-correlating
ACE solar wind dynamic pressure and GOES Bz.

        int: Function finished indicator
    start_year = int(uf.get_parameter('start_year'))
    end_year = int(uf.get_parameter('end_year'))
    for i in range(start_year, end_year):
    return 1
Пример #4
def pull_data():
    Pull a range (specified in config.par) of years of ACE SWE, ACE MFI, and GOES MFI data from CDAWeb, clean it, and store it in a location specified in config.par.
        int: Function finished indicator
    start_year = int(uf.get_parameter('start_year'))
    end_year = int(uf.get_parameter('end_year'))
    for i in range(start_year, end_year):
    return 1
Пример #5
def calc_time_indices():
    Create and save to file three lists of indices for each year, for ACE swe, ACE mfi, and GOES.
    The indices define time intervals separated by a time dt, of length interval_length, both contained 
    in config.par. The range of years computed for are also contained in config.par.
        year(int) -- The year for which indices will be calculated
        int: Function finished indicator
    start_year = int(uf.get_parameter('start_year'))
    end_year = int(uf.get_parameter('end_year'))
    for i in range(start_year, end_year):
    return 1
Пример #6
def average_data_year(year):
    For each interval in a year, compute the average of a bunch of different 
    ACE and GOES quantities, and save them to file for use later.Requires data files to have been downloaded.
        year(int): The year for which the averages will be computed
        int: Function completed indicator

    filepath = uf.get_parameter('filepath')

    print('Starting ' + str(year))

    #check for file
    if os.path.exists(filepath + 'Data/ACE_avg_' + str(year) +
                      '.npy') & os.path.exists(
                          filepath + 'Data/ACE_B_avg_' + str(year) +
                          '.npy') & os.path.exists(filepath +
                                                   'Data/GOES_avg_' +
                                                   str(year) + '.npy'):
        print('File ' + 'ACE_avg_' + str(year) + '.npy' +
              ' already exists! Skipping...')
        return 1

    ACE = np.load(filepath + 'Data/ACE_' + str(year) + '.npy')
    ACE_indices = np.load(filepath + 'Indices/ACE_indices_' + str(year) +

    ACE_avg = average_arr(ACE, ACE_indices)
    np.save(filepath + 'Data/ACE_avg_' + str(year) + '.npy', ACE_avg)

    ACE_B = np.load(filepath + 'Data/ACE_B_' + str(year) + '.npy')
    ACE_B_indices = np.load(filepath + 'Indices/ACE_B_indices_' + str(year) +

    ACE_B_avg = average_arr(ACE_B, ACE_B_indices)
    np.save(filepath + 'Data/ACE_B_avg_' + str(year) + '.npy', ACE_B_avg)

    GOES = np.load(filepath + 'Data/GOES_' + str(year) + '.npy')
    GOES_indices = np.load(filepath + 'Indices/GOES_indices_' + str(year) +

    GOES_avg = average_arr(GOES, GOES_indices)
    np.save(filepath + 'Data/GOES_avg_' + str(year) + '.npy', GOES_avg)

    return ACE_avg
def calc_timeshifts(method, name, **parameters):
    Given the name of a defined timeshifting method, calculate timeshifts
    for every time interval from 2000 to 2009, and save to a named directory.

        method(string) -- Name of the timeshifting method to be used
        name('string') -- Name of the folder to save files to
        Other parameters specific to individual timeshifting methods

        int: Correlation between ACE dynamic pressure and the shifted GOES Bz data

    filepath = uf.get_parameter('filepath')

    #Make the shifts directory if it didn't exist
    if not os.path.exists(filepath + 'Shifts/'):
        os.makedirs(filepath + 'Shifts/')

    #Make the method directory if it didn't exist
    if not os.path.exists(filepath + 'Shifts/' + name + '/'):
        os.makedirs(filepath + 'Shifts/' + name + '/')

    path = filepath + 'Shifts/' + name + '/'

    start_year = int(uf.get_parameter('start_year'))
    end_year = int(uf.get_parameter('end_year'))

    for i in range(start_year, end_year):
        print('Starting ', i)
                             path + name + '_shifts_' + str(i) + '.npy',
                             method=method + '_shift',

Пример #8
def updateAll():
    filepath = uf.get_parameter('filepath')
    f = filepath+'Models/'
    for file in os.listdir(f):
Пример #9
 def saveModel(self):
     self.filepath = uf.get_parameter('filepath')
     f = self.filepath+'Models/'+self.filename
     np.save(f, self)
Пример #10
    def __init__(self, 
                 training_corr_min = 0.7,
                 n_train = 2500,
                 min_shift = 10.,
                 layout = np.array([10,10,10]),
                 n_models = 2,
                 optimizer = 'adam',
                 loss = 'mae',
                 metrics = ['mse'],
                 n_epochs = 100, 
                 batch_size = 50,
                 filename = '',
                 custom_func = ''

        self.start_year = int(uf.get_parameter('start_year'))
        self.end_year = int(uf.get_parameter('end_year'))
        self.custom_func = custom_func
        #Parameters for choosing training set
        self.training_corr_min = training_corr_min
        self.n_train = n_train
        self.min_shift = min_shift
        #parameters for the networks
        self.layout = layout
        self.n_models = n_models
        self.optimizer = optimizer
        self.loss = loss
        self.metrics = metrics
        #training parameters
        self.n_epochs = n_epochs
        self.batch_size = batch_size

        self.filepath = uf.get_parameter('filepath')
        if not os.path.exists(self.filepath+'Models/'):
        if filename == '':
            print('Generating filename')
            #construct a filename
            filename = 'network_'+str(self.layout)+'x'+str(self.n_models)+'_'+str(self.n_epochs)+'_'+str(self.batch_size)+'.npy'
            n_f = 1
            while os.path.exists(self.filepath+'Models/'+filename):
               n_f = n_f+1
               filename = 'network_'+str(self.layout)+'x'+str(self.n_models)+'_'+str(self.n_epochs)+'_'+str(self.batch_size)+'_'+str(n_f)+'.npy'

        self.filename = filename 
        #Internal stuff 
        self.nns = []
Пример #11
 def saveNetworks(self):
     self.filepath = uf.get_parameter('filepath')
     for i in range(len(self.nns)):
         get_weights(self.nns[i], save = self.filepath+'model_'+str(i).zfill(3))
def calc_timeshifts_year(year, filename, method='flat', **parameters):
    Given a year, a defined timeshifting method, and a filename, calculate timeshifts
    for every time interval in that year, and save to a file..

        year(int) -- The year to compute timeshifts for
        filename(string) -- The name of the file to save the timeshifts to
        Other parameters specific to individual timeshifting methods

    Keyword Arguments:
        method(string) -- Method for which timeshifts are calculated (default: 'flat)'
        int: Correlation between ACE dynamic pressure and the shifted GOES Bz data

    start = time.time()

    filepath = uf.get_parameter('filepath')

    if not 'overwrite' in parameters:
        parameters['overwrite'] = False

    #First, check whether this data file exists already
    if parameters['overwrite'] == False:
        if os.path.exists(filename):
            print('File ' + filename + ' already exists! Skipping...')
            return 1

    ACE = np.load(filepath + 'Data/ACE_' + str(year) + '.npy')
    ACE_B = np.load(filepath + 'Data/ACE_B_' + str(year) + '.npy')
    GOES = np.load(filepath + 'Data/GOES_' + str(year) + '.npy')

    ACE_t = ACE['t'].copy()
    ACE_B_t = ACE_B['t'].copy()
    GOES_t = GOES['t'].copy()

    A_i = np.load(filepath + 'Indices/ACE_indices_' + str(year) + '.npy')
    Ab_i = np.load(filepath + 'Indices/ACE_B_indices_' + str(year) + '.npy')
    G_i = np.load(filepath + 'Indices/GOES_indices_' + str(year) + '.npy')

    shifts = np.zeros(len(A_i)) + np.nan

    if not hasattr(tsm, method):
        print('That method doesnt exist!')
        return -1

    timeshifting_method = getattr(tsm, method)

    print('Starting ' + method + ' method')

    #Loop through start times
    for i in range(0, len(A_i)):
        shifts[i] = timeshifting_method(A_i[i, 0], A_i[i, 1], Ab_i[i, 0],
                                        Ab_i[i, 1], G_i[i, 0], G_i[i, 1],
                                        ACE_t, ACE, ACE_B_t, ACE_B, GOES_t,
                                        GOES, **parameters)
        if np.mod(i, 200) == 0 and i != 0:
            uf.status(int(float(i) / float(len(A_i)) * 100))

    timetaken = time.time() - start
    print(timetaken, ' seconds')

    np.save(filename, shifts)
Пример #13
def pull_GOES_year(year, filepath=''):
    Pull a year of GOES data from CDAWeb, clean it, and store it in a location specified in config.par. 
    Which GOES satellite data comes from depends on the year. 2000-2003 pulls GOES 10, 2003-2009 pulls GOES12.
        year(int) -- The year for which data will be pulled
        int: Function finished indicator
    filepath = uf.get_parameter('filepath')

    #check if there's a folder there, if not, make it
    if not os.path.exists(filepath + 'Data/'):
        os.makedirs(filepath + 'Data/')

    filename = filepath + 'Data/GOES_' + str(year) + '.npy'

    #Check if file already exists
    if os.path.exists(filename):
        print('File ' + 'GOES_' + str(year) + '.npy' +
              ' already exists! Skipping...')
        return 1

    print('Pulling GOES data from ' + str(year))

    GOES_dtype = np.dtype([('t', 'f8'), ('pos', '3f8'), ('B', '3f8')])
    GOES = np.ndarray(0, dtype=GOES_dtype)

    #This maps a given year to a GOES satellite
    GOES_dict = {
        2000: 10,
        2001: 10,
        2002: 10,
        2003: 10,
        2004: 12,
        2005: 12,
        2006: 12,
        2007: 12,
        2008: 12,
        2009: 12
    #This dict serves to map a goes satellite to it's name in CDAS and it's associated variable names.
    GOES_names = {
        10: ['G0_K0_MAG', 'SC_pos_se', 'B_GSE_c'],
        12: ['GOES12_K0_MAG', 'SC_pos_se', 'B_GSE_c']

        x = GOES_dict[year]
        print("Year is not defined yet, try another one.")
        return -1

    #Again, go month by month.
    for i in range(1, 13):

        t1 = datetime.datetime(year, i, 1)

        if i + 1 < 13:
            t2 = datetime.datetime(year, i + 1, 1)
            t2 = datetime.datetime(year + 1, 1, 1)
        #print('Pulling '+str(t1)[0:10] + ' - ' + str(t2)[0:10])

            goes_data = cdas.get_data('sp_phys',
                                      GOES_names[GOES_dict[year]][0], t1, t2,
            import calendar
            print('No data found for ' + calendar.month_name[i] + ' ' +
        GOES_month = np.ndarray(len(goes_data['EPOCH']), dtype=GOES_dtype)

        GOES_month['pos'] = np.transpose([
        GOES_month['B'] = np.transpose([
        GOES_month['t'] = mdate.date2num(goes_data['EPOCH'])

        #Clean bad data
        GOES_month['B'][GOES_month['B'] < -10**30] = np.nan

        #append to the full array
        GOES = np.append(GOES, GOES_month)
        uf.status(int((i / 12) * 100))

    np.save(filename, GOES)
    print(str(year) + ' finished!')
    print('File saved to ' + filename)
Пример #14
def generate_ideal_timeshifts_year(year):
    Generate and save a list of ideal (correct) timeshifts generated from cross-correlating
ACE solar wind dynamic pressure and GOES Bz for one year.

        year(int) -- The year for which ideal timeshifts will be calculated
        int: Function finished indicator
    print('Generating ideal shifts for ' + str(year))
    filepath = uf.get_parameter('filepath')

    #   interval_length = eval(uf.get_parameter('interval_length'))
    #    dt = eval(uf.get_parameter('dt'))

    if not os.path.exists(filepath + 'Ideal_shifts/'):
        os.makedirs(filepath + 'Ideal_shifts/')

    #First, check whether this data file exists already.

    if os.path.exists(filepath + 'Ideal_shifts/ideal_shifts_' + str(year) +
        print('File ' + 'ideal_shifts_' + str(year) + '.npy' +
              ' already exists! Skipping...')
        return 1

    #Load data
    ACE = np.load(filepath + 'Data/ACE_' + str(year) + '.npy')
    GOES = np.load(filepath + 'Data/GOES_' + str(year) + '.npy')
    ACE_t = ACE['t'].copy()
    GOES_t = GOES['t'].copy()

    ACE_i = np.load(filepath + 'Indices/ACE_indices_' + str(year) + '.npy')
    GOES_i = np.load(filepath + 'Indices/GOES_indices_' + str(year) + '.npy')

    #Create an array of start times and end times for each interval
    start_times = ACE_t[ACE_i][:, 0]
    end_times = ACE_t[ACE_i][:, 0]

    #Define some arrays to hold stuff.
    corrs = np.zeros([len(start_times), 121]) + np.nan
    extra_shifts = np.arange(-60, 61, 1) * 60.
    shifts = np.zeros(len(start_times)) + np.nan

    ideal_shifts = np.zeros(len(start_times)) + np.nan
    ideal_corrs = np.zeros(len(start_times)) + np.nan

    #Keep track of elapsed time
    start = time.time()

    #Loop through start times
    for i in range(0, len(start_times)):
        #get the interval we are analyzing
        [At1, At2] = ACE_i[i]
        [Gt1, Gt2] = GOES_i[i]
        #Make sure the interval exists
        if np.isnan(At1) or np.isnan(Gt1):
        #Make sure there are enough ACE data points
        if len(ACE['p'][At1:At2][np.isfinite(ACE['p'][At1:At2])]) < 20:
        #Make sure there are enough GOES data points
        if len(GOES['B'][Gt1:Gt2, 2][np.isfinite(GOES['B'][Gt1:Gt2, 2])]) < 20:
        #Calculate the flat timeshift as a baseline
        shifts[i] = _flat_shift(ACE_i[i], ACE, GOES_i[i], GOES)

        #For GOES, supply only a subset of data to save on time.
        #We are looking at intervals within an hour of the flat timeshift.
        #So maybe supply GOES data from two hours before the interval to 5 after?
        GOES_subset_i = uf.interval(start_times[i] - 1. / 24.,
                                    end_times[i] + 5. / 24., GOES_t)
        GOES_t_subset = GOES_t[GOES_subset_i[0]:GOES_subset_i[1]]
        GOES_subset = GOES[GOES_subset_i[0]:GOES_subset_i[1]]

        #Try shifts within 60 minutes of the flat timeshift
        for j in range(121):

            corrs[i, j] = shift_correlate(i, ACE_i, ACE_t, ACE, GOES_t_subset,
                                          shifts[i] + extra_shifts[j])

        #Now that we have list of correlations for this interval, we take the highest one, and save it and the corresponding timeshift.
        #Remember to add back the flat timeshift.

        #If we have nans in the correlation array, abort

        if np.isnan(corrs[i, 0]):
            ideal_corrs[i] = np.nan
            ideal_shifts[i] = np.nan

        ideal_corrs[i] = np.nanmax(corrs[i])
        ideal_shifts[i] = shifts[i] + extra_shifts[np.nanargmax(corrs[i])]

        #Update a progress bar
        if np.mod(i, 200) == 0 and i != 0:
            uf.status(int(float(i) / float(len(start_times)) * 100))

    #At the end, save the list of ideal shifts and correlations.
    timetaken = time.time() - start
    print(timetaken, ' seconds')

    #Package up stuff and save it
    results = np.transpose([ideal_shifts, ideal_corrs])
    np.save(filepath + 'Ideal_shifts/ideal_shifts_' + str(year) + '.npy',

    return 1
Пример #15
def pull_ACE_year(year):
    Pull a year of ACE SWE data from CDAWeb, clean it, and store it in a location specified in config.par
        year(int) -- The year for which data will be pulled
        int: Function finished indicator
    print('Pulling data for ' + str(year))
    filepath = uf.get_parameter('filepath')

    #check if there's a folder there, if not, make it
    if not os.path.exists(filepath + 'Data/'):
        os.makedirs(filepath + 'Data/')

    filename = filepath + 'Data/ACE_' + str(year) + '.npy'

    #Check if file already exists
    if os.path.exists(filename):
        print('File ' + 'ACE_' + str(year) + '.npy' +
              ' already exists! Skipping...')
        return 1

    #First create empty structures to hold the data

    ACE_dtype = np.dtype([('t', 'f8'), ('pos', '3f8'), ('v', '3f8'),
                          ('n', 'f8'), ('p', 'f8'), ('spd', 'f8')])
    ACE = np.ndarray(0, dtype=ACE_dtype)

    print('Pulling ACE swe data from ' + str(year))

    #Pull the data from CDAWeb in month chunks
    for i in range(1, 13):
        t1 = datetime.datetime(year, i, 1)

        if i + 1 < 13:
            t2 = datetime.datetime(year, i + 1, 1)
            t2 = datetime.datetime(year + 1, 1, 1)
        #print('Pulling '+str(t1)[0:10] + ' - ' + str(t2)[0:10])

        swe_data = cdas.get_data('sp_phys', 'AC_H0_SWE', t1, t2,
                                 ['Np', 'Vp', 'V_GSE', 'SC_pos_GSE'])

        #make temp structure
        ACE_month = np.ndarray(len(swe_data['EPOCH']), dtype=ACE_dtype)

        #throw data into structure and clean it up
        ACE_month['t'] = mdate.date2num(swe_data['EPOCH'])
        ACE_month['pos'] = np.transpose([
            swe_data['ACE_X-GSE'], swe_data['ACE_Y-GSE'], swe_data['ACE_Z-GSE']
        ACE_month['n'] = swe_data['H_DENSITY']
        ACE_month['v'] = np.transpose(
            [swe_data['VX_(GSE)'], swe_data['VY_(GSE)'], swe_data['VZ_(GSE)']])

        #clean up ACE data
        ACE_month['n'][ACE_month['n'] < -10**30] = np.nan
        ACE_month['v'][ACE_month['v'] < -10**30] = np.nan

        ACE_month['spd'] = np.sqrt(np.sum(ACE_month['v']**2, axis=1))
        ACE_month['p'] = 1.6726 * 10**(
            -6) * ACE_month['n'] * ACE_month['spd']**2  # Units are nPa

        ACE = np.append(ACE, ACE_month)
        uf.status(int((i / 12) * 100))

    np.save(filename, ACE)
    print(str(year) + ' finished!')
    print('File saved to ' + filename)
    return 1
Пример #16
def calc_time_indices_year(year):
    Create and save to file three lists of indices for one year, for ACE swe, ACE mfi, and GOES.
    The indices define time intervals separated by a time dt, of length interval_length. 
    (These are defined in config.par)
        year(int) -- The year for which indices will be calculated
        int: Function finished indicator

    filepath = uf.get_parameter('filepath')

    interval_length = eval(uf.get_parameter('interval_length'))
    dt = eval(uf.get_parameter('dt'))

    print('Calculating indices for ' + str(year))

    if not os.path.exists(filepath + 'Indices/'):
        os.makedirs(filepath + 'Indices/')

    filename = filepath + 'Indices/ACE_indices_' + str(year) + '.npy'

    #Check if file already exists
    if os.path.exists(filename):
        print('File ' + 'ACE_indices_' + str(year) + '.npy' +
              ' already exists! Skipping...')
        return 1

    ACE = np.load(filepath + 'Data/ACE_' + str(year) + '.npy')
    ACE_B = np.load(filepath + 'Data/ACE_B_' + str(year) + '.npy')
    GOES = np.load(filepath + 'Data/GOES_' + str(year) + '.npy')

    ACE_t = ACE['t'].copy()
    ACE_B_t = ACE_B['t'].copy()
    GOES_t = GOES['t'].copy()

    #Create an array of start times based on year, with each time separated by half an hour

    tstart = mdate.date2num(datetime.datetime(year, 1, 1, 1, 0, 0))
    tend = mdate.date2num(datetime.datetime(year, 12, 31, 23, 0, 0))

    start_times = np.arange(tstart + 3. / 24., tend - 3. / 24., dt)
    end_times = start_times + interval_length

    ACE_B_time_indices = np.empty([len(start_times), 2], dtype=int)
    ACE_time_indices = np.empty([len(start_times), 2], dtype=int)
    GOES_time_indices = np.empty([len(start_times), 2], dtype=int)

    for i in range(0, len(start_times)):
        [Abt1, Abt2] = uf.interval(start_times[i], end_times[i], ACE_B_t)
        [At1, At2] = uf.interval(start_times[i], end_times[i], ACE_t)
        [Gt1, Gt2] = uf.interval(start_times[i], end_times[i], GOES_t)
        if np.isnan(At1) or np.isnan(Gt1):
            ACE_B_time_indices[i] = [-1, -1]
            ACE_time_indices[i] = [-1, -1]
            GOES_time_indices[i] = [-1, -1]
        if len(ACE['p'][At1:At2][np.isfinite(ACE['p'][At1:At2])]) < 20:
            ACE_B_time_indices[i] = [-1, -1]
            ACE_time_indices[i] = [-1, -1]
            GOES_time_indices[i] = [-1, -1]
        if len(GOES['B'][Gt1:Gt2, 2][np.isfinite(GOES['B'][Gt1:Gt2, 2])]) < 20:
            ACE_B_time_indices[i] = [-1, -1]
            ACE_time_indices[i] = [-1, -1]
            GOES_time_indices[i] = [-1, -1]
        ACE_time_indices[i] = [At1, At2]
        ACE_B_time_indices[i] = [Abt1, Abt2]
        GOES_time_indices[i] = [Gt1, Gt2]
        if np.mod(i, 200) == 0 and i != 0:
            uf.status(int(float(i) / float(len(start_times)) * 100))

    np.save(filepath + '/ACE_indices_' + str(year) + '.npy', ACE_time_indices)
    np.save(filepath + '/GOES_indices_' + str(year) + '.npy',
    np.save(filepath + '/ACE_B_indices_' + str(year) + '.npy',
    return 1
Пример #17
def loadNetwork(filename):
    filepath = uf.get_parameter('filepath')
    filename = filepath+'Models/'+filename
    return np.load(filename).item()
Пример #18
def pull_ACE_B_year(year, filepath=''):
    Pull a year of ACE MFI data from CDAWeb, clean it, and store it in a location specified in config.par
        year(int) -- The year for which data will be pulled
        int: Function finished indicator

    filepath = uf.get_parameter('filepath')

    #check if there's a folder there, if not, make it
    if not os.path.exists(filepath + 'Data/'):
        os.makedirs(filepath + 'Data/')

    filename = filepath + 'Data/ACE_B_' + str(year) + '.npy'

    #Check if file already exists
    if os.path.exists(filename):
        print('File ' + 'ACE_B_' + str(year) + '.npy' +
              ' already exists! Skipping...')
        return 1

    print('Pulling ACE mfi data from ' + str(year))

    ACE_B_dtype = np.dtype([('t', 'f8'), ('B', '3f8')])
    ACE_B = np.ndarray(0, dtype=ACE_B_dtype)

    for i in range(1, 13):

        t1 = datetime.datetime(year, i, 1)

        if i + 1 < 13:
            t2 = datetime.datetime(year, i + 1, 1)
            t2 = datetime.datetime(year + 1, 1, 1)
        #print('Pulling '+str(t1)[0:10] + ' - ' + str(t2)[0:10])

        mfi_data = cdas.get_data('sp_phys', 'AC_H0_MFI', t1, t2, ['BGSEc'])

        ACE_B_month = np.ndarray(len(mfi_data['EPOCH']) // 4,

            collapse_down(mfi_data['BX_GSE'], 4),
            collapse_down(mfi_data['BY_GSE'], 4),
            collapse_down(mfi_data['BZ_GSE'], 4)

        ACE_B_month['B'] = np.transpose([
            collapse_down(mfi_data['BX_GSE'], 4),
            collapse_down(mfi_data['BY_GSE'], 4),
            collapse_down(mfi_data['BZ_GSE'], 4)
        ACE_B_month['t'] = collapse_down(mdate.date2num(mfi_data['EPOCH']), 4)

        #Clean bad data
        ACE_B_month['B'][ACE_B_month['B'] < -10**30] = np.nan

        #append to the full array
        ACE_B = np.append(ACE_B, ACE_B_month)
        uf.status(int((i / 12) * 100))

    np.save(filename, ACE_B)
    print(str(year) + ' finished!')
    print('File saved to ' + filename)
def evaluate_method(method, corr_min=0.3, exclude=[]):
    Compare the timeshifts for a given method to ideal timeshifts, and plot a histogram of the differences.
    Also lists the width and center of the resulting histogram.

        method(string) -- The timeshifting method to evaluae
        corr_min(float) -- Minimum correlation to accept ideal timeshifts for.
    Keyword Arguments:
        exclude(list) -- A list of indices corresponding to intervals to exclude from the analysis
        int, int: The width of the error histogram, the center of the error histogram.
    filepath = uf.get_parameter('filepath')

    #ideal_shifts = np.load('C:/Users/Taylor/Google Drive/Science/Data/timeshifting/ideal_shifts.npy')
    #ideal_shifts_corrs = np.load('C:/Users/Taylor/Google Drive/Science/Data/timeshifting/ideal_shifts_corrs.npy')

    start_year = int(uf.get_parameter('start_year'))
    end_year = int(uf.get_parameter('end_year'))

    ideals = np.zeros([0, 2])
    shifts = np.array([])
    for year in range(start_year, end_year):

        ideals_year = np.load(filepath + 'Ideal_shifts/ideal_shifts_' +
                              str(year) + '.npy')
        ideals = np.append(ideals, ideals_year, axis=0)

        year_shifts = np.load(filepath + 'Shifts/' + method + '/' + method +
                              '_shifts_' + str(year) + '.npy')
        shifts = np.append(shifts, year_shifts)

    ideal_shifts = ideals[:, 0]
    ideal_shifts_corrs = ideals[:, 1]

    #return ideal_shifts, shifts
    deltas = (ideal_shifts - shifts) / 60.

    if exclude != []:
        deltas = np.delete(deltas, exclude)
        ideal_shifts_corrs = np.delete(ideal_shifts_corrs, exclude)

    #Get rid of nans
    ideal_shifts = ideal_shifts[np.isfinite(deltas)]
    ideal_shifts_corrs = ideal_shifts_corrs[np.isfinite(deltas)]
    shifts = shifts[np.isfinite(deltas)]
    deltas = deltas[np.isfinite(deltas)]

    #Get rid of other things
    deltas = deltas[ideal_shifts_corrs > corr_min]
    deltas = deltas[deltas < 40]
    deltas = deltas[deltas > -40]

    hist = np.histogram(deltas, bins=79)
    centers = (hist[1][:-1] + hist[1][1:]) / 2.

    #Fit gaussian

    p0 = [30., 0., 1., 10]

    coeff, var_matrix = curve_fit(uf.gauss,
    hist_fit_flat = uf.gauss(centers, *coeff)
    width = np.abs(coeff[2])
    center = coeff[1]

    plt.plot(centers, hist[0], '-')
    plt.plot(centers, hist_fit_flat)
    print('For ', method, ':')

    print('Width is ', width)
    print('Center is ', center)
    return width, deltas