示例#1
0
 def read(self):
     '''Load the data from in_filename'''
     skip_index = find_first(self.in_filename, '^[0-9]{4},[0-9]{4}$') - 1
     df = pd.read_table(self.in_filename,
                        skiprows=skip_index,
                        header=None,
                        engine='c',
                        sep=',',
                        names=('a', 'b'))
     self.pressure_data = np.array([
         uc.USGS_PROTOTYPE_V_TO_DBAR(np.float64(x))
         for x in df[df.b.isnull() == False].a
     ])
     self.temperature_data = [
         uc.USGS_PROTOTYPE_V_TO_C(np.float64(x))
         for x in df[df.b.isnull() == False].b
     ]
     with open(self.in_filename, 'r') as wavelog:
         for x in wavelog:
             # second arg has extra space that is unnecessary
             if re.match('^[0-9]{4}.[0-9]{2}.[0-9]{2}', x):
                 start_ms = uc.datestring_to_ms(x, self.date_format_string)
                 self.utc_millisecond_data = uc.generate_ms(
                     start_ms, len(self.pressure_data), self.frequency)
                 break
示例#2
0
    def read(self):
        '''load the data from in_filename
        only parse the initial datetime = much faster
        '''
        self.get_serial()
        skip_index = find_first(self.in_filename, 'Date and Time,Seconds')
        data = pd.read_table(self.in_filename,
                             skiprows=skip_index,
                             header=None,
                             engine='c',
                             sep=',',
                             usecols=(0, 1, 2, 3))

        self.data_start = uc.datestring_to_ms(data[0][1],
                                              self.date_format_string,
                                              self.tz_info,
                                              self.daylight_savings)
        self.data_start2 = uc.datestring_to_ms(data[0][2],
                                               self.date_format_string,
                                               self.tz_info,
                                               self.daylight_savings)

        self.frequency = 1 / ((self.data_start2 - self.data_start) / 1000)

        self.utc_millisecond_data = uc.generate_ms(self.data_start,
                                                   len(data[0]),
                                                   self.frequency)
        self.pressure_data = data[3].values * uc.PSI_TO_DBAR
示例#3
0
def wind_data(file_name, mode='netCDF'):

    #every fifteen minutes maybe not necessary
    frequency = 1 / 900
    series_length = 1440
    time = unit_conversion.generate_ms(1404647999870, series_length, frequency)
    wind_direction = get_rand_circular_data(series_length, 15, 360)
    wind_speed = get_rand_discrete_data(series_length, 2, 5, 0)

    if mode == 'netCDF':
        ds = Dataset(file_name, 'w', format="NETCDF4_CLASSIC")
        ds.createDimension('time', len(time))
        time_var = ds.createVariable('time', 'f8', ('time'))
        time_var[:] = time
        wind_speed_var = ds.createVariable('wind_speed', 'f8', ('time'))
        wind_speed_var[:] = wind_speed
        wind_direction_var = ds.createVariable('wind_direction', 'f8',
                                               ('time'))
        wind_direction_var[:] = wind_direction
        ds.close()
    else:
        excelFile = pd.DataFrame({
            'Time': time,
            'Wind Speed in m/s': wind_speed,
            'Wind Direction in degrees': wind_direction,
        })

        excelFile.to_csv(path_or_buf=file_name)

    print('total:', len(time), len(wind_direction), len(wind_speed))
def wind_data(file_name, mode='netCDF'):
    
    #every fifteen minutes maybe not necessary
    frequency = 1/900
    series_length = 1440
    time = unit_conversion.generate_ms(1404647999870, series_length, frequency)
    wind_direction = get_rand_circular_data(series_length, 15, 360)
    wind_speed = get_rand_discrete_data(series_length, 2, 5, 0)
    
    if mode == 'netCDF':
        ds = Dataset(file_name, 'w', format="NETCDF4_CLASSIC")
        ds.createDimension('time',len(time))
        time_var = ds.createVariable('time','f8',('time'))
        time_var[:] = time
        wind_speed_var = ds.createVariable('wind_speed','f8',('time'))
        wind_speed_var[:] = wind_speed
        wind_direction_var = ds.createVariable('wind_direction','f8',('time'))
        wind_direction_var[:] = wind_direction
        ds.close()
    else:
        excelFile = pd.DataFrame({'Time': time, 
                                  'Wind Speed in m/s': wind_speed,
                                  'Wind Direction in degrees': wind_direction,
                                  })
        
        excelFile.to_csv(path_or_buf= file_name)
        
    
    print('total:', len(time), len(wind_direction), len(wind_speed))
示例#5
0
 def read(self):
     '''load the data from in_filename
     only parse the initial datetime = much faster
     '''
     self.get_serial()
     skip_index = find_first(self.in_filename, '^ID') - 1
     # for skipping lines in case there is calibration header data
     df = pd.read_table(self.in_filename, skiprows=skip_index + 1, header=None,
                        engine='c', sep=',', usecols=[3, 4, 5])
     
     try:
         self.data_start = uc.datestring_to_ms(df[3][3][1:],
                                               self.date_format_string, self.tz_info, self.daylight_savings)
         second_stamp = uc.datestring_to_ms(df[3][4][1:],
                                            self.date_format_string, self.tz_info, self.daylight_savings)
         self.frequency = 1000 / (second_stamp - self.data_start)
         
         
         self.pressure_data = df[5].values * uc.PSI_TO_DBAR
         start_ms = uc.datestring_to_ms('%s' % df[3][0][1:], self.date_format_string, self.tz_info, self.daylight_savings)
     
     except:
         self.data_start = uc.datestring_to_ms(df[3][3][1:],
                                               self.date_format_string2, self.tz_info, self.daylight_savings)
         second_stamp = uc.datestring_to_ms(df[3][4][1:],
                                            self.date_format_string2, self.tz_info, self.daylight_savings)
         self.frequency = 1000 / (second_stamp - self.data_start)
         
         
         self.pressure_data = df[5].values * uc.PSI_TO_DBAR
         start_ms = uc.datestring_to_ms('%s' % df[3][0][1:], self.date_format_string2, self.tz_info, self.daylight_savings)
         
     self.utc_millisecond_data = uc.generate_ms(start_ms, df.shape[0], self.frequency)
示例#6
0
    def read(self):
        '''load the data from in_filename
        only parse the initial datetime = much faster
        '''
        self.get_serial()

        second = False
        skip_index = find_first(self.in_filename, '"#"')
        if skip_index == None:
            skip_index = find_first(self.in_filename, '#')
            second = True

        df = pd.read_table(self.in_filename,
                           skiprows=skip_index,
                           header=None,
                           engine='c',
                           sep=',',
                           usecols=(1, 2))
        df = df.dropna()

        try:
            first_stamp = uc.datestring_to_ms(df.values[0][0],
                                              self.date_format_string,
                                              self.tz_info,
                                              self.daylight_savings)
            second_stamp = uc.datestring_to_ms(df.values[1][0],
                                               self.date_format_string,
                                               self.tz_info,
                                               self.daylight_savings)
        except:
            first_stamp = uc.datestring_to_ms(df.values[0][0],
                                              self.date_format_string2,
                                              self.tz_info,
                                              self.daylight_savings)
            second_stamp = uc.datestring_to_ms(df.values[1][0],
                                               self.date_format_string2,
                                               self.tz_info,
                                               self.daylight_savings)

        self.frequency = 1000 / (second_stamp - first_stamp)

        try:
            start_ms = uc.datestring_to_ms(df[1][0], self.date_format_string,
                                           self.tz_info, self.daylight_savings)
        except:
            start_ms = uc.datestring_to_ms(df[1][0], self.date_format_string2,
                                           self.tz_info, self.daylight_savings)

        self.utc_millisecond_data = uc.generate_ms(start_ms, df.shape[0],
                                                   self.frequency)

        #         if self.daylight_savings == True:
        #             self.utc_millisecond_data = [x - 3600000 for x in self.utc_millisecond_data]

        self.pressure_data = df[2].values * uc.PSI_TO_DBAR
示例#7
0
 def read(self):
     '''load the data from in_filename
     only parse the initial datetime = much faster
     '''
     skip_index = find_first(self.in_filename, '^[0-9]{2}-[A-Z]{1}[a-z]{2,8}-[0-9]{4}')
     df = pd.read_csv(self.in_filename, skiprows=skip_index, delim_whitespace=True,
                      header=None, engine='c', usecols=[0, 1, 2])
     
     self.datestart = uc.datestring_to_ms('%s %s' % (df[0][0], df[1][0]), self.date_format_string)
     self.utc_millisecond_data = uc.generate_ms(self.datestart, df.shape[0] - 1,
                                                 self.frequency)
     self.pressure_data = np.array([x for x in df[2][:-1]])
示例#8
0
    def read(self):
        '''load the data from in_filename
        only parse the initial datetime = much faster
        '''
        self.get_serial()
        skip_index = find_first(self.in_filename, '^ID') - 1
        # for skipping lines in case there is calibration header data
        df = pd.read_table(self.in_filename,
                           skiprows=skip_index + 1,
                           header=None,
                           engine='c',
                           sep=',',
                           usecols=[3, 4, 5])

        try:
            self.data_start = uc.datestring_to_ms(df[3][3][1:],
                                                  self.date_format_string,
                                                  self.tz_info,
                                                  self.daylight_savings)
            second_stamp = uc.datestring_to_ms(df[3][4][1:],
                                               self.date_format_string,
                                               self.tz_info,
                                               self.daylight_savings)
            self.frequency = 1000 / (second_stamp - self.data_start)

            self.pressure_data = df[5].values * uc.PSI_TO_DBAR
            start_ms = uc.datestring_to_ms('%s' % df[3][0][1:],
                                           self.date_format_string,
                                           self.tz_info, self.daylight_savings)

        except:
            self.data_start = uc.datestring_to_ms(df[3][3][1:],
                                                  self.date_format_string2,
                                                  self.tz_info,
                                                  self.daylight_savings)
            second_stamp = uc.datestring_to_ms(df[3][4][1:],
                                               self.date_format_string2,
                                               self.tz_info,
                                               self.daylight_savings)
            self.frequency = 1000 / (second_stamp - self.data_start)

            self.pressure_data = df[5].values * uc.PSI_TO_DBAR
            start_ms = uc.datestring_to_ms('%s' % df[3][0][1:],
                                           self.date_format_string2,
                                           self.tz_info, self.daylight_savings)

        self.utc_millisecond_data = uc.generate_ms(start_ms, df.shape[0],
                                                   self.frequency)
def quick_dirty_wind_data(in_file_name, out_file_name):  
    df = pd.read_csv(in_file_name, header=None)
    
    #generate 6 minute utc millisecond data
    #millisecond is a time stamp for Fri Jan 22 2016 23:00:00
    time = unit_conversion.generate_ms(1453503600000.0, 1213, 1/360)
    wind_direction = df[6]
    wind_speed = df[8]
    
    ds = Dataset(out_file_name, 'w', format="NETCDF4_CLASSIC")
    ds.createDimension('time',len(time))
    time_var = ds.createVariable('time','f8',('time'))
    time_var[:] = time
    wind_speed_var = ds.createVariable('wind_speed','f8',('time'))
    wind_speed_var[:] = wind_speed.values
    wind_direction_var = ds.createVariable('wind_direction','f8',('time'))
    wind_direction_var[:] = wind_direction.values
    ds.close()  
示例#10
0
def quick_dirty_wind_data(in_file_name, out_file_name):
    df = pd.read_csv(in_file_name, header=None)

    #generate 6 minute utc millisecond data
    #millisecond is a time stamp for Fri Jan 22 2016 23:00:00
    time = unit_conversion.generate_ms(1453503600000.0, 1213, 1 / 360)
    wind_direction = df[6]
    wind_speed = df[8]

    ds = Dataset(out_file_name, 'w', format="NETCDF4_CLASSIC")
    ds.createDimension('time', len(time))
    time_var = ds.createVariable('time', 'f8', ('time'))
    time_var[:] = time
    wind_speed_var = ds.createVariable('wind_speed', 'f8', ('time'))
    wind_speed_var[:] = wind_speed.values
    wind_direction_var = ds.createVariable('wind_direction', 'f8', ('time'))
    wind_direction_var[:] = wind_direction.values
    ds.close()
示例#11
0
    def read(self):
        '''load the data from in_filename
        only parse the initial datetime = much faster
        '''
        skip_index = find_first(self.in_filename,
                                '^[0-9]{2}-[A-Z]{1}[a-z]{2,8}-[0-9]{4}')
        df = pd.read_csv(self.in_filename,
                         skiprows=skip_index,
                         delim_whitespace=True,
                         header=None,
                         engine='c',
                         usecols=[0, 1, 2])

        self.datestart = uc.datestring_to_ms('%s %s' % (df[0][0], df[1][0]),
                                             self.date_format_string)
        self.utc_millisecond_data = uc.generate_ms(self.datestart,
                                                   df.shape[0] - 1,
                                                   self.frequency)
        self.pressure_data = np.array([x for x in df[2][:-1]])
示例#12
0
 def read(self):
     '''load the data from in_filename
     only parse the initial datetime = much faster
     '''
     self.get_serial()
     skip_index = find_first(self.in_filename, 'Date and Time,Seconds')
     data = pd.read_table(self.in_filename, skiprows=skip_index, header=None,
                        engine='c', sep=',', usecols=(0,1,2,3))
     
     self.data_start = uc.datestring_to_ms(data[0][1], self.date_format_string,
                                        self.tz_info, self.daylight_savings)
     self.data_start2 = uc.datestring_to_ms(data[0][2], self.date_format_string,
                                        self.tz_info, self.daylight_savings)
     
     self.frequency = 1 / ((self.data_start2 - self.data_start) / 1000)
     
     self.utc_millisecond_data = uc.generate_ms(self.data_start, len(data[0]), 
                                                self.frequency)
     self.pressure_data = data[3].values * uc.PSI_TO_DBAR
示例#13
0
 def read(self):
     '''Load the data from in_filename'''
     skip_index = find_first(self.in_filename, '^[0-9]{4},[0-9]{4}$') - 1
     df = pd.read_table(self.in_filename, skiprows=skip_index, header=None,
                        engine='c', sep=',', names=('a', 'b'))
     self.pressure_data = np.array([
         uc.USGS_PROTOTYPE_V_TO_DBAR(np.float64(x))
         for x in df[df.b.isnull() == False].a])
     self.temperature_data = [
         uc.USGS_PROTOTYPE_V_TO_C(np.float64(x))
         for x in df[df.b.isnull() == False].b]
     with open(self.in_filename, 'r') as wavelog:
         for x in wavelog:
             # second arg has extra space that is unnecessary
             if re.match('^[0-9]{4}.[0-9]{2}.[0-9]{2}', x):
                 start_ms = uc.datestring_to_ms(x, self.date_format_string)
                 self.utc_millisecond_data = uc.generate_ms(start_ms,
                                                            len(self.pressure_data),
                                                            self.frequency)
                 break
示例#14
0
    def read(self):
        '''load the data from in_filename
        only parse the initial datetime = much faster
        '''
        self.get_serial()
        
        second = False
        skip_index = find_first(self.in_filename, '"#"')
        if skip_index == None:
            skip_index = find_first(self.in_filename, '#')
            second = True
        
        df = pd.read_table(self.in_filename, skiprows=skip_index, header=None,
                           engine='c', sep=',', usecols=(1, 2))
        df = df.dropna()
        
        try:
            first_stamp = uc.datestring_to_ms(df.values[0][0], self.date_format_string, self.tz_info, self.daylight_savings)
            second_stamp = uc.datestring_to_ms(df.values[1][0], self.date_format_string, self.tz_info, self.daylight_savings)
        except:
            first_stamp = uc.datestring_to_ms(df.values[0][0], self.date_format_string2, self.tz_info, self.daylight_savings)
            second_stamp = uc.datestring_to_ms(df.values[1][0], self.date_format_string2, self.tz_info, self.daylight_savings)
            
        self.frequency = 1000 / (second_stamp - first_stamp)
        
        
        try:
            start_ms = uc.datestring_to_ms(df[1][0], self.date_format_string, self.tz_info, self.daylight_savings)
        except:
            start_ms = uc.datestring_to_ms(df[1][0], self.date_format_string2, self.tz_info, self.daylight_savings)
            
        self.utc_millisecond_data = uc.generate_ms(start_ms, df.shape[0], self.frequency)
        
#         if self.daylight_savings == True:
#             self.utc_millisecond_data = [x - 3600000 for x in self.utc_millisecond_data]
        
        self.pressure_data = df[2].values * uc.PSI_TO_DBAR
示例#15
0
def change_netCDFTime(in_file_name, out_file_name, start_ms):
    shutil.copy(in_file_name, out_file_name)
    time_len = len(nc.get_time(out_file_name))
    new_time = unit_conversion.generate_ms(start_ms, time_len, 1/900)
    nc.set_variable_data(out_file_name, 'time', new_time)
示例#16
0
def change_netCDFTime(in_file_name, out_file_name, start_ms):
    shutil.copy(in_file_name, out_file_name)
    time_len = len(nc.get_time(out_file_name))
    new_time = unit_conversion.generate_ms(start_ms, time_len, 1 / 900)
    nc.set_variable_data(out_file_name, 'time', new_time)