Exemplo n.º 1
0
    def mergeBst_ch(self,fileName='bst_2018-2019.csv',file_ch = 'bst_ch_2018.txt'):
        data_df = pd.read_csv('%s%s'%(getBasePath('typhoon'),fileName))

        change_idx = 0
        with open('%s%s'%(getBasePath('typhoon'),file_ch),'r') as f:
            while True:
                tmp = f.readline()
                if len(tmp)==0:
                    break
                aTyphoon = self.splitMultiBlank(str(tmp))
                linesNum = int(aTyphoon[2])
                data_time_list = data_df.time.tolist()
                for idx in range(linesNum):
                    aLine = self.splitMultiBlank(str(f.readline()))
                    time = int(aLine[0])
                    lat = int(aLine[2]) * 0.1
                    lon = int(aLine[3]) * 0.1

                    if time in data_time_list:
                        idx_data_match = data_df.index[data_df['time'] == time].tolist()[0]
                        lat_data = data_df.iloc[idx_data_match]['lat']
                        lon_data = data_df.iloc[idx_data_match]['lon']

                        if lat_data != lat or lon_data!=lon:
                            data_df.ix[idx_data_match,'lat'] = lat
                            data_df.ix[idx_data_match,'lon'] = lon
                            change_idx += 1

        print(change_idx)
        data_df.to_csv('%s%s'%(getBasePath('typhoon'),fileName),index=False,header=True)
Exemplo n.º 2
0
    def dataTime(self, pickList, dataFilename='dir_1KM.txt', res='1KM'):
        '''
        Extract data which are observed when typhoons occur.
        :param pickList: the time interval when typhoons occur, which are concluded from tracks data.
        :param dataFilename: raw file which storing all the data file names.
        :param res: resolution
        :return:
        '''
        data_time_df = pd.read_csv('%s%s' %
                                   (getBasePath('typhoon'), dataFilename),
                                   header=None)
        data_time_df.columns = ['path']

        data_time_df['names'] = data_time_df.applymap(
            lambda x: x[x.rfind('\\') + 1:])
        data_time_df.drop(axis=1, labels=['path'], inplace=True)

        data_time_df['time'] = data_time_df.applymap(lambda x: x[44:58])
        data_time_df['date'] = pd.to_datetime(
            data_time_df['time'], format='%Y%m%d%H%M%S')  # 将数据类型转换为日期类型
        data_time_df = data_time_df.set_index('date')  # 将date设置为index

        data_time_df_pick = pd.DataFrame()
        for x in pickList:
            data_time_df_pick = pd.concat(
                [data_time_df_pick, data_time_df[x[0]:x[1]]])

        print(data_time_df_pick.shape[0])

        data_time_df_pick[['names', 'time']].to_csv(
            '%sdata_pick_%s.csv' % (getBasePath('typhoon'), res),
            index=False,
            header=True,
            encoding='utf-8')
Exemplo n.º 3
0
    def trackTime(self, fileName='bst_2018-2019.csv', start_time='2018-07-01'):
        '''
        Extrct the time of tracks data.
        :param fileName:
        :return:
        '''
        tracks_df = pd.read_csv('%s%s' % (getBasePath('typhoon'), fileName))

        time_list = list(tracks_df['time'].unique())
        time_list = ['%s00' % x for x in time_list]

        time_df = pd.DataFrame(data=time_list, columns=['time'])

        time_df['date'] = pd.to_datetime(time_df['time'],
                                         format='%Y%m%d%H%M%S')  # 将数据类型转换为日期类型
        time_df = time_df.set_index('date')  # 将date设置为index
        time_df = time_df['2018-07-01':]

        time_df = time_df.sort(ascending=True)

        time_df.to_csv('%stime_tracks.csv' % getBasePath('typhoon'),
                       header=None,
                       index=False,
                       encoding='UTF-8')

        # time_df.groupby()

        f = open('%stime_tracks.txt' % getBasePath('typhoon'),
                 'w',
                 encoding='UTF-8')
        for itm in time_list:
            f.writelines(itm + '\n')
        f.close()

        return time_list
Exemplo n.º 4
0
    def extractImg(self):
        fea_train = pd.read_csv('%s/data/train-img-feas.csv'%util.getBasePath(),index_col=0)
        fea_test = pd.read_csv('%s/data/test-img-feas.csv' % util.getBasePath(),index_col=0)

        df_imgs = pd.concat([fea_train,fea_test],axis=0,ignore_index=True)

        self.df = pd.concat([self.df,df_imgs],axis=1)
        del df_imgs
        print('get images done')
Exemplo n.º 5
0
 def __init__(self, n_clusters=10, isRelativePath=True):
     self.sift_extractor = cv2.xfeatures2d_SIFT.create()
     if not isRelativePath:
         self.path = getBasePath() + '/../data/profile_images_%s/%s'
     else:
         self.path = getBasePath() + '/data/profile_images_%s/%s'
     self.n_clusters = n_clusters
     self.kmeans = KMeans(n_clusters=self.n_clusters, random_state=3724)
     self.nullImgInds = [
     ]  # keep tracking those with not existed profile images, int
Exemplo n.º 6
0
    def __init__(self,resolution,filename,fillValue=999):
        '''
        :param resolution: like '4KM','2KM'
        '''
        self._resolution = resolution
        self._grid_size = config.IMG_SIZE[self._resolution]
        self._fillValue = fillValue

        savefileName = self.saveFilename(filename)
        GEO_LAT, GEO_LON = self.readRaw('%s/%s' % (getBasePath('data'), filename))

        GEO_LAT = self.fillBlank(GEO_LAT, 'lat')
        GEO_LON = self.fillBlank(GEO_LON, 'lon')

        self.saveGeo('%s/%s' % (getBasePath('data'), savefileName), GEO_LAT, GEO_LON)
Exemplo n.º 7
0
 def loadGeoData(self, resolution, category='raw'):
     df = pd.read_csv("%s/FullMask_Grid_%s_999_NULL_%s.csv" %
                      (getBasePath(''), resolution, 'lat'),
                      sep=',')
     df['Coordinates'] = list(zip(df.lon, df.lat))
     df['Coordinates'] = df['Coordinates'].apply(Point)
     gdf = gpd.GeoDataFrame(df, geometry='Coordinates')
     return gdf
Exemplo n.º 8
0
def saveGeo(GEO_LAT,
            GEO_LON,
            filename='%stransFormula_IMG_2_GEO' % util.getBasePath('data')):
    lat_df = pd.DataFrame(GEO_LAT)
    lat_df.to_csv('%s_%s_lat.csv' % (filename, res), header=True, index=True)

    lon_df = pd.DataFrame(GEO_LON)
    lon_df.to_csv('%s_%s_lon.csv' % (filename, res), header=True, index=True)
Exemplo n.º 9
0
    def readBST_Track_file(self,fileName='bst_2018-2019.txt',saveFileName='bst_2018-2019.csv'):
        # track_df = pd.DataFrame(columns=self._columns)
        itm_list = []

        with open('%s%s'%(getBasePath('typhoon'),fileName),'rb') as f:
            while True:
                tmp = f.readline()
                if len(tmp)==0:
                    break
                aTyphoon = self.splitMultiBlank(str(tmp))
                linesNum = int(aTyphoon[2])
                aTyphoonID = aTyphoon[5]
                aTyphoonName = aTyphoon[7]
                for idx in range(linesNum):
                    aLine = self.splitMultiBlank(str(f.readline()))
                    # itm_time = f[0]
                    # lat = int(f[3]) * 0.1
                    # lon = int(f[4]) * 0.1
                    # center_press = int(f[5])
                    # max_sustained_wind_spd = int(f[6])
                    # direct_longest_rad_50kt = f[7]
                    # longest_rad_50kt = int(f[8])
                    # shortest_rad_50kt = int(f[9])
                    # direct_longest_rad_30kt = f[10]
                    # longest_rad_30kt = int(f[11])
                    # shortest_rad_30kt = int(f[12])
                    aItm = ['20%s'%aLine[0],int(aLine[3]) * 0.1,int(aLine[4]) * 0.1,aTyphoonID,aTyphoonName,
                            int(re.sub("\D", "", aLine[5])),int(re.sub("\D", "", aLine[6]))]
                    if int(re.sub("\D", "", aLine[6])) == 0:
                        aItm +=['8',0,0,'8',0,0]
                    else:
                        aItm += [aLine[7][0],
                            int(aLine[7][1:]),int(aLine[8]),
                            aLine[9][0],
                            int(aLine[9][1:]),int(aLine[10])
                            ]
                    itm_list.append(aItm)
                    if(len(aItm)!=13):
                        print(aItm)
                    # track_df.loc[df_idx] = aItm
                    # df_idx += 1
        track_df = pd.DataFrame(itm_list,columns=self._columns)
        track_df.to_csv('%s%s'%(getBasePath('typhoon'),saveFileName),index=False,header=True)
Exemplo n.º 10
0
 def drawMap(self, filename, dn_ch_arr, ch_name):
     ''' Save extracted image file.
     :param dn_ch: the dn data of one specific channel
     '''
     tmp = filename.split('_')[9]  # extract filename from raw filename
     filename = '%s%s/%s_Channel%s.jpg' % (getBasePath('img'),
                                           self._resolution, tmp, ch_name)
     img = cv2.cvtColor(dn_ch_arr, cv2.COLOR_BGR2RGB)
     cv2.imwrite(filename, img,
                 [int(cv2.IMWRITE_JPEG_QUALITY), self._saveIMGQuality])
Exemplo n.º 11
0
 def __init__(self,
              imgNameList,
              tasktype='train',
              path="%s/../data/%s_profile_images/profile_images_%s"):
     self.imgNameList = imgNameList
     if tasktype not in ['train', 'test']:
         print("please set tasktype as train or test")
     self.tasktype = tasktype
     self.imgList = []
     self.imgBasePath = path % (getBasePath(), self.tasktype, self.tasktype)
     self.imgExisIndex = []
Exemplo n.º 12
0
    def getAllFilesList(self, basepath='F:\风云数据\\1KM\\1Km全圆盘数据', res='1KM'):
        # miss_list = []
        f = open("%sdir_%s.txt" % (getBasePath('typhoon'), res),
                 "w",
                 encoding='UTF-8')
        for root, dirs, files in os.walk(basepath):
            for file in files:
                tmp = os.path.join(root, file)
                # if tmp.endswith('.null') or tmp.endswith('hdf.td') or tmp.endswith('hdf.td.cfg'):
                #     miss_list.append(tmp)
                f.writelines(tmp + "\n")

        f.close()
Exemplo n.º 13
0
    def getIMGCoord(self,filename='bst_2018-2019.csv'):
        df = pd.read_csv('%s%s'%(getBasePath('typhoon'),filename))

        for res in ['4KM','2KM','1KM']:

            transformer = CoordTrans(res)

            def valuation_formula(x, y,type='l'):
                if type=='l':
                    return int(transformer.geo2ImgCoord(x,y)[0])
                elif type=='c':
                    return int(transformer.geo2ImgCoord(x,y)[1])


            key_l,key_c = 'l_img_%s'%res,  'c_img_%s'%res,

            df[key_l] = df.apply(lambda row: valuation_formula(row['lon'], row['lat'],'l'), axis=1)
            df[key_c] = df.apply(lambda row: valuation_formula(row['lon'], row['lat'],'c'), axis=1)


        print(df.head())

        df.to_csv('%s%s' % (getBasePath('typhoon'), filename), index=False, header=True)
Exemplo n.º 14
0
    def pickDataByTrackTime(self,
                            trackFilename='time_tracks.csv',
                            dataFilename='data_pick_1KM.csv',
                            res='1KM'):
        track_time_df = pd.read_csv('%s%s' %
                                    (getBasePath('typhoon'), trackFilename))
        track_time_df.columns = ['time']
        track_time_df.set_index('time', inplace=True)  # 将date设置为index
        track_time_df.index = pd.to_datetime(
            track_time_df.index, format='%Y%m%d%H%M%S')  # 将数据类型转换为日期类型

        track_time_list = list(track_time_df.index.values)

        data_time_df = pd.read_csv('%s%s' %
                                   (getBasePath('typhoon'), dataFilename))
        data_time_df.set_index('time', inplace=True)  # 将date设置为index
        data_time_df.index = pd.to_datetime(
            data_time_df.index, format='%Y%m%d%H%M%S')  # 将数据类型转换为日期类型

        data_match_df = pd.DataFrame()

        for idx in track_time_list:

            if idx in data_time_df.index:

                idx_int = data_time_df.index.get_loc(idx)

                data_match_df = pd.concat(
                    [data_match_df, data_time_df.iloc[idx_int]])

        print(data_match_df.shape[0])

        data_match_df.to_csv('%sdata_match_%s.csv' %
                             (getBasePath('typhoon'), res),
                             index=False,
                             header=False,
                             encoding='utf-8')
Exemplo n.º 15
0
    def loadData(self, dataset):
        indexlist_ = ['id', 'uname', 'url', 'covImgStatus', 'verifStatus', 'textColor', 'pageColor', 'themeColor',
                      'isViewSizeCustom', 'utcOffset', 'location', 'isLocVisible', 'uLanguage', 'creatTimestamp',
                      'uTimeZone', 'numFollowers', 'numPeopleFollowing', 'numStatUpdate', 'numDMessage',
                      'category', 'avgvisitPerSecond', 'avgClick', 'profileImg', 'numPLikes']
        basepath = getBasePath()
        astr = "%s/data/%s.csv"
        if dataset not in ['train', 'test']:
            print("Invalid dataset type, only train and test are supported")
            return ""
        filename = astr % (basepath, dataset)

        df = pd.read_csv(filename)
        if dataset == 'test':
            df.columns = indexlist_[:-1]
        else:
            df.columns = indexlist_
        return df
Exemplo n.º 16
0
    def loadOneRadTempData(self, filename):
        '''
        Load the radiation temperature data of one specific HDF file.
        :param filename: A file name of HDF file.
        :return:
        '''
        H5f = h5py.File(getBasePath('data') + filename, 'r')
        self._IMG_VALID_REG = H5f['NOMObsColumn'][:]

        dn_ch_list = []
        rad_temp_list = []
        lat_size, lon_size = config.IMG_SIZE[
            self._resolution][0], config.IMG_SIZE[self._resolution][1]

        for chan in self._chan_num_list:
            dn_ch_list.append(
                H5f[config.NOM_KEY_VALUE[self._resolution][chan]][:])
            rad_temp_list.append(
                H5f[config.CALIB_KEY_VALUE[self._resolution][chan]][:])

        for idx, (dn, rad) in enumerate(zip(dn_ch_list, rad_temp_list)):
            for lat_ind in range(lat_size):
                valid_reg_interval = self._IMG_VALID_REG[lat_ind][:]
                if valid_reg_interval[0] > -1 and valid_reg_interval[1] > -1:
                    for lon_ind in range(valid_reg_interval[0],
                                         valid_reg_interval[1] + 1):
                        tmp = int(dn[lat_ind][lon_ind])
                        if tmp >= 65535:
                            # dn[lat_ind][lon_ind] = self._fillValue[idx]
                            dn[lat_ind][lon_ind] = 65535
                        else:
                            dn[lat_ind][lon_ind] = rad[tmp]
                else:
                    dn[lat_ind][:] = 65535

            dn_ch_list[idx] = dn

        H5f.close()

        return dn_ch_list
Exemplo n.º 17
0
 def loadModel(self):
     bs = getBasePath()
     return tf.keras.models.load_model(self.modelpath % getBasePath())
Exemplo n.º 18
0
 def loadModel(self):
     return keras.models.load_model('%s/savedModel/model-mlp.h5' %
                                    getBasePath())
Exemplo n.º 19
0
 def savemodel(self):
     self.model.save('%s/savedModel/model-mlp.h5' % getBasePath())
Exemplo n.º 20
0
 def loadModel(self):
     return joblib.load('%s/savedModel/model-rf.joblib' % getBasePath())
Exemplo n.º 21
0
 def savemodel(self):
     joblib.dump(self.model,
                 '%s/savedModel/model-rf.joblib' % getBasePath(),
                 compress=0)
Exemplo n.º 22
0
def loadModel(model_name):
    return joblib.load('%s/savedModel/%s.joblib' % (getBasePath(), model_name))
Exemplo n.º 23
0
def savemodel(model, modelname):
    joblib.dump(model,
                '%s/savedModel/%s.joblib' % (getBasePath(), modelname),
                compress=0)
Exemplo n.º 24
0
 def savemodel(self):
     self.model.save(self.modelpath % getBasePath())
Exemplo n.º 25
0
        features = self._get_layer_output(X, -3)  # the last third layer
        return features

    def savemodel(self):
        self.model.save(self.modelpath % getBasePath())

    def loadModel(self):
        bs = getBasePath()
        return tf.keras.models.load_model(self.modelpath % getBasePath())


if __name__ == '__main__':
    import pandas as pd
    import numpy as np

    df_train = pd.read_csv("%s/../data/train.csv" % getBasePath())
    df_test = pd.read_csv("%s/../data/test.csv" % getBasePath())
    indexlist_ = [
        'id', 'uname', 'url', 'covImgStatus', 'verifStatus', 'textColor',
        'pageColor', 'themeColor', 'isViewSizeCustom', 'utcOffset', 'location',
        'isLocVisible', 'uLanguage', 'creatTimestamp', 'uTimeZone',
        'numFollowers', 'numPeopleFollowing', 'numStatUpdate', 'numDMessage',
        'category', 'avgvisitPerSecond', 'avgClick', 'profileImg', 'numPLikes'
    ]
    df_train.columns = indexlist_
    df_test.columns = indexlist_[:-1]

    imgNamelist_train = df_train['profileImg'].values
    imgNamelist_test = df_test['profileImg'].values

    yclassNum = 10