dataset = np.load(inputfile) print("dataset..", dataset.shape) dataset = dataset[:54663, 1, :] ##700 pressure print("700 pressure dataset..", dataset.shape) sys.path.insert(1, '../final_eval/') os.chdir("../final_eval") from Dataset_transformations import Dataset_transformations from Clustering import Clustering from netcdf_subset import netCDF_subset import dataset_utils as utils print(os.getcwd()) sys.path.insert(1, '../wrfhy/wrfvol/') os.chdir("../wrfhy/wrfvol/") export_template = netCDF_subset('40years.nc', [700], ['GHT'], lvlname='num_metgrid_levels', timename='Times') sys.path.insert(1, '../../final_eval/') os.chdir("../../final_eval/") ds = Dataset_transformations(dataset, 1000, dataset.shape) print ds._items.shape # times = export_template.get_times() # nvarin = [] # for var in export_template.get_times(): # if var[0] != 'masked': # str='' # for v in var: # str += v # # print(str)
ds = Dataset_transformations(data.T, 1000, data.shape) if os.path.exists(PREFIX+CONFIG_NAME+'.zip'): clust_obj = dataset_utils.load_single(PREFIX+CONFIG_NAME+'.zip') else: print 'Doing kmeans.....' clust_obj = Clustering(ds,n_clusters=15,n_init=100,features_first=False) clust_obj.batch_kmeans(10) print 'Saving .....' clust_obj.save(PREFIX+CONFIG_NAME+'.zip') # Descriptor num_min: 1 num_min = 1 times_pos = closest(clust_obj._link, ds._items, num_min, win=4, t=8, save=False) np.save(PREFIX+'time_pos_desc'+str(num_min)+'.npy', times_pos) ns = netCDF_subset(NC_PATH, [500, 700, 900], ['GHT'], timename='Times') desc_date(clust_obj, ns, times_pos) clust_obj.save(PREFIX+CONFIG_NAME+'_'+str(num_min)+'.zip') for c, i in enumerate(times_pos): if not os.path.exists(PREFIX+'descriptors1/'): os.mkdir(PREFIX+'descriptors1/') name = PREFIX+'descriptors1/desc_'+str(c)+'.nc' ns.exact_copy_file(name, i[0]) # Descriptor num_min: 10 num_min = 10 times_pos = closest(clust_obj._link, ds._items, num_min, win=4, t=8, save=False) np.save(PREFIX+'time_pos_desc'+str(num_min)+'.npy', times_pos) ns = netCDF_subset(NC_PATH, [500, 700, 900], ['GHT'], timename='Times')
PREFIX = "PCA_INV_SEED_16" NC_PATH = '/mnt/disk1/thanasis/data/11_train.nc' if __name__ == '__main__': parser = ArgumentParser(description='Extract variables from netcdf file') parser.add_argument('-i', '--input', required=True, type=str, help='input file') parser.add_argument('-o', '--output', type=str, help='output file') opts = parser.parse_args() getter = attrgetter('input', 'output') inp, outp = getter(opts) export_template = netCDF_subset(NC_PATH, [700], ['GHT'], lvlname='num_metgrid_levels', timename='Times') pca = PCA(n_components=16) items = np.load(inp) X = pca.fit_transform(items.T).T print 'hello' print X.shape pcared = PCA(n_components=16) seed = pcared.fit_transform(X) print seed.shape pcared2 = PCA(n_components=16) items = pcared2.fit_transform(items) print items.shape ds = Dataset_transformations(items, 1000) print ds._items.shape times = export_template.get_times()