def load(self, filename='model_template.zip'): self = utils.load_single(filename)
nvarin = [] for var in export_template.get_times(): str = "" for v in var: str += v nvarin.append(str) times = [] for var in nvarin: under_split = var.split('_') date_split = under_split[0].split('-') time_split = under_split[1].split(':') date_object = datetime.datetime(int(date_split[0]), int(date_split[1]), int(date_split[2]), int(time_split[0]), int(time_split[1])) times.append(date_object) print times[0:10] if MODEL_PATH != "": print 'Loading model.....' m = utils.load_single(MODEL_PATH) ds._items = m.get_hidden(ds._items) print ds._items.shape clust_obj = Clustering(ds, n_clusters=15, n_init=100, features_first=False) clust_obj.kmeans() clust_obj.create_density_descriptors(12, times) export_template = netCDF_subset(NC_PATH, [700], ['GHT'], lvlname='num_metgrid_levels', timename='Times') clust_obj.mult_desc_date(export_template) utils.export_descriptor_mult_dense(outp, export_template, clust_obj) clust_obj.save(PREFIX + '_mult_dense.zip')
def main(): disp_needs_resizing = False target_disp_shape = (501, 501) target_disp_length = 501 * 501 datafile = sys.argv[1] log('Loading clustering...') clustering = load_single(CLUSTERS_FILE) log('Done.') model = None if MODEL_FILE is not None: log('Loading model...') model = load_single(MODEL_FILE) log('Done.') else: log('Model not given, assumming clustering on raw data.') # Iterate through the samples mm = np.memmap(datafile, dtype='float32', mode='r', shape=MM_SHAPE) for s_i, sample in enumerate(mm[4000:]): origin_i = int(sample[SAMPLE_SPEC['origin']]) # real origin disp = np.array(sample[SAMPLE_SPEC['disp']]) disp = preprocessing.maxabs_scale(disp) * 1000 # scale disp [-1..1) disp += PAD if len(disp) < OR_DISP_SIZE: disp_needs_resizing = True x = int(np.sqrt(len(disp))) target_disp_shape = (x, x) target_disp_length = target_disp_shape[0] * target_disp_shape[1] # log('Target dispersion shape: ' + str(target_disp_shape)) assert np.sqrt(len(disp)).is_integer() # sanity check... weather = np.array(sample[SAMPLE_SPEC['weath']]) weather = weather.reshape(4096, 1) ds = Dataset_transformations(weather, 1, weather.shape) ds.normalize() ds._items = ds._items.T ds_hidden = None if MODEL_FILE is not None: h = model.get_hidden(ds._items) ds_hidden = Dataset_transformations(h, 1, h.shape) else: ds_hidden = ds ## unfortunate naming but... assert ds_hidden._items.shape == (1, 4096) # display_array(ds._items.reshape(64, 64)) # display_array(h[:,:2500].reshape(50, 50)) # get the closest cluster to the current weather cl_order = clustering.centroids_distance(ds_hidden) cl_cluster = cl_order[0][0] cl_score = cl_order[0][1] cluster_date = clustering._desc_date[cl_cluster] cl_id = reconstruct_date(cluster_date) scores = [] scores_euc = [] scores_cos = [] for d in glob(DISPERSIONS_DIR + '/' + cl_id + '/*' + SPECIES + '.npy'): origin = d[d.rfind('/') + 1:] origin = origin[:origin.find('-')] cl_dispersion = np.load(d) if disp_needs_resizing: # resize the 501x501 into whatever is needed cl_dispersion = imresize(cl_dispersion, target_disp_shape, mode='F') # p real, q model # display_array(disp.reshape(167,167)) # display_array(cl_dispersion) cl_dispersion = preprocessing.maxabs_scale(cl_dispersion) * 1000 cl_dispersion += PAD scor = euclidean(cl_dispersion.reshape(target_disp_length), disp) # scor = entropy(disp, cl_dispersion.reshape(target_disp_length)) scores.append((STATIONS.index(origin), origin, scor)) # Calculate cosine distance: scor_euc = cosine(cl_dispersion.reshape(target_disp_length), disp) scores_euc.append((STATIONS.index(origin), origin, scor_euc)) scor_cos = correlation(cl_dispersion.reshape(target_disp_length), disp) scores_cos.append((STATIONS.index(origin), origin, scor_cos)) assert scor != float('Inf') assert scor_euc != float('Inf') assert scor_cos != float('Inf') scores.sort(key=operator.itemgetter(2)) scores_euc.sort(key=operator.itemgetter(2)) scores_cos.sort(key=operator.itemgetter(2)) # print scores # print scores_euc # print scores_cos pos = 0 pos_euc = 0 pos_cos = 0 for i in range(0, len(STATIONS)): # print 'Origin', STATIONS.index(origin), 'score...', scores[i][0] if origin_i == scores[i][0]: pos = i + 1 if origin_i == scores_euc[i][0]: pos_euc = i + 1 if origin_i == scores_cos[i][0]: pos_cos = i + 1 if pos > 0 and pos_euc > 0 and pos_cos > 0: break # log(str(origin_i) + '> ' + str(s_i) + ' ' + str(pos) ) log( str(origin_i) + '\t' + str(pos) + '\t' + str(pos_euc) + '\t' + str(pos_cos))
def main(): disp_needs_resizing = False target_disp_shape = (501, 501) target_disp_length = 501 * 501 datafile = sys.argv[1] clustering = load_single(CLUSTERS_FILE) model = None if MODEL_FILE is not None: ##load model ##stacked lstm # from tensorflow.python.keras.models import load_model #model = load_model("model.hdf5") # print("model loaded") #sys.path.insert(1,'../weather2') #os.chdir("../weather2") import demo #model=demo.stacked_lstm_ae(8,4096,'relu',32,'sgd',0.2) #simples model = demo.cnn_bilstm() #print(model.summary()) #sys.path.insert(1,'../final_eval') #os.chdir("../final_eval") model.load_weights(MODEL_FILE) from tensorflow.python.keras.models import Model #model = Model(inputs=model.inputs, outputs=model.get_layer("encoder").output) model = Model(inputs=model.inputs, outputs=model.get_layer("bidirectional").output) else: #log('no model') pass # Iterate through the samples mm = np.memmap(datafile, dtype='float32', mode='r', shape=MM_SHAPE) # TGIORGOS: CHANGE # For for each sixhour slot (change to 8 slots) for s_i, sample in enumerate(mm[0:]): origin_i = int(sample[SAMPLE_SPEC['origin']]) # real origin disp = np.array(sample[SAMPLE_SPEC['disp']]) disp = preprocessing.maxabs_scale(disp) * 1000 # scale disp [-1..1) disp += PAD if len(disp) < OR_DISP_SIZE: disp_needs_resizing = True x = int(np.sqrt(len(disp))) target_disp_shape = (x, x) target_disp_length = target_disp_shape[0] * target_disp_shape[1] # log('Target dispersion shape: ' + str(target_disp_shape)) assert np.sqrt(len(disp)).is_integer() # sanity check... lis = list() if s_i + 8 > len(mm): # log(str("returning....")) # log(s_i) # log(s_i+8) return for i in range(s_i, s_i + 8, 1): ##1-8 , 2-9 ..... # print("adding",i) # print 'shape:' , mm[i][[SAMPLE_SPEC['weath']]].shape lis.append(mm[i][[SAMPLE_SPEC['weath']]]) weather = np.array(lis) weather = weather.reshape(1, 64, 64, 8, 1) #conv #weather1 = weather.reshape(4096, 8,1) #add for lstm ds = Dataset_transformations(weather, 1, weather.shape) #ds1 = Dataset_transformations(weather1, 1, weather1.shape) #lstm #print ds._items ds.normalize() #ds1.normalize() # TGIORGOS CHANGE: # 6 hour slot ds._items = ds._items.T #ds1._items = ds1._items.T ds_hidden = None #print(ds._items.shape) if MODEL_FILE is not None: #h = model.get_hidden(ds._items) # print(model.summary()) # sys.path.insert(1,'../final_eval') # os.chdir("../final_eval") #log(str(ds._items.shape)) h = model.predict(ds._items) # h = h.reshape(h.shape[0],h.shape[1]*h.shape[2]) #log(str(h)) #log(str(h.shape)) ds_hidden = Dataset_transformations(h, 1, h.shape) else: ds_hidden = ds ## unfortunate naming but... #log('EEEEEEEEEEE') assert ds_hidden._items.shape == (1, 4096) # display_array(ds._items.reshape(61, 64)) # display_array(h[:,:2496].reshape(50, 50)) # get the closest cluster to the current weather cl_order = clustering.centroids_distance(ds_hidden) # log(cl_order) cl_cluster = cl_order[0][0] cl_score = cl_order[0][1] cluster_date = clustering._desc_date[cl_cluster] cl_id = reconstruct_date(cluster_date) scores = [] scores_euc = [] scores_cos = [] for d in glob(DISPERSIONS_DIR + '/' + cl_id + '/*' + SPECIES + '.npy'): origin = d[d.rfind('/') + 1:] origin = origin[:origin.find('-')] cl_dispersion = np.load(d) if disp_needs_resizing: # resize the 501x501 into whatever is needed cl_dispersion = imresize(cl_dispersion, target_disp_shape, mode='F') # p real, q model # display_array(disp.reshape(167,167)) # display_array(cl_dispersion) cl_dispersion = preprocessing.maxabs_scale(cl_dispersion) * 1000 cl_dispersion += PAD scor = euclidean(cl_dispersion.reshape(target_disp_length), disp) # scor = entropy(disp, cl_dispersion.reshape(target_disp_length)) scores.append((STATIONS.index(origin), origin, scor)) # Calculate cosine distance: scor_euc = cosine(cl_dispersion.reshape(target_disp_length), disp) scores_euc.append((STATIONS.index(origin), origin, scor_euc)) scor_cos = correlation(cl_dispersion.reshape(target_disp_length), disp) scores_cos.append((STATIONS.index(origin), origin, scor_cos)) assert scor != float('Inf') assert scor_euc != float('Inf') assert scor_cos != float('Inf') scores.sort(key=operator.itemgetter(2)) scores_euc.sort(key=operator.itemgetter(2)) scores_cos.sort(key=operator.itemgetter(2)) #log(str(scores)) #log(str(scores_euc)) #log(str(scores_cos)) pos = 0 pos_euc = 0 pos_cos = 0 # try: for i in range(0, len(STATIONS)): #log('BIKA STIN FOR') #log(str(STATIONS.index(origin))) if origin_i == scores[i][0]: pos = i + 1 if origin_i == scores_euc[i][0]: pos_euc = i + 1 if origin_i == scores_cos[i][0]: pos_cos = i + 1 if pos > 0 and pos_euc > 0 and pos_cos > 0: break # log(str(origin_i) + '> ' + str(s_i) + ' ' + str(pos) ) log( str(origin_i) + '\t' + str(pos) + '\t' + str(pos_euc) + '\t' + str(pos_cos))
from tensorflow.python.keras.models import Model model = Model(inputs=model.input, outputs=model.get_layer('encoder').output) data = model.predict(seq_in) print("Prediction shape:", data.shape) exit() data = data.reshape((data.shape[0] * data.shape[1], 8 * 8 * 2)) print('reshape encoding..:', data.shape) # Reshape data = data.reshape(data.shape[1], data.shape[0]) ds = Dataset_transformations(data.T, 1000, data.shape) if os.path.exists(PREFIX + CONFIG_NAME + '.zip'): clust_obj = dataset_utils.load_single(PREFIX + CONFIG_NAME + '.zip') else: print 'Doing kmeans.....' clust_obj = Clustering(ds, n_clusters=15, n_init=100, features_first=False) clust_obj.batch_kmeans(10) print 'Saving .....' clust_obj.save(PREFIX + CONFIG_NAME + '.zip') # Descriptor num_min: 1 num_min = 1 times_pos = closest(clust_obj._link, ds._items, num_min, win=4, t=8, save=False)
def main(): nnmodel = utils.load_single(sys.argv[1]) conv = visualize_conv(nnmodel)
from dataset_utils import load_single clust_obj = load_single('<path to clustering object>') print clust_obj._desc_date from datetime import datetime def reconstruct_date(date_str, dot_nc=False): if dot_nc: date = datetime.strptime( date_str.split('.')[0], '%Y-%m-%d_%H:%M:%S') else: date = datetime.strptime(date_str, '%Y-%m-%d_%H:%M:%S') return datetime.strftime(date, '%y-%m-%d-%H') q = [] for i in clust_obj._desc_date: q.append(reconstruct_date(i))
def main(): nnmodel = utils.load_single(sys.argv[1]) newnn = nnmodel visualize_conv(newnn)