def data(): HISTORY_LAG = 500 FUTURE_TARGET = 50 #Load training data raw_dataset = pd.read_csv( './data/WindTurbine_Dataset_Hourly-weather-obs_ChurchLawford_2015_Telemetry.csv' ) # Define the labels from the variable/columns that will be used. features_considered = [ 'wind_speed', 'stn_pres', 'air_temperature', 'rltv_hum', 'Power_Out_KW' ] # Create a clean Data Frame with only the data required dataset_copy = raw_dataset.copy() dataset = dataset_copy[features_considered] # dataset.tail() # The dataset contains a few unknown values. dataset.isna().sum() # So drop them if its the easiest (or put ave./default values if possible?) dataset = dataset.dropna() full_dataset = dataset.copy() train_dataset = dataset.sample(frac=0.7, random_state=0) test_dataset = dataset.drop(train_dataset.index) # Also look at the overall statistics: train_stats = train_dataset.describe() train_stats.pop('Power_Out_KW') train_stats = train_stats.transpose() train_mean = train_stats['mean'] train_std = train_stats['std'] print('Train data statistics', train_stats) # Separate the target value, or "label", from the features. This label is the value that you will train the model to predict. train_labels = train_dataset.pop('Power_Out_KW') test_labels = test_dataset.pop('Power_Out_KW') full_labels = full_dataset.pop('Power_Out_KW') normed_train_data = normalize(train_dataset, train_stats) stats = test_dataset.describe() stats = stats.transpose() normed_test_data = normalize(test_dataset, stats) stats = full_dataset.describe() stats = stats.transpose() normed_full_data = normalize(full_dataset, stats) return normed_train_data, normed_test_data, train_labels, test_labels
def save_plot(array, output_dir, save_name, color_code=cm.Spectral): '''Saves an array and a related colorbar as seperate pngs''' array_norm = normalize(array) im = Image.fromarray(np.uint8(color_code(array_norm) * 255)) imga = im.convert("RGBA") save_location = output_dir + "/" + "pngs/" if not os.path.exists(save_location): os.makedirs(save_location) imga.save(save_location + save_name + ".png", "PNG") fig, ax = plt.subplots(figsize=(6, 1)) fig.subplots_adjust(bottom=0.5) cmap = mpl.cm.Spectral norm = mpl.colors.Normalize(np.nanmin(array), np.nanmax(array)) cb1 = mpl.colorbar.ColorbarBase(ax, cmap=cmap, norm=norm, orientation='horizontal') cb1.set_label(save_name) save_location2 = output_dir + "/" + "colorbars" if not os.path.exists(save_location2): os.makedirs(save_location2) plt.savefig(save_location2 + "/colorbar_" + save_name + ".png", bbox_inches='tight') print("png saved in " + save_location + save_name + ".png") print("colorbar saved in " + save_location2 + "/colorbar_" + save_name + ".png")
def hit_test(self, position, vector, max_distance=8): """ Line of sight search from current position. If a block is intersected it is returned, along with the block previously in the line of sight. If no block is found, return None, None. Parameters ---------- position : tuple of len 3 The (x, y, z) position to check visibility from. vector : tuple of len 3 The line of sight vector. max_distance : int How many blocks away to search for a hit. """ m = 8 x, y, z = position dx, dy, dz = vector previous = None for _ in range(max_distance * m): key = normalize((x, y, z)) if key != previous and key in self.world: return key, previous previous = key x, y, z = x + dx / m, y + dy / m, z + dz / m return None, None
def forward(self, x): h = normalize(x, self.dim) if self.affine: shape = [1 for _ in x.shape] shape[self.dim] = self.num_channels h = self.weight.view(shape) * h + self.bias.view(shape) return h
def normalize_file(): print('\nLog: applying case normalization to newsfeed') with open(rf"{d.default_output_path}\\" + "newsfeed_input.txt", 'r') as file: norm_data = f.normalize(file.read()) norm_data = norm_data.replace('Privatead', 'PrivateAd') with open(rf"{d.default_output_path}\\" + "newsfeed_input.txt", 'w') as file: file.write(norm_data) print('Log: case normalization was successfully applied!')
def setvectors(self): self.vectors = [] self.keys = [] for item in self.space.table: self.vectors.append(fn.normalize(self.space.table[item])) #self.vectors.append(self.space.table[item]) self.keys.append(item) #self.vectors = (self.vectors - np.min(self.vectors, 0)) / (np.max(self.vectors, 0) - np.min(self.vectors, 0)) pass
def entropy(input_examples): value_list = [] for value in data.values[-1]: count = 0 for e in input_examples: if e[-1] == value: count += 1 value_list.append(count) probabilities = normalize(remove_all(0, value_list)) summation = 0 for probability in probabilities: summation += (-probability * np.log2(probability)) return summation
def data(): TIMESTEP = '720T' HISTORY_LAG = 100 FUTURE_TARGET = 50 nw_16_url = './data/NW2016.csv' #nw_17_url = './data/NW2017.csv' #nw_18_url = './data/NW2018.csv' NW2016_dataset = pd.read_csv(nw_16_url, header = 0, sep = ',', quotechar= '"', error_bad_lines = False) #NW2017_dataset = pd.read_csv(nw_17_url, header = 0, sep = ',', quotechar= '"', error_bad_lines = False) #NW2018_dataset = pd.read_csv(nw_18_url, header = 0, sep = ',', quotechar= '"', error_bad_lines = False) #data = pd.concat([NW2016_dataset, NW2017_dataset, NW2018_dataset]) data = NW2016_dataset data = data[data.isna()['psl'] == False] #Drop useless columns data = data.drop(columns = ['lat', 'lon', 'height_sta'], axis = 1) data['date'] = pd.to_datetime(data['date'], format='%Y%m%d %H:%M') data.set_index('date', inplace=True) #Interpolate missing values data = data.interpolate(method='linear') stats = data.describe() stats = stats.transpose() data = normalize(data, stats) resample_ds = data.resample(TIMESTEP).mean() train_ds = resample_ds.sample(frac=0.7) test_ds = resample_ds.drop(train_ds.index) X_train, y_train = segment(train_ds, "td", window = HISTORY_LAG, future = FUTURE_TARGET) X_train = X_train.reshape(X_train.shape[0], HISTORY_LAG, 1) y_train = y_train.reshape(y_train.shape[0], FUTURE_TARGET, 1) X_test, y_test = segment(train_ds, "td", window = HISTORY_LAG, future = FUTURE_TARGET) X_test = X_test.reshape(X_test.shape[0], HISTORY_LAG, 1) y_test = y_test.reshape(y_test.shape[0], FUTURE_TARGET,) print(len(X_train), 'train sequences') print(len(X_test), 'test sequences') print('X_train shape:', X_train.shape) print('X_test shape:', X_test.shape) return X_train, X_test, y_train, y_test
def weighted_add_score(ncs, gensim_w2v_model): scores = [] for nc in ncs: head, modifier = re.split(' ', nc) w1 = get_vector(head, gensim_w2v_model, model_config.input_vector_length, model_config.seed) w2 = get_vector(modifier, gensim_w2v_model, model_config.input_vector_length, model_config.seed) w_add = weighted_add(w1=w1, w2=w2) compound = head + '_' + modifier w3 = get_vector(compound, gensim_w2v_model, model_config.output_vector_length, model_config.seed) scores.append(cosine_similarity(w3, w_add)) normalized_scores = normalize(scores) normalized_scores = np.subtract(1, normalized_scores) return normalized_scores.tolist()
def collide(self, position, height): """ Checks to see if the player at the given `position` and `height` is colliding with any blocks in the world. Parameters ---------- position : tuple of len 3 The (x, y, z) position to check for collisions at. height : int or float The height of the player. Returns ------- position : tuple of len 3 The new position of the player taking into account collisions. """ # How much overlap with a dimension of a surrounding block you need to # have to count as a collision. If 0, touching terrain at all counts as # a collision. If .49, you sink into the ground, as if walking through # tall grass. If >= .5, you'll fall through the ground. pad = 0.25 p = list(position) np = normalize(position) for face in FACES: # check all surrounding blocks for i in range(3): # check each dimension independently if not face[i]: continue # How much overlap you have with this dimension. d = (p[i] - np[i]) * face[i] if d < pad: continue for dy in range(height): # check each height op = list(np) op[1] -= dy op[i] += face[i] if tuple(op) not in self.model.world: continue p[i] -= (d - pad) * face[i] if face == (0, -1, 0) or face == (0, 1, 0): # You are colliding with the ground or ceiling, so stop # falling / rising. self.dy = 0 break return tuple(p)
def decision_stump(data, weight): """ Does everything and normalizes the weights :param data: data is of Dataset class :param weight: weights of corresponding stumps/trees :return: """ all_examples = data.examples all_attributes = data.inputs col_num, weights = choose_best_attribute(all_examples, all_attributes, weight) hypothesis = Node(col_num, data.attribute_name[col_num]) if weights[0] > weights[1]: hypothesis.add("True", Leaf("True")) else: hypothesis.add("True", Leaf("False")) if weights[2] > weights[3]: hypothesis.add("False", Leaf("True")) else: hypothesis.add("False", Leaf("False")) correct, wrong = [], [] total_error = 0 for example_id in range(len(all_examples)): this_row = all_examples[example_id] if this_row[col_num] != this_row[-1]: total_error += weight[example_id] wrong.append(example_id) else: correct.append(example_id) importance = (1 / 2) * (math.log(abs((1 - total_error) / total_error))) for c in correct: weight[c] *= (math.e**importance) for i_c in wrong: weight[i_c] *= (math.e**(-1 * importance)) weight = normalize(weight) return hypothesis, abs(importance), weight
elif dataset_option == "S": train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset_SIGNS() train_set_x = train_set_x_orig.reshape(train_set_x_orig.shape[0],-1).T print(train_set_x.shape) print(test_set_x_orig.shape) test_set_x = test_set_x_orig.reshape(test_set_x_orig.shape[0],-1).T print(test_set_x.shape) num_px = train_set_x_orig.shape[1] X = train_set_x/255 Y = train_set_y X_test = test_set_x/255 Y_test = test_set_y print(Y_test) X, X_test = normalize(X, X_test) # One Hot Encoding dict = {'Y' : Y, 'Y_test' : Y_test} dict = one_hot_encoding(dict) Y = dict['Y'] Y_test= dict["Y_test"] del dict print(Y) print("Y.shape : " + str(Y.shape)) print("X.shape : " + str(X.shape)) print("Y_test.shape : " + str(Y_test.shape)) print("X_test.shape : " + str(X_test.shape))
from copy import copy import functions as f if __name__ == "__main__": """ Main program """ data_file = 'data/stock.csv' stock = pd.read_csv(data_file) #print(stock.head()) # Plot raw data #interactive_plot(data = stock, title = 'Prices') # Normalize data and plot it stock_norm = f.normalize(data=stock) #interactive_plot(data = stock_norm, title = 'Normalized Prices') n_cols = len(stock_norm.columns) - 1 n_runs_mc = 3 portfolios = f.montecarlo(data=stock_norm, n_runs=n_runs_mc) #print(portfolios.head()) #interactive_plot(data=portfolios, title='MC') for i, col in enumerate(portfolios.columns[1:]): return_col = 'R_' + str(i) portfolio_col = 'P_' + str(i) portfolios[return_col] = f.daily_returns(data=portfolios, col=col)
def modImg(img): img = skimage.color.rgb2gray(img) img = f.splitImage(img, 0.4) img = cv2.resize(img, imgShape) img = f.normalize(img) return img.reshape((1, *(imgShape), 1))
'evictions': new_evic_column }) df = df.sort_values(by=['data_year', 'status_date']) #---------------------------------------------------------------# # Normalize norm = [] for i in range(df.data_year.min(), df.data_year.max()): if i == 2003: query = df.query('data_year == 2002').evictions temp = list(df.query('data_year == 2003').evictions) for j in range(len(list(df.query('data_year == 2003').evictions))): norm = norm + [(temp[j] - query.min()) / (query.max() - query.min())] else: norm = norm + (list( fn.normalize(df.query(f'data_year == {i}').evictions))) temp = list(df.query('data_year == @k').evictions) temp[8] = np.nan #'May' data is incomplete, so manually muting it for now k = dt.date.today().year - 1 query = df.query('data_year == @k').evictions for i in range( len(temp) ): #Normalizing most up-to-date data using previous year's data, 2019 >> 2018 temp[i] = (temp[i] - query.min()) / (query.max() - query.min()) norm = norm + temp df['normalize'] = norm #---------------------------------------------------------------# # Sort by year and months
if db_taxonomy.size == 0: print("Taxonomia não encontrada do banco de dados") continue else: taxonomy_id = db_taxonomy['id'].values[0] #Taxonomia sem termos if term_resp.json() == []: continue else: print("Verificando {} termos".format(len(term_resp.json()))) for term in term_resp.json(): db_term = terms_db.loc[terms_db['name'] == functions.normalize(term['name'])] if db_term.size == 0: print("Termo não encontrado no banco de dados") continue else: term_id = db_term['id'].values[0] termTax_df = termTax_df.append( { 'taxonomy_id': taxonomy_id, 'term_id': term_id }, ignore_index=True) #Convert the terms DataFrame to it respective SQL Table
# In[4]: # remove unstationarity from data data.close = data.close - data.close.shift(1) data.open = data.open - data.open.shift(1) data.high = data.high - data.high.shift(1) data.low = data.low - data.low.shift(1) # In[5]: data.dropna(inplace=True) # In[6]: for col in data.columns: data[col] = f.normalize(data[col]) # In[7]: split = pd.Timestamp('01-01-2015') # In[8]: train = data.loc[:split, ] test = data.loc[split:, ] # In[9]: for col in data.columns: train.loc[:, col], test.loc[:, col] = f.scale(train.loc[:, col], test.loc[:, col])
def check_colorization(batch, labels, col_space, Col_Net, classifier, alex, T=1): gray = torch.tensor([0.2989 ,0.5870, 0.1140])[:,None,None].float() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') batch_size = batch.shape[0] rgb, yuv, lab = [False]*3 if col_space == 'rgb': rgb =True elif col_space == 'lab': lab = True elif col_space == 'yuv': yuv = True if yuv or lab: Y = batch[:,:1,:,:] #build gray image if lab or yuv: X=torch.unsqueeze(batch[:,0,:,:],1).to(device) #set X to the Lightness of the image batch=batch[:,1:,:,:].to(device) #image is a and b channel else: #using the matlab formula: 0.2989 * R + 0.5870 * G + 0.1140 * B and load data to gpu X=(batch.clone()*gray).sum(1).to(device).view(-1,1,96,96) batch=batch.float().to(device) #if rgb: # normalize(X,(0,),(1,),True) if yuv: normalize(X,(.5,),(.5,),True) elif lab: normalize(X,(50,),(1,),True) #do colorization col_batch = Col_Net(X).detach().cpu() classes = col_batch.shape[1] #construct rgb image form network output if classes != 3: #for lab/yuw and GAN net if classes == 2: if yuv or lab: col_batch = torch.cat((Y, col_batch), 1).numpy().transpose((0,2,3,1)) #for -c elif classes > 3: if yuv: col_batch = UV_from_distr(col_batch, T, Y) elif lab: col_batch = ab_from_distr(col_batch, T, Y) if yuv: rgb_batch = (color.yuv2rgb(col_batch)) elif lab: #lab2rgb doesn't support batches rgb_batch=np.zeros_like(col_batch) for k in range(len(col_batch)): rgb_batch[k] = color.lab2rgb(col_batch[k]) rgb_batch = torch.tensor(np.array(rgb_batch).transpose((0,3,1,2))).float() else: rgb_batch = col_batch rgb_batch = F.interpolate(rgb_batch, size = (224,224)) rgb_batch = normalize(rgb_batch,[0.485, 0.456, 0.406],[0.229, 0.224, 0.225]) with torch.no_grad(): class_out = alex(rgb_batch.to(device)) pred = classifier(class_out) correct_pred = (pred.argmax(1)==labels.to(device)).sum().cpu().item() return correct_pred, batch_size
y = form["Y"].value # INPUT DATA #X = np.array([0, 1, 2, 3, 4, 5]) X = X.split(",") X = [float(number) for number in X] X = np.asarray(X) # OUTPUT DATA #y = np.array([4, 7, 10, 13, 16, 19]) y = y.split(",") y = [float(number) for number in y] y = np.asarray(y) # WEIGHTS w = w.split(",") w = [float(number) for number in w] w = np.asarray(w) # Add bias term bias = np.ones(X.shape[0]) X = np.array([bias, X]) X = X.transpose() # Compute w = (Xt.X)-1 w = normalize(X, y) f_values = np.dot(X, w) print(f_values[0], ",", f_values[len(f_values) - 1], ":") print(w[0], ",", w[1], ":") print(error_function(y, f_values), ":")
def normalize(img): return f.normalize(img)
#read data with open('iris.csv') as csvfile: reader = csv.reader(csvfile) cols = [1, 2, 3, 4] for row in reader: if (not labels): newRow = list(row[i] for i in cols) labels = newRow else: newRow = list(float(row[i]) for i in cols) data.append(newRow) species.append(row[5]) #nomralize columns norm = functions.normalize(functions.getColumn(data, 0)) functions.replaceColumn(data, norm, 0) norm = functions.normalize(functions.getColumn(data, 1)) functions.replaceColumn(data, norm, 1) norm = functions.normalize(functions.getColumn(data, 2)) functions.replaceColumn(data, norm, 2) norm = functions.normalize(functions.getColumn(data, 3)) functions.replaceColumn(data, norm, 3) """ for i in range(3): norm = functions.normalize(functions.getColumn(data,i)) functions.replaceColumn(data,norm,i) """
def reshape(image): image = f.splitImage(image) image = cv2.resize(image, d_size, interpolation=cv2.INTER_CUBIC) return f.normalize(image)
def likelihood(self, sample): sample = normalize(sample) return math.exp(self.kappa * np.array.dot(sample, np.array( [0, 0, 1]))) * self.kappa / (4 * math.pi * math.sinh(self.kappa))
norm = np.zeros((bins,bins)) for k in range(bins): for j in range(bins): norm[k][j]==0 if norm[k][j] < tolerance: norm[k][j] = np.nan bar.next() heat_map_df=pd.DataFrame({'date':d,'KDE':[norm]}) heat_map_df.to_pickle(f'{fn.get_base_dir()}/pickled_files/jar/KDE_' + str(i) + '.pkl') else: #kde Sbw=(len(X1))**(-1./(2.+4.)) bw=Sbw/1.15 A = fn.KDE(X1,Y1,bins,min(X),max(X),min(Y),max(Y),bw) density, xxmin, xxmax, yymin, yymax = fn.KDE_plot(A) #normalize and nan it norm = fn.normalize(density) #Norm = normalize(density) for k in range(bins): for j in range(bins): if norm[k][j] < tolerance: norm[k][j] = np.nan bar.next() heat_map_df=pd.DataFrame({'date':d,'KDE':[norm]}) heat_map_df.to_pickle(f'{fn.get_base_dir()}/pickled_files/jar/KDE_' + str(i) + '.pkl') #---------------------------------------------------------------# print('time to run: ',datetime.now() - startTime) #---------------------------------------------------------------#
#Verifica se o metadado é composto por termos de taxonomias if inbcm.cross_dict[install_key][ metadata_cross] in inbcm.tax_meta: for value in item['metadata'][ item_metadata][ 'value_as_string'].split( " | "): #Get terms table from database updated for each value terms_db = pd.read_sql_table( 'termos', dbConnection) #Dealing with white values: if functions.normalize(value) == '': continue #Dealing with term hierarchy if " > " in functions.normalize(value): value = value.split(" > ")[-1] value_db = terms_db.loc[ terms_db['name'] == functions.normalize(value)] if value_db.size == 0: #Df to insert new terms to database insert_terms_df = pd.DataFrame( columns=terms_db.columns)
'Number of houses per Km2'] ##################################################################################################### # Paths of Files ##################################################################################################### # Static Paths main_path = "D:/UNIVERSIDAD/DANE Dengue/BasesDatos" dengue_data_file = "Data_Files/DANE_Dengue_Data_2015_2019.csv" municipality_area_file = "Data_Files/Municipality_Area.csv" # Reading the static .csv files municipality_data = pd.read_csv(dengue_data_file, usecols=['State code','Municipality code', 'Municipality']) municipality_area_data = pd.read_csv(municipality_area_file) # Mayusculas y quito tildes for i in range(len(municipality_area_data['Departamento'])): municipality_area_data.loc[i,'Departamento'] = normalize(municipality_area_data.loc[i,'Departamento'].upper()) main_file = pd.read_csv(dengue_data_file) municipalities_df = [] # List of main directories states = os.listdir(main_path) for i in range(len(states)): # Dynamic Paths people_file_path = main_path + '/' + states[i] + '/CNPV2018_5PER_A2_' + states[i][0:2] + '.csv' houses_file_path = main_path + '/' + states[i] + '/CNPV2018_2HOG_A2_' + states[i][0:2] + '.csv' viv_file_path = main_path + '/' + states[i] + '/CNPV2018_1VIV_A2_' + states[i][0:2] + '.csv' health_providers_file = main_path + '/' + states[i] + '/Prestadores_' + states[i] + '.csv' # Reading the dynamic .csv files people_data = pd.read_csv(people_file_path, usecols=['U_MPIO', 'P_EDADR', 'PA1_GRP_ETNIC', 'CONDICION_FISICA', 'P_ALFABETA', 'P_NIVEL_ANOSR', 'P_TRABAJO', 'P_SEXO']) houses_data = pd.read_csv(houses_file_path, usecols=['U_MPIO','COD_ENCUESTAS']) viv_data = pd.read_csv(viv_file_path, usecols=['U_MPIO', 'VA1_ESTRATO', 'VB_ACU', 'VF_INTERNET'])
months = [str(m).zfill(2) for m in range(1, 13)] dates = [[y + m for m in months] for y in years] dates = [lst for sublst in dates for lst in sublst] indexlist = [["NOAA_" + dates[i], arrays[i]] for i in range(len(arrays))] # The RMA uses overlapping bi-monthly intervals. This applies that structure. indexlist = adjustIntervals(indexlist) indexlist_raw = adjustIntervals(indexlist) # The RMA then indexes each value against a baseline of average values between # 1948 and two years prior to the current insurance year. This value is # grouped by interval. The function creates a non-applicable warning: # "Mean of Empty Slice". with warnings.catch_warnings(): warnings.simplefilter("ignore", category=RuntimeWarning) indexlist = normalize(indexlist, 1948, 2016) # In[] # Function to build history for each site def buildHistory(gridid, indexlist): loc = np.where(grid == gridid) rows = [] years = [int(index[0][-6:-2]) for index in indexlist] for y in range(1948, max(years) + 1): year_arrays = [ index for index in indexlist if int(index[0][-6:-2]) == y ] values = [float(array[1][loc]) for array in year_arrays] values.insert(0, y) rows.append(values) df = pd.DataFrame(rows)
print("============Density normalization============") counter = Counter() for point in grid_dict.values(): counter[point.density_norm] += 1 min_density = min(min_density, point.density_norm) max_density = max(max_density, point.density_norm) orderedDict = OrderedDict(sorted(counter.items(), key=lambda t: t[0])) # min_density = 1 print("Min: ", min_density, " Max:", max_density) counter = Counter() for point in grid_dict.values(): point.density_norm = functions.normalize(point.density_norm, min_density, max_density) point.color = functions.logarithmAsymptotic(point.density_norm) counter[point.density_norm] += 1 print("Done in: ", time.time() - lap, " s") lap = time.time() segment = [[],[]] all_segments = [] #lon, lat, color print("============Plotting tracks============") for lon, lat in lon_lat_list: for point in zip(lon,lat): x,y = functions.naiveSearch(cells_width, cells_height, point[0], point[1]) if((x,y) not in grid_dict.keys()): print("unknown point") color = grid_dict[(x,y)].color if(abs(color - last_color) > color_change_threshold and segment_len > min_segment_len): #change of density
import functions as f #url = f.get_url() datafile = "total_game_data2.csv" #f.download_data(url, datafile) print(datafile) csv_ret = f.data_separation(datafile) print("separate") csv_altered = f.normalize('altered_total.csv') print("norm") labels = "class.csv" arr, m = f.shuffle_dimension('altered_total.csv', labels) print("shuffle") f.kmeans_cluster(arr, m, labels) #output = f.user_data(23, 1500, 3760, 2970, 25, 600, 500, 12) #df2, labels2 = f.data_seperation(output) #norm2, csv_ret2 = f.normalize(df2) #arr2, m2 = f.shuffle_dimension(norm2, labels2) #f.user_kmeans(arr2, m2, labels2)
import functions as f #url = f.get_url() datafile = "total_game_data2.csv" #f.download_data(url, datafile) print(datafile) df, labels = f.data_seperation(datafile) print(labels) norm, csv_ret = f.normalize(df) arr, m = f.shuffle_dimension(norm, labels) f.kmeans_cluster(arr, m, labels) #output = f.user_data(23, 1500, 3760, 2970, 25, 600, 500, 12) #df2, labels2 = f.data_seperation(output) #norm2, csv_ret2 = f.normalize(df2) #arr2, m2 = f.shuffle_dimension(norm2, labels2) #f.user_kmeans(arr2, m2, labels2)
def normalize(self): for item in self.table: self.table[item] = fn.normalize(self.table[item])
def astar(array, start, goal): # print(array) directions = [(0, 1), (0, -1), (1, 0), (-1, 0), (1, 1), (1, -1), (-1, 1), (-1, -1)] # 8个方向 close_set = set() # close list came_from = {} # 路径集(记录最优路径节点) gscore = {start: 0} # g函数字典,值为到起点的距离 ,key为一个点坐标 # f函数字典,f = g + h,初始化只有起点的h值 fscore = {start: heuristic_cost_estimate(start, goal)} openSet = [] # open list 建立一个常见的堆结构 heappush(openSet, (fscore[start], start)) # 往堆中插入一条新的值 ,内部存按堆排序的f升序堆 # while openSet 非空 while openSet: # current := the node in openSet having the lowest fScore value current = heappop(openSet)[1] # 从堆中弹出fscore最小的节点 (heappop函数实现) # 循环终止条件 if current == goal: # 当openlist包含目的地节点时,返回path # 根据came_from字典(k-v对的v指向父节点)生成path并返回 path = reconstruct_path(came_from, current) length = len(path) direct = np.array( [path[length-2][0] - path[length-1][0], path[length-2][1] - path[length-1][1]]) return normalize(direct) # 返回的是当前的从当前位置到目的地的速度方向向量 close_set.add(current) # 把当前节点移入close list中 for i, j in directions: # 对当前节点的 8 个相邻节点一一进行检查 neighbor = current[0] + i, current[1] + j # 相邻节点的计算 # print("@current") # print(current) # print("@neighbor") # print(neighbor) # 判断节点是否在地图范围内,并判断是否为障碍物 if 0 <= neighbor[0] < array.shape[0]: # 地图范围内判断 if 0 <= neighbor[1] < array.shape[1]: # 地图范围内判断 if array[neighbor[0]][neighbor[1]] == 1: # 1为障碍物,有障碍物判断 continue # 跳过障碍物 else: # array bound y walls continue # 跳过超出地图范围 else: # array bound x walls continue # 跳过超出地图范围 # Ignore the neighbor which is already evaluated. if neighbor in close_set: continue # 跳过已进入 Closelist 的节点 # 计算经过当前节点到达相邻节点的g值,用于比较是否更新 tentative_gScore = gscore[current] + dist_between(current, neighbor) # 如果当前节点的相邻节点不在open list中,将其加入到open list当中 # Discover a new node if neighbor not in [i[1] for i in openSet]: heappush(openSet, (fscore.get(neighbor, np.inf), neighbor)) # 若不是更优的解(g不具有更小值)则跳过该节点 # This is not a better path. elif tentative_gScore >= gscore.get(neighbor, np.inf): continue # 若未跳过,则该节点为经过的路径,修改came_from列表 # This path is the best until now. Record it! came_from[neighbor] = current # 相邻节点父节点指向当前节点 gscore[neighbor] = tentative_gScore fscore[neighbor] = tentative_gScore + \ heuristic_cost_estimate(neighbor, goal) return False
greater_references = greater_references.astype(int) # boolean to 0, 1 greater_references = greater_references.tolist() p_at = 0 for i in range(0, k): index = desc_scores_indices[i] if greater_references[index] == 1: p_at += 1 return p_at logging.info('Number of eval elements: ', str(len(eval_list))) print('----------------------------------------------------------------------------------------') print('Spearman rho bet. human score and additive score ', scipy.stats.spearmanr(additive_list, eval_list)) print('Spearman rho bet. human score and reg score ', scipy.stats.spearmanr(reg_list, eval_list)) print('----------------------------------------------------------------------------------------') print('Spearman rho bet. human score and SDMA', scipy.stats.spearmanr(normalize(sdmas_list), eval_list)) print('Spearman rho bet. human score and NPMI', scipy.stats.spearmanr(normalize(npmis_list), eval_list)) print('Spearman rho bet. human score and PMI', scipy.stats.spearmanr(normalize(pmis_list), eval_list)) print('----------------------------------------------------------------------------------------') print('Spearman rho bet. human score and mult score ', scipy.stats.spearmanr(normalize(mult), eval_list)) print('Spearman rho bet. human score and mult-dist score ', scipy.stats.spearmanr(normalize(multivar_dist), eval_list)) print('----------------------------------------------------------------------------------------') k = 50 threshold = 0.6 print(' p_at reg', precision_at(eval_list, reg_list, threshold=threshold, k=k)) print(' p_at add', precision_at(eval_list, additive_list, threshold=threshold, k=k)) print(' p_at mult', precision_at(eval_list, mult, threshold=threshold, k=k)) print(' p_at multivar', precision_at(eval_list, multivar_dist, threshold=threshold, k=k)) # plot.hist(additive_norm, bins=10, color='r') # plot.show()
] input_coords = [0, 0, 0, 0] #System imports from terminal window import sys odds1 = float(sys.argv[2]) oddsX = float(sys.argv[3]) odds2 = float(sys.argv[4]) odds25u = float(sys.argv[5]) odds25o = float(sys.argv[6]) team1 = sys.argv[7] team2 = sys.argv[7] matchID = sys.argv[1] odds1X2 = normalize([odds1, oddsX, odds2]) odds25 = normalize([odds25u, odds25o]) ### Get images by selecting part of the screen def on_click(x, y, button, pressed): global count_input global input_text global c1 global c2 global c3 global c4 count_input += 1