Exemplo n.º 1
0
def data():
    HISTORY_LAG = 500
    FUTURE_TARGET = 50
    #Load training data
    raw_dataset = pd.read_csv(
        './data/WindTurbine_Dataset_Hourly-weather-obs_ChurchLawford_2015_Telemetry.csv'
    )

    # Define the labels from the variable/columns that will be used.
    features_considered = [
        'wind_speed', 'stn_pres', 'air_temperature', 'rltv_hum', 'Power_Out_KW'
    ]

    # Create a clean Data Frame with only the data required
    dataset_copy = raw_dataset.copy()
    dataset = dataset_copy[features_considered]
    # dataset.tail()

    # The dataset contains a few unknown values.
    dataset.isna().sum()

    # So drop them if its the easiest (or put ave./default values if possible?)
    dataset = dataset.dropna()

    full_dataset = dataset.copy()
    train_dataset = dataset.sample(frac=0.7, random_state=0)
    test_dataset = dataset.drop(train_dataset.index)
    # Also look at the overall statistics:
    train_stats = train_dataset.describe()
    train_stats.pop('Power_Out_KW')
    train_stats = train_stats.transpose()
    train_mean = train_stats['mean']
    train_std = train_stats['std']

    print('Train data statistics', train_stats)

    # Separate the target value, or "label", from the features. This label is the value that you will train the model to predict.
    train_labels = train_dataset.pop('Power_Out_KW')
    test_labels = test_dataset.pop('Power_Out_KW')
    full_labels = full_dataset.pop('Power_Out_KW')
    normed_train_data = normalize(train_dataset, train_stats)
    stats = test_dataset.describe()
    stats = stats.transpose()
    normed_test_data = normalize(test_dataset, stats)
    stats = full_dataset.describe()
    stats = stats.transpose()
    normed_full_data = normalize(full_dataset, stats)

    return normed_train_data, normed_test_data, train_labels, test_labels
Exemplo n.º 2
0
def save_plot(array, output_dir, save_name, color_code=cm.Spectral):
    '''Saves an array and a related colorbar as seperate pngs'''

    array_norm = normalize(array)
    im = Image.fromarray(np.uint8(color_code(array_norm) * 255))
    imga = im.convert("RGBA")

    save_location = output_dir + "/" + "pngs/"
    if not os.path.exists(save_location):
        os.makedirs(save_location)

    imga.save(save_location + save_name + ".png", "PNG")

    fig, ax = plt.subplots(figsize=(6, 1))
    fig.subplots_adjust(bottom=0.5)

    cmap = mpl.cm.Spectral
    norm = mpl.colors.Normalize(np.nanmin(array), np.nanmax(array))

    cb1 = mpl.colorbar.ColorbarBase(ax,
                                    cmap=cmap,
                                    norm=norm,
                                    orientation='horizontal')
    cb1.set_label(save_name)

    save_location2 = output_dir + "/" + "colorbars"
    if not os.path.exists(save_location2):
        os.makedirs(save_location2)

    plt.savefig(save_location2 + "/colorbar_" + save_name + ".png",
                bbox_inches='tight')

    print("png saved in " + save_location + save_name + ".png")
    print("colorbar saved in " + save_location2 + "/colorbar_" + save_name +
          ".png")
Exemplo n.º 3
0
    def hit_test(self, position, vector, max_distance=8):
        """ Line of sight search from current position. If a block is
        intersected it is returned, along with the block previously in the line
        of sight. If no block is found, return None, None.

        Parameters
        ----------
        position : tuple of len 3
            The (x, y, z) position to check visibility from.
        vector : tuple of len 3
            The line of sight vector.
        max_distance : int
            How many blocks away to search for a hit.

        """
        m = 8
        x, y, z = position
        dx, dy, dz = vector
        previous = None
        for _ in range(max_distance * m):
            key = normalize((x, y, z))
            if key != previous and key in self.world:
                return key, previous
            previous = key
            x, y, z = x + dx / m, y + dy / m, z + dz / m
        return None, None
Exemplo n.º 4
0
    def forward(self, x):
        h = normalize(x, self.dim)

        if self.affine:
            shape = [1 for _ in x.shape]
            shape[self.dim] = self.num_channels
            h = self.weight.view(shape) * h + self.bias.view(shape)
        return h
Exemplo n.º 5
0
def normalize_file():
    print('\nLog: applying case normalization to newsfeed')
    with open(rf"{d.default_output_path}\\" + "newsfeed_input.txt", 'r') as file:
        norm_data = f.normalize(file.read())
    norm_data = norm_data.replace('Privatead', 'PrivateAd')
    with open(rf"{d.default_output_path}\\" + "newsfeed_input.txt", 'w') as file:
        file.write(norm_data)
    print('Log: case normalization was successfully applied!')
 def setvectors(self):
     self.vectors = []
     self.keys = []
     for item in self.space.table:
         self.vectors.append(fn.normalize(self.space.table[item]))
         #self.vectors.append(self.space.table[item])
         self.keys.append(item)
     #self.vectors = (self.vectors - np.min(self.vectors, 0)) / (np.max(self.vectors, 0) - np.min(self.vectors, 0))
     pass
Exemplo n.º 7
0
    def entropy(input_examples):
        value_list = []
        for value in data.values[-1]:
            count = 0
            for e in input_examples:
                if e[-1] == value:
                    count += 1
            value_list.append(count)

        probabilities = normalize(remove_all(0, value_list))
        summation = 0
        for probability in probabilities:
            summation += (-probability * np.log2(probability))
        return summation
Exemplo n.º 8
0
def data():
    TIMESTEP = '720T'
    HISTORY_LAG = 100
    FUTURE_TARGET = 50
    nw_16_url = './data/NW2016.csv'
    #nw_17_url = './data/NW2017.csv'
    #nw_18_url = './data/NW2018.csv'

    NW2016_dataset = pd.read_csv(nw_16_url, header = 0, sep = ',', quotechar= '"', error_bad_lines = False)
    #NW2017_dataset = pd.read_csv(nw_17_url, header = 0, sep = ',', quotechar= '"', error_bad_lines = False)
    #NW2018_dataset = pd.read_csv(nw_18_url, header = 0, sep = ',', quotechar= '"', error_bad_lines = False)
    
    #data = pd.concat([NW2016_dataset, NW2017_dataset, NW2018_dataset])
    data = NW2016_dataset
    data = data[data.isna()['psl'] == False]
    #Drop useless columns
    data = data.drop(columns = ['lat', 'lon', 'height_sta'], axis = 1)

    data['date'] = pd.to_datetime(data['date'], format='%Y%m%d %H:%M')
    data.set_index('date', inplace=True)
    
    #Interpolate missing values
    data = data.interpolate(method='linear')

    stats = data.describe()
    stats = stats.transpose()
    data = normalize(data, stats)
    resample_ds = data.resample(TIMESTEP).mean()

    train_ds = resample_ds.sample(frac=0.7)
    test_ds = resample_ds.drop(train_ds.index)

    X_train, y_train = segment(train_ds, "td", window = HISTORY_LAG, future = FUTURE_TARGET)
    X_train = X_train.reshape(X_train.shape[0], HISTORY_LAG, 1)
    y_train = y_train.reshape(y_train.shape[0], FUTURE_TARGET, 1)

    X_test, y_test = segment(train_ds, "td", window = HISTORY_LAG, future = FUTURE_TARGET)
    X_test = X_test.reshape(X_test.shape[0], HISTORY_LAG, 1)
    y_test = y_test.reshape(y_test.shape[0], FUTURE_TARGET,)

    print(len(X_train), 'train sequences')
    print(len(X_test), 'test sequences')

    
    print('X_train shape:', X_train.shape)
    print('X_test shape:', X_test.shape)

    return X_train, X_test, y_train, y_test
Exemplo n.º 9
0
def weighted_add_score(ncs, gensim_w2v_model):
    scores = []
    for nc in ncs:
        head, modifier = re.split(' ', nc)
        w1 = get_vector(head, gensim_w2v_model,
                        model_config.input_vector_length, model_config.seed)
        w2 = get_vector(modifier, gensim_w2v_model,
                        model_config.input_vector_length, model_config.seed)
        w_add = weighted_add(w1=w1, w2=w2)
        compound = head + '_' + modifier
        w3 = get_vector(compound, gensim_w2v_model,
                        model_config.output_vector_length, model_config.seed)
        scores.append(cosine_similarity(w3, w_add))
        normalized_scores = normalize(scores)
        normalized_scores = np.subtract(1, normalized_scores)
    return normalized_scores.tolist()
Exemplo n.º 10
0
    def collide(self, position, height):
        """ Checks to see if the player at the given `position` and `height`
        is colliding with any blocks in the world.

        Parameters
        ----------
        position : tuple of len 3
            The (x, y, z) position to check for collisions at.
        height : int or float
            The height of the player.

        Returns
        -------
        position : tuple of len 3
            The new position of the player taking into account collisions.

        """
        # How much overlap with a dimension of a surrounding block you need to
        # have to count as a collision. If 0, touching terrain at all counts as
        # a collision. If .49, you sink into the ground, as if walking through
        # tall grass. If >= .5, you'll fall through the ground.
        pad = 0.25
        p = list(position)
        np = normalize(position)
        for face in FACES:  # check all surrounding blocks
            for i in range(3):  # check each dimension independently
                if not face[i]:
                    continue
                # How much overlap you have with this dimension.
                d = (p[i] - np[i]) * face[i]
                if d < pad:
                    continue
                for dy in range(height):  # check each height
                    op = list(np)
                    op[1] -= dy
                    op[i] += face[i]
                    if tuple(op) not in self.model.world:
                        continue
                    p[i] -= (d - pad) * face[i]
                    if face == (0, -1, 0) or face == (0, 1, 0):
                        # You are colliding with the ground or ceiling, so stop
                        # falling / rising.
                        self.dy = 0
                    break
        return tuple(p)
Exemplo n.º 11
0
def decision_stump(data, weight):
    """
    Does everything and normalizes the weights
    :param data: data is of Dataset class
    :param weight: weights of corresponding stumps/trees
    :return:
    """
    all_examples = data.examples
    all_attributes = data.inputs
    col_num, weights = choose_best_attribute(all_examples, all_attributes,
                                             weight)
    hypothesis = Node(col_num, data.attribute_name[col_num])

    if weights[0] > weights[1]:
        hypothesis.add("True", Leaf("True"))
    else:
        hypothesis.add("True", Leaf("False"))

    if weights[2] > weights[3]:
        hypothesis.add("False", Leaf("True"))
    else:
        hypothesis.add("False", Leaf("False"))

    correct, wrong = [], []
    total_error = 0
    for example_id in range(len(all_examples)):
        this_row = all_examples[example_id]
        if this_row[col_num] != this_row[-1]:
            total_error += weight[example_id]
            wrong.append(example_id)
        else:
            correct.append(example_id)

    importance = (1 / 2) * (math.log(abs((1 - total_error) / total_error)))

    for c in correct:
        weight[c] *= (math.e**importance)
    for i_c in wrong:
        weight[i_c] *= (math.e**(-1 * importance))

    weight = normalize(weight)

    return hypothesis, abs(importance), weight
Exemplo n.º 12
0
    elif dataset_option == "S":
        train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset_SIGNS()
        train_set_x = train_set_x_orig.reshape(train_set_x_orig.shape[0],-1).T
        print(train_set_x.shape)
        print(test_set_x_orig.shape)
        test_set_x = test_set_x_orig.reshape(test_set_x_orig.shape[0],-1).T
        print(test_set_x.shape)
        num_px = train_set_x_orig.shape[1]
        
        X = train_set_x/255
        Y = train_set_y
        X_test = test_set_x/255
        Y_test = test_set_y
        print(Y_test)

        X, X_test = normalize(X, X_test)

        # One Hot Encoding
        dict = {'Y' : Y, 
                'Y_test' : Y_test}
        dict = one_hot_encoding(dict)
        Y = dict['Y']
        Y_test= dict["Y_test"]
        del dict
        print(Y)

        print("Y.shape : " + str(Y.shape))
        print("X.shape : " + str(X.shape))
        print("Y_test.shape : " + str(Y_test.shape))
        print("X_test.shape : " + str(X_test.shape))
Exemplo n.º 13
0
from copy import copy

import functions as f

if __name__ == "__main__":
    """ Main program """

    data_file = 'data/stock.csv'
    stock = pd.read_csv(data_file)
    #print(stock.head())

    # Plot raw data
    #interactive_plot(data = stock, title = 'Prices')

    # Normalize data and plot it
    stock_norm = f.normalize(data=stock)
    #interactive_plot(data = stock_norm, title = 'Normalized Prices')

    n_cols = len(stock_norm.columns) - 1
    n_runs_mc = 3

    portfolios = f.montecarlo(data=stock_norm, n_runs=n_runs_mc)
    #print(portfolios.head())

    #interactive_plot(data=portfolios, title='MC')

    for i, col in enumerate(portfolios.columns[1:]):
        return_col = 'R_' + str(i)
        portfolio_col = 'P_' + str(i)
        portfolios[return_col] = f.daily_returns(data=portfolios, col=col)
Exemplo n.º 14
0
def modImg(img):
    img = skimage.color.rgb2gray(img)
    img = f.splitImage(img, 0.4)
    img = cv2.resize(img, imgShape)
    img = f.normalize(img)
    return img.reshape((1, *(imgShape), 1))
    'evictions': new_evic_column
})
df = df.sort_values(by=['data_year', 'status_date'])
#---------------------------------------------------------------#
# Normalize
norm = []
for i in range(df.data_year.min(), df.data_year.max()):
    if i == 2003:
        query = df.query('data_year == 2002').evictions
        temp = list(df.query('data_year == 2003').evictions)
        for j in range(len(list(df.query('data_year == 2003').evictions))):
            norm = norm + [(temp[j] - query.min()) /
                           (query.max() - query.min())]
    else:
        norm = norm + (list(
            fn.normalize(df.query(f'data_year == {i}').evictions)))
temp = list(df.query('data_year == @k').evictions)

temp[8] = np.nan  #'May' data is incomplete, so manually muting it for now

k = dt.date.today().year - 1
query = df.query('data_year == @k').evictions
for i in range(
        len(temp)
):  #Normalizing most up-to-date data using previous year's data, 2019 >> 2018
    temp[i] = (temp[i] - query.min()) / (query.max() - query.min())

norm = norm + temp
df['normalize'] = norm
#---------------------------------------------------------------#
# Sort by year and months
Exemplo n.º 16
0
            if db_taxonomy.size == 0:
                print("Taxonomia não encontrada do banco de dados")
                continue
            else:
                taxonomy_id = db_taxonomy['id'].values[0]

            #Taxonomia sem termos
            if term_resp.json() == []:
                continue

            else:
                print("Verificando {} termos".format(len(term_resp.json())))

                for term in term_resp.json():
                    db_term = terms_db.loc[terms_db['name'] ==
                                           functions.normalize(term['name'])]

                    if db_term.size == 0:
                        print("Termo não encontrado no banco de dados")
                        continue

                    else:
                        term_id = db_term['id'].values[0]
                        termTax_df = termTax_df.append(
                            {
                                'taxonomy_id': taxonomy_id,
                                'term_id': term_id
                            },
                            ignore_index=True)

#Convert the terms DataFrame to it respective SQL Table
Exemplo n.º 17
0
# In[4]:

# remove unstationarity from data
data.close = data.close - data.close.shift(1)
data.open = data.open - data.open.shift(1)
data.high = data.high - data.high.shift(1)
data.low = data.low - data.low.shift(1)

# In[5]:

data.dropna(inplace=True)

# In[6]:

for col in data.columns:
    data[col] = f.normalize(data[col])

# In[7]:

split = pd.Timestamp('01-01-2015')

# In[8]:

train = data.loc[:split, ]
test = data.loc[split:, ]

# In[9]:

for col in data.columns:
    train.loc[:, col], test.loc[:, col] = f.scale(train.loc[:, col],
                                                  test.loc[:, col])
Exemplo n.º 18
0
def check_colorization(batch, labels, col_space, Col_Net, classifier, alex, T=1):
    gray = torch.tensor([0.2989 ,0.5870, 0.1140])[:,None,None].float()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    batch_size = batch.shape[0]

    rgb, yuv, lab = [False]*3
    if col_space == 'rgb':
        rgb =True
    elif col_space == 'lab':
        lab = True
    elif col_space == 'yuv':
        yuv = True

    if yuv or lab:
        Y = batch[:,:1,:,:]
    #build gray image
    if lab or yuv:
        X=torch.unsqueeze(batch[:,0,:,:],1).to(device) #set X to the Lightness of the image
        batch=batch[:,1:,:,:].to(device) #image is a and b channel
    else:
        #using the matlab formula: 0.2989 * R + 0.5870 * G + 0.1140 * B and load data to gpu
        X=(batch.clone()*gray).sum(1).to(device).view(-1,1,96,96)
        batch=batch.float().to(device)
    #if rgb:
    #    normalize(X,(0,),(1,),True)
    if yuv:
        normalize(X,(.5,),(.5,),True)
    elif lab:
        normalize(X,(50,),(1,),True)
    
    #do colorization
    col_batch = Col_Net(X).detach().cpu()
    classes = col_batch.shape[1]
    
    #construct rgb image form network output
    if classes != 3:
        #for lab/yuw and GAN net
        if classes == 2:
            if yuv or lab:
                col_batch = torch.cat((Y, col_batch), 1).numpy().transpose((0,2,3,1))
        #for -c
        elif classes > 3:
            if yuv:
                col_batch = UV_from_distr(col_batch, T, Y)
            elif lab:
                col_batch = ab_from_distr(col_batch, T, Y)
        if yuv:
            rgb_batch = (color.yuv2rgb(col_batch))
        elif lab: #lab2rgb doesn't support batches
            rgb_batch=np.zeros_like(col_batch)
            for k in range(len(col_batch)):
                rgb_batch[k] = color.lab2rgb(col_batch[k])

        rgb_batch = torch.tensor(np.array(rgb_batch).transpose((0,3,1,2))).float()
    else:
        rgb_batch = col_batch
    
    rgb_batch = F.interpolate(rgb_batch, size = (224,224))
    rgb_batch = normalize(rgb_batch,[0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
    
    with torch.no_grad():
        class_out = alex(rgb_batch.to(device))
        pred = classifier(class_out)

    correct_pred = (pred.argmax(1)==labels.to(device)).sum().cpu().item()
    
    return correct_pred, batch_size
Exemplo n.º 19
0
y = form["Y"].value

# INPUT DATA
#X = np.array([0, 1, 2, 3, 4, 5])
X = X.split(",")
X = [float(number) for number in X]
X = np.asarray(X)
# OUTPUT DATA
#y = np.array([4, 7, 10, 13, 16, 19])
y = y.split(",")
y = [float(number) for number in y]
y = np.asarray(y)

# WEIGHTS
w = w.split(",")
w = [float(number) for number in w]
w = np.asarray(w)

# Add bias term
bias = np.ones(X.shape[0])
X = np.array([bias, X])
X = X.transpose()

# Compute w = (Xt.X)-1
w = normalize(X, y)

f_values = np.dot(X, w)
print(f_values[0], ",", f_values[len(f_values) - 1], ":")
print(w[0], ",", w[1], ":")
print(error_function(y, f_values), ":")
Exemplo n.º 20
0
def normalize(img):
    return f.normalize(img)
Exemplo n.º 21
0
#read data
with open('iris.csv') as csvfile:
    reader = csv.reader(csvfile)
    cols = [1, 2, 3, 4]
    for row in reader:

        if (not labels):
            newRow = list(row[i] for i in cols)
            labels = newRow
        else:
            newRow = list(float(row[i]) for i in cols)
            data.append(newRow)
            species.append(row[5])

#nomralize columns
norm = functions.normalize(functions.getColumn(data, 0))
functions.replaceColumn(data, norm, 0)

norm = functions.normalize(functions.getColumn(data, 1))
functions.replaceColumn(data, norm, 1)

norm = functions.normalize(functions.getColumn(data, 2))
functions.replaceColumn(data, norm, 2)

norm = functions.normalize(functions.getColumn(data, 3))
functions.replaceColumn(data, norm, 3)
"""
for i in range(3):
    norm = functions.normalize(functions.getColumn(data,i))
    functions.replaceColumn(data,norm,i)
"""
Exemplo n.º 22
0
def reshape(image):
    image = f.splitImage(image)
    image = cv2.resize(image, d_size, interpolation=cv2.INTER_CUBIC)
    return f.normalize(image)
Exemplo n.º 23
0
 def likelihood(self, sample):
     sample = normalize(sample)
     return math.exp(self.kappa * np.array.dot(sample, np.array(
         [0, 0, 1]))) * self.kappa / (4 * math.pi * math.sinh(self.kappa))
			norm = np.zeros((bins,bins))
			for k in range(bins):
				for j in range(bins):
					norm[k][j]==0
					if norm[k][j] < tolerance:
						norm[k][j] = np.nan
					bar.next()

			heat_map_df=pd.DataFrame({'date':d,'KDE':[norm]})
			heat_map_df.to_pickle(f'{fn.get_base_dir()}/pickled_files/jar/KDE_' + str(i) + '.pkl')
		else:
			#kde
			Sbw=(len(X1))**(-1./(2.+4.))
			bw=Sbw/1.15
			A = fn.KDE(X1,Y1,bins,min(X),max(X),min(Y),max(Y),bw)
			density, xxmin, xxmax, yymin, yymax = fn.KDE_plot(A)
			#normalize and nan it
			norm = fn.normalize(density)
			#Norm = normalize(density)
			for k in range(bins):
				for j in range(bins):
					if norm[k][j] < tolerance:
						norm[k][j] = np.nan
					bar.next()

			heat_map_df=pd.DataFrame({'date':d,'KDE':[norm]})
			heat_map_df.to_pickle(f'{fn.get_base_dir()}/pickled_files/jar/KDE_' + str(i) + '.pkl')
#---------------------------------------------------------------#
print('time to run: ',datetime.now() - startTime)
#---------------------------------------------------------------#
Exemplo n.º 25
0
                                #Verifica se o metadado é composto por termos de taxonomias
                                if inbcm.cross_dict[install_key][
                                        metadata_cross] in inbcm.tax_meta:

                                    for value in item['metadata'][
                                            item_metadata][
                                                'value_as_string'].split(
                                                    " | "):

                                        #Get terms table from database updated for each value
                                        terms_db = pd.read_sql_table(
                                            'termos', dbConnection)

                                        #Dealing with white values:
                                        if functions.normalize(value) == '':
                                            continue

                                        #Dealing with term hierarchy
                                        if " > " in functions.normalize(value):
                                            value = value.split(" > ")[-1]

                                        value_db = terms_db.loc[
                                            terms_db['name'] ==
                                            functions.normalize(value)]

                                        if value_db.size == 0:

                                            #Df to insert new terms to database
                                            insert_terms_df = pd.DataFrame(
                                                columns=terms_db.columns)
Exemplo n.º 26
0
                'Number of houses per Km2']

#####################################################################################################
# Paths of Files
#####################################################################################################

# Static Paths
main_path = "D:/UNIVERSIDAD/DANE Dengue/BasesDatos"
dengue_data_file = "Data_Files/DANE_Dengue_Data_2015_2019.csv"
municipality_area_file = "Data_Files/Municipality_Area.csv"
# Reading the static .csv files
municipality_data = pd.read_csv(dengue_data_file, usecols=['State code','Municipality code', 'Municipality'])
municipality_area_data = pd.read_csv(municipality_area_file)
# Mayusculas y quito tildes
for i in range(len(municipality_area_data['Departamento'])):
    municipality_area_data.loc[i,'Departamento'] = normalize(municipality_area_data.loc[i,'Departamento'].upper())
main_file = pd.read_csv(dengue_data_file)
municipalities_df = []
# List of main directories
states = os.listdir(main_path)

for i in range(len(states)):
    # Dynamic Paths
    people_file_path = main_path + '/' + states[i] + '/CNPV2018_5PER_A2_' + states[i][0:2] + '.csv'
    houses_file_path = main_path + '/' + states[i] + '/CNPV2018_2HOG_A2_' + states[i][0:2] + '.csv'
    viv_file_path = main_path + '/' + states[i] + '/CNPV2018_1VIV_A2_' + states[i][0:2] + '.csv'
    health_providers_file = main_path + '/' + states[i] + '/Prestadores_' + states[i] + '.csv'
    # Reading the dynamic .csv files
    people_data = pd.read_csv(people_file_path, usecols=['U_MPIO', 'P_EDADR', 'PA1_GRP_ETNIC', 'CONDICION_FISICA', 'P_ALFABETA', 'P_NIVEL_ANOSR', 'P_TRABAJO', 'P_SEXO'])
    houses_data = pd.read_csv(houses_file_path, usecols=['U_MPIO','COD_ENCUESTAS'])
    viv_data = pd.read_csv(viv_file_path, usecols=['U_MPIO', 'VA1_ESTRATO', 'VB_ACU', 'VF_INTERNET'])
Exemplo n.º 27
0
months = [str(m).zfill(2) for m in range(1, 13)]
dates = [[y + m for m in months] for y in years]
dates = [lst for sublst in dates for lst in sublst]
indexlist = [["NOAA_" + dates[i], arrays[i]] for i in range(len(arrays))]

# The RMA uses overlapping bi-monthly intervals. This applies that structure.
indexlist = adjustIntervals(indexlist)
indexlist_raw = adjustIntervals(indexlist)

# The RMA then indexes each value against a baseline of average values between
# 1948 and two years prior to the current insurance year. This value is
# grouped by interval. The function creates a non-applicable warning:
# "Mean of Empty Slice".
with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=RuntimeWarning)
    indexlist = normalize(indexlist, 1948, 2016)


# In[] # Function to build history for each site
def buildHistory(gridid, indexlist):
    loc = np.where(grid == gridid)
    rows = []
    years = [int(index[0][-6:-2]) for index in indexlist]
    for y in range(1948, max(years) + 1):
        year_arrays = [
            index for index in indexlist if int(index[0][-6:-2]) == y
        ]
        values = [float(array[1][loc]) for array in year_arrays]
        values.insert(0, y)
        rows.append(values)
    df = pd.DataFrame(rows)
Exemplo n.º 28
0
print("============Density normalization============")
counter = Counter()
for point in grid_dict.values():
    counter[point.density_norm] += 1
    min_density = min(min_density, point.density_norm)
    max_density = max(max_density, point.density_norm)
orderedDict = OrderedDict(sorted(counter.items(), key=lambda t: t[0]))



# min_density = 1
print("Min: ", min_density, " Max:", max_density)

counter = Counter()
for point in grid_dict.values():
    point.density_norm = functions.normalize(point.density_norm, min_density, max_density)
    point.color = functions.logarithmAsymptotic(point.density_norm)
    counter[point.density_norm] += 1

print("Done in: ", time.time() - lap, " s")
lap = time.time()
segment = [[],[]]
all_segments = [] #lon, lat, color
print("============Plotting tracks============")
for lon, lat in lon_lat_list:
    for point in zip(lon,lat):
        x,y = functions.naiveSearch(cells_width, cells_height, point[0], point[1])
        if((x,y) not in grid_dict.keys()):
            print("unknown point")
        color = grid_dict[(x,y)].color
        if(abs(color - last_color) > color_change_threshold and segment_len > min_segment_len): #change of density
Exemplo n.º 29
0
import functions as f

#url = f.get_url()

datafile = "total_game_data2.csv"

#f.download_data(url, datafile)

print(datafile)

csv_ret = f.data_separation(datafile)
print("separate")
csv_altered = f.normalize('altered_total.csv')
print("norm")
labels = "class.csv"

arr, m = f.shuffle_dimension('altered_total.csv', labels)
print("shuffle")
f.kmeans_cluster(arr, m, labels)

#output = f.user_data(23, 1500, 3760, 2970, 25, 600, 500, 12)

#df2, labels2 = f.data_seperation(output)

#norm2, csv_ret2 = f.normalize(df2)

#arr2, m2 = f.shuffle_dimension(norm2, labels2)

#f.user_kmeans(arr2, m2, labels2)

Exemplo n.º 30
0
import functions as f

#url = f.get_url()

datafile = "total_game_data2.csv"

#f.download_data(url, datafile)

print(datafile)

df, labels = f.data_seperation(datafile)
print(labels)
norm, csv_ret = f.normalize(df)

arr, m = f.shuffle_dimension(norm, labels)

f.kmeans_cluster(arr, m, labels)

#output = f.user_data(23, 1500, 3760, 2970, 25, 600, 500, 12)

#df2, labels2 = f.data_seperation(output)

#norm2, csv_ret2 = f.normalize(df2)

#arr2, m2 = f.shuffle_dimension(norm2, labels2)

#f.user_kmeans(arr2, m2, labels2)



 def normalize(self):
     for item in self.table:
         self.table[item] = fn.normalize(self.table[item])
Exemplo n.º 32
0
def astar(array, start, goal):
    # print(array)
    directions = [(0, 1), (0, -1), (1, 0), (-1, 0), (1, 1),
                  (1, -1), (-1, 1), (-1, -1)]  # 8个方向

    close_set = set()  # close list
    came_from = {}     # 路径集(记录最优路径节点)
    gscore = {start: 0}  # g函数字典,值为到起点的距离 ,key为一个点坐标
    # f函数字典,f = g + h,初始化只有起点的h值
    fscore = {start: heuristic_cost_estimate(start, goal)}

    openSet = []     # open list 建立一个常见的堆结构
    heappush(openSet, (fscore[start], start))   # 往堆中插入一条新的值 ,内部存按堆排序的f升序堆

    # while openSet 非空
    while openSet:
        # current := the node in openSet having the lowest fScore value
        current = heappop(openSet)[1]   # 从堆中弹出fscore最小的节点 (heappop函数实现)

        # 循环终止条件
        if current == goal:             # 当openlist包含目的地节点时,返回path
            # 根据came_from字典(k-v对的v指向父节点)生成path并返回
            path = reconstruct_path(came_from, current)
            length = len(path)
            direct = np.array(
                [path[length-2][0] - path[length-1][0], path[length-2][1] - path[length-1][1]])
            return normalize(direct)  # 返回的是当前的从当前位置到目的地的速度方向向量
        close_set.add(current)  # 把当前节点移入close list中

        for i, j in directions:      # 对当前节点的 8 个相邻节点一一进行检查
            neighbor = current[0] + i, current[1] + j   # 相邻节点的计算
            # print("@current")
            # print(current)
            # print("@neighbor")
            # print(neighbor)
            # 判断节点是否在地图范围内,并判断是否为障碍物
            if 0 <= neighbor[0] < array.shape[0]:     # 地图范围内判断
                if 0 <= neighbor[1] < array.shape[1]:  # 地图范围内判断
                    if array[neighbor[0]][neighbor[1]] == 1:   # 1为障碍物,有障碍物判断
                        continue  # 跳过障碍物
                else:
                    # array bound y walls
                    continue  # 跳过超出地图范围
            else:
                # array bound x walls
                continue  # 跳过超出地图范围

            # Ignore the neighbor which is already evaluated.
            if neighbor in close_set:
                continue  # 跳过已进入 Closelist 的节点

            #  计算经过当前节点到达相邻节点的g值,用于比较是否更新
            tentative_gScore = gscore[current] + dist_between(current, neighbor)

            # 如果当前节点的相邻节点不在open list中,将其加入到open list当中
            # Discover a new node
            if neighbor not in [i[1] for i in openSet]:
                heappush(openSet, (fscore.get(neighbor, np.inf), neighbor))
            # 若不是更优的解(g不具有更小值)则跳过该节点
            # This is not a better path.
            elif tentative_gScore >= gscore.get(neighbor, np.inf):
                continue
            # 若未跳过,则该节点为经过的路径,修改came_from列表
            # This path is the best until now. Record it!
            came_from[neighbor] = current  # 相邻节点父节点指向当前节点
            gscore[neighbor] = tentative_gScore
            fscore[neighbor] = tentative_gScore + \
                heuristic_cost_estimate(neighbor, goal)

    return False
Exemplo n.º 33
0
        greater_references = greater_references.astype(int) # boolean to 0, 1
        greater_references = greater_references.tolist()
        p_at = 0
        for i in range(0, k):
            index = desc_scores_indices[i]
            if greater_references[index] == 1:
                p_at += 1
        return p_at

    logging.info('Number of eval elements: ', str(len(eval_list)))

    print('----------------------------------------------------------------------------------------')
    print('Spearman rho bet. human score and additive score ', scipy.stats.spearmanr(additive_list, eval_list))
    print('Spearman rho bet. human score and reg score ', scipy.stats.spearmanr(reg_list, eval_list))
    print('----------------------------------------------------------------------------------------')
    print('Spearman rho bet. human score and SDMA', scipy.stats.spearmanr(normalize(sdmas_list), eval_list))
    print('Spearman rho bet. human score and NPMI', scipy.stats.spearmanr(normalize(npmis_list), eval_list))
    print('Spearman rho bet. human score and PMI', scipy.stats.spearmanr(normalize(pmis_list), eval_list))
    print('----------------------------------------------------------------------------------------')
    print('Spearman rho bet. human score and mult score ', scipy.stats.spearmanr(normalize(mult), eval_list))
    print('Spearman rho bet. human score and mult-dist score ', scipy.stats.spearmanr(normalize(multivar_dist), eval_list))
    print('----------------------------------------------------------------------------------------')
    k = 50
    threshold = 0.6
    print(' p_at reg',  precision_at(eval_list, reg_list, threshold=threshold, k=k))
    print(' p_at add',  precision_at(eval_list, additive_list, threshold=threshold, k=k))
    print(' p_at mult',  precision_at(eval_list, mult, threshold=threshold, k=k))
    print(' p_at multivar',  precision_at(eval_list, multivar_dist, threshold=threshold, k=k))

    # plot.hist(additive_norm, bins=10, color='r')
    # plot.show()
Exemplo n.º 34
0
]
input_coords = [0, 0, 0, 0]

#System imports from terminal window
import sys

odds1 = float(sys.argv[2])
oddsX = float(sys.argv[3])
odds2 = float(sys.argv[4])
odds25u = float(sys.argv[5])
odds25o = float(sys.argv[6])
team1 = sys.argv[7]
team2 = sys.argv[7]

matchID = sys.argv[1]
odds1X2 = normalize([odds1, oddsX, odds2])
odds25 = normalize([odds25u, odds25o])

### Get images by selecting part of the screen


def on_click(x, y, button, pressed):

    global count_input
    global input_text
    global c1
    global c2
    global c3
    global c4

    count_input += 1