Exemplo n.º 1
0
def xgb_params():
    para_dict = {
        "max_depth": rd(3, 10),
        "learning_rate": uni(loc=0, scale=1),
        "n_estimators": rd(50, 200),
        'objective': ['reg:linear'],
    }
    return para_dict
Exemplo n.º 2
0
    def label_ranking_policy(self, obs):
        """ Produces an action for a given state based on the LabelRanker model prediction
            Note: only the pendulum-angle and pendulum-velocity of the input state are considered when producing an action
        
            At each input state:
                - Highest ranked action is selected with a prob. of 0.95
                - Second highest ranked action is selected with a prob. of 0.04
                - Any remaining actions are selected with an equal proabability of .01 """

        # only select the pendulum-velocity and angle from the input state vector
        #state_obs = np.array([round(obs[2].reshape(-1)[0],5), round(obs[3].reshape(-1)[0],5)]) # Rounded input
        state_obs = np.array([obs[0], obs[1]])
        state_obs = state_obs.reshape(
            -1, state_obs.shape[0])  # reshape to be a 2D array
        state_obs = torch.from_numpy(state_obs)  # convert to a tensor

        # make ranking predictions for all actions
        with torch.no_grad():
            preds = self.model(state_obs.float())

        # rank the indexes of actions (from highest ranked/preferred action to lowest)
        #ranked_action_idx = (-rd(preds.detach().numpy())).argsort()[:preds.shape[1]]
        ranked_action_idx = (-rd(preds.detach().numpy())).argsort()

        ### Return the selected action ###

        # if there are more than 2 actions
        if len(self.action_space) > 2:

            # compute the probabilities for the 3rd action onward
            #remain_probs = .00/len(ranked_action_idx[2:])
            #n_remain_actions = ranked_action_idx.shape[0]-2

            # since we add random noise to action, policy becomes stochastic (even if we select the 1st ranked action always)
            # select one of the remaining actions 1% time
            #action = np.random.choice(ranked_action_idx, size = 1 , p=[self.probs[0], self.probs[1]] + list(np.repeat(remain_probs,n_remain_actions)))[0]
            action = np.random.choice(ranked_action_idx,
                                      size=1,
                                      p=[prob for prob in self.probs])[0]
        else:

            # if there are only 2 actions: select highest preferred actions 95% and 5% of the time
            action = np.random.choice(ranked_action_idx,
                                      size=1,
                                      p=[self.probs[0], self.probs[1]])[0]

        # When action space is partitioned, return the corresponding action
        #  Action values are clipped to be in the [0,1] range

        if self.modified_algo_flag:
            return_action = np.array(
                [np.clip(self.action_space[int(action)], 0, 1)])
        else:
            return_action = np.array(
                [np.clip(self.action_space[int(action)], 0, 1)])

        return return_action


########################################
def process():

    data = request.get_json()

    url = data['url']

    pattern = re.compile(r'(?<=net\/).*?(?=\/)')
    match = pattern.findall(url)
    container_name = match[0]
    reg = r"(?<=" + container_name + "\/).*"
    pattern = re.compile(reg)
    match = pattern.findall(url)
    file_name1 = match[0]
    block_blob_service = BlockBlobService(
        account_name='dsconvreport',
        account_key=
        '2wIt3xVY2HR5mXfl2489ctyE1CIewgwA0am+jE85HkOfOBKc7Af0KHHb2YS9Z466T+v9KClZXYeht21M3oXFYw=='
    )

    full_path_to_file = file_name1[:-5] + "_downloaded.xlsx"

    block_blob_service.get_blob_to_path(container_name, file_name1,
                                        full_path_to_file)

    df = pd.read_excel(full_path_to_file)

    val = df.values.tolist()

    items = []
    supp_val = []
    ranks = []

    for i in val:
        if i[0] not in items:
            items.append(i[0])

    for j in items:
        for i in val:
            if i[0] == j:
                supp_val.append(i[2])
        ranks.extend(rd(supp_val, method='dense'))
        supp_val = []

    df['supplier_rank'] = ranks
    files = 'Test_Supplierdata_kundan.xlsx'
    df.to_excel(files)

    block_blob_service.create_blob_from_path(container_name,
                                             'Test_Supplierdata_kundan.xlsx',
                                             'Test_Supplierdata_kundan.xlsx')

    op_url = "https://dsconvreport.blob.core.windows.net/" + container_name + "/" + files
    results[
        'result'] = "Congratulations, Your File has been Uploaded Successfully"
    results['uploaded_url'] = op_url

    return jsonify({
        'output_url': op_url,
        'message': 'Your file has been downloaded and uploaded'
    })
Exemplo n.º 4
0
def _rank_analog_grid(train,forecast,out_array,i_start,i_stop,j_start,j_stop, grid_window):
    """
    Function to find analogous dates based on root mean square error.
    :param array:
        Some 2-d numpy array
    :return self:
    """

    shp1 = (train.shape[0],(((grid_window*2)+1)*((grid_window*2)+1)))
    shp2 = (1,(((grid_window*2)+1)*((grid_window*2)+1)))
    ranked_array = np.empty((train.shape[0]+1,forecast.shape[0],forecast.shape[1]))
    # --- Fill in ranked array
    ranked_array[:-1,...] = train
    ranked_array[-1,...] = forecast
    # --- First, rank data at each array in relevant domain
    for i in range(i_start-grid_window,i_stop+grid_window+1,1): # --- latitudes
        for j in range(j_start-grid_window,j_stop+grid_window+1,1): # --- longitudes
            ranked_array[:,i,j] = rd(ranked_array[:,i,j],method='average')
    # --- Now find differences at each forecast grid point and local-domain
    for i in range(i_start,i_stop+1,1): # --- latitudes
        for j in range(j_start,j_stop+1,1): # --- longitudes
            out_array[:,i,j] = (np.sum(np.absolute(ranked_array[:-1,i-grid_window:i+grid_window+1,j-grid_window:j+grid_window+1].reshape(shp1)
                                      -ranked_array[-1,i-grid_window:i+grid_window+1,j-grid_window:j+grid_window+1].reshape(shp2)),
                                      axis=1))
    return out_array
Exemplo n.º 5
0
def argsort_analogs(analog_array,i_start,i_stop,j_start,j_stop):
    """
    Function to find analogous dates based on root mean square error.
    :param array:
        Some 2-d numpy array
    :return self:
    """
    out_array = np.zeros(analog_array.shape)
    # --- Now find differences at each forecast grid point and local-domain
    for i in range(i_start,i_stop+1,1): # --- latitudes
        for j in range(j_start,j_stop+1,1): # --- longitudes
            out_array[:,i,j] = rd(analog_array[:,i,j],method='ordinal')
    return out_array
Exemplo n.º 6
0
def processjson():
    data = request.get_json()

    # extract container name and file name from url using regex
    my_url = data['my_url']
    pattern = re.compile(r'(?<=net\/).*?(?=\/)')
    matches = pattern.findall(my_url)
    container_name = matches[0]
    my_reg = r"(?<=" + container_name + "\/).*"
    pattern = re.compile(my_reg)
    matches = pattern.findall(my_url)
    local_file_name = matches[0]


    # download file
    block_blob_service = BlockBlobService(account_name='dsconvreport',
                                          account_key='2wIt3xVY2HR5mXfl2489ctyE1CIewgwA0am+jE85HkOfOBKc7Af0KHHb2YS9Z466T+v9KClZXYeht21M3oXFYw==')
    full_path_to_file = local_file_name
    block_blob_service.get_blob_to_path(container_name, local_file_name, full_path_to_file)

    # process data
    df = pd.read_excel(full_path_to_file)
    b = df.values.tolist()
    my_sup_val = []
    my_items = []
    my_ranks = []
    for i in b:
        if i[0] not in my_items:
            my_items.append(i[0])

    for j in my_items:
        for i in b:
            if i[0] == j:
                my_sup_val.append(i[2])
        my_ranks.extend(rd(my_sup_val, method="dense"))
        my_sup_val = []

    # make new file
    df["supplier_rank"] = my_ranks
    new_file = "Test_Supplierdata_gaurav.xlsx"
    df.to_excel(new_file)

    # Upload file
    block_blob_service.create_blob_from_path(container_name, new_file, new_file)

    output_url = "https://dsconvreport.blob.core.windows.net/"+container_name+"/"+ new_file
    my_result['result'] = "Uploaded Successfully"
    my_result['uploaded_url'] = output_url
    return jsonify({'output_url': output_url, 'Message': 'Successfully Downloaded and Uploaded'})
def compute_ranks(number, games):
    teams = np.zeros((number, 3))

    # Table with teams
    for team_a, team_b, points_a, points_b in games:
        teams[team_a, 1] += points_a
        teams[team_a, 2] += points_a - points_b
        teams[team_b, 1] += points_b
        teams[team_b, 2] += points_b - points_a

        if points_a == points_b:
            teams[team_a, 0] += DRAW
            teams[team_b, 0] += DRAW
        elif points_a > points_b:
            teams[team_a, 0] += WIN
        else:
            teams[team_b, 0] += WIN

    team_scores = teams.dot(np.array([-100000, -1, -1000]).reshape(-1, 1))
    return rd(team_scores, method='min').astype(int).tolist()
Exemplo n.º 8
0
import pickle, pandas as pd
import numpy as np

from scipy.stats import rankdata as rd


def load_pickle(fp):
    with open(fp, 'rb') as f:
        return pickle.load(f)


x = load_pickle('../lb-predictions/skp017.pkl')
x['label'] = np.mean(np.asarray([rd(_[:, 1]) for _ in x['label']]), axis=0)
df = pd.DataFrame({
    'image_name': [_.split('.')[0] for _ in x['image_id']],
    'target': x['label']
})

df.to_csv('../submissions/skp017.csv', index=False)

###

x = load_pickle('../lb-predictions/skp024-5fold.pkl')
x['label'] = np.mean(np.asarray([rd(_[:, 1]) for _ in x['label']]), axis=0)
df = pd.DataFrame({
    'image_name': [_.split('.')[0] for _ in x['image_id']],
    'target': x['label']
})

df.to_csv('../submissions/skp024-5fold.csv', index=False)
Exemplo n.º 9
0
    maindf = preds[0]
    for i in range(1, len(preds)):
        maindf = maindf.merge(preds[i], on='image_name')
    train = pd.read_csv('../data/train.csv') 
    maindf = maindf.merge(train, on='image_name')
    return maindf


df = load_and_merge([f'../cv-predictions/fold0/skp00{i}.pkl' for i in [0,3,7,8,9]])

for c in df.columns:
    if 'y_pred' in c:
        print(f'AUC={roc_auc_score(df["target"], df[c]):.4f}')


roc_auc_score(df['target'], rd(df['y_pred0'])+rd(df['y_pred1'])+1.5*rd(df['y_pred2'])+0.5*rd(df['y_pred3'])+rd(df['y_pred4']))


###


def make_df(pickled):
    df = pd.DataFrame({'image': pickled['image_id'], 'y_pred': pickled['y_pred'][:,1], 'y_true': pickled['y_true']})
    #df['y_pred'] = rd(df['y_pred'])
    return df

preds = pd.concat([make_df(load_pickle(_)) for _ in glob.glob('../cv-predictions/fold*/skp007.pkl')])

malign = preds[preds['y_true'] == 1]
benign = preds[preds['y_true'] == 0]