Exemplo n.º 1
0
def toGPU(batch, gpu_num):
    class Data(): pass
    data = Data()

    if gpu_num == 0:
        setattr(data, "image_0", batch[0])
        setattr(data, "text_0", batch[1])
        setattr(data, "wrong_image_0", batch[2])
        setattr(data, "wrong_text_0", batch[3])

    else:
        split_size = len(batch[0]) // 2

        image = xp.array_split(xp.array(batch[0], dtype="float32"), gpu_num)
        text = [batch[1][i*split_size:(i+1)*split_size] for i in range(gpu_num)]

        wrong_image = xp.array_split(xp.array(batch[2], dtype="float32"), gpu_num)
        wrong_text = [batch[3][i*split_size:(i+1)*split_size] for i in range(gpu_num)]

        for i in range(gpu_num):
            setattr(data, f"image_{i}", cuda.to_gpu(image[i], i))
            setattr(data, f"text_{i}", cuda.to_gpu(text[i], i))
            setattr(data, f"wrong_image_{i}", cuda.to_gpu(wrong_image[i], i))
            setattr(data, f"wrong_text_{i}", cuda.to_gpu(wrong_text[i], i))

    return data
Exemplo n.º 2
0
def batched_silhouette_scores(embeddings, clusters, batch_size=BATCH_SIZE):
    """Calculate silhouette score in batches on the CPU. Compatible with data on GPU or CPU

    Args:
        embeddings (cudf.DataFrame or cupy.ndarray): input features to clustering
        clusters (cudf.DataFrame or cupy.ndarray): cluster values for each data point
        batch_size (int, optional): Size for batching.

    Returns:
        float: mean silhouette score from batches
    """

    # Function to calculate batched results
    def _silhouette_scores(input_data):
        embeddings, clusters = input_data
        return silhouette_score(cupy.asnumpy(embeddings),
                                cupy.asnumpy(clusters))

    if hasattr(embeddings, 'values'):
        embeddings = embeddings.values
    embeddings = cupy.asarray(embeddings)

    if hasattr(clusters, 'values'):
        clusters = clusters.values
    clusters = cupy.asarray(clusters)

    n_data = len(embeddings)
    msg = 'Calculating silhouette score on {} molecules'.format(n_data)
    if batch_size < n_data:
        msg += ' with batch size of {}'.format(batch_size)
    logger.info(msg + ' ...')

    n_chunks = int(math.ceil(n_data / batch_size))
    embeddings_chunked = cupy.array_split(embeddings, n_chunks)
    clusters_chunked = cupy.array_split(clusters, n_chunks)

    # Calculate scores on batches and return the average
    scores = list(
        map(_silhouette_scores, zip(embeddings_chunked, clusters_chunked)))
    return numpy.nanmean(numpy.array(scores))
Exemplo n.º 3
0
def toGPU(batch, gpu_num):
    class Data(): pass
    data = Data()

    if gpu_num == 0:
        setattr(data, "image_0", batch[0])
        setattr(data, "depth_0", batch[1])
        setattr(data, "text_0", batch[2])
        setattr(data, "wrong_image_0", batch[3])
        setattr(data, "wrong_depth_0", batch[4])
        setattr(data, "wrong_text_0", batch[5])

    else:
        split_size = len(batch[1]) // 2

        image = [batch[0][i*split_size:(i+1)*split_size] for i in range(gpu_num)]
        depth = [batch[1][i*split_size:(i+1)*split_size] for i in range(gpu_num)]

        image = [{key:cuda.to_gpu(np.array([data[key] for data in image[i]], dtype="float32"), i) for key in image_keys} for i in range(gpu_num)]
        depth = [{key:cuda.to_gpu(np.array([data[key] for data in depth[i]], dtype="float32"), i) for key in depth_keys} for i in range(gpu_num)]

        text = xp.array_split(xp.array(batch[2], dtype="float32"), gpu_num)

        wrong_image = [batch[3][i*split_size:(i+1)*split_size] for i in range(gpu_num)]
        wrong_depth = [batch[4][i*split_size:(i+1)*split_size] for i in range(gpu_num)]

        wrong_image = [{key:cuda.to_gpu(np.array([data[key] for data in wrong_image[i]], dtype="float32"), i) for key in image_keys} for i in range(gpu_num)]
        wrong_depth = [{key:cuda.to_gpu(np.array([data[key] for data in wrong_depth[i]], dtype="float32"), i) for key in depth_keys} for i in range(gpu_num)]

        wrong_text = xp.array_split(xp.array(batch[5], dtype="float32"), gpu_num)

        for i in range(gpu_num):
            setattr(data, f"image_{i}", image[i])
            setattr(data, f"depth_{i}", depth[i])
            setattr(data, f"text_{i}", cuda.to_gpu(text[i], i))
            setattr(data, f"wrong_image_{i}", wrong_image[i])
            setattr(data, f"wrong_depth_{i}", wrong_depth[i])
            setattr(data, f"wrong_text_{i}", cuda.to_gpu(wrong_text[i], i))

    return data
Exemplo n.º 4
0
    def gen_disctance_list_ds(self, w, h, height, downsampling_xy, pts_cp):

        ### Generate Distance-List
        ### with DownSampling

        # print("Distance")

        px_list = []
        for i in range(w):
            for j in range(h):
                px = [j * downsampling_xy, i * downsampling_xy, height]
                px_list.append([px])
        
        ### pos-numpy array (from Image)
        pos_cp = cp.array(px_list)
        # print(pos_cp)
        # print("pos.shape :", pos_cp.shape)

        ### Separate Process
        ### https://qiita.com/kazuki_hayakawa/items/557edd922f9f1fafafe0

        SPLIT = 250
        pos_cp_split = cp.array_split(pos_cp, SPLIT)
        # print(len(pos_cp_split))

        dist_tmp = []

        for i in range(SPLIT):

            tmp_p = pos_cp_split[i]
            # print("pts.shape :", tmp_p.shape)

            ### pts-numpy array (from STL)
            # print("pts.shape :", pts_cp.shape)

            ### 
            d = self.clac_all_distance(tmp_p, pts_cp)
            dist_tmp.append(d)

        dist_list = cp.concatenate(dist_tmp, 0)
        # print(len(dist_list))

        return dist_list
Exemplo n.º 5
0
    def forward(self, x, y):
        y = chainermn.functions.bcast(self.comm, y, 0)
        partions = cp.array_split(x, self.comm.size, -2)
        # This part needs fixing. Probably all conditions are not checked
        if self.comm.rank == 0:
            x = partions[0]
        elif self.comm.rank == 1:
            x = partions[1]
        elif self.comm.rank == 2:
            x = partions[2]
        elif self.comm.rank == 3:
            x = partions[3]
        else:
            print("Rank does not exist")

        h = FX.halo_exchange_3d(self.comm, x, k_size=3, index=1, pad=0)
        h = F.leaky_relu(self.Conv1(h))
        h = F.average_pooling_3d(h, ksize=2, stride=2)
        h = FX.halo_exchange_3d(self.comm, h, k_size=3, index=2, pad=0)
        h = F.leaky_relu(self.Conv2(h))
        h = F.average_pooling_3d(h, ksize=2, stride=2)
        hs = chainermnx.functions.spatialallgather(self.comm, h)
        h = F.concat(hs, -2)
        h = F.leaky_relu(self.Conv3(h))
        h = F.average_pooling_3d(h, ksize=2, stride=2)
        h = F.leaky_relu(self.Conv4(h))
        h = F.average_pooling_3d(h, ksize=2, stride=2)
        h = F.leaky_relu(self.Conv5(h))
        h = F.average_pooling_3d(h, ksize=2, stride=2)
        h = F.leaky_relu(self.Conv6(h))
        h = F.average_pooling_3d(h, ksize=2, stride=2)
        h = F.leaky_relu(self.Conv7(h))
        h = F.leaky_relu(self.FC1(h))
        h = F.leaky_relu(self.FC2(h))
        h = self.Output(h)

        loss = F.mean_squared_error(h, y)
        chainer.report({'loss': loss}, self)
        # print("Rank ", self.comm.rank, "Completed forward and y is ", y)
        return loss
Exemplo n.º 6
0
def _stratify_split(X, y, n_train, n_test, x_numba, y_numba, random_state):
    """
    Function to perform a stratified split based on y lables.
    Based on scikit-learn stratified split implementation.

    Parameters
    ----------
    X, y: Shuffled input data and labels
    n_train: Number of samples in train set
    n_test: number of samples in test set
    x_numba: Determines whether the data should be converted to numba
    y_numba: Determines whether the labales should be converted to numba

    Returns
    -------
    X_train, X_test: Data X divided into train and test sets
    y_train, y_test: Labels divided into train and test sets
    """
    x_cudf = False
    y_cudf = False

    if isinstance(X, cudf.DataFrame):
        x_cudf = True
    elif hasattr(X, "__cuda_array_interface__"):
        X = cp.asarray(X)
        x_order = _strides_to_order(X.__cuda_array_interface__['strides'],
                                    cp.dtype(X.dtype))

    if isinstance(y, cudf.Series):
        y_cudf = True
    elif hasattr(y, "__cuda_array_interface__"):
        y = cp.asarray(y)
        y_order = _strides_to_order(y.__cuda_array_interface__['strides'],
                                    cp.dtype(y.dtype))
    elif isinstance(y, cudf.DataFrame):
        y_cudf = True
        # ensuring it has just one column
        if y.shape[1] != 1:
            raise ValueError('Expected one label, but found y'
                             'with shape = %d' % (y.shape))

    classes, y_indices = cp.unique(y.values if y_cudf else y,
                                   return_inverse=True)

    n_classes = classes.shape[0]
    class_counts = cp.bincount(y_indices)
    if n_train < n_classes:
        raise ValueError('The train_size = %d should be greater or '
                         'equal to the number of classes = %d' %
                         (n_train, n_classes))
    if n_test < n_classes:
        raise ValueError('The test_size = %d should be greater or '
                         'equal to the number of classes = %d' %
                         (n_test, n_classes))
    class_indices = cp.array_split(cp.argsort(y_indices), n_classes)

    X_train = None

    # random_state won't be None or int, that's handled earlier
    if isinstance(random_state, np.random.RandomState):
        random_state = cp.random.RandomState(seed=random_state.get_state()[1])

    # Break ties
    n_i = _approximate_mode(class_counts, n_train, random_state)
    class_counts_remaining = class_counts - n_i
    t_i = _approximate_mode(class_counts_remaining, n_test, random_state)

    for i in range(n_classes):
        permutation = random_state.permutation(class_counts[i].item())
        perm_indices_class_i = class_indices[i].take(permutation)

        if hasattr(X, "__cuda_array_interface__") or \
           isinstance(X, cupyx.scipy.sparse.csr_matrix):

            X_train_i = cp.array(X[perm_indices_class_i[:n_i[i]]],
                                 order=x_order)
            X_test_i = cp.array(X[perm_indices_class_i[n_i[i]:n_i[i] +
                                                       t_i[i]]],
                                order=x_order)

            y_train_i = cp.array(y[perm_indices_class_i[:n_i[i]]],
                                 order=y_order)
            y_test_i = cp.array(y[perm_indices_class_i[n_i[i]:n_i[i] +
                                                       t_i[i]]],
                                order=y_order)

            if X_train is None:
                X_train = cp.array(X_train_i, order=x_order)
                y_train = cp.array(y_train_i, order=y_order)
                X_test = cp.array(X_test_i, order=x_order)
                y_test = cp.array(y_test_i, order=y_order)
            else:
                X_train = cp.concatenate([X_train, X_train_i], axis=0)
                X_test = cp.concatenate([X_test, X_test_i], axis=0)
                y_train = cp.concatenate([y_train, y_train_i], axis=0)
                y_test = cp.concatenate([y_test, y_test_i], axis=0)

        elif x_cudf:
            X_train_i = X.iloc[perm_indices_class_i[:n_i[i]]]
            X_test_i = X.iloc[perm_indices_class_i[n_i[i]:n_i[i] + t_i[i]]]

            y_train_i = y.iloc[perm_indices_class_i[:n_i[i]]]
            y_test_i = y.iloc[perm_indices_class_i[n_i[i]:n_i[i] + t_i[i]]]

            if X_train is None:
                X_train = X_train_i
                y_train = y_train_i
                X_test = X_test_i
                y_test = y_test_i
            else:
                X_train = cudf.concat([X_train, X_train_i], ignore_index=False)
                X_test = cudf.concat([X_test, X_test_i], ignore_index=False)
                y_train = cudf.concat([y_train, y_train_i], ignore_index=False)
                y_test = cudf.concat([y_test, y_test_i], ignore_index=False)

    if x_numba:
        X_train = cuda.as_cuda_array(X_train)
        X_test = cuda.as_cuda_array(X_test)
    elif x_cudf:
        X_train = cudf.DataFrame(X_train)
        X_test = cudf.DataFrame(X_test)

    if y_numba:
        y_train = cuda.as_cuda_array(y_train)
        y_test = cuda.as_cuda_array(y_test)
    elif y_cudf:
        y_train = cudf.DataFrame(y_train)
        y_test = cudf.DataFrame(y_test)

    return X_train, X_test, y_train, y_test
 locals()[item+'_1951_2014'] = cp.load(prism_2_hist[i])
 # combine two-stage dataset into a historical data set, time range: 1895-2014
 locals()[item+'_h'] = cp.concatenate((locals()
                                       [item+'_1895_1950'], locals()[item+'_1951_2014']))
 # load future data, time range: 2006-2099
 locals()[item+"_f_a_"+mdl] = np.load(ppt_future[mdl_c])
 locals()[item+"_f_"+mdl] = np.delete(locals()[item+"_f_a_"+mdl],
                                      [it for it in range(0, (2015-2006)*12)], 0)
 locals()[item+"_f_"+mdl] = cp.array(locals()[item+"_f_"+mdl])
 # stack historical and future data
 locals()[item+"_all_"+mdl] = cp.concatenate((locals()
                                              [item+'_h'], locals()[item+"_f_"+mdl]), axis=0)
 # calculate seasonal data, time ra
 locals()[item+'_mon_fmt_'+mdl] = locals()[item+"_all_"+mdl].reshape(
     (int(locals()[item+"_all_"+mdl].shape[0]/12), 12, len(dist_id_sel)))
 locals()[item+'_seasonal_all_'+mdl] = cp.array_split(locals()
                                                      [item+'_mon_fmt_'+mdl], 4, axis=1)
 locals()[item+'_sp_'+mdl] = locals()[item +
                                      '_seasonal_all_'+mdl][0].mean(axis=1)
 locals()[item+'_sm_'+mdl] = locals()[item +
                                      '_seasonal_all_'+mdl][1].mean(axis=1)
 locals()[item+'_fl_'+mdl] = locals()[item +
                                      '_seasonal_all_'+mdl][2].mean(axis=1)
 locals()[item+'_wt_'+mdl] = locals()[item +
                                      '_seasonal_all_'+mdl][3].mean(axis=1)
 # construct A matrix (without npp): 1980-2014
 locals()[item+'_sp_'+mdl+"_A"] = locals()[item+'_sp_' +
                                           mdl][yr_total.index("1980"):yr_total.index("2015"), ].mean(axis=0)
 locals()[item+'_sm_'+mdl+"_A"] = locals()[item+'_sm_' +
                                           mdl][yr_total.index("1980"):yr_total.index("2015"), ].mean(axis=0)
 locals()[item+'_fl_'+mdl+"_A"] = locals()[item+'_fl_' +
                                           mdl][yr_total.index("1980"):yr_total.index("2015"), ].mean(axis=0)
def FilterData(matrizOnibusCpu, matrizLinhasCpu, busIdList, lineIdList,
               CONFIGS, logging):

    distanceTolerance = float(
        CONFIGS['default_correction_method']['distanceTolerance'])
    detectionPercentage = float(
        CONFIGS['default_correction_method']['detectionPercentage'])

    busStepSize = int(CONFIGS['default_correction_method']['busStepSize'])
    lineStepSize = int(CONFIGS['default_correction_method']['lineStepSize'])

    matrizOnibus = cp.asarray(matrizOnibusCpu)
    matrizLinhas = cp.asarray(matrizLinhasCpu)

    busesList = cp.array_split(
        matrizOnibus,
        int(matrizOnibus.shape[0] /
            busStepSize if matrizOnibus.shape[0] > busStepSize else 1))
    for index in range(1, len(busesList)):
        if len(busesList[index].shape) == 2:
            busesList[index][None, :]
        busesList[index] = cp.hsplit(busesList[index], [
            int(cp.argwhere(cp.isnan(busesList[index][0, :, 1]))[0]),
            busesList[index].shape[1]
        ])[0]

    linesList = cp.array_split(
        matrizLinhas,
        int(matrizLinhas.shape[0] /
            lineStepSize if matrizLinhas.shape[0] > lineStepSize else 1))

    for index in range(1, len(linesList)):
        if len(linesList[index].shape) == 2:
            linesList[index][None, :]
        linesList[index] = cp.hsplit(linesList[index], [
            int(cp.argwhere(cp.isnan(linesList[index][0, :, 1]))[0]),
            linesList[index].shape[1]
        ])[0]

    #busDataset = tr.utils.data.DataLoader(busesList,drop_last=True)
    #lineDataset = tr.utils.data.DataLoader(linesList,drop_last=True)

    #algorithm = Algorithm()
    #algorithm.cuda()
    fullResults = None
    for nowBus, busTensor in enumerate(busesList):
        allLinesResult = None
        for nowLine, lineTensor in enumerate(linesList):
            # Algorithm segment
            #algRes = algorithm.FullAlgorithm(busTensor,lineTensor,distanceTolerance,detectionPercentage)
            #algRes = FullAlgorithm(busTensor,lineTensor,distanceTolerance,detectionPercentage)
            algRes = cp.asnumpy(
                Algorithm(busTensor,
                          lineTensor,
                          TOLERANCE=distanceTolerance,
                          detectionPercentage=detectionPercentage))
            #segmentResults = algRes.numpy()
            # Concatenation to full results matrix
            if allLinesResult is None:
                allLinesResult = np.copy(algRes)
            else:
                allLinesResult = np.concatenate([allLinesResult, algRes],
                                                axis=1)
        if fullResults is None:
            fullResults = np.copy(allLinesResult)
        else:
            fullResults = np.concatenate([fullResults, allLinesResult], axis=0)
    #fullResults = fullResults > detectionPercentage
    lineLabel = [(i[0], str(i[1])) for i in lineIdList]
    busLabel = [i[0] for i in busIdList]
    results = pd.DataFrame(fullResults.T,
                           index=pd.MultiIndex.from_tuples(lineLabel),
                           columns=busLabel)
    return results