Exemplo n.º 1
0
 def analyze(self, objectid):
     failed = []
     doc = self.db.get_document(object_id=objectid)
     name = doc['name']
     out_dir = self.prepare_dest_folder(name)
     dates = doc['important_dates']
     df = doc['df']
     stationarity_file = 'results/stationarity.txt'
     out_stat = open(stationarity_file, 'a')
     for col in list(df.columns.values):
         ts = df[col]
         utils.plot_ts(ts, "{}:{}".format(name, col), os.path.join(out_dir, "orig_{}.pdf".format(col)))
         stationarity = utils.is_stationary(ts)
         out_stat.write("{} - ts: {} - len: {} - Stationary: {}\n".format(name, col, len(ts), stationarity))
         if stationarity:
             lag_acf, lag_pacf = utils.find_acf_pacf(ts, fname=os.path.join(out_dir, '{}_acf_pacf.pdf'.format(col)))
         else:
             ts_diff = utils.differentiate(ts)
             diff_stationarity = utils.is_stationary(ts_diff)
             out_stat.write("{} - ts_diff: {} - len: {} - Stationary: {}\n".format(name, col, len(ts_diff), diff_stationarity))
             if diff_stationarity:
                 lag_acf, lag_pacf = utils.find_acf_pacf(ts_diff, fname=os.path.join(out_dir, '{}_diff_acf_pacf.pdf'.format(col)))
             else:
                 ts_log = utils.log_transform(ts)
                 log_stationarity = utils.is_stationary(ts_log)
                 out_stat.write("{} - ts_log: {} - len: {} - Stationary: {}\n".format(name, col, len(ts_log), log_stationarity))
                 if log_stationarity:
                     lag_acf, lag_pacf = utils.find_acf_pacf(ts_diff, fname=os.path.join(out_dir, '{}_log_acf_pacf.pdf'.format(col)))
                 else:
                     out_stat.write("{} - ts: {} - len: {} - Stationary: 1st order failed\n".format(name, col, len(ts)))
                     failed.append(objectid)
    def consume(self, y):

        # transform and smooth the input
        log_y = utils.log_transform(y, self.__config.max_input_value)
        smooth_log_y = (1 - self.__config.input_smoothing
                        ) * log_y + self.__config.input_smoothing * (0 if len(
                            self.__stream) == 0 else self.__stream[-1])
        self.__stream.append(smooth_log_y)

        # calculate prediction error
        self.__stream_prediction_error.append(
            abs(self.__stream[-1] - self.__stream_predicted[-1]))

        # check for anomalies:
        anomaly_results = self.__check_anomaly()
        self.__stream_anomalies.append(
            1 if anomaly_results.anomaly_exists else 0)

        # make new prediction:
        predicted_next_point = self.__predict_next_point()
        self.__stream_predicted.append(predicted_next_point)

        return anomaly_results
def generate_test_dataset(drr_path, patient_name, model, data_mode, gans=None):
    assert model in ['frcnn', 'mrcnn', 'siammask']

    load_path = drr_path + patient_name + '/DRRs/' + data_mode + '/'
    test_imgs, test_labs = [], []

    for i in range(8, 10):
        # load data
        drr_dir = load_path + str(10 * i) + '-' + str(10 * (i + 1)) + '/'
        imgs_name = data_mode + '_drr_imgs'
        labs_name = data_mode + '_drr_labs'
        if gans is not None:
            imgs_name = imgs_name + '_' + gans

        imgs = load_data(drr_dir + imgs_name + '.hdf5', imgs_name)
        labs = load_data(drr_dir + labs_name + '.hdf5', labs_name)

        # apply log transformation to make data more linear
        # and normalize between -1 and 1
        if gans is None:
            imgs = utils.log_transform(imgs)

        # reshape data
        imgs = np.reshape(
            imgs, (imgs.shape[0] // 181, 181, imgs.shape[1], imgs.shape[2]))
        labs = np.reshape(
            labs, (labs.shape[0] // 181, 181, labs.shape[1], labs.shape[2]))

        if model in ['frcnn', 'mrcnn']:

            imgs = np.moveaxis(imgs, 1, 0)
            labs = np.moveaxis(labs, 1, 0)
            imgs = np.reshape(
                imgs[:-1, ...],
                (9, 20, imgs.shape[1], imgs.shape[2], imgs.shape[3]))
            labs = np.reshape(
                labs[:-1, ...],
                (9, 20, labs.shape[1], labs.shape[2], labs.shape[3]))
            imgs = np.reshape(imgs,
                              (imgs.shape[0], imgs.shape[1] * imgs.shape[2],
                               imgs.shape[3], imgs.shape[4]))
            labs = np.reshape(labs,
                              (labs.shape[0], labs.shape[1] * labs.shape[2],
                               labs.shape[3], labs.shape[4]))

            # crop around center
            imgs = imgs[:, :, imgs.shape[2] // 4:-imgs.shape[2] // 4,
                        imgs.shape[3] // 4:-imgs.shape[3] // 4]
            labs = labs[:, :, labs.shape[2] // 4:-labs.shape[2] // 4,
                        labs.shape[3] // 4:-labs.shape[3] // 4]

        elif model in ['siammask']:
            # crop around center
            if gans is None:
                imgs = imgs[:, :, imgs.shape[2] // 4:-imgs.shape[2] // 4,
                            imgs.shape[3] // 4:-imgs.shape[3] // 4]
            else:
                imgs = imgs[:, :, 64:-64, :]

            labs = labs[:, :, labs.shape[2] // 4:-labs.shape[2] // 4,
                        labs.shape[3] // 4:-labs.shape[3] // 4]

        else:
            raise NotImplementedError

        test_imgs.append(imgs)
        test_labs.append(labs)

        print(i)

    test_imgs = np.concatenate(test_imgs, axis=0)
    test_labs = np.concatenate(test_labs, axis=0)

    save_path = drr_path + patient_name + '/models/' + model + '/' + data_mode + '/dataset/'
    if gans is not None:
        save_data(save_path, 'test_imgs' + '_' + gans, test_imgs)
    else:
        save_data(save_path, 'test_imgs', test_imgs)
        save_data(save_path, 'test_labs', test_labs)
def generate_train_dataset(drr_path,
                           patient_name,
                           model='mrcnn',
                           data_mode='shifted',
                           gans=None):
    assert data_mode in ['standard', 'shifted', 'decorrelated']
    assert model in ['frcnn', 'mrcnn', 'siammask']

    load_path = drr_path + patient_name + '/DRRs/' + data_mode + '/'

    train_imgs, train_labs = [], []
    for i in range(0, 8):
        # load data
        drr_dir = load_path + str(10 * i) + '-' + str(10 * (i + 1)) + '/'
        imgs_name = data_mode + '_drr_imgs'
        labs_name = data_mode + '_drr_labs'
        if gans is not None:
            imgs_name = imgs_name + '_' + gans

        imgs = load_data(drr_dir + imgs_name + '.hdf5', imgs_name)
        labs = load_data(drr_dir + labs_name + '.hdf5', labs_name)

        # apply log transformation to make data more linear
        # and normalize between -1 and 1
        if gans is None:
            imgs = utils.log_transform(imgs)

        imgs = np.reshape(
            imgs, (imgs.shape[0] // 181, 181, imgs.shape[1], imgs.shape[2]))
        labs = np.reshape(
            labs, (labs.shape[0] // 181, 181, labs.shape[1], labs.shape[2]))

        if model in ['frcnn', 'mrcnn']:
            imgs = imgs[:, 0::5, :, :]
            labs = labs[:, 0::5, :, :]

            imgs = np.reshape(
                imgs,
                (imgs.shape[0] * imgs.shape[1], imgs.shape[2], imgs.shape[3]))
            labs = np.reshape(
                labs,
                (labs.shape[0] * labs.shape[1], labs.shape[2], labs.shape[3]))

            # crop around center
            imgs = crop_around_center(imgs, imgs.shape[1] // 2,
                                      imgs.shape[2] // 2)
            labs = crop_around_center(labs, labs.shape[1] // 2,
                                      labs.shape[2] // 2)

        elif model in ['siammask']:
            # crop around center
            if gans is None:
                imgs = imgs[:, :, imgs.shape[2] // 4:-imgs.shape[2] // 4,
                            imgs.shape[3] // 4:-imgs.shape[3] // 4]
            else:
                imgs = imgs[:, :, 64:-64, :]

            labs = labs[:, :, labs.shape[2] // 4:-labs.shape[2] // 4,
                        labs.shape[3] // 4:-labs.shape[3] // 4]

        else:
            raise NotImplementedError

        train_imgs.append(imgs)
        train_labs.append(labs)

        print(i)

    train_imgs = np.concatenate(train_imgs, axis=0)
    train_labs = np.concatenate(train_labs, axis=0)

    save_path = drr_path + patient_name + '/models/' + model + '/' + data_mode + '/dataset/'
    if gans is not None:
        save_data(save_path, 'train_imgs' + '_' + gans, train_imgs)
    else:
        save_data(save_path, 'train_imgs', train_imgs)
        save_data(save_path, 'train_labs', train_labs)
Exemplo n.º 5
0
        sp.load_npz('data/processed_' + args.dataset + '/test_masked.npz')
    }
else:
    raw_data = {
        'train':
        sp.load_npz('data/processed_' + args.dataset + '/train_ex_val.npz'),
        'val':
        sp.load_npz('data/processed_' + args.dataset + '/val_unmasked.npz'),
        'val_masked':
        sp.load_npz('data/processed_' + args.dataset + '/val_masked.npz')
    }

# transform data
if args.transform is 'log':
    data = {
        k: log_transform(v, args.alpha, args.eps)
        for k, v in raw_data.items()
    }
elif args.transform is 'linear':
    data = {k: linear_transform(v, args.alpha) for k, v in raw_data.items()}
else:
    data = raw_data

# instantiate item_user matrix
item_user = data['train'].T.tocsr()

# initialise model
if args.m == 'imf':
    model = implicit.als.AlternatingLeastSquares(
        factors=args.factors,
        regularization=args.l2,
Exemplo n.º 6
0

def ebo_trans(a):
    return np.power(a[0], 0.125) * np.power(a[1], 0.25) * np.power(a[2], 0.5) * a[3]


for col, c in enumerate(columns):
    for j, s in enumerate(size):
        print(c, s)
        range_rows = np.load(
            "/home/jitao/experiment/data/range_feature_data/cover_5000_attr_{}_rows.npy".format(COLS[col])
        ).astype(np.float32)
        range_source = np.load(
            "/home/jitao/experiment/data/range_feature_data/cover_5000_attr_{}_range_features.npy".format(COLS[col])
        ).astype(np.float32)
        range_rows = log_transform(range_rows)
        # selectivity feature
        raw_data = pd.read_csv("cover.csv").sample(30000).to_numpy()
        bins = []
        bin_size = []
        for i in range(c):
            b, bs = buildEqualDepthHist(raw_data[:, i], nbin=100)
            bins.append(b)
            bin_size.append(bs)

        selectivity = []
        for each in range_source:
            sel = []
            for k in range(0, c):
                sel.append(computeRangeCardinality(bins[k], bin_size[k], each[2 * k], each[2 * k + 1]))
            selectivity.append(sel)