Exemplo n.º 1
0
def load_features(config: dict) -> Tuple[pd.DataFrame, pd.DataFrame]:
    feature_path = config["dataset"]["feature_dir"]

    with timer("load train"):
        x_train = pd.concat(
            [
                load_pickle(f"{feature_path}/{f}_train.pkl")
                for f in config["features"]
                if Path(f"{feature_path}/{f}_train.pkl").exists()
            ],
            axis=1,
            sort=False,
        )

    with timer("load test"):
        x_test = pd.concat(
            [
                load_pickle(f"{feature_path}/{f}_test.pkl")
                for f in config["features"]
                if Path(f"{feature_path}/{f}_test.pkl").exists()
            ],
            axis=1,
            sort=False,
        )

    return x_train, x_test
Exemplo n.º 2
0
def load_data(base_path, dataset, model, seq_flag, rob_flag):
    if model == 'GraphRNN':
        path = os.path.join(base_path, model, f'{dataset}_size10_ratio5')
        for subdir, dirs, files in os.walk(path):
            for filename in files:
                if '1000' in filename:
                    print(f'loading {subdir} {filename} ...',
                          end='',
                          flush=True)
                    pkl = load_pickle(os.path.join(subdir, filename))
                    print('done')
                    yield pkl, model
        return
    else:
        path = os.path.join(base_path, dataset, model)
        for subdir, dirs, files in os.walk(path):
            for filename in files[:5]:
                if 'csv' not in filename:
                    # if 'seq' not in filename and 'rob' not in filename:
                    print(f'loading {subdir} {filename} ... ',
                          end='',
                          flush=True)
                    pkl = load_pickle(os.path.join(subdir, filename))
                    trial = filename.split('_')[2].strip('.pkl.gz')
                    print('done')
                    yield pkl, trial
Exemplo n.º 3
0
    def process(self):
        data_list = []

        features = load_pickle(SOURCE_PATH / 'features.pkl')
        dist_matrix = load_pickle(SOURCE_PATH / 'dist_matrix.pkl')
        travel_matrix = load_pickle(SOURCE_PATH / 'travel_matrix.pkl')
        dist_matrix = 1 - dist_matrix
        dist_matrix = dist_matrix / np.max(
            dist_matrix) + travel_matrix / np.max(travel_matrix)
        dist_matrix = dist_matrix / np.max(dist_matrix)
        dist_matrix[dist_matrix <= 0.2] = 0

        print(dist_matrix)
        # for i in range(len(dist_matrix)):
        #     for j in range(len(dist_matrix)):
        #         if i == j:
        #             dist_matrix[i,j] = 1
        y = torch.Tensor(features[:, 0]).float()

        edge_index = torch.Tensor(
            np.argwhere(dist_matrix != 0).transpose()).long()
        edge_attr = torch.Tensor(dist_matrix[np.nonzero(dist_matrix)].reshape(
            edge_index.shape[1], )).long()
        # print(edge_attr.size())
        x = torch.Tensor(features[:, 1:]).float()
        n_features = x.shape[0]
        data = Data(x=x,
                    edge_index=edge_index,
                    edge_attr=edge_attr,
                    y=y,
                    n_features=n_features)
        data_list.append(data)

        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])
Exemplo n.º 4
0
def main():
    cfg = load_config()

    print('Loading Model & Data.')
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = torch.load(cfg['INFERENCE_MODEL']).to(device)
    model.eval()
    data = tensor(load_pickle(cfg['INFERENCE_DATA'])).float().to(device)
    label = None
    if len(cfg['INFERENCE_DATA']) > 0:
        label = load_pickle(cfg['INFERENCE_LABEL'])

    print('Starting Inference.')
    with torch.no_grad():
        y_hat = model(data)
        y_hat = torch.argmax(y_hat, dim=1).tolist()

    print(y_hat)

    if label is not None:
        print('Generating Report')
        print(
            classification_report(label,
                                  y_hat,
                                  target_names=['SR', 'SB', 'AFIB', 'ST']))
        print('Kappa Score: {:.2f}'.format(cohen_kappa_score(label, y_hat)))
Exemplo n.º 5
0
def load_data(base_path, dataset, models, seq_flag, rob_flag):
    for model in models:
        if model == 'GraphRNN':
            path = os.path.join(base_path, model, f'{dataset}_size10_ratio5')
            for subdir, dirs, files in os.walk(path):
                for filename in files:
                    if '1000' in filename:
                        print(f'loading {subdir} {filename} ...',
                              end='',
                              flush=True)
                        pkl = load_pickle(os.path.join(subdir, filename))
                        print('done')
                        yield pkl, model
            return
        else:
            path = os.path.join(base_path, dataset, model)
            for subdir, dirs, files in os.walk(path):
                for filename in files:
                    if '.csv' not in filename and 'jensen-shannon' not in subdir:
                        if (seq_flag or 'seq' not in filename) and (
                                rob_flag or 'rob' not in filename):
                            print(f'loading {subdir} {filename} ... ',
                                  end='',
                                  flush=True)
                            pkl = load_pickle(os.path.join(subdir, filename))
                            print('done')
                            yield pkl, model
Exemplo n.º 6
0
    def __init__(self, root: str):
        """
        Loads dataset to memory and transforms it to tensor.
        :param root: Directory where data files are located
        """
        self.root = root

        self.train = load_pickle(root + '/train_medium.pkl')
        self.label = torch.tensor(load_pickle(root + '/label_medium.pkl'))

        self.num_classes = max(max(self.train)) + 1
        self.ohe_mapping = torch.eye(self.num_classes)
Exemplo n.º 7
0
    def __init__(self, root: str, test: Optional[bool] = False):
        """
        Loads dataset to memory and transforms it to tensor.
        :param root: Directory where data files are located
        """
        self.root = root

        if test:
            self.data = tensor(load_pickle(root + '/test_data.pkl'), dtype=torch.float)
            self.label = tensor(load_pickle(root + '/test_label.pkl'), dtype=torch.long)
        else:
            self.data = tensor(load_pickle(root + '/train_data.pkl'), dtype=torch.float)
            self.label = tensor(load_pickle(root + '/train_label.pkl'), dtype=torch.long)

        self.num_classes = int(max(self.label) + 1)
Exemplo n.º 8
0
def test():
    X_train, Y_train, X_test, Y_test = prepare_data()
    path = '/home/user/Desktop/ml_workbook/temp/mnist/1621618265.4642844/final_10000.pkl'
    parameters = load_pickle(path)
    res = model.predict(X_test, Y_test, [40, 16, 10],
                        ['relu', 'relu', 'sigmoid'], parameters)
    print(res)
Exemplo n.º 9
0
def test_with_front_camera():
    vid = cv2.VideoCapture(0)

    st = time.time()
    path = '/home/user/Desktop/ml_workbook/temp/sign_models/1621589180.7416956/backup/backup_7000.pkl'
    parameters = load_pickle(path)

    while (True):
        # Capture the video frame
        # by frame
        ret, frame = vid.read()

        # Display the resulting frame
        cv2.imshow('frame', frame)

        ct = time.time()
        if ct - st > 1:
            st = ct
            out, _ = model.forward_propagation(input_frame(frame), parameters,
                                               [128, 16, 10],
                                               ['relu', 'relu', 'sigmoid'])
            out = np.argmax(out)
            map = {4: 1, 7: 3, 5: 8, 9: 5, 6: 4, 1: 0, 3: 6, 2: 7, 8: 2, 0: 9}
            print(out, map[out])
        # the 'q' button is set as the
        # quitting button you may use any
        # desired button of your choice
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # After the loop release the cap object
    vid.release()
    # Destroy all the windows
    cv2.destroyAllWindows()
Exemplo n.º 10
0
    async def main(self):
        self.bot.news = utils.load_news()
        self.bot._data = utils.load_pickle()
        self.bot._populations = utils.load_populations()
        self.bot.http_session = ClientSession()
        await self.parse_and_update()
        await self.bot.wait_until_ready()
        starting = True
        while True:
            try:
                before = time.time()
                if not starting:
                    self.interval_update += 1
                    try:
                        await self.parse_and_update()
                    except Exception as e:
                        logger.exception(e, exc_info=True)
                    finally:
                        await self.send_notifications()
                        await self.send_tracker()
                else:
                    starting = False

                after = time.time()
            except:
                pass
            await asyncio.sleep(3600 - int(after - before))
Exemplo n.º 11
0
def load_data(input_path, dataset, model, filename_idx):
    path = os.path.join(input_path, dataset, model)
    input_filenames = [f for f in listdir(path) if isfile(join(path, f))]
    # print(input_filenames)
    filename = input_filenames[filename_idx]
    pkl = load_pickle(os.path.join(path, filename))
    trial = filename.split('_')[2].strip('.pkl.gz')
    return pkl, trial
Exemplo n.º 12
0
def load_data(input_path, dataset, model):
    path = os.path.join(input_path, dataset, model)
    input_filenames = [f for f in listdir(path) if isfile(join(path, f))]
    # print(input_filenames)
    for filename in input_filenames:
        pkl = load_pickle(os.path.join(path, filename))
        trial = filename.split('_')[2].strip('.pkl.gz')
        yield pkl, trial
def load_data(base_path, dataset):
    path = os.path.join(base_path, 'GraphRNN', f'{dataset}_size10_ratio5')
    for subdir, dirs, files in os.walk(path):
        for filename in files:
            if '1000' in filename:
                print(f'loading {subdir} {filename} ... ', end='', flush=True)
                pkl = load_pickle(os.path.join(subdir, filename))
                print('done')
                yield pkl, int(filename.split('_')[1])
    return
Exemplo n.º 14
0
def main():
    cfg = load_config()

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print('Utilizing device {}'.format(device))

    map_char_to_int = load_pickle(cfg['PROCESSED_DATA_DIR'] +
                                  '/map_char_to_int.pkl')
    map_int_to_char = load_pickle(cfg['PROCESSED_DATA_DIR'] +
                                  '/map_int_to_char.pkl')
    num_chars = len(map_int_to_char)
    ohe_mapping = torch.eye(num_chars).to(device)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print('Utilizing device {}'.format(device))

    print('Loading Model')
    model = LSTMPredictor(num_chars, cfg['HIDDEN_SIZE'],
                          cfg['BATCH_SIZE']).to(device)
    model.load_state_dict(torch.load(cfg['MODEL_PATH']))
    model.eval()

    while True:
        input_str = input('Tweet: ')
        if input_str == "exit":
            break

        input_str = input_str.lower()
        while True:
            input_enc = [map_char_to_int[char] for char in list(input_str)]
            input_ohe = ohe_mapping[input_enc].unsqueeze(dim=0)
            with torch.no_grad():
                output = model(input_ohe)
                output_prob = torch.softmax(output,
                                            dim=1).squeeze().cpu().numpy()

            result = np.random.choice(num_chars, p=output_prob)
            input_str += map_int_to_char[result]

            if len(input_str) >= 140 or (len(input_str) >= 100
                                         and input_str[-1] == "."):
                print(input_str)
                break
Exemplo n.º 15
0
def load_data(base_path, bucket, dataset, model):
    path = os.path.join(base_path, bucket, dataset, model)
    for subdir, dirs, files in os.walk(path):
        for filename in files:
            if 'bucket3' not in subdir or 'HRG' in subdir:
                print(f'\tloading {subdir} {filename} ... ',
                      end='',
                      flush=True)
                pkl = load_pickle(os.path.join(subdir, filename))
                print('done')
                yield pkl, model
Exemplo n.º 16
0
def load_data(, dataset, model):
    path = os.path.join(base_path, dataset, model)
    for subdir, dirs, files in os.walk(path):
        for filename in files: #Todo: this thing doesn't actually return the trial id...
            if '.csv' not in filename and 'jensen-shannon' not in subdir:
                #if ((seq_flag and 'seq' in filename) and (not seq_flag and 'seq' not in filename)) and ((rob_flag and 'rob' in filename) and (not rob_flag and 'rob' not in filename)):
                if 'seq' not in filename and 'rob' not in filename:
                    print(f'\tloading {subdir} {filename} ... ', end='', flush=True)
                    pkl = load_pickle(os.path.join(subdir, filename))
                    print('done')
                    yield pkl, model
Exemplo n.º 17
0
def load_data(base_path, dataset, models, seq_flag, rob_flag):
    for model in models:
        path = os.path.join(base_path, dataset, model)
        for subdir, dirs, files in os.walk(path):
            for filename in files:
                if 'csv' not in filename:
                    if 'seq' not in filename and 'rob' not in filename:
                        print(f'loading {subdir} {filename} ... ', end='', flush=True)
                        pkl = load_pickle(os.path.join(subdir, filename))#, subdir.split('/')[-1]
                        print('done')
                        yield pkl, filename
def load_data(base_path, dataset, model, seq_flag, rob_flag):
    path = os.path.join(base_path, dataset, model)
    for subdir, dirs, files in os.walk(path):
        for filename in files:
            if 'csv' not in filename:
                # if 'seq' not in filename and 'rob' not in filename:
                print(f'loading {subdir} {filename} ... ', end='', flush=True)
                pkl = load_pickle(os.path.join(subdir, filename))
                trial = filename.split('_')[2].strip('.pkl.gz')
                print('done')
                yield pkl, trial
Exemplo n.º 19
0
def main():
    # INVARIANTS
    graphs = ['eucore', 'clique-ring-500-4']
    generators = ['BTER', 'BUGGE', 'Chung-Lu', 'CNRG', \
                  'Erdos-Renyi', 'HRG', 'SBM']
    kronecker = ['Kronecker']
    autoencoders = ['Deep_GCN_AE', 'Deep_GCN_VAE', \
                    'GCN_AE', 'GCN_VAE', \
                    'Linear_AE', 'Linear_VAE']
    neural = ['NetGAN']  # add GraphRNN when it's ready

    # VARIANTS
    data_path = '/Users/akira/repos/infinity-mirror/output'
    models = neural
    graph = graphs[1]
    sel = 'fast'
    cols = ['graph', 'type', 'orig_n', 'orig_m', 'orig_graph_obj', \
            'model', 'sel', 'trial_id', \
            'gen_id', 'gen_n', 'gen_m', 'gen_graph_obj', \
            'deltacon0', 'lambda_dist', 'degree_cvm', 'pagerank_cvm', \
            'pgd_pearson', 'pgd_spearman', 'node_diff', 'edge_diff']

    plt.rcParams['figure.figsize'] = [10, 20]
    #data = {col: [] for col in cols}

    for model in models:
        data = {col: [] for col in cols}
        path = os.path.join(data_path, graph, model)
        print(f'reading: {model}... ', end='', flush=True)
        for filename in os.listdir(path):
            if filename[5:7:1] == '20':
                trial_id = filename[8:10:1]
                try:
                    trial_id = int(trial_id)
                except ValueError:
                    trial_id = int(trial_id[:-1])
                root = load_pickle(os.path.join(path, filename))
                for row in get_stats_from_root(graph=graph, \
                                               model=model, \
                                               sel=sel, \
                                               root=root, \
                                               cols=cols, \
                                               trial_id=trial_id):
                    for col, val in row.items():
                        data[col].append(val)
        print('done')
        group_plot(pd.DataFrame(data), graph, model,
                   f'/Users/akira/figures/{graph}/')

    #df = pd.DataFrame(data)

    # FIGURE SIZE

    return
Exemplo n.º 20
0
    def _do_masking(self):
        try:
            # get key and dir's abs_path
            key_str = self._get_key()
            file_path = self._get_path()
            # print(key_str, file_path)
            wb_read = load_workbook(file_path, read_only=True)
            wb_write = Workbook(write_only=True)
            hash_bytes = load_pickle('mapping.pkl')
            # get the count of all data rows
            row_count = 0
            current_count = 1
            for sheetname in wb_read.sheetnames:
                sheet_read = wb_read[sheetname]
                row_count += sheet_read.max_row
            # read data and do masking, and then save the masked rows
            for sheetname in wb_read.sheetnames:
                print('processing sheet {}:'.format(sheetname))
                sheet_read = wb_read[sheetname]
                # sheet_row_count = sheet_read.max_row
                # print(sheet_row_count)
                sheet_write = wb_write.create_sheet(title=sheetname)
                rows_read = sheet_read.rows
                for row in rows_read:
                    row_values = []
                    for cell in row:
                        row_values.append(cell.value)
                    # do masking
                    if current_count > 1:
                        masked_row, hash_bytes_added = mask_row(
                            key_str, sheetname, row_values)
                        hash_bytes.update(hash_bytes_added)
                        sheet_write.append(masked_row)
                    else:
                        sheet_write.append(row_values)
                    current_count += 1
                    if current_count % 100 == 0 or current_count == row_count:
                        self._set_processBar(current_count / row_count * 100)
                        print('完成了{}%'.format(current_count / row_count * 100))
            save_pickle('mapping.pkl', hash_bytes)

            QMessageBox.information(QWidget(), "Information",
                                    "数据脱敏成功,点击确认后请保存文件")
            write_path = QFileDialog.getSaveFileName(caption="保存为.xlsx文档",
                                                     directory="./")[0]
            # write_path = './加密数据.xlsx'
            # print(write_path)
            wb_write.save(write_path)
            QMessageBox.information(QWidget(), "Information", "保存完成")
        except Exception as e:
            QMessageBox.warning(QWidget(), "warning", str(e))
            print(e)
Exemplo n.º 21
0
def test_image_file(file):
    img = cv2.imread(file)
    img = cv2.resize(img, (64, 64))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = np.reshape(img, (1, 64 * 64)).T

    path = '/home/user/Desktop/ml_workbook/temp/sign_models/1621589180.7416956/backup/backup_7000.pkl'
    parameters = load_pickle(path)
    out, _ = model.forward_propagation(img, parameters, [128, 16, 10],
                                       ['relu', 'relu', 'sigmoid'])
    print(out)
    out = np.argmax(out)
    return out
Exemplo n.º 22
0
    async def parse_and_update(self):
        updating = await up.update(self.bot.http_session)
        self.bot.news = utils.load_news()
        self.bot._data = utils.load_pickle()
        logger.info("New data downloaded")
        try:
            await plot_csv(utils.STATS_PATH, self.bot._data)
            await plot_csv(utils.STATS_LOG_PATH,
                           self.bot._data,
                           logarithmic=True)
        except Exception as e:
            logger.exception(e, exc_info=True)

        logger.info("New plot generated")
Exemplo n.º 23
0
    def extract_features(self, model, model_path, model_tag, used_set,
                         loaders_dic):
        """
        inputs:
            model : The loaded model containing the feature extractor
            loaders_dic : Dictionnary containing training and testing loaders
            model_path : Where was the model loaded from
            model_tag : Which model ('final' or 'best') to load
            used_set : Set used between 'test' and 'val'
            n_ways : Number of ways for the task

        returns :
            extracted_features_dic : Dictionnary containing all extracted features and labels
        """

        # Load features from memory if previously saved ...
        save_dir = os.path.join(model_path, model_tag, used_set)
        filepath = os.path.join(save_dir, 'output.plk')
        if os.path.isfile(filepath):
            extracted_features_dic = load_pickle(filepath)
            print(" ==> Features loaded from {}".format(filepath))
            return extracted_features_dic

        # ... otherwise just extract them
        else:
            print(" ==> Beginning feature extraction")
            if not os.path.isdir(save_dir):
                os.makedirs(save_dir)

        model.eval()
        with torch.no_grad():

            all_features = []
            all_labels = []
            for i, (inputs, labels,
                    _) in enumerate(warp_tqdm(loaders_dic['test'], False)):
                inputs = inputs.to(self.device)
                outputs, _ = model(inputs, True)
                all_features.append(outputs.cpu())
                all_labels.append(labels)
            all_features = torch.cat(all_features, 0)
            all_labels = torch.cat(all_labels, 0)
            extracted_features_dic = {
                'concat_features': all_features,
                'concat_labels': all_labels
            }
        print(" ==> Saving features to {}".format(filepath))
        save_pickle(filepath, extracted_features_dic)
        return extracted_features_dic
Exemplo n.º 24
0
def test():
    path = '/temp/datasets/sign'

    test_X = np.load(os.path.join(path, 'test_X.npy'))
    test_Y = np.load(os.path.join(path, 'test_Y.npy'))

    train_X = test_X.reshape((test_X.shape[0], 64 * 64)).T
    train_Y = test_Y.T

    # path = os.path.join('../../temp/sign_models/', '1621577378.1671953/backup/backup_1500.pkl')
    path = '/home/user/Desktop/ml_workbook/temp/sign_models/1621589180.7416956/backup/backup_7000.pkl'
    parameters = load_pickle(path)
    res = model.predict(train_X, train_Y, [128, 32, 10],
                        ['relu', 'relu', 'sigmoid'], parameters)
    print(res)
Exemplo n.º 25
0
def create_flight_matrix(provinces, travel_data):
    """

    Args: provinces (numpy.ndarray): nodes in the graph travel_data (numpy.ndarray): flights frequencies data (
    https://www.transtats.bts.gov/DL_SelectFields.asp?Table_ID=236)

    Returns:
        flight_matrix (numpy.ndarray): flight frequencies between two regions
    """
    # print(provinces)
    # print(travel_data)
    print(provinces)
    flight_matrix = np.zeros((len(provinces), len(provinces)))
    travel_data = travel_data.head(n_rows)
    travel_data = travel_data.groupby(['ORIGIN_STATE_NM', 'DEST_STATE_NM']).size().to_frame('COUNT').reset_index()\
        .rename(columns={'ORIGIN_STATE_NM': 'ORIGIN', 'DEST_STATE_NM': 'DEST'})
    # print(travel_data)
    # city2state = travel_data.groupby(['ORIGIN_CITY_NAME', 'DEST_STATE_NM']).size().to_frame('COUNT').reset_index()
    # .rename(columns = {'ORIGIN_CITY_NAME': 'ORIGIN', 'DEST_STATE_NM': 'DEST'})
    # state2city = travel_data.groupby(['ORIGIN_STATE_NM', 'DEST_CITY_NAME']).size().to_frame('COUNT').reset_index()
    # .rename(columns = {'ORIGIN_STATE_NM': 'ORIGIN', 'DEST_CITY_NAME': 'DEST'})
    # city2city = travel_data.groupby(['ORIGIN_CITY_NAME', 'DEST_CITY_NAME']).size().to_frame('COUNT').reset_index()
    # .rename(columns = {'ORIGIN_CITY_NAME': 'ORIGIN', 'DEST_CITY_NAME': 'DEST'})
    # travel_data = pd.concat([state2state, city2city, state2city, city2state])
    frequency_mappings = dict(zip(list(zip(travel_data.ORIGIN.tolist(), travel_data.DEST.tolist())), travel_data.COUNT
                                  .tolist()))
    # print(frequency_mappings)
    abbr_state_mappings = load_pickle(SOURCE_PATH / 'data/abbrev_state_mappings.pkl')

    for i in range(len(provinces)):
        for j in range(len(provinces)):
            try:
                # if provinces[i].find(', ') != -1 and provinces[j].find(', ') != -1:
                #     flight_matrix[i, j] = frequency_mappings[(abbr_state_mappings[provinces[i][-2:]],
                #                                               abbr_state_mappings[provinces[j][-2:]])]
                # elif provinces[i].find(', ') != -1:
                #     flight_matrix[i, j] = frequency_mappings[(abbr_state_mappings[provinces[i][-2:]], provinces[j])]
                # elif provinces[j].find(', ') != -1:
                #     flight_matrix[i, j] = frequency_mappings[(provinces[i], abbr_state_mappings[provinces[j][-2:]])]
                # else:
                flight_matrix[i, j] = frequency_mappings[(provinces[i], provinces[j])]
            except KeyError:
                continue

    return flight_matrix
Exemplo n.º 26
0
 async def manual(self, ctx):
     if ctx.author.id in (90184563405361152, 162200556234866688):
         await up._write(config("uri_data") + "?revalidate=true",
                         utils.DATA_PATH,
                         self.bot.http_session,
                         headers={"Super-Secret": config("uri_key")})
         try:
             self.bot._data = utils.load_pickle()
             await plot_csv(utils.STATS_PATH, self.bot._data)
             await plot_csv(utils.STATS_LOG_PATH,
                            self.bot._data,
                            logarithmic=True)
             await ctx.send("Manual update is a success!")
         except Exception as e:
             await ctx.send(f"{type(e).__name__} : {e}")
     else:
         await ctx.send(
             "If you know this command that means you saw it on github haha :p But still, you're not allowed to do this."
         )
Exemplo n.º 27
0
    def generate(self, num_graphs: int, gen_id: int) -> List[nx.Graph]:

        dump = f'./src/netgan/dumps'
        gname = f'{self.input_graph.name}_{self.trial}'
        pickle_path = f'{dump}/{gname}.pkl.gz'

        proc = sub.run(
            f'conda init bash; . ~/.bashrc; conda activate netgan; python src/netgan/gen.py {gname} {pickle_path} {num_graphs}',
            shell=True)  # , stdout=sub.DEVNULL)

        assert proc.returncode == 0, 'error in NetGAN generate'
        output_pickle_path = f'{dump}/{gname}_graphs.pkl.gz'

        generated_graphs = []
        for i, gen_graph in enumerate(load_pickle(output_pickle_path)):
            gen_graph.name = f'{self.input_graph.name}_{self.trial}_{i + 1}'  # adding the number of graph
            gen_graph.gen_id = gen_id
            generated_graphs.append(gen_graph)

        delete_files(output_pickle_path)
        return generated_graphs
def stats_computation(dataset, model, trial, filename, stats):
    path = Path(
        get_imt_output_directory()) / 'pickles' / dataset / model / filename
    graph_list = load_pickle(path)
    assert isinstance(
        graph_list,
        list), f'Expected type "list" and got type {type(graph_list)}.'
    assert all(isinstance(g, nx.Graph) for g in graph_list
               ), f'Expected a list of nx.Graph and got disappointed instead.'

    ColorPrint.print_orange(f'{filename} has length {len(graph_list)}')

    for idx, G in enumerate(graph_list):
        gs_obj = GraphStats(graph=G,
                            dataset=dataset,
                            model=model,
                            trial=trial,
                            iteration=idx)
        gs_obj.write_stats_jsons(stats=stats)

    return None
Exemplo n.º 29
0
def load_data_eval():
    print('==================')
    print('Loading data ...')

    start = time()
    # Define datasets
    data = pd.read_csv(all_paths['rois_{}_dataset_csv'.format(
        cfg.args['ROI_CROP_TYPE'])])

    if cfg.cli_args.random_split:
        all_patients = data.patient_id.unique()
        random.shuffle(all_patients)
        pivot_idx = int(cfg.TRAIN_TEST_SPLIT * len(all_patients))
        test_patients = all_patients[pivot_idx:]
    else:
        patients_split = u.load_yaml(all_paths['train_test_split_yaml'])
        test_patients = patients_split['EVAL']

    test_data = data.loc[data.patient_id.isin(test_patients)]
    print('{} patients and {} slices'.format(len(test_patients),
                                             len(test_data)))

    load_preprocessing.load_transform_image()
    _, testset = get_train_test_datasets(
        test_data=test_data, transform_test=cfg.transform_image_test)

    print('==================')
    print('Preprocessing data ...')

    cfg.preprocessing = u.load_pickle(
        join(cfg.cli_args.tensorboard_path, 'preprocessing.pkl'))
    # cfg.res['preprocessing'] = [str(cfg.preprocessing)]
    # cfg.res['batch_preprocessing'] = [str(cfg.transform_image_train)]

    testset.data = cfg.preprocessing(testset.data)
    cfg.testloader = DataLoader(testset,
                                batch_size=cfg.BATCH_SIZE,
                                shuffle=False)

    print("Dataset process time: ", time() - start)
Exemplo n.º 30
0
    def generate(self, num_graphs: int,
                 gen_id: int) -> Union[List[nx.Graph], None]:
        edgelist_path = f'./src/hrg/{self.initial_gname}_{self.trial}.g'
        nx.write_edgelist(self.input_graph, edgelist_path, data=False)
        output_pickle_path = f'./src/hrg/Results/{self.initial_gname}_{self.trial}_hstars.pickle'

        completed_process = sub.run(
            f'. ./envs/hrg/bin/activate; cd src/hrg; python2 exact_phrg.py --orig {self.initial_gname}_{self.trial}.g --trials {num_graphs}; deactivate;',
            shell=True,
            stdout=sub.DEVNULL)

        if completed_process.returncode != 0 or not check_file_exists(
                output_pickle_path):
            CP.print_blue(f'Error in HRG: "{self.input_graph.name}"')
            raise Exception('Generation failed!')

        else:
            generated_graphs = []
            gen_graphs = load_pickle(output_pickle_path)
            if not isinstance(gen_graphs,
                              list) or len(gen_graphs) != num_graphs:
                raise Exception('Generation failed!')

            for i, gen_graph in enumerate(gen_graphs):
                gen_graph = self._make_graph(gen_graph)
                gen_graph.name = f'{self.input_graph.name}_{self.trial}_{i + 1}'  # adding the number of graph
                gen_graph.gen_id = gen_id

                generated_graphs.append(gen_graph)

            if not isinstance(generated_graphs,
                              list) or len(generated_graphs) != num_graphs:
                print('HRG failed')
                raise Exception('Generation failed!')

        # delete_files(edgelist_path, output_pickle_path)
        return generated_graphs