示例#1
0
def import_dataset_arff(f, explain_indices: List[int],
                        random_explain_dataset: bool) -> Tuple[Dataset, Dataset, List[str]]:
    dataset = load_arff(f)

    dataset_len = len(dataset)
    train_indices = list(range(dataset_len))

    if random_explain_dataset:
        random.seed(1)
        # small dataset
        MAX_SAMPLE_COUNT = 100
        if dataset_len < (2 * MAX_SAMPLE_COUNT):
            samples = int(0.2 * dataset_len)
        else:
            samples = MAX_SAMPLE_COUNT

        # Randomly pick some instances to remove from the training dataset and use in the
        # explain dataset
        explain_indices = list(random.sample(train_indices, samples))
    for i in explain_indices:
        train_indices.remove(i)

    train_dataset = Dataset.from_indices(train_indices, dataset)
    explain_dataset = Dataset.from_indices(explain_indices, dataset)

    return train_dataset, explain_dataset, [str(i) for i in explain_indices]
    def test_equals_nequals(self):
        """Test (not) equals cuts"""
        test_df = pd.DataFrame({'testvar1': [1, 0, 1, 0, 0, 0, 1, 1, 1, 0]})
        test_cut_dicts = [{
            'name': 'cut 1',
            'cut_var': 'testvar1',
            'relation': '=',
            'cut_val': 1,
            'group': 'var1cut',
            'is_symmetric': True
        }, {
            'name': 'cut 2',
            'cut_var': 'testvar1',
            'relation': '!=',
            'cut_val': 1,
            'group': 'var1cut',
            'is_symmetric': False
        }]
        cut_label = config.cut_label

        Dataset._create_cut_columns(test_df, test_cut_dicts)
        out_column1 = pd.Series(data=[
            True, False, True, False, False, False, True, True, True, False
        ],
                                name='cut 1' + cut_label)
        out_column2 = pd.Series(data=[
            False, True, False, True, True, True, False, False, False, True
        ],
                                name='cut 2' + cut_label)

        assert pd.Series.equals(test_df['cut 1' + cut_label], out_column1), \
            f"Expected {out_column1}, got {test_df['cut 1' + cut_label]}"
        assert pd.Series.equals(test_df['cut 2' + cut_label], out_column2), \
            f"Expected {out_column2}, got {test_df['cut 2' + cut_label]}"
示例#3
0
def convert():
    # Load model
    image_shape = (224, 224)
    detector = Detector(image_shape, 'models')
    model = detector.model
    # Data pipeline
    batch_size = 64
    ds = Dataset(image_shape, batch_size)
    pipeline, _ = ds.pipeline()

    def representative_dataset_gen():
        for tensor in pipeline.take(1):
            raw_imgs, mask_imgs = tensor
            img = np.array([raw_imgs[0]])
            yield [img]  # Shape (1, height, width, channel)

    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.representative_dataset = representative_dataset_gen
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
    converter.inference_input_type = tf.uint8
    converter.inference_output_type = tf.uint8
    tflite_quant_model = converter.convert()

    MODEL = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                         '../models/tpu/ohmnilabs_floornet_224_quant_postprocess.tflite')
    open(MODEL, 'wb').write(tflite_quant_model)
示例#4
0
def _compute_prediction_difference_subset(training_dataset: Dataset,
                                          encoded_instance: pd.Series,
                                          rule_body_indices,
                                          clf,
                                          instance_class_index):
    encoded_instance_x = encoded_instance[:-1].to_numpy()

    rule_attributes = [
        list(training_dataset.attributes())[rule_body_index - 1][0] for
        rule_body_index in rule_body_indices]

    # Take only the considered attributes from the dataset
    filtered_dataset = training_dataset.X()[rule_attributes]

    # Count how many times a set of attribute values appears in the dataset
    attribute_sets_occurrences = dict(
        Counter(map(tuple, filtered_dataset.values.tolist())).items())

    # For each set of attributes
    differences = [
        _compute_perturbed_difference(item, clf, encoded_instance,
                                      instance_class_index,
                                      rule_attributes, training_dataset) for
        item in
        attribute_sets_occurrences.items()]

    prediction_difference = sum(differences)

    # p(y=c|x) i.e. Probability that instance x belongs to class c
    p = clf.predict_proba(encoded_instance_x.reshape(1, -1))[0][instance_class_index]
    prediction_differences = p - prediction_difference

    return prediction_differences
示例#5
0
    def setUp(self):
        name = "cassandra20200615"
        mode = "train"
        repositories = [{
            "name": "cassandra20200615",
            "url": "",
            "CommitTarget": "",
            "filterFile": "",
            "codeIssueJira": "",
            "projectJira": ""
        }]
        parameters = {}
        option = {
            "name": name,
            "mode": mode,
            "repositories": repositories,
            "parameters": parameters  #needless when to infer.
        }
        option = Option(option)

        self.dataset = Dataset(option.getRepositorieImproved())
        self.repository = repositories[0]
        print(
            os.path.join(UtilPath.Test(), "testDataset",
                         self.repository["name"], "repository"))
        self.gr = GitRepository(
            os.path.join(UtilPath.Test(), "testDataset",
                         self.repository["name"], "repository"))
示例#6
0
def train():
    # Config params
    image_shape = (224, 224)
    batch_size = 64
    epochs = 30
    # Dataset & model
    detector = Detector(image_shape)
    ds = Dataset(image_shape, batch_size)
    training_pipeline, validation_pipeline = ds.pipeline()
    steps_per_epoch = ds.num_training//batch_size
    # Start training
    model_history = detector.train(
        training_pipeline, epochs, steps_per_epoch,
        validation_pipeline,
    )
    # Visualize loss
    loss = model_history.history['loss']
    val_loss = model_history.history['val_loss']
    range_of_epochs = range(epochs)
    plt.figure()
    plt.plot(range_of_epochs, loss, 'r', label='Training loss')
    plt.plot(range_of_epochs, val_loss, 'bo', label='Validation loss')
    plt.title('Training Loss and Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss Value')
    plt.ylim([0, 1])
    plt.legend()
    plt.show()
示例#7
0
def main():
    network_dataset = Dataset('twitters2')

    nl = read_file_to_dict(os.path.join(DATASET_PATH, 'TwitterSample2.txt'))

    # 10% sampling
    nbunch = nl[0:int(len(nl) // 2)]
    network_dataset.graph = network_dataset.graph.subgraph(nbunch)

    server_list = [Server(k) for k in range(0, 512)]
    vp_number = 0

    node_list = list(network_dataset.graph.nodes)
    random.shuffle(node_list)
    print('Dataset information: TwitterSample2\nNodes Number:',
          network_dataset.graph.order(), '\nEdge Number:',
          network_dataset.graph.size())
    print('Using Random Partitioning Method...\nServer Number:',
          len(server_list), '\nVirtual Primary Copy Number:', vp_number,
          '\nWrite Frequency of Nodes: 1')
    start = time.time()
    m = RandomP(server_list, network_dataset, node_list)
    m.add_new_primary_node(server_list, vp_number)
    m.check_server_load()
    m.check_locality()
    end = time.time()
    print('Random Partitioning Time:', end - start, 'seconds')
    m.compute_inter_sever_cost()
    path = RANDOM_GRAPH_PATH
    m.save_all(path)
示例#8
0
def test_encode(filename, seq_length, text):
    dataset = Dataset([filename], seq_length)
    encoded = dataset.encode(text)
    assert len(encoded) == len(text)
    for label in encoded:
        assert sum(label) == 1
        assert len(label) == dataset.vocab_size
def make_ct_datasets(configs, paths):
    TRAIN_SIZE = 0.9

    o_img_paths = np.array(
        sorted(glob(os.path.join(paths['data']['path'], 'Original/*'))))
    f_img_paths = np.array(
        sorted(glob(os.path.join(paths['data']['path'], 'Filtered/*'))))

    img_paths_train = {
        'original': o_img_paths[:int(TRAIN_SIZE * len(o_img_paths))],
        'filtered': f_img_paths[:int(TRAIN_SIZE * len(f_img_paths))]
    }
    img_paths_val = {
        'original': o_img_paths[int(TRAIN_SIZE * len(o_img_paths)):],
        'filtered': f_img_paths[int(TRAIN_SIZE * len(f_img_paths)):]
    }

    crop_size = configs['data_params']['augmentation_params']['crop_size']
    transforms_train = Compose([RandomCrop(crop_size), ToFloat(), ToTensor()])
    transforms_val = Compose([RandomCrop(1344), ToFloat(), ToTensor()])

    train_loader = DataLoader(
        Dataset(img_paths_train, transforms_train),
        batch_size=configs['data_params']['batch_size'],
        num_workers=configs['data_params']['num_workers'],
        shuffle=True)

    val_loader = DataLoader(Dataset(img_paths_val, transforms_val),
                            batch_size=1,
                            num_workers=configs['data_params']['num_workers'],
                            shuffle=False)

    return train_loader, val_loader
示例#10
0
    def __init__(
            self,
            config,
            name: str,
            device=torch.device('cuda'),
            model_path: str = None,
    ):

        self.name = name
        self.config = config
        self.device = device
        self.model = Network(config).to(self.device)
        if model_path is not None:
            chckpt = torch.load(model_path)
            self.model.load_state_dict(chckpt)
        self.optimizer = torch.optim.Adam(self.model.parameters(),
                                          lr=self.config.lr)
        self.writer = SummaryWriter(
            os.path.join(self.config.work_dir, self.name))

        self.training_dataset = Dataset(dataset_type='training', config=config)
        self.validation_dataset = Dataset(dataset_type='validation',
                                          config=config)

        self.training_dataloader = torch.utils.data.DataLoader(
            self.training_dataset,
            batch_size=self.config.batch_size,
            shuffle=True,
            drop_last=True,
        )
        self.validation_dataloader = torch.utils.data.DataLoader(
            self.validation_dataset,
            batch_size=self.config.batch_size,
        )
        self.criterion = torch.nn.CrossEntropyLoss()
示例#11
0
def create_algo(server_count=4, node_count=10):
    data = Dataset(dataset_str='facebook')
    data.graph = nx.Graph()
    for i in range(node_count):
        data.graph.add_node(i)
    server_list = [Server(serer_id=i) for i in range(server_count)]
    algo = OfflineAlgo(server_list=server_list, network_dataset=data)
    return algo
示例#12
0
def show_predictions():
    image_shape = (224, 224)
    detector = Detector(image_shape)
    ds = Dataset(image_shape)
    pipeline, _ = ds.pipeline()
    for image, mask in pipeline.take(1):
        pred_mask = detector.predict(image)
        __display([image[0], mask[0], __create_mask(pred_mask)])
示例#13
0
def main():

    args = Parser().get_parser().parse_args()
    print("=====Configurations=====\n", args)

    # Load Configuration and data
    config = Config(args)
    dataset = Dataset(config)

    start = time.time()

    outer_tracking = {}
    # TODO Load data once across all folds

    headers = ['O_EPOCH', 'I_EPOCH', 'TR_F1', 'VAL_LOSS', 'VAL_F1', 'k-MICRO-F1', 'k-MACRO-F1', 'MICRO-F1', 'MACRO-F1', 'MC_ACC', 'ML_ACC', 'BAE']
    perc_results = [[]]*len(config.train_percents)
    for perc_id, train_percent in enumerate(config.train_percents):
        print('\n\n############################  Percentage: ', train_percent, '#####################################')
        # config.train_percent = train_percent
        fold_results = [[]]*len(config.train_folds)
        for fold_id, fold in enumerate(config.train_folds):
            print('\n------- Fold: ', fold)

            # config.train_fold = fold
            dataset.load_indexes(train_percent, fold)
            values = train_model(dataset)

            if config.prop_model_name == 'propagation_gated':
                np.save(path.join(config.paths['experiment'],
                                  config.dataset_name + '-' + str(fold) + '-' + str(config.max_depth) + '_gating_scores.npy'), scores)

            outer_tracking[fold_id] = values
            fold_results[fold_id] = values[-1]
            if not config.save_model:
                remove_directory(config.paths['perc_' + train_percent] + '_' + fold)

        fold_results = np.vstack(fold_results)
        file_name = os.path.join(config.paths['perc_' + train_percent], 'metrics.txt')
        np.savetxt(file_name, fold_results, header=str(headers), comments='', fmt='%1.5f')

        perc_results[perc_id] = np.mean(fold_results, axis=0)
        if not config.save_model:
            remove_directory(config.paths['perc_' + train_percent])

    results = np.vstack(perc_results)
    file_name = os.path.join(config.paths['experiment'], 'metrics.txt')
    np.savetxt(file_name, results, header=str(headers), comments='', fmt='%1.5f')
    print('Mico: ', results[0][8], '|  Macro: ', results[0][9])

    np.save(path.join(config.paths['experiment'], config.dataset_name+str(config.max_depth)+'_batch_results.npy'), outer_tracking)


    # TODO code inference - Load model and run test
    print('Time taken:', time.time() - start)
示例#14
0
def test_sample(filename, batch_size, seq_length):
    dataset = Dataset([filename], seq_length)
    count = 0
    batch = dataset.sample(batch_size)
    for seq in batch.inputs:
        assert len(seq) == seq_length
        for i in range(seq_length):
            # One-hot encoded
            assert sum(seq[i]) == 1
            assert len(seq[i]) == dataset.vocab_size
        count += 1
    assert count == batch_size
示例#15
0
def test_batch(filename, batch_size, seq_length):
    dataset = Dataset([filename], seq_length)
    for batch in dataset.batch(batch_size):
        # The number of elements in the batch is `batch_size`
        assert len(batch.inputs) == batch_size
        assert len(batch.targets) == batch_size
        for i in range(batch_size):
            # Each element in the batch is a sequence
            assert len(batch.inputs[i]) == seq_length
            assert len(batch.targets[i]) == seq_length
            for j in range(seq_length):
                # One-hot encoded
                assert sum(batch.inputs[i][j]) == 1
                assert len(batch.inputs[i][j]) == dataset.vocab_size
 def test_derived_variable(self, tmp_root_datafile):
     derived_vars = {
         'dev_var1': {
             'var_args': ['testvar1', 'testvar2'],
             'tree': 'tree1',
             'func': lambda x, y: x + y
         },
         'dev_var2': {
             'var_args': ['testvar4'],
             'tree': 'tree2',
             'func': lambda x: 2 * x
         }
     }
     vars_to_cut = self.test_vars_to_cut.copy() | {'dev_var1', 'dev_var2'}
     expected_output = self.expected_output.copy()
     expected_output['testvar2'] = np.arange(1000) * 1.1
     expected_output['testvar4'] = np.arange(1000) * -1
     expected_output['dev_var1'] = expected_output[
         'testvar1'] + expected_output['testvar2']
     expected_output['dev_var2'] = 2 * expected_output['testvar4']
     output = Dataset._build_dataframe(tmp_root_datafile,
                                       TTree_name=self.default_TTree,
                                       cut_list_dicts=self.test_cut_dicts,
                                       vars_to_cut=vars_to_cut,
                                       calc_vars_dict=derived_vars)
     # test column names are the same
     assert set(output.columns) == set(expected_output.columns)
     # test contents are the same
     for col in output.columns:
         assert np.array_equal(output[col], expected_output[col])
示例#17
0
def load_data():
    # read
    training_df = pd.read_csv(os.path.join(DEFAULT_DATA_FOLDER, "training"),
                              sep="\t",
                              dtype={
                                  "user_id": str,
                                  "item_id": str
                              })
    test_df = pd.read_csv(os.path.join(DEFAULT_DATA_FOLDER, "test"),
                          sep="\t",
                          dtype={
                              "user_id": str,
                              "item_id": str
                          })
    item_info_long = pd.read_csv(os.path.join(DEFAULT_DATA_FOLDER,
                                              "item_features"),
                                 sep="\t",
                                 dtype={"item_id": str})
    item_info_wide = item_info_long.pivot(
        index="item_id", columns="feature",
        values="value").reset_index().fillna(0)

    #
    y_train = training_df.rating.values.astype(np.float)
    training_df = training_df.drop(columns=["rating"])

    y_test = test_df.rating.values.astype(np.float)
    test_df = test_df.drop(columns=["rating"])

    return Dataset(training_df, y_train, test_df, y_test, item_info_wide)
示例#18
0
    def test_EncodedDataset_constructor(self):
        dataset = ch.datasets.TupleDataset([
            Entry("entry1", [Example(([10, 20, 30], ), 10)],
                  dict([["HEAD", True], ["SORT", False]])),
            Entry("entry2", [Example(([30, 20, 10], ), [10, 20, 30])],
                  dict([["HEAD", False], ["SORT", True]]))
        ])

        cdataset = EncodedDataset(
            Dataset(dataset, DatasetMetadata(1, set(["HEAD", "SORT"]), 256,
                                             5)))
        [(types0, values0, attribute0),
         (types1, values1, attribute1)] = list(cdataset)

        self.assertTrue(np.all([[[0, 1], [1, 0]]] == types0))
        self.assertTrue(
            np.all([[[266, 276, 286, 512, 512], [266, 512, 512, 512, 512]]] ==
                   values0))
        self.assertTrue(np.all(np.array([1, 0]) == attribute0))

        self.assertTrue(np.all([[[0, 1], [0, 1]]] == types1))
        self.assertTrue(
            np.all([[[286, 276, 266, 512, 512], [266, 276, 286, 512, 512]]] ==
                   values1))
        self.assertTrue(np.all(np.array([0, 1]) == attribute1))
示例#19
0
 def __init__(self, raw_dataframe, data_config):
     self.raw = raw_dataframe
     if "test_ratio" in data_config.keys(
     ) and data_config.test_ratio is not None:
         self.train_test_split = True
         train_data, test_data = train_test_split(
             self.raw,
             test_size=data_config.test_ratio,
             random_state=0,
             stratify=self.raw[["label"]])
         self.train = Dataset(train_data)
         self.test = Dataset(test_data)
     else:
         self.train_test_split = False
         train_data = self.raw
         self.train = pd.DataFrame(raw_dataframe)
示例#20
0
def test_load(filename, start_seq):
    seq_length = 25
    dataset = Dataset([filename], seq_length)
    model = RNNTextGenerator(25,
                             dataset.vocab_size,
                             meta_graph='./model/RNNTextGenerator')
    print(model.generate(dataset, start_seq, 50))
 def test_alt_trees(self, tmp_root_datafile):
     newcut = {
         'name': 'cut 3',
         'cut_var': 'testvar4',
         'relation': '<',
         'cut_val': -10,
         'group': 'var4cut',
         'is_symmetric': False,
         'tree': 'tree2'
     }
     list_of_dicts = self.test_cut_dicts.copy()
     list_of_dicts += [newcut]
     expected_output = self.expected_output.copy()
     expected_output['testvar4'] = np.arange(1000) * -1
     expected_output['eventNumber'] = np.arange(1000)
     output = Dataset._build_dataframe(tmp_root_datafile,
                                       TTree_name=self.default_TTree,
                                       cut_list_dicts=list_of_dicts,
                                       vars_to_cut=self.test_vars_to_cut)
     assert set(output.columns) == set(expected_output.columns)
     # test contents are the same
     for col in output.columns:
         assert np.array_equal(output[col], expected_output[col]), \
             f"Dataframe builder failed in column {col};\n" \
             f"Expected: \n{expected_output[col]},\n" \
             f"Got: \n{output[col]}"
示例#22
0
def view_samples():
    image_shape = (224, 224)
    ds = Dataset(image_shape)
    pl, _ = ds.pipeline()
    num_of_samples = 5
    for raw_imgs, mask_imgs in pl.take(1):
        samples = zip(raw_imgs[:num_of_samples], mask_imgs[:num_of_samples])
        samples = list(samples)
        length = len(samples)
        plt.figure(figsize=(5, 5 * length))
        for i, (raw_img, mask_img) in enumerate(samples):
            plt.subplot(length, 2, 2 * i + 1)
            plt.imshow(raw_img)
            plt.subplot(length, 2, 2 * i + 2)
            mask_img = np.reshape(mask_img, image_shape)
            plt.imshow(mask_img)
        plt.show()
 def test_missing_branch(self, tmp_root_datafile):
     missing_branches = {'missing1', 'missing2'}
     with pytest.raises(ValueError) as e:
         _ = Dataset._build_dataframe(tmp_root_datafile,
                                      TTree_name=self.default_TTree,
                                      cut_list_dicts=self.test_cut_dicts,
                                      vars_to_cut=missing_branches)
     assert e.match(r"Missing TBranch\(es\) .* in TTree 'tree1' of file .*")
示例#24
0
 def test_relocate_process(self):
     data = Dataset(dataset_str='facebook')
     data.graph = nx.Graph()
     for i in range(10):
         data.graph.add_node(i)
     data.graph.add_edge(0, 1)
     data.graph.add_edge(0, 2)
     data.graph.add_edge(0, 3)
     data.graph.add_edge(0, 4)
     server_list = [Server(serer_id=i) for i in range(8)]
     algo = OfflineAlgo(server_list=server_list, network_dataset=data)
     node_list = list(data.graph.nodes)
     node_len = len(node_list)
     for i in range(node_len):
         n = node_list[i]
         algo.add_new_primary_node(node_id=n, write_freq=Constant.WRITE_FREQ)
     algo.node_relocation_process()
示例#25
0
    def __init__(self, flags):
        run_config = tf.compat.v1.ConfigProto(log_device_placement=False)
        run_config.gpu_options.allow_growth = True
        self.sess = tf.compat.v1.Session(config=run_config)

        self.flags = flags
        self.dataset = Dataset(self.sess, flags, self.flags.dataset)
        self.dataset.load_data()
        self.model = WGANTimeSeries(self.sess, self.flags, self.dataset)

        self._make_folders()
        self.iter_time = 0

        self.saver = tf.train.Saver()
        self.sess.run(tf.global_variables_initializer())

        tf_utils.show_all_variables()
示例#26
0
def load_data(x_data,source_data,length_data, batch_size):
    data_loader = None
    if x_data != '':     
        X = pickle.load(open(x_data, 'rb')) 
        source = pickle.load(open(source_data, 'rb'))
        length = pickle.load(open(length_data, 'rb'))
        data = Dataset(X,source,length)  
        data_loader = DataLoader(data, batch_size=batch_size, shuffle = True)
    return data_loader
 def test_missing_tree(self, tmp_root_datafile):
     with pytest.raises(ValueError) as e:
         _ = Dataset._build_dataframe(tmp_root_datafile,
                                      TTree_name='missing',
                                      cut_list_dicts=self.test_cut_dicts,
                                      vars_to_cut=self.test_vars_to_cut)
     assert str(
         e.value
     ) == f"TTree(s) 'missing' not found in file {tmp_root_datafile}"
示例#28
0
def process_video(video_path):
    detector = initialize_detector()

    input_shape = (48, 48, 3)
    num_classes = 4
    cnn_weights_path = 'model/weights.h5'
    DELTA = 15

    dataset = Dataset()
    cnn = Model(input_shape, num_classes, cnn_weights_path)

    cap = cv2.VideoCapture(video_path)

    while (cap.isOpened()):
        _, frame = cap.read()

        frame = resize(frame, (NEW_HEIGHT, NEW_WIDTH), mode='constant')
        print(frame.shape)

        predictions = hot_predict('dummy', detector, image=frame)

        for bounding_box in predictions:
            x1 = int(bounding_box['x1']) - DELTA
            y1 = int(bounding_box['y1']) - DELTA
            x2 = int(bounding_box['x2']) + DELTA
            y2 = int(bounding_box['y2']) + DELTA

            traffic_sign = frame[y1:y2, x1:x2]
            processed_image = dataset._preprocess_image(traffic_sign,
                                                        centered=True)

            cnn_input = np.expand_dims(processed_image, axis=0)

            label = cnn.predict(cnn_input)
            draw_rectangle(frame, (x1, y1, x2, y2), label)

        cv2.imshow('frame', frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
 def test_duplicate_events_no_alt_tree(self,
                                       tmp_root_datafile_duplicate_events):
     with pytest.raises(Exception) as e:
         _ = Dataset._build_dataframe(tmp_root_datafile_duplicate_events,
                                      TTree_name=self.default_TTree,
                                      cut_list_dicts=self.test_cut_dicts,
                                      vars_to_cut=self.test_vars_to_cut)
     assert str(
         e.value
     ) == f"Found 1000 duplicate events in datafile {tmp_root_datafile_duplicate_events}."
示例#30
0
    def test_merge_process(self):
        data = Dataset(dataset_str='facebook')
        data.graph = nx.Graph()
        for i in range(10):
            data.graph.add_node(i)
        data.graph.add_edge(0, 1)
        data.graph.add_edge(0, 2)
        data.graph.add_edge(0, 3)
        data.graph.add_edge(0, 4)
        server_list = [Server(serer_id=i) for i in range(8)]
        algo = OfflineAlgo(server_list=server_list, network_dataset=data)
        node_list = list(data.graph.nodes)
        node_len = len(node_list)
        for i in range(node_len):
            n = node_list[i]
            algo.add_new_primary_node(node_id=n, write_freq=Constant.WRITE_FREQ)
        algo.init_merge_process()

        for i in range(0, len(algo.merged_node_list)):
            m_node = algo.merged_node_list[i]
            if m_node.id == 0:
                self.assertEqual(m_node.internal_connection, 0)
                self.assertEqual(m_node.external_connection, 4)
            elif m_node.id in [1, 2, 3, 4]:
                self.assertEqual(m_node.internal_connection, 0)
                self.assertEqual(m_node.external_connection, 1)
            else:
                self.assertEqual(m_node.internal_connection, 0)
                self.assertEqual(m_node.external_connection, 0)
        node_count_list = []
        for m_node in algo.merged_node_list:
            node_count_list += m_node.node_id_list
        node_count_list.sort()
        self.assertEqual(node_count_list, [i for i in range(10)])
        for i in range(1, len(algo.merged_node_list)):
            algo.merged_node_list[0]._add_node(algo.merged_node_list[i], algo=algo, remove_flag=False)
        node_count_list = algo.merged_node_list[0].node_id_list
        node_count_list.sort()
        self.assertEqual(node_count_list, [i for i in range(10)])
        self.assertEqual(algo.merged_node_list[0].external_connection, 0)
        self.assertEqual(algo.merged_node_list[0].internal_connection, 4)
        self.assertEqual(algo.merged_node_list[0].node_count, 10)
示例#31
0
	def __init__(self, dsname, trainf='train_gdm', lyr=[nl.trans.TanSig(),nl.trans.TanSig()],
		lr=0.0001, epochs=100, update_freq=20, show=20, minmax=1.0, hid_lyr=10, ibias=1.0, norm=True):
		self.ds = Dataset.load(dsname)
		self.norm = norm
		self.set_train_data()
		# training parameters
		self.trainf = getattr(nl.train, trainf)
		self.lyr = lyr
		self.lr = lr
		self.epochs = epochs
		self.update_freq = update_freq
		self.show = show
		self.minmax = minmax
		self.hid_lyr = hid_lyr
		self.ibias = ibias
		# neural network
		self.set_train_kwargs()
		self.net = self.setup_network()