예제 #1
0
    def load_saved(self):
        try:
            self.analyser = load_object(
                os.path.join(self.feature_extraction_folder,
                             "analyzer.pickle"))
        except FileNotFoundError as e:
            pass

        try:
            self.vocabulary = load_object(
                os.path.join(self.feature_extraction_folder,
                             "vocabulary.pickle"))
        except FileNotFoundError as e:
            pass

        try:
            self.embedding_matrix = np.genfromtxt(self.embedding_save_path,
                                                  delimiter=',')
        except OSError as e:
            pass

        try:
            self.maxlen = load_object(
                os.path.join(self.feature_extraction_folder, "maxlen.pickle"))
        except FileNotFoundError as e:
            pass
예제 #2
0
 def load_saved(self):
     try:
         self.label_to_int = load_object(
             os.path.join(self.feature_extraction_folder,
                          "label_to_int.pickle"))
     except FileNotFoundError as e:
         pass
예제 #3
0
    def get_dataset(self, train_skip=1):
        '''
        This function is responsible of creating the dataset of the wanted task from the given graph_nx. Wraps the
        function 'task_loader.load_task' for caching.
        Args:
            train_skip: float - ratio of the data we take for train. For example, if we have N possible
                                 samples in the given graph_nx, then we take only int(N/train_skip) samples for train.
                                 This is highly important in large graphs.
        Returns:
            X: dict - with keys 'train', 'test'. each value is a np.array of the dataset, where each entery is a sample
                      with the embeddings.
            y: dict - with keys 'train', 'test', each value is a np.array of the dataset, where y[key][i] is the label
                      of X[key][i] for the given task.
        '''
        # load task data
        task_data_path = join(self.dump_folder,
                              f'{self.task}_dataset_{self.pivot_time}.data')
        if os.path.exists(task_data_path):
            X, y = load_object(task_data_path)
        else:
            X, y = loader.load_task(self.graph_nx,
                                    self.task,
                                    train_skip=1,
                                    pivot_time=self.pivot_time,
                                    test_size=self.test_size)
            save_object((X, y), task_data_path)

        X = {'train': X['train'][::train_skip], 'test': X['test']}
        y = {'train': y['train'][::train_skip], 'test': y['test']}

        return X, y
예제 #4
0
def get_papers(num_files=None):
    all_df = pd.DataFrame()
    folder_path = r'..\dump\data_pkl_vectorized'
    file_paths = os.listdir(folder_path)
    for i, file_path in enumerate(file_paths):
        if num_files and num_files == i:
            return all_df
        df = pd.DataFrame(load_object(os.path.join(folder_path, file_path)))
        all_df = all_df.append(df)
    return all_df
    def load_models(self):
        try:
            if not self.list_model:
                for i in range(self.nb_dataset):
                    self.list_model.append(
                        tf.keras.models.load_model(os.path.join(
                            self.model_folder,
                            'weights.best.{}.hdf5'.format(i)),
                                                   custom_objects={
                                                       'swish': swish,
                                                       'ncce': ncce
                                                   }))
        except OSError as e:
            pass

        try:
            self.list_weight = load_object(
                os.path.join(self.model_folder, "list_weight.pickle"))
        except FileNotFoundError as e:
            pass

        try:
            self.cutoff = load_object(
                os.path.join(self.model_folder, "cutoff.pickle"))
        except FileNotFoundError as e:
            pass

        try:
            self.label = load_object(
                os.path.join(self.model_folder, "label.pickle"))
        except FileNotFoundError as e:
            pass

        try:
            self.min_label = load_object(
                os.path.join(self.model_folder, "min_label.pickle"))
        except FileNotFoundError as e:
            pass
예제 #6
0
 def calculate_pivot_time(self):
     '''
     Calculate the pivot time that is needed in order to create a 'time_split_ratio' between train edges and
     test edges
     Returns:
         time step representing the pivot time step
     '''
     ratio2pivot = {}
     ratio2pivot_path = join(self.dump_folder, 'ratio2pivot.dict')
     if os.path.exists(ratio2pivot_path):
         ratio2pivot = load_object(ratio2pivot_path)
         if self.test_size in ratio2pivot:
             return ratio2pivot[self.test_size]
     pivot_time = get_pivot_time(self.graph_nx, self.test_size)
     ratio2pivot[self.test_size] = pivot_time
     save_object(ratio2pivot, ratio2pivot_path)
     return pivot_time