def load_saved(self): try: self.analyser = load_object( os.path.join(self.feature_extraction_folder, "analyzer.pickle")) except FileNotFoundError as e: pass try: self.vocabulary = load_object( os.path.join(self.feature_extraction_folder, "vocabulary.pickle")) except FileNotFoundError as e: pass try: self.embedding_matrix = np.genfromtxt(self.embedding_save_path, delimiter=',') except OSError as e: pass try: self.maxlen = load_object( os.path.join(self.feature_extraction_folder, "maxlen.pickle")) except FileNotFoundError as e: pass
def load_saved(self): try: self.label_to_int = load_object( os.path.join(self.feature_extraction_folder, "label_to_int.pickle")) except FileNotFoundError as e: pass
def get_dataset(self, train_skip=1): ''' This function is responsible of creating the dataset of the wanted task from the given graph_nx. Wraps the function 'task_loader.load_task' for caching. Args: train_skip: float - ratio of the data we take for train. For example, if we have N possible samples in the given graph_nx, then we take only int(N/train_skip) samples for train. This is highly important in large graphs. Returns: X: dict - with keys 'train', 'test'. each value is a np.array of the dataset, where each entery is a sample with the embeddings. y: dict - with keys 'train', 'test', each value is a np.array of the dataset, where y[key][i] is the label of X[key][i] for the given task. ''' # load task data task_data_path = join(self.dump_folder, f'{self.task}_dataset_{self.pivot_time}.data') if os.path.exists(task_data_path): X, y = load_object(task_data_path) else: X, y = loader.load_task(self.graph_nx, self.task, train_skip=1, pivot_time=self.pivot_time, test_size=self.test_size) save_object((X, y), task_data_path) X = {'train': X['train'][::train_skip], 'test': X['test']} y = {'train': y['train'][::train_skip], 'test': y['test']} return X, y
def get_papers(num_files=None): all_df = pd.DataFrame() folder_path = r'..\dump\data_pkl_vectorized' file_paths = os.listdir(folder_path) for i, file_path in enumerate(file_paths): if num_files and num_files == i: return all_df df = pd.DataFrame(load_object(os.path.join(folder_path, file_path))) all_df = all_df.append(df) return all_df
def load_models(self): try: if not self.list_model: for i in range(self.nb_dataset): self.list_model.append( tf.keras.models.load_model(os.path.join( self.model_folder, 'weights.best.{}.hdf5'.format(i)), custom_objects={ 'swish': swish, 'ncce': ncce })) except OSError as e: pass try: self.list_weight = load_object( os.path.join(self.model_folder, "list_weight.pickle")) except FileNotFoundError as e: pass try: self.cutoff = load_object( os.path.join(self.model_folder, "cutoff.pickle")) except FileNotFoundError as e: pass try: self.label = load_object( os.path.join(self.model_folder, "label.pickle")) except FileNotFoundError as e: pass try: self.min_label = load_object( os.path.join(self.model_folder, "min_label.pickle")) except FileNotFoundError as e: pass
def calculate_pivot_time(self): ''' Calculate the pivot time that is needed in order to create a 'time_split_ratio' between train edges and test edges Returns: time step representing the pivot time step ''' ratio2pivot = {} ratio2pivot_path = join(self.dump_folder, 'ratio2pivot.dict') if os.path.exists(ratio2pivot_path): ratio2pivot = load_object(ratio2pivot_path) if self.test_size in ratio2pivot: return ratio2pivot[self.test_size] pivot_time = get_pivot_time(self.graph_nx, self.test_size) ratio2pivot[self.test_size] = pivot_time save_object(ratio2pivot, ratio2pivot_path) return pivot_time