def reset_segment_file(self): # File/Folder error checks if not os.path.exists(self.folder_path): raise PipelineSegmentError( "Couldn't find the pipeline segment's folder when trying to configure this object with the provided json file." ) if not os.path.exists(self.folder_path + copy.deepcopy(self.__json_file_name)): raise PipelineSegmentError( f"Couldn't find the pipeline segment's file named '{self.__json_file_name}' in the pipeline's directory when trying to configure this object with the provided json file." ) dict_to_json_file({}, self.folder_path, self.file_name)
def file_name(self): """ Gets the file name of the json file. """ # ----- if len(self.__function_pipe) == 0: raise PipelineSegmentError( "The pipeline segment has not performed any actions yet." " Please perform some methods with this object.") elif not self.__create_file: raise PipelineSegmentError( "This pipeline segment does not have saved " "file and thus can not have a file path.") else: return copy.deepcopy(self.__json_file_name)
def __add_function_to_que(self, function_name, parameters, params_dict): """ Adds the function info the function que. If the segment has no json file name then generate one for it the given directory. Args: function_name: string Functions name params_dict: dict Parameter's name to their associated values. Note: This function should only ever be called by children of this object. """ if self.__lock_interaction: raise PipelineSegmentError( "This pipeline has be locked down and " "will prevent futher changes to the generated flat file.") for delete_key in { "self", "df", "df_features", "_add_to_que", "params_dict" }: if delete_key in params_dict.keys(): del params_dict[delete_key] for k, v in {k: v for k, v in params_dict.items()}.items(): if k not in parameters: del params_dict[k] elif isinstance(v, set): params_dict[k] = list(v) self.__function_pipe.append((function_name, params_dict)) # Generate new json file name with proper file/folder output attributes if len(self.__function_pipe) == 1 and not self.__json_file_name: FileOutput.__init__( self, f'_Extras/Pipeline Structure/Data Pipeline Segments/{self.__object_type}' ) all_json_files = get_all_files_from_path(self.folder_path, ".json") while True: random_file_name = create_hex_decimal_string().upper() if random_file_name not in all_json_files: break self.__segment_id = random_file_name self.__json_file_name = random_file_name + ".json" # Update json file if self.__create_file: self.__create_json_pipeline_segment_file()
def __configure_pipeline_segment_with_existing_file(self): """ Attempts to get a json file and then re_init the 'function_pipe' and the 'json_file_name'. """ FileOutput.__init__( self, f'_Extras/Pipeline Structure/Data Pipeline Segments/{self.__object_type}' ) self.__function_pipe = deque() self.__json_file_name = copy.deepcopy(self.__segment_id) + ".json" # File/Folder error checks if not os.path.exists(self.folder_path): raise PipelineSegmentError( "Couldn't find the pipeline segment's folder when trying to configure this object with the provided json file." ) if not os.path.exists(self.folder_path + copy.deepcopy(self.__json_file_name)): raise PipelineSegmentError( f"Couldn't find the pipeline segment's file named '{self.__json_file_name}' in the pipeline's directory when trying to configure this object with the provided json file." ) json_dict = json_file_to_dict(self.folder_path + copy.deepcopy(self.__json_file_name)) # Push functions into function pipe for function_order in range( 1, json_dict["Pipeline Segment"]["Function Count"] + 1): function_name = list( json_dict["Pipeline Segment"]["Functions Performed Order"] [f"Function Order {function_order}"].keys())[0] params_dict = json_dict["Pipeline Segment"][ "Functions Performed Order"][ f"Function Order {function_order}"][function_name][ "Params Dict"] self.__function_pipe.append((function_name, params_dict))
def __replace_function_in_que(self, function_name, params_dict, param, param_val): raise ValueError("This function hasn't been completed yet!") if self.__lock_interaction: raise PipelineSegmentError( "This pipeline has be locked down and " "will prevent futher changes to the generated flat file.") for delete_key in { "self", "df", "df_features", "_add_to_que", "params_dict" }: if delete_key in params_dict.keys(): del params_dict[delete_key] for k, v in {k: v for k, v in params_dict.items()}.items(): if k not in parameters: del params_dict[k] elif isinstance(v, set): params_dict[k] = list(v) self.__function_pipe.append((function_name, params_dict)) # Generate new json file name with proper file/folder output attributes if len(self.__function_pipe) == 1 and not self.__json_file_name: FileOutput.__init__( self, f'_Extras/Pipeline Structure/Data Pipeline Segments/{self.__object_type}' ) all_json_files = get_all_files_from_path(self.folder_path, ".json") while True: random_file_name = create_hex_decimal_string().upper() if random_file_name not in all_json_files: break self.__segment_id = random_file_name self.__json_file_name = random_file_name + ".json" # Update json file if self.__create_file: self.__create_json_pipeline_segment_file()
def __init__(self, object_type, segment_id=None, create_file=True): """ Args: object_type: string The child type of all object's that inherited DataPipelineSegment segment_id: string If init as a string instead of None; the object will attempt to find the json file in the provided directory. Note: Essentially we are serializing the object with json files. """ self.__json_file_name = None self.__object_type = copy.deepcopy(object_type) if not isinstance(segment_id, str) and segment_id: raise UnsatisfiedRequirments( "Segment id must be a string or set to 'None'!") if segment_id and not create_file: raise PipelineSegmentError( "Parameter conflict: segment_id is referring " "to a saved file but create_file is set to False.") # File extension removal if isinstance(segment_id, str): segment_id = segment_id.split(".")[0] self.__segment_id = copy.deepcopy(segment_id) # Pushes the functions info based on order they are called self.__function_pipe = deque() self.__create_file = create_file self.__lock_interaction = False # Attempt to get json file into object's attributes. if self.__segment_id: self.__configure_pipeline_segment_with_existing_file()