def __init__(self, path:str, show_feedback:bool = True): """ This constructor collects the path string. :param path:str: path string for load or save :param show_feedback:bool: show reports and informations """ AssertNotNone(path, "Given path for NumpyDatasetHandler constructor is none!") self._path:str = path self._show_feedback = show_feedback self._np_file_names:list = Constants().NP_TEACHER_FORCING_FILE_NAMES self._shape_regex:str = Constants().NP_GATHER_LOAD_SHAPE_REX
def __init__(self, node_parenthesis: list = ['(', ')'], input_context: str = None, input_extension_dict: dict = {}, keep_opt_infos: bool = False): """ Class constructor collect all necessary parameters for the cleaning process. :param node_parenthesis:list: define node parenthesis :param input_context:str: input amr string :param input_extension_dict:dict: look up dictionairy :param keep_opt_infos:bool: switch allow to optional infos """ try: self.constants = Constants() self.keep_opt_info_encoding = keep_opt_infos if isNotNone(input_context): self.hasContext = True self.context = input_context if bool(input_extension_dict): self.hasExtentsionsDict = True self.extension_dict = input_extension_dict self.extension_keys_dict = self.extension_dict.keys() if isNotNone(node_parenthesis): self.node_parenthesis = node_parenthesis if (self.hasContext): self.GenerateCleanAMR() except Exception as ex: template = "An exception of type {0} occurred in [AMRCleaner.__init__]. Arguments:\n{1!r}" message = template.format(type(ex).__name__, ex.args) print(message)
def __init__(self, folder_path:str, show_feedback:bool = True): """ This constructor collects the path string. :param _folder_path:str: folder path to save the datasets if its desired, names are autoset :param show_feedback:bool: show reports and informations """ self._folder_path:str = folder_path self._show_feedback:bool = show_feedback self._np_file_names:list = Constants().NP_TEACHER_FORCING_FILE_NAMES
def __init__(self, path:str =None, seperator_regex:str =None): """ The class constructor check for valid input and store it for local usage. :param path:str: path of file with string content :param seperator_regex:str: an optional regex string that allow to split an amr dataset at each occurence """ try: self.constants = Constants() self.path = path if isNotNone(path) else None self.seperator_regex = seperator_regex if isNotNone(seperator_regex) else self.constants.ELEMENT_SPLIT_REGEX except Exception as ex: template = "An exception of type {0} occurred in [FileReader.Constructor]. Arguments:\n{1!r}" message = template.format(type(ex).__name__, ex.args) print(message)
def __init__(self, context:str =None, show_feedback:bool =False): """ This class constructor stores the given context and allow to activation of showing the process results on the console. :param context:str: amr input :param show_feedback:bool: show process response on console """ try: self.input_semantic = context self.show_response = show_feedback self.constants = Constants() self.graph_nodes = OrderedDict() # THIS IS DEPRECATED! #self.nodes_as_dict = False except Exception as ex: template = "An exception of type {0} occurred in [SemanticMatricBuilder.Constructor]. Arguments:\n{1!r}" message = template.format(type(ex).__name__, ex.args) print(message)
def __init__(self, amr_str: str = None, show_process: bool = False, is_saving: bool = False): """ This class constructor collects necessary inputs and initialize the constants only. :param amr_str:str: amr input as string :param show_process:bool: show processing steps :param is_saving:bool: result saving data or further processing """ try: self.constants = Constants() self.amr_input = amr_str self.show = show_process self.saving = is_saving except Exception as ex: template = "An exception of type {0} occurred in [TParser.Constructor]. Arguments:\n{1!r}" message = template.format(type(ex).__name__, ex.args) print(message)
def __init__(self, in_content: str = None, sentence_restriction: int = -1, semantics_restriction: int = -1): """ This constructor store the given context, optional a size restriction and load at least the project constants. :param in_content:str: input context by default None :param sentence_restriction:int: sentence restriction by default -1 :param semantics_restriction:int: semantic restriction by default -1 """ try: self.context = in_content self.restriction_sentence = sentence_restriction self.restriction_semantic = semantics_restriction self.constants = Constants() except Exception as ex: template = "An exception of type {0} occurred in [DatasetExtractor.Constructor]. Arguments:\n{1!r}" message = template.format(type(ex).__name__, ex.args) print(message)
def __init__(self, input_path: str, in_output_extender: str = 'output', data_pairs: list = None, in_context: str = None): """ This is the constructor of the (File-)Writer class. Necessary is the input path only. Extender for the output path provide a default value. To provide 2 types of input you can set amr datapairs or various context of type string. If both is present the data pairs wil be preferred. :param input_path:str: path of the input file :param in_output_extender:str: extender to create output file from input path :param data_pairs:list: amr data pairs list like List<Array{sentence, semantic}> :param in_context:str: optional if no data pairs present use context """ try: self.constants = Constants() self.path = input_path AssertNotNone(self.path, msg='Given path for FileWriter was None!') self.out_path = setOrDefault(self.path + '.' + in_output_extender, self.constants.TYP_ERROR, isStr(in_output_extender)) print('Destination: ', self.out_path) if isNotNone(data_pairs): self.dataset_pairs = data_pairs #TODO: Missing Store Numpy Graphs self.StoreAMR() else: AssertNotNone(in_context, msg='Given input for FileWriter was None!') self.context = in_context self.StoreContext() except Exception as ex: template = "An exception of type {0} occurred in [FileWriter.__init__]. Arguments:\n{1!r}" message = template.format(type(ex).__name__, ex.args) print(message)
def __init__(self, in_path: str = None, output_path_extender: str = 'ouput', max_length: int = -1, show_feedback: bool = False, keep_opt_infos: bool = False, min_cardinality: int = 3, max_cardinality: int = 100, cpu_cores: int = 1, saving_cleaned_data: bool = False, stringified_amr: bool = False): """ This class constructor collect informations about the input and output files. Further its possible to define a max_lengt for the used dataset. It defines the string length for sentences and doubled it for the semantics string length. Ever missmatch will be dropped out! If it is negative the module will use all dataset elements. Attention: With min_cardinality and max_cardinality you can define the used dataset after processing. This allows to handle hughe differnces in the dataset groups ~> depends on knowledge about "well defined datasets" rules :param in_path:str: dataset input path :param output_path_extender:str: result output path :param max_length:int: context length restriction :param show_feedback:bool: show process content as console feedback :param keep_opt_infos:bool: include optional info in the amr cleaner strategy :param min_cardinality:int: define min range for the node matrix representation [>2 (at least 3 nodes/words) depends on the SPO sentence definition in english] :param max_cardinality:int: define max range for the node matrix representation :param cpu_cores:int: define the number of existing/accessible cpu cores. :param saving_cleaned_data:bool: allow to save the cleaned dataset. :param stringified_amr:bool: convert semantic to matrices """ try: self._constants = Constants() self._look_up_ext_rep_path = './Datasets/LookUpAMR/supported_amr_internal_nodes_lookup.txt' self._extension_dict = Reader( path=self._look_up_ext_rep_path, seperator_regex=self._constants.MAPPING_SPLIT_REGEX ).LineReadContent() self._in_path = setOrDefault(in_path, self._constants.TYP_ERROR, isStr(in_path)) self._dataset_drop_outs = 0 self._max_chars_sentences = 0 self._max_words_sentences = 0 self._max_chars_semantics = 0 self._max_observed_nodes_cardinality = 0 self._unique_graph_node_cardinalities = set() self._graph_node_cardinalities_list = [] self._count_graph_node_cards_occs = dict() self._is_showing_feedback = show_feedback self._is_saving = saving_cleaned_data self._is_keep_opt_infos = keep_opt_infos self._out_path_extender = output_path_extender self._restriction_chars_sentence = setOrDefault( max_length, -1, isInt(max_length)) self._restriction_chars_semantic = -1 if (max_length < 0) else ( 2 * self._restriction_chars_sentence) self._min_cardinality = min_cardinality if ( min_cardinality > 2) else 3 self._max_cardinality = max_cardinality if ( max_cardinality >= min_cardinality) else 100 self._cpu_cores = cpu_cores if (cpu_cores > 1) else 1 self._stringified_amr = stringified_amr except Exception as ex: template = "An exception of type {0} occurred in [DatasetProvider.__init__]. Arguments:\n{1!r}" message = template.format(type(ex).__name__, ex.args) print(message)