Пример #1
0
 def __init__(self, path:str, show_feedback:bool = True):
     """
     This constructor collects the path string.
         :param path:str: path string for load or save
         :param show_feedback:bool: show reports and informations
     """
     AssertNotNone(path, "Given path for NumpyDatasetHandler constructor is none!")
     self._path:str = path
     self._show_feedback = show_feedback
     self._np_file_names:list = Constants().NP_TEACHER_FORCING_FILE_NAMES
     self._shape_regex:str = Constants().NP_GATHER_LOAD_SHAPE_REX
    def __init__(self,
                 node_parenthesis: list = ['(', ')'],
                 input_context: str = None,
                 input_extension_dict: dict = {},
                 keep_opt_infos: bool = False):
        """
        Class constructor collect all necessary parameters for the cleaning process.
            :param node_parenthesis:list: define node parenthesis
            :param input_context:str: input amr string
            :param input_extension_dict:dict: look up dictionairy
            :param keep_opt_infos:bool: switch allow to optional infos
        """
        try:
            self.constants = Constants()
            self.keep_opt_info_encoding = keep_opt_infos

            if isNotNone(input_context):
                self.hasContext = True
                self.context = input_context

            if bool(input_extension_dict):
                self.hasExtentsionsDict = True
                self.extension_dict = input_extension_dict
                self.extension_keys_dict = self.extension_dict.keys()
            if isNotNone(node_parenthesis):
                self.node_parenthesis = node_parenthesis

            if (self.hasContext):
                self.GenerateCleanAMR()
        except Exception as ex:
            template = "An exception of type {0} occurred in [AMRCleaner.__init__]. Arguments:\n{1!r}"
            message = template.format(type(ex).__name__, ex.args)
            print(message)
Пример #3
0
 def __init__(self, folder_path:str, show_feedback:bool = True):
     """
     This constructor collects the path string.
         :param _folder_path:str: folder path to save the datasets if its desired, names are autoset
         :param show_feedback:bool: show reports and informations
     """
     self._folder_path:str = folder_path
     self._show_feedback:bool = show_feedback
     self._np_file_names:list = Constants().NP_TEACHER_FORCING_FILE_NAMES
 def __init__(self, path:str =None, seperator_regex:str =None):
     """
     The class constructor check for valid input and store it for local usage. 
         :param path:str: path of file with string content
         :param seperator_regex:str: an optional regex string that allow to split an amr dataset at each occurence
     """   
     try:
         self.constants = Constants()
         self.path = path  if isNotNone(path) else None           
         self.seperator_regex = seperator_regex if isNotNone(seperator_regex) else self.constants.ELEMENT_SPLIT_REGEX
     except Exception as ex:
         template = "An exception of type {0} occurred in [FileReader.Constructor]. Arguments:\n{1!r}"
         message = template.format(type(ex).__name__, ex.args)
         print(message)
Пример #5
0
    def __init__(self, context:str =None, show_feedback:bool =False):
        """
        This class constructor stores the given context and allow to activation of showing the process results on the console.
            :param context:str: amr input
            :param show_feedback:bool: show process response on console
        """   
        try:
            self.input_semantic = context
            self.show_response = show_feedback
            self.constants = Constants()
            self.graph_nodes = OrderedDict()

            # THIS IS DEPRECATED!
            #self.nodes_as_dict = False
        except Exception as ex:
            template = "An exception of type {0} occurred in [SemanticMatricBuilder.Constructor]. Arguments:\n{1!r}"
            message = template.format(type(ex).__name__, ex.args)
            print(message)
 def __init__(self,
              amr_str: str = None,
              show_process: bool = False,
              is_saving: bool = False):
     """
     This class constructor collects necessary inputs and initialize the constants only.
         :param amr_str:str: amr input as string
         :param show_process:bool: show processing steps
         :param is_saving:bool: result saving data or further processing
     """
     try:
         self.constants = Constants()
         self.amr_input = amr_str
         self.show = show_process
         self.saving = is_saving
     except Exception as ex:
         template = "An exception of type {0} occurred in [TParser.Constructor]. Arguments:\n{1!r}"
         message = template.format(type(ex).__name__, ex.args)
         print(message)
Пример #7
0
 def __init__(self,
              in_content: str = None,
              sentence_restriction: int = -1,
              semantics_restriction: int = -1):
     """
     This constructor store the given context, optional a size restriction and load at least the project constants.
         :param in_content:str: input context by default None
         :param sentence_restriction:int: sentence restriction by default -1
         :param semantics_restriction:int: semantic restriction by default -1
     """
     try:
         self.context = in_content
         self.restriction_sentence = sentence_restriction
         self.restriction_semantic = semantics_restriction
         self.constants = Constants()
     except Exception as ex:
         template = "An exception of type {0} occurred in [DatasetExtractor.Constructor]. Arguments:\n{1!r}"
         message = template.format(type(ex).__name__, ex.args)
         print(message)
    def __init__(self,
                 input_path: str,
                 in_output_extender: str = 'output',
                 data_pairs: list = None,
                 in_context: str = None):
        """
        This is the constructor of the (File-)Writer class. 
        Necessary is the input path only. 
        Extender for the output path provide a default value.
        To provide 2 types of input you can set amr datapairs or various context of type string.
        If both is present the data pairs wil be preferred.
            :param input_path:str: path of the input file
            :param in_output_extender:str: extender to create output file from input path
            :param data_pairs:list: amr data pairs list like List<Array{sentence, semantic}>
            :param in_context:str: optional if no data pairs present use context
        """
        try:
            self.constants = Constants()
            self.path = input_path

            AssertNotNone(self.path, msg='Given path for FileWriter was None!')
            self.out_path = setOrDefault(self.path + '.' + in_output_extender,
                                         self.constants.TYP_ERROR,
                                         isStr(in_output_extender))
            print('Destination: ', self.out_path)

            if isNotNone(data_pairs):
                self.dataset_pairs = data_pairs
                #TODO: Missing Store Numpy Graphs
                self.StoreAMR()
            else:
                AssertNotNone(in_context,
                              msg='Given input for FileWriter was None!')
                self.context = in_context
                self.StoreContext()

        except Exception as ex:
            template = "An exception of type {0} occurred in [FileWriter.__init__]. Arguments:\n{1!r}"
            message = template.format(type(ex).__name__, ex.args)
            print(message)
Пример #9
0
    def __init__(self,
                 in_path: str = None,
                 output_path_extender: str = 'ouput',
                 max_length: int = -1,
                 show_feedback: bool = False,
                 keep_opt_infos: bool = False,
                 min_cardinality: int = 3,
                 max_cardinality: int = 100,
                 cpu_cores: int = 1,
                 saving_cleaned_data: bool = False,
                 stringified_amr: bool = False):
        """
        This class constructor collect informations about the input and output files.
        Further its possible to define a max_lengt for the used dataset. 
        It defines the string length for sentences and doubled it for the semantics string length.
        Ever missmatch will be dropped out!
        If it is negative the module will use all dataset elements.

        Attention:
            With min_cardinality and max_cardinality you can define the used dataset after processing. 
            This allows to handle hughe differnces in the dataset groups 
                ~> depends on knowledge about "well defined datasets" rules

            :param in_path:str: dataset input path
            :param output_path_extender:str: result output path
            :param max_length:int: context length restriction
            :param show_feedback:bool: show process content as console feedback
            :param keep_opt_infos:bool: include optional info in the amr cleaner strategy
            :param min_cardinality:int: define min range for the node matrix representation [>2 (at least 3 nodes/words) depends on the SPO sentence definition in english]
            :param max_cardinality:int: define max range for the node matrix representation 
            :param cpu_cores:int: define the number of existing/accessible cpu cores.
            :param saving_cleaned_data:bool: allow to save the cleaned dataset.
            :param stringified_amr:bool: convert semantic to matrices
        """
        try:
            self._constants = Constants()
            self._look_up_ext_rep_path = './Datasets/LookUpAMR/supported_amr_internal_nodes_lookup.txt'
            self._extension_dict = Reader(
                path=self._look_up_ext_rep_path,
                seperator_regex=self._constants.MAPPING_SPLIT_REGEX
            ).LineReadContent()
            self._in_path = setOrDefault(in_path, self._constants.TYP_ERROR,
                                         isStr(in_path))
            self._dataset_drop_outs = 0
            self._max_chars_sentences = 0
            self._max_words_sentences = 0
            self._max_chars_semantics = 0
            self._max_observed_nodes_cardinality = 0
            self._unique_graph_node_cardinalities = set()
            self._graph_node_cardinalities_list = []
            self._count_graph_node_cards_occs = dict()
            self._is_showing_feedback = show_feedback
            self._is_saving = saving_cleaned_data
            self._is_keep_opt_infos = keep_opt_infos
            self._out_path_extender = output_path_extender
            self._restriction_chars_sentence = setOrDefault(
                max_length, -1, isInt(max_length))
            self._restriction_chars_semantic = -1 if (max_length < 0) else (
                2 * self._restriction_chars_sentence)
            self._min_cardinality = min_cardinality if (
                min_cardinality > 2) else 3
            self._max_cardinality = max_cardinality if (
                max_cardinality >= min_cardinality) else 100
            self._cpu_cores = cpu_cores if (cpu_cores > 1) else 1
            self._stringified_amr = stringified_amr
        except Exception as ex:
            template = "An exception of type {0} occurred in [DatasetProvider.__init__]. Arguments:\n{1!r}"
            message = template.format(type(ex).__name__, ex.args)
            print(message)