示例#1
0
 def _configure(self):
     config = parse_yaml(self.yaml_file)
     config = extract_task_config(config, 'BagOfWordsFromXML')
     self.output_path = config.get('output_directory', '')
     self.tasks = config.get('tasks', {})
     self.minimum_wordcount = config.get('minimum_wordcount', self.minimum_wordcount)
     self.include_structure = config.get('include_structure', self.include_structure)
示例#2
0
 def _configure(self):
     config = parse_yaml(self.yaml_file)
     config = extract_task_config(config, 'BagOfWordsFromParsed')
     self.output_path = config.get('output_directory', '')
     self.tasks = config.get('tasks', {})
     self.minimum_wordcount = config.get('minimum_wordcount',
                                         self.minimum_wordcount)
示例#3
0
    def _configure(self):
        config = parse_yaml(self.yaml_file)
        config = extract_task_config(config, 'Identify')
        self.output_path = config.get('output_directory', '')
        self.params = config.get('params', {})

        if 'identifiers' in self.params:
            if isinstance(self.params['identifiers'], list):
                identifiers = self.params.get('identifiers', [])
                self.identifiers = [
                    self._locate_in_configs(i) for i in identifiers]
            else:
                # self.identifiers = glob.glob(config['identifiers'])
                raise Exception('identifier file list not found')
示例#4
0
    def _configure(self):
        config = parse_yaml(self.yaml_file)
        config = extract_task_config(config, 'Identify')
        self.output_path = config.get('output_directory', '')
        self.params = config.get('params', {})

        if 'identifiers' in self.params:
            if isinstance(self.params['identifiers'], list):
                identifiers = self.params.get('identifiers', [])
                self.identifiers = [
                    self._locate_in_configs(i) for i in identifiers
                ]
            else:
                # self.identifiers = glob.glob(config['identifiers'])
                raise Exception('identifier file list not found')
示例#5
0
    def _configure(self):
        config = parse_yaml(self.yaml_file)
        config = extract_task_config(config, 'IdentityEDA')
        self.input_path = config.get('input_directory', '')
        self.output_path = config.get('output_directory', '')
        self.delimiter = config.get('delimiter', ',')
        self.delimiter_replace = config.get('delimiter_replace', ';;')

        # if the identifier_pattern: get all matches
        identifier_pattern = config.get('identifier_pattern', '')
        if identifier_pattern:
            identifiers = glob.glob(identifier_pattern)

        # if identifiers (list), use only the list values
        identifiers = config.get('identifiers', identifiers)

        # and then use our little yaml loader
        identifier_yaml = load_yamls(identifiers)

        self.aggregation_terms = ["'%s'" % y['name'].strip() for y in identifier_yaml]
 def _configure(self):
     config = parse_yaml(self.yaml_file)
     config = extract_task_config(config, 'SimpleParliament')
     self.output_path = config.get('output_directory', '')
     self.params = config.get('params', {})
示例#7
0
 def _configure(self):
     config = parse_yaml(self.yaml_file)
     config = extract_task_config(config, 'Clean')
     self.output_path = config.get('output_directory', '')
 def _configure(self):
     config = parse_yaml(self.yaml_file)
     config = extract_task_config(config, 'Parliament')
     self.output_path = config.get('output_directory', '')
     self.params = config.get('params', {})
示例#9
0
 def _configure(self):
     config = parse_yaml(self.yaml_file)
     config = extract_task_config(config, 'TextPreprocessing')
     self.output_path = config.get('output_directory', '')
     self.tasks = config.get('tasks', {})
示例#10
0
 def _configure(self):
     config = parse_yaml(self.yaml_file)
     config = extract_task_config(config, 'Clean')
     self.output_path = config.get('output_directory', '')
示例#11
0
 def _configure(self):
     config = parse_yaml(self.yaml_file)
     config = extract_task_config(config, 'TextPreprocessing')
     self.output_path = config.get('output_directory', '')
     self.tasks = config.get('tasks', {})