Пример #1
0
    def get_next(self, extended=False, reset=False):
        """Returns the next row. If extended is True, the row is extended with
           a list of booleans depending on whether the label is in the
           objective field value or not. If reset is True, the file is
           reopened and pointer starts at the beginning of the file.

        """
        row = self.training_reader.next()
        row = [value.strip() for value in row]
        if extended:
            if self.multi_label and self.fields_labels is None:
                self.fields_labels = self._get_labels()

            for field_column in self.multi_label_fields:
                aggregated_field_value = row[field_column]
                field_values = aggregated_field_value.split(
                    self.label_separator)

                field_values = [value.strip() for
                                value in field_values]

                labels_row = [int(label in field_values) for label in
                              self.fields_labels[field_column]]
                row.extend(labels_row)
                for aggregate in self.label_aggregates:
                    row.append(AGGREGATES[aggregate](field_values))
        if reset:
            self.reset()
        if not PYTHON3:
            row = [encode2(item) for item in row]
        return row
Пример #2
0
    def _get_columns(self, fields_list):
        """Receives a comma-separated list of fields given by name or
           column number and returns column number list

        """
        column_list = []
        if fields_list is None:
            return column_list
        if not isinstance(fields_list, list):
            fields_list = [fields_list]
        for field in fields_list:
            column = None
            if isinstance(field, int):
                column = field
            elif field is None:
                column = self.row_length - 1
            else:
                try:
                    column = self.headers.index(field)
                except ValueError:
                    if self.objective:
                        sys.exit("The %s has been set as multi-label field but"
                                 " it cannot be found in the headers row: \n"
                                 " %s" %
                                 (field,
                                  ", ".join([encode2(header)
                                             for header in self.headers])))
                    else:
                        column = None
            if column is not None:
                column_list.append(column)
        return column_list
Пример #3
0
    def get_next(self, extended=False, reset=False):
        """Returns the next row. If extended is True, the row is extended with
           a list of booleans depending on whether the label is in the
           objective field value or not. If reset is True, the file is
           reopened and pointer starts at the beginning of the file.

        """
        row = self.training_reader.next()
        row = [value.strip() for value in row]
        if extended:
            if self.multi_label and self.fields_labels is None:
                self.fields_labels = self._get_labels()

            for field_column in self.multi_label_fields:
                aggregated_field_value = row[field_column]
                field_values = aggregated_field_value.split(
                    self.label_separator)

                field_values = [value.strip() for value in field_values]

                labels_row = [
                    int(label in field_values)
                    for label in self.fields_labels[field_column]
                ]
                row.extend(labels_row)
                for aggregate in self.label_aggregates:
                    row.append(AGGREGATES[aggregate](field_values))
        if reset:
            self.reset()
        if not PYTHON3:
            row = [encode2(item) for item in row]
        return row
Пример #4
0
    def _get_columns(self, fields_list):
        """Receives a comma-separated list of fields given by name or
           column number and returns column number list

        """
        column_list = []
        if fields_list is None:
            return column_list
        if not isinstance(fields_list, list):
            fields_list = [fields_list]
        for field in fields_list:
            column = None
            if isinstance(field, int):
                column = field
            elif field is None:
                column = self.row_length - 1
            else:
                try:
                    column = self.headers.index(field)
                except ValueError:
                    if self.objective:
                        sys.exit(
                            "The %s has been set as multi-label field but"
                            " it cannot be found in the headers row: \n"
                            " %s" % (field, ", ".join(
                                [encode2(header) for header in self.headers])))
                    else:
                        column = None
            if column is not None:
                column_list.append(column)
        return column_list
Пример #5
0
 def get_label_headers(self):
     """Returns a list of headers with the new extended field names for
        each objective label
     """
     new_headers = self.get_headers()
     for field_column in self.multi_label_fields:
         labels = self.fields_labels[field_column]
         new_field_names = [get_label_field(self.headers[field_column], label) for label in labels]
         new_headers.extend(new_field_names)
         for aggregate in self.label_aggregates:
             new_headers.append(get_label_field(self.headers[field_column], aggregate))
     if not PYTHON3:
         new_headers = [encode2(header) for header in new_headers]
     return new_headers
Пример #6
0
 def get_label_headers(self):
     """Returns a list of headers with the new extended field names for
        each objective label
     """
     new_headers = self.get_headers()
     for field_column in self.multi_label_fields:
         labels = self.fields_labels[field_column]
         new_field_names = [get_label_field(self.headers[field_column],
                                            label)
                            for label in labels]
         new_headers.extend(new_field_names)
         for aggregate in self.label_aggregates:
             new_headers.append(get_label_field(
                 self.headers[field_column], aggregate))
     if not PYTHON3:
         new_headers = [encode2(header) for header in new_headers]
     return new_headers