Пример #1
0
 def get_label_headers(self):
     """Returns a list of headers with the new extended field names for
        each objective label
     """
     new_headers = self.get_headers()
     for field_column in self.multi_label_fields:
         labels = self.fields_labels[field_column]
         new_field_names = [get_label_field(self.headers[field_column], label) for label in labels]
         new_headers.extend(new_field_names)
         for aggregate in self.label_aggregates:
             new_headers.append(get_label_field(self.headers[field_column], aggregate))
     if not PYTHON3:
         new_headers = [encode2(header) for header in new_headers]
     return new_headers
Пример #2
0
 def get_label_headers(self):
     """Returns a list of headers with the new extended field names for
        each objective label
     """
     new_headers = self.get_headers()
     for field_column in self.multi_label_fields:
         labels = self.fields_labels[field_column]
         new_field_names = [
             get_label_field(self.headers[field_column], label)
             for label in labels
         ]
         new_headers.extend(new_field_names)
         for aggregate in self.label_aggregates:
             new_headers.append(
                 get_label_field(self.headers[field_column], aggregate))
     new_headers = [header.encode("utf-8") for header in new_headers]
     return new_headers
Пример #3
0
def multi_label_expansion(training_set, training_set_header, objective_field,
                          args, output_path, field_attributes=None,
                          labels=None, session_file=None):
    """Splitting the labels in a multi-label objective field to create
       a source with column per label

    """
    # find out column number corresponding to the objective field
    training_reader = TrainReader(training_set, training_set_header,
                                  objective_field, multi_label=True,
                                  labels=labels,
                                  label_separator=args.label_separator,
                                  training_separator=args.training_separator)
    # read file to get all the different labels if no --labels flag is given
    # or use labels given in --labels and generate the new field names
    new_headers = training_reader.get_headers(objective_field=False)
    new_field_names = [l.get_label_field(training_reader.objective_name, label)
                       for label in training_reader.labels]
    new_headers.extend(new_field_names)
    new_headers.append(training_reader.objective_name)
    new_headers = [header.encode("utf-8") for header in new_headers]
    try:
        file_name = os.path.basename(training_set)
    except AttributeError:
        file_name = "training_set.csv"
    output_file = "%s%sextended_%s" % (output_path, os.sep, file_name)
    message = u.dated("Transforming to extended source.\n")
    u.log_message(message, log_file=session_file,
                  console=args.verbosity)
    with open(output_file, 'w', 0) as output_handler:
        output = csv.writer(output_handler, lineterminator="\n")
        output.writerow(new_headers)
        # read to write new source file with column per label
        training_reader.reset()
        if training_set_header:
            training_reader.next()
        while True:
            try:
                row = training_reader.next(extended=True)
                output.writerow(row)
            except StopIteration:
                break
    objective_field = training_reader.headers[training_reader.objective_column]
    if field_attributes is None:
        field_attributes = {}
    for label_column, label in training_reader.labels_columns():
        field_attributes.update({label_column: {
            "label": "%s%s" % (l.MULTI_LABEL_LABEL, label)}})
    # Setting field label to mark objective and label fields and objective
    # field (just in case it was not set previously and other derived fields
    # are added in the source construction process after the real last field).
    return (output_file, training_reader.labels, field_attributes,
            training_reader.objective_name)