Exemplo n.º 1
0
 def get_fields(cls, file, encoding):
     path, fn = os.path.split(file.name)
     fn, ext = os.path.splitext(fn)
     yield ArticleField("Filename", "title", values=[fn])
     # FIXME encoding, and probably don't read the whole file?
     yield ArticleField("Text",
                        "text",
                        values=[file.read().decode("ascii")])
     if path: yield ArticleField("path", "section", values=[path])
     if "_" in fn:
         for i, elem in enumerate(fn.split("_")):
             yield ArticleField("Filename part {i}".format(**locals()),
                                values=[elem])
Exemplo n.º 2
0
    def get_fields(cls, file, encoding):
        sample_data = defaultdict(OrderedSet)

        for f, enc, _ in UploadScript._get_files(file, encoding):
            csvf = _open(f, encoding)
            reader = csv.DictReader(csvf)
            for row in itertools.islice(reader, 0, 5):
                for field_name, value in row.items():
                    if value.strip():
                        sample_data[field_name].add(value.strip())

        # Delete empty data
        for values in sample_data.values():
            if "" in values:
                values.remove("")

        # Guess types and destinations
        for field_name, values in sorted(sample_data.items(),
                                         key=itemgetter(0)):
            filtered_field_name = to_valid_field_name(field_name)
            try:
                value = next(iter(values))
            except StopIteration:
                value = None

            suggested_destination, suggested_type = guess_destination_and_type(
                filtered_field_name, value)
            yield ArticleField(field_name,
                               destination=suggested_destination,
                               values=list(
                                   itertools.islice(sample_data[field_name], 0,
                                                    5)),
                               suggested_type=suggested_type)
Exemplo n.º 3
0
 def get_fields(cls, file, encoding):
     values = defaultdict(list)
     for art_dict in cls._scrape_unit(_read(file, encoding)):
         for k, v in art_dict.items():
             values[k].append(v)
     for k, v in values.items():
         yield ArticleField(k, k, v[:5])
Exemplo n.º 4
0
    def get_fields(cls, file: str, encoding: str):
        fields = OrderedDict()
        fieldMap = Language.reverseMap(cls.languages)
        for file, encoding, _ in cls._get_files(file, encoding):
            reader = csv.DictReader(_open(file, encoding), delimiter=";")
            rows = [row for row in reader]
            fields.update((k, (fieldMap[k], [row[k] for row in rows]))
                          for k in reader.fieldnames)

        for source, (destination, values) in fields.items():
            dest_name = ESFIELDS[destination]
            yield ArticleField(source, destination=dest_name, values=values)
Exemplo n.º 5
0
 def get_fields(cls, file, encoding):
     fields = collections.OrderedDict()
     for (file, encoding, (query, arts)) in cls._get_files(file, encoding):
         for meta in arts:
             if meta:
                 for k, v in meta.items():
                     if v:
                         fields[k] = fields.get(k, []) + [v]
     for k, values in fields.items():
         k = k.replace("-", "").strip()
         if "_" in k:
             name, suggested_type = k.rsplit("_", 1)
         else:
             name, suggested_type = k, None
         yield ArticleField(name, name, values[:5], suggested_type=suggested_type)
Exemplo n.º 6
0
 def get_fields(cls, file, encoding):
     fields = collections.OrderedDict()
     for doc in split_file(file):
         for k, v in parse_doc(doc):
             fields[k] = fields.get(k, []) + [v]
     return [ArticleField(k, k, values) for (k, values) in fields.items()]