def _get_serialized_fields(self, item, default_value=None, include_empty=None): """Return the fields to export as an iterable of tuples (name, serialized_value) """ item = ItemAdapter(item) if include_empty is None: include_empty = self.export_empty_fields if self.fields_to_export is None: if include_empty: field_iter = item.field_names() else: field_iter = item.keys() else: if include_empty: field_iter = self.fields_to_export else: field_iter = (x for x in self.fields_to_export if x in item) for field_name in field_iter: if field_name in item: field_meta = item.get_field_meta(field_name) value = self.serialize_field(field_meta, field_name, item[field_name]) else: value = default_value yield field_name, value
def _get_serialized_fields(self, item, default_value=None, include_empty=None): """Return the fields to export as an iterable of tuples (name, serialized_value) 用serialize_field 序列化当前所选的item项 """ item = ItemAdapter(item) if include_empty is None: include_empty = self.export_empty_fields if self.fields_to_export is None: if include_empty: # 包含空值(item定义有但是传入没有的key) field_iter = item.field_names() else: # 只包含item传递过来的值 field_iter = item.keys() else: if include_empty: # 指定特定值 field_iter = self.fields_to_export else: field_iter = (x for x in self.fields_to_export if x in item) #取交集 for field_name in field_iter: if field_name in item: field_meta = item.get_field_meta(field_name) value = self.serialize_field(field_meta, field_name, item[field_name]) else: value = default_value yield field_name, value
def store(self, item): adapter = ItemAdapter(item) if self._headers_not_written: # Store field_names in a list to make sure the order stays the same self.header = adapter.field_names() self.buffer.append(adapter.field_names()) self._headers_not_written = False temp = [] for field in self.header: val = adapter.get(field) if isinstance(val, list): val = self.delimiter.join([str(v) for v in val]) temp.append(val) self.buffer.append(temp)
def close_spider(self, spider): if self.items is not []: for item in self.items: adapter = ItemAdapter(item) columns = adapter.field_names() writer = csv.DictWriter(self.file, fieldnames=columns, restval='', extrasaction='ignore', delimiter=',', quoting=csv.QUOTE_NONNUMERIC, quotechar="\"") if self.file.tell() == 0: writer.writeheader() writer.writerow(adapter.asdict()) self.file.close()
def _get_serialized_fields(self, item, default_value=None, include_empty=None, pre=None, field_filter=None): """Copy from BaseItemExporter """ item = ItemAdapter(item) if include_empty is None: include_empty = self.export_empty_fields if self.fields_to_export is None: if include_empty: field_iter = item.field_names() else: field_iter = item.keys() else: if include_empty: field_iter = self.fields_to_export else: field_iter = (x for x in self.fields_to_export if x in item) for field_name in field_iter: k = None if field_filter: if pre is not None: k = pre_join(pre, field_name) if k in field_filter: continue if field_name in item: field_meta = item.get_field_meta(field_name) value = self.serialize_field( field_meta, field_name, item[field_name], pre=k, field_filter=field_filter, ) else: value = default_value yield field_name, value
def _get_serialized_fields(self, item, default_value=None, include_empty=None): """Return the fields to export as an iterable of tuples (name, serialized_value) """ item = ItemAdapter(item) if include_empty is None: include_empty = self.export_empty_fields if self.fields_to_export is None: if include_empty: field_iter = item.field_names() else: field_iter = item.keys() elif isinstance(self.fields_to_export, Mapping): if include_empty: field_iter = self.fields_to_export.items() else: field_iter = ((x, y) for x, y in self.fields_to_export.items() if x in item) else: if include_empty: field_iter = self.fields_to_export else: field_iter = (x for x in self.fields_to_export if x in item) for field_name in field_iter: if isinstance(field_name, str): item_field, output_field = field_name, field_name else: item_field, output_field = field_name if item_field in item: field_meta = item.get_field_meta(item_field) value = self.serialize_field(field_meta, output_field, item[item_field]) else: value = default_value yield output_field, value