def encoding(self): if self._encoding is None: mime = parse_mimetype(self.headers.get('content-type')) self._encoding = mime.charset if self._encoding is None: with open(self.file_path, 'rb') as fh: self._encoding = guess_file_encoding(fh) return self._encoding
def to_dict(self): mime = parse_mimetype(self.mime_type) return { "path": self.path, "sha1": self.checksum, "timestamp": self.timestamp, "dataset": self.dataset, "mime_type": self.mime_type, "mime_type_label": mime.label, "size": self.size, "title": self.title, }
def all_resources(conn: Conn, dataset: Dataset) -> Generator[Resource, None, None]: q = select(resource_table) q = q.filter(resource_table.c.dataset == dataset.name) q = q.order_by(resource_table.c.path.asc()) result = conn.execute(q) for row in result.fetchall(): resource = cast(Resource, row._asdict()) # Add mime type label for the web UI. Should this live here? mime_type = resource["mime_type"] if mime_type is not None: mime = parse_mimetype(mime_type) resource["mime_type_label"] = mime.label resource["url"] = dataset.make_public_url(resource["path"]) yield resource
def caption(self, value: str) -> str: return parse_mimetype(value).label or value
def caption(self, value): return parse_mimetype(value).label
from pprint import pprint from collections import defaultdict import csv from pantomime import parse_mimetype data = defaultdict(int) with open('occrp.csv', 'r') as fh: reader = csv.reader(fh, delimiter=';') for row in reader: original, count = row parsed = parse_mimetype(original) print(parsed.label) # data[parsed.normalized] += int(count) # if parsed.normalized != original: # pprint((original, parsed.label)) print(len(data))