def process_cable(self, cb, overwrite): """ Cable Content extractor """ cable_id = cb.reference_id cable = self.mongodb.cables.find_one({'_id': cable_id}) if not overwrite and cable is not None: logging.info('CABLE ALREADY EXISTS : SKIPPING') self.cable_list.append(cable_id) logging.info("cables processed = %d, %s" % (len(self.cable_list), cb.reference_id)) return ## updates metas without erasing edges if cable is None: cable = initEdges({}) ## overwrite metas informations without erasing edges cable.update({ # auto index '_id': "%s" % cable_id, 'label': titlefy(cb.subject), 'start': datetime.strptime(cb.created, "%Y-%m-%d %H:%M"), 'classification': cb.classification, 'embassy': cb.origin, 'content': cb.content, 'category': "Document" }) self.mongodb.cables.save(cable) self.cable_list.append(cable_id) logging.info(u"cables processed = %d, %s" % (len(self.cable_list), cb.reference_id))
def process_cable(self, cb, overwrite): """ Cable Content extractor """ cable_id = cb.reference_id cable = self.mongodb.cables.find_one({'_id': cable_id}) if not overwrite and cable is not None: logging.info('CABLE ALREADY EXISTS : SKIPPING') self.cable_list.append(cable_id) logging.info("cables processed = %d, %s" % (len(self.cable_list), cb.reference_id)) return ## updates metas without erasing edges if cable is None: cable = initEdges({}) ## overwrite metas informations without erasing edges cable.update({ # auto index '_id' : "%s" % cable_id, 'label' : titlefy(cb.subject), 'start' : datetime.strptime(cb.created, "%Y-%m-%d %H:%M"), 'classification' : cb.classification, 'embassy' : cb.origin, 'content' : cb.content, 'category': "Document" }) self.mongodb.cables.save(cable) self.cable_list.append(cable_id) logging.info(u"cables processed = %d, %s" % (len(self.cable_list), cb.reference_id))
def generate_csv(in_dir, out): """\ Walks through the `in_dir` and generates the CSV file `out` """ writer = UnicodeWriter(open(out, "wb"), delimiter=";") writer.writerow(("Reference ID", "Created", "Origin", "Subject")) for cable in cables_from_source(in_dir): writer.writerow((cable.reference_id, cable.created, cable.origin, titlefy(cable.subject)))
def generate_csv(src, out): """\ Walks through `src` and generates the CSV file `out` """ writer = UnicodeWriter(open(out, 'wb'), delimiter=';') writer.writerow(('Reference ID', 'Created', 'Origin', 'Subject')) for cable in cables_from_source(src, predicate=pred.origin_filter(pred.origin_germany)): writer.writerow((cable.reference_id, cable.created, cable.origin, titlefy(cable.subject)))
def generate_csv(in_dir, out): """\ Walks through the `in_dir` and generates the CSV file `out` """ writer = UnicodeWriter(open(out, 'wb'), delimiter=';') writer.writerow(('Reference ID', 'Created', 'Origin', 'Subject')) for cable in cables_from_source(in_dir): writer.writerow((cable.reference_id, cable.created, cable.origin, titlefy(cable.subject)))
def generate_csv(src, out): """\ Walks through `src` and generates the CSV file `out` """ writer = UnicodeWriter(open(out, 'wb'), delimiter=';') writer.writerow(('Reference ID', 'Created', 'Origin', 'Subject')) for cable in cables_from_source(src, predicate=pred.origin_filter( pred.origin_germany)): writer.writerow((cable.reference_id, cable.created, cable.origin, titlefy(cable.subject)))
def generate_csv(path, out): """\ Walks through the `path` and generates the CSV file `out` """ def is_berlin_cable(filename): return 'BERLIN' in filename writer = UnicodeWriter(open(out, 'wb'), delimiter=';') writer.writerow(('Reference ID', 'Created', 'Origin', 'Subject')) for cable in cables_from_source(path, predicate=is_berlin_cable): writer.writerow((cable.reference_id, cable.created, cable.origin, titlefy(cable.subject)))
def generate_csv(filename, out): """\ Walks through the given csv `filename` and generates the CSV file `out` """ writer = UnicodeWriter(open(out, 'wb'), delimiter=',', quotechar='"', escapechar='\\', quoting = csv.QUOTE_ALL) for cable in cables_from_csv(filename): # Single element meta single = [ ("ReferenceId", cable.reference_id, "Text"), ("Origin", cable.origin, "Text"), ("Classification", cable.classification, "Text"), ("Subject", titlefy(cable.subject), "Text"), ("Header", cable.header, "Text") ] # Multi element meta tags = [("Tags", x, "Text") for x in cable.tags] recipients = [("Recipients", x.name, "Text") for x in cable.recipients] references = [("References", x.value, "Text") for x in cable.references] singed_by = [("SignedBy", x, "Text") for x in cable.signed_by] meta = sum(single + tags + recipients + references + singed_by, ()) writer.writerow((cable.content, cable.created) + meta)
def check(content, expected): eq_(expected, titlefy(content))