Exemplo n.º 1
0
 def process_cable(self, cb, overwrite):
     """
     Cable Content extractor
     """
     cable_id = cb.reference_id
     cable = self.mongodb.cables.find_one({'_id': cable_id})
     if not overwrite and cable is not None:
         logging.info('CABLE ALREADY EXISTS : SKIPPING')
         self.cable_list.append(cable_id)
         logging.info("cables processed = %d, %s" %
                      (len(self.cable_list), cb.reference_id))
         return
     ## updates metas without erasing edges
     if cable is None:
         cable = initEdges({})
     ## overwrite metas informations without erasing edges
     cable.update({
         # auto index
         '_id': "%s" % cable_id,
         'label': titlefy(cb.subject),
         'start': datetime.strptime(cb.created, "%Y-%m-%d %H:%M"),
         'classification': cb.classification,
         'embassy': cb.origin,
         'content': cb.content,
         'category': "Document"
     })
     self.mongodb.cables.save(cable)
     self.cable_list.append(cable_id)
     logging.info(u"cables processed = %d, %s" %
                  (len(self.cable_list), cb.reference_id))
Exemplo n.º 2
0
 def process_cable(self, cb, overwrite):
     """
     Cable Content extractor
     """
     cable_id = cb.reference_id
     cable = self.mongodb.cables.find_one({'_id': cable_id})
     if not overwrite and cable is not None:
         logging.info('CABLE ALREADY EXISTS : SKIPPING')
         self.cable_list.append(cable_id)
         logging.info("cables processed = %d, %s" % (len(self.cable_list), cb.reference_id))
         return
     ## updates metas without erasing edges
     if cable is None:
         cable = initEdges({})
     ## overwrite metas informations without erasing edges
     cable.update({
         # auto index
         '_id' : "%s" % cable_id,
         'label' : titlefy(cb.subject),
         'start' : datetime.strptime(cb.created, "%Y-%m-%d %H:%M"),
         'classification' : cb.classification,
         'embassy' : cb.origin,
         'content' : cb.content,
         'category': "Document"
     })
     self.mongodb.cables.save(cable)
     self.cable_list.append(cable_id)
     logging.info(u"cables processed = %d, %s" % (len(self.cable_list), cb.reference_id))
Exemplo n.º 3
0
def generate_csv(in_dir, out):
    """\
    Walks through the `in_dir` and generates the CSV file `out`
    """
    writer = UnicodeWriter(open(out, "wb"), delimiter=";")
    writer.writerow(("Reference ID", "Created", "Origin", "Subject"))
    for cable in cables_from_source(in_dir):
        writer.writerow((cable.reference_id, cable.created, cable.origin, titlefy(cable.subject)))
Exemplo n.º 4
0
def generate_csv(src, out):
    """\
    Walks through `src` and generates the CSV file `out`
    """
    writer = UnicodeWriter(open(out, 'wb'), delimiter=';')
    writer.writerow(('Reference ID', 'Created', 'Origin', 'Subject'))
    for cable in cables_from_source(src, predicate=pred.origin_filter(pred.origin_germany)):
        writer.writerow((cable.reference_id, cable.created, cable.origin, titlefy(cable.subject)))
Exemplo n.º 5
0
def generate_csv(in_dir, out):
    """\
    Walks through the `in_dir` and generates the CSV file `out`
    """
    writer = UnicodeWriter(open(out, 'wb'), delimiter=';')
    writer.writerow(('Reference ID', 'Created', 'Origin', 'Subject'))
    for cable in cables_from_source(in_dir):
        writer.writerow((cable.reference_id, cable.created, cable.origin,
                         titlefy(cable.subject)))
def generate_csv(src, out):
    """\
    Walks through `src` and generates the CSV file `out`
    """
    writer = UnicodeWriter(open(out, 'wb'), delimiter=';')
    writer.writerow(('Reference ID', 'Created', 'Origin', 'Subject'))
    for cable in cables_from_source(src,
                                    predicate=pred.origin_filter(
                                        pred.origin_germany)):
        writer.writerow((cable.reference_id, cable.created, cable.origin,
                         titlefy(cable.subject)))
Exemplo n.º 7
0
def generate_csv(path, out):
    """\
    Walks through the `path` and generates the CSV file `out`
    """
    def is_berlin_cable(filename):
        return 'BERLIN' in filename

    writer = UnicodeWriter(open(out, 'wb'), delimiter=';')
    writer.writerow(('Reference ID', 'Created', 'Origin', 'Subject'))
    for cable in cables_from_source(path, predicate=is_berlin_cable):
        writer.writerow((cable.reference_id, cable.created, cable.origin,
                         titlefy(cable.subject)))
Exemplo n.º 8
0
def generate_csv(filename, out):
    """\
    Walks through the given csv `filename` and generates the CSV file `out`
    """
    writer = UnicodeWriter(open(out, 'wb'), delimiter=',', quotechar='"', escapechar='\\', quoting = csv.QUOTE_ALL)
    for cable in cables_from_csv(filename):    

        # Single element meta
        single = [
            ("ReferenceId", cable.reference_id, "Text"),
            ("Origin", cable.origin, "Text"),
            ("Classification", cable.classification, "Text"),
            ("Subject", titlefy(cable.subject), "Text"),
            ("Header", cable.header, "Text")
        ]

        # Multi element meta
        tags = [("Tags", x, "Text") for x in cable.tags]
        recipients = [("Recipients", x.name, "Text") for x in cable.recipients]
        references = [("References", x.value, "Text") for x in cable.references]
        singed_by = [("SignedBy", x, "Text") for x in cable.signed_by]
        
        meta = sum(single + tags + recipients + references + singed_by, ())
        writer.writerow((cable.content, cable.created) + meta)
Exemplo n.º 9
0
 def check(content, expected):
     eq_(expected, titlefy(content))
Exemplo n.º 10
0
 def check(content, expected):
     eq_(expected, titlefy(content))