def mtgx2json(graph): zipfile = ZipFile(graph) graphs = filter(lambda x: x.endswith('.graphml'), zipfile.namelist()) for f in graphs: multikeys = [] xml = XML(zipfile.open(f).read()) links = {} for edge in xml.findall('{http://graphml.graphdrawing.org/xmlns}graph/' '{http://graphml.graphdrawing.org/xmlns}edge'): src = edge.get('source') dst = edge.get('target') if src not in links: links[src] = dict(in_=[], out=[]) if dst not in links: links[dst] = dict(in_=[], out=[]) links[src]['out'].append(dst) links[dst]['in_'].append(src) for node in xml.findall('{http://graphml.graphdrawing.org/xmlns}graph/' '{http://graphml.graphdrawing.org/xmlns}node'): node_id = node.get('id') node = node.find( '{http://graphml.graphdrawing.org/xmlns}data/' '{http://maltego.paterva.com/xml/mtgx}MaltegoEntity') record = OrderedDict({ 'NodeID': node_id, 'EntityType': node.get('type').strip() }) props = {'Data': {}} for prop in node.findall( '{http://maltego.paterva.com/xml/mtgx}Properties/' '{http://maltego.paterva.com/xml/mtgx}Property'): value = prop.find( '{http://maltego.paterva.com/xml/mtgx}Value').text or '' entity_prop = {prop.get('displayName'): value.strip()} props['Data'].update(entity_prop) record.update(props) s = ' - '.join([ '%s: %s' % (key, value) for (key, value) in record['Data'].items() ]) record.pop('Data') data = {'Data': s} record.update(data) link = {'Links': {}} i_link = {'Incoming': links.get(node_id, {}).get('in_', 0)} link['Links'].update(i_link) o_link = {'Outgoing': links.get(node_id, {}).get('out', 0)} link['Links'].update(o_link) record.update(link) multikeys.append(record) return multikeys
def mtgx2csv(opts): zipfile = ZipFile(opts.graph) graphs = filter(lambda x: x.endswith('.graphml'), zipfile.namelist()) for f in graphs: filename = '%s_%s' % (opts.graph.replace( '.', '_', 1), os.path.basename(f).replace('.graphml', '.csv', 1)) print('Writing data from %s/%s to %s...' % (opts.graph, f, filename), file=sys.stderr) with open(filename, 'w') as csvfile: csv = writer(csvfile) xml = XML(zipfile.open(f).read()) links = {} for edge in xml.findall( '{http://graphml.graphdrawing.org/xmlns}graph/' '{http://graphml.graphdrawing.org/xmlns}edge'): src = edge.get('source') dst = edge.get('target') if src not in links: links[src] = dict(in_=0, out=0) if dst not in links: links[dst] = dict(in_=0, out=0) links[src]['out'] += 1 links[dst]['in_'] += 1 for node in xml.findall( '{http://graphml.graphdrawing.org/xmlns}graph/' '{http://graphml.graphdrawing.org/xmlns}node'): node_id = node.get('id') node = node.find( '{http://graphml.graphdrawing.org/xmlns}data/' '{http://maltego.paterva.com/xml/mtgx}MaltegoEntity') row = [to_utf8(('Entity Type=%s' % node.get('type')).strip())] for prop in node.findall( '{http://maltego.paterva.com/xml/mtgx}Properties/' '{http://maltego.paterva.com/xml/mtgx}Property'): value = prop.find( '{http://maltego.paterva.com/xml/mtgx}Value' ).text or '' row.append( to_utf8(('%s=%s' % (prop.get('displayName'), value)).strip())) row.append('Incoming Links=%s' % links.get(node_id, {}).get('in_', 0)) row.append('Outgoing Links=%s' % links.get(node_id, {}).get('out', 0)) csv.writerow(row)
def mtgx2json(graph): zipfile = ZipFile(graph) graphs = filter(lambda x: x.endswith(".graphml"), zipfile.namelist()) for f in graphs: multikeys = [] xml = XML(zipfile.open(f).read()) links = {} for edge in xml.findall( "{http://graphml.graphdrawing.org/xmlns}graph/" "{http://graphml.graphdrawing.org/xmlns}edge" ): src = edge.get("source") dst = edge.get("target") if src not in links: links[src] = dict(in_=[], out=[]) if dst not in links: links[dst] = dict(in_=[], out=[]) links[src]["out"].append(dst) links[dst]["in_"].append(src) for node in xml.findall( "{http://graphml.graphdrawing.org/xmlns}graph/" "{http://graphml.graphdrawing.org/xmlns}node" ): node_id = node.get("id") node = node.find( "{http://graphml.graphdrawing.org/xmlns}data/" "{http://maltego.paterva.com/xml/mtgx}MaltegoEntity" ) record = OrderedDict({"NodeID": node_id, "EntityType": node.get("type").strip()}) props = {"Data": {}} for prop in node.findall( "{http://maltego.paterva.com/xml/mtgx}Properties/" "{http://maltego.paterva.com/xml/mtgx}Property" ): value = prop.find("{http://maltego.paterva.com/xml/mtgx}Value").text or "" entity_prop = {prop.get("displayName"): value.strip()} props["Data"].update(entity_prop) record.update(props) s = " - ".join(["%s: %s" % (key, value) for (key, value) in record["Data"].items()]) record.pop("Data") data = {"Data": s} record.update(data) link = {"Links": {}} i_link = {"Incoming": links.get(node_id, {}).get("in_", 0)} link["Links"].update(i_link) o_link = {"Outgoing": links.get(node_id, {}).get("out", 0)} link["Links"].update(o_link) record.update(link) multikeys.append(record) return multikeys
def run(args): opts = parse_args(args) zipfile = ZipFile(opts.graph) graphs = filter(lambda x: x.endswith('.graphml'), zipfile.namelist()) for f in graphs: with open(f.split('/')[1].split('.')[0] + '.csv', 'wb') as csvfile: csv = writer(csvfile) xml = XML(zipfile.open(f).read()) links = {} for edge in xml.findall( '{http://graphml.graphdrawing.org/xmlns}graph/' '{http://graphml.graphdrawing.org/xmlns}edge'): src = edge.get('source') dst = edge.get('target') if src not in links: links[src] = dict(in_=0, out=0) if dst not in links: links[dst] = dict(in_=0, out=0) links[src]['out'] += 1 links[dst]['in_'] += 1 for node in xml.findall( '{http://graphml.graphdrawing.org/xmlns}graph/' '{http://graphml.graphdrawing.org/xmlns}node'): node_id = node.get('id') node = node.find( '{http://graphml.graphdrawing.org/xmlns}data/' '{http://maltego.paterva.com/xml/mtgx}MaltegoEntity') row = [to_utf8(('Entity Type=%s' % node.get('type')).strip())] for prop in node.findall( '{http://maltego.paterva.com/xml/mtgx}Properties/' '{http://maltego.paterva.com/xml/mtgx}Property'): value = prop.find( '{http://maltego.paterva.com/xml/mtgx}Value' ).text or '' row.append( to_utf8(('%s=%s' % (prop.get('displayName'), value)).strip())) row.append('Incoming Links=%s' % links.get(node_id, {}).get('in_', 0)) row.append('Outgoing Links=%s' % links.get(node_id, {}).get('out', 0)) csv.writerow(row)
def mtgx2csv(opts): zipfile = ZipFile(opts.graph) graphs = filter(lambda x: x.endswith(".graphml"), zipfile.namelist()) for f in graphs: filename = "%s_%s" % (opts.graph.replace(".", "_", 1), os.path.basename(f).replace(".graphml", ".csv", 1)) print "Writing data from %s/%s to %s..." % (opts.graph, f, filename) with open(filename, "wb") as csvfile: csv = writer(csvfile) xml = XML(zipfile.open(f).read()) links = {} for edge in xml.findall( "{http://graphml.graphdrawing.org/xmlns}graph/" "{http://graphml.graphdrawing.org/xmlns}edge" ): src = edge.get("source") dst = edge.get("target") if src not in links: links[src] = dict(in_=0, out=0) if dst not in links: links[dst] = dict(in_=0, out=0) links[src]["out"] += 1 links[dst]["in_"] += 1 for node in xml.findall( "{http://graphml.graphdrawing.org/xmlns}graph/" "{http://graphml.graphdrawing.org/xmlns}node" ): node_id = node.get("id") node = node.find( "{http://graphml.graphdrawing.org/xmlns}data/" "{http://maltego.paterva.com/xml/mtgx}MaltegoEntity" ) row = [to_utf8(("Entity Type=%s" % node.get("type")).strip())] for prop in node.findall( "{http://maltego.paterva.com/xml/mtgx}Properties/" "{http://maltego.paterva.com/xml/mtgx}Property" ): value = prop.find("{http://maltego.paterva.com/xml/mtgx}Value").text or "" row.append(to_utf8(("%s=%s" % (prop.get("displayName"), value)).strip())) row.append("Incoming Links=%s" % links.get(node_id, {}).get("in_", 0)) row.append("Outgoing Links=%s" % links.get(node_id, {}).get("out", 0)) csv.writerow(row)
def listChildrenViaPropfind(): data = yield self.simpleSend( "PROPFIND", "/", resultcode=responsecode.MULTI_STATUS, headers=[('Depth', '1')] ) tree = XML(data) seq = [e.text for e in tree.findall("{DAV:}response/{DAV:}href")] shortest = min(seq, key=len) seq.remove(shortest) filtered = [elem[len(shortest):].rstrip("/") for elem in seq] returnValue(filtered)
def parse_highlights(xml): soup = XML(xml) highlightList = [] for series in soup.findall('series'): tempSeries = dict(series.items()) tempSeries.update(xml_text_elements(series)) highlightList.append(tempSeries) return highlightList
def parse_highlights(xml): soup = XML(xml) highlightList = [] for series in soup.findall('series'): tempSeries = dict(series.items()) tempSeries.update(xml_text_elements(series)) highlightList.append(tempSeries) return highlightList
def listChildrenViaPropfind(): request = SimpleStoreRequest(self, "PROPFIND", "/calendars/__uids__/user01/", authid="user01") request.headers.setHeader("depth", "1") response = yield self.send(request) response = IResponse(response) data = yield allDataFromStream(response.stream) tree = XML(data) seq = [e.text for e in tree.findall("{DAV:}response/{DAV:}href")] shortest = min(seq, key=len) seq.remove(shortest) filtered = [elem[len(shortest):].rstrip("/") for elem in seq] returnValue(filtered)
def mtgx2csv(opts): zipfile = ZipFile(opts.graph) graphs = filter(lambda x: x.endswith('.graphml'), zipfile.namelist()) for f in graphs: filename = '%s_%s' % (opts.graph.replace('.', '_', 1), os.path.basename(f).replace('.graphml', '.csv', 1)) print 'Writing data from %s/%s to %s...' % (opts.graph, f, filename) with open(filename, 'wb') as csvfile: csv = writer(csvfile) xml = XML(zipfile.open(f).read()) links = {} for edge in xml.findall('{http://graphml.graphdrawing.org/xmlns}graph/' '{http://graphml.graphdrawing.org/xmlns}edge'): src = edge.get('source') dst = edge.get('target') if src not in links: links[src] = dict(in_=0, out=0) if dst not in links: links[dst] = dict(in_=0, out=0) links[src]['out'] += 1 links[dst]['in_'] += 1 for node in xml.findall('{http://graphml.graphdrawing.org/xmlns}graph/' '{http://graphml.graphdrawing.org/xmlns}node'): node_id = node.get('id') node = node.find('{http://graphml.graphdrawing.org/xmlns}data/' '{http://maltego.paterva.com/xml/mtgx}MaltegoEntity') row = [to_utf8(('Entity Type=%s' % node.get('type')).strip())] for prop in node.findall('{http://maltego.paterva.com/xml/mtgx}Properties/' '{http://maltego.paterva.com/xml/mtgx}Property'): value = prop.find('{http://maltego.paterva.com/xml/mtgx}Value').text or '' row.append(to_utf8(('%s=%s' % (prop.get('displayName'), value)).strip())) row.append('Incoming Links=%s' % links.get(node_id, {}).get('in_', 0)) row.append('Outgoing Links=%s' % links.get(node_id, {}).get('out', 0)) csv.writerow(row)
def run(args): opts = parse_args(args) zipfile = ZipFile(opts.graph) graphs = filter(lambda x: x.endswith('.graphml'), zipfile.namelist()) for f in graphs: with open(f.split('/')[1].split('.')[0] + '.csv', 'wb') as csvfile: csv = writer(csvfile) xml = XML(zipfile.open(f).read()) links = {} for edge in xml.findall('{http://graphml.graphdrawing.org/xmlns}graph/' '{http://graphml.graphdrawing.org/xmlns}edge'): src = edge.get('source') dst = edge.get('target') if src not in links: links[src] = dict(in_=0, out=0) if dst not in links: links[dst] = dict(in_=0, out=0) links[src]['out'] += 1 links[dst]['in_'] += 1 for node in xml.findall('{http://graphml.graphdrawing.org/xmlns}graph/' '{http://graphml.graphdrawing.org/xmlns}node'): node_id = node.get('id') node = node.find('{http://graphml.graphdrawing.org/xmlns}data/' '{http://maltego.paterva.com/xml/mtgx}MaltegoEntity') row = [to_utf8(('Entity Type=%s' % node.get('type')).strip())] for prop in node.findall('{http://maltego.paterva.com/xml/mtgx}Properties/' '{http://maltego.paterva.com/xml/mtgx}Property'): value = prop.find('{http://maltego.paterva.com/xml/mtgx}Value').text or '' row.append(to_utf8(('%s=%s' % (prop.get('displayName'), value)).strip())) row.append('Incoming Links=%s' % links[node_id]['in_']) row.append('Outgoing Links=%s' % links[node_id]['out']) csv.writerow(row)
def run(args): opts = parse_args(args) zip = ZipFile(opts.graph) graphs = filter(lambda x: x.endswith('.graphml'), zip.namelist()) for f in graphs: csv = open(f.split('/')[1].split('.')[0] + '.csv', 'w') xml = XML(zip.open(f).read()) for e in xml.findall('{http://graphml.graphdrawing.org/xmlns}graph/{http://graphml.graphdrawing.org/xmlns}node/{http://graphml.graphdrawing.org/xmlns}data/{http://maltego.paterva.com/xml/mtgx}MaltegoEntity'): csv.write(('"Entity Type=%s",' % e.get('type')).strip()) for prop in e.findall('{http://maltego.paterva.com/xml/mtgx}Properties/{http://maltego.paterva.com/xml/mtgx}Property'): value = prop.find('{http://maltego.paterva.com/xml/mtgx}Value').text or '' if '"' in value: value.replace('"', '""') csv.write(('"%s=%s",' % (prop.get('displayName'), value)).strip()) csv.write('\n')
def listChildrenViaPropfind(): authPrincipal = yield self.actualRoot.findPrincipalForAuthID( "user01") request = SimpleStoreRequest(self, "PROPFIND", "/calendars/__uids__/user01/", authPrincipal=authPrincipal) request.headers.setHeader("depth", "1") response = yield self.send(request) response = IResponse(response) data = yield allDataFromStream(response.stream) tree = XML(data) seq = [e.text for e in tree.findall("{DAV:}response/{DAV:}href")] shortest = min(seq, key=len) seq.remove(shortest) filtered = [elem[len(shortest):].rstrip("/") for elem in seq] returnValue(filtered)
def run(args): opts = parse_args(args) if path.exists(opts.outfile) and not opts.append and not \ parse_bool('%s already exists. Are you sure you want to overwrite it? [y/N]: ' % repr(opts.outfile), default='n'): exit(-1) entity_source = None if opts.mtz_file is None: d = detect_settings_dir() if maltego_version(d) >= '3.4.0': print(""" =========================== ERROR: NOT SUPPORTED =========================== Starting from Maltego v3.4.0 the 'canari generate-entities' command can no longer generate entity definition files from the Maltego configuration directory. Entities can only be generated from export files (*.mtz). To export entities navigate to the 'Manage' tab in Maltego, then click on the 'Export Entities' button and follow the prompts. Once the entities have been exported, run the following command: shell> canari generate-entities -m myentities.mtz =========================== ERROR: NOT SUPPORTED =========================== """) exit(-1) entity_source = DirFile( path.join(d, 'config', 'Maltego', 'Entities') ) else: entity_source = ZipFile(opts.mtz_file) entity_files = filter(lambda x: x.endswith('.entity'), entity_source.namelist()) namespaces = dict() excluded_entities = [] if opts.append: existing_entities = get_existing_entities(opts.outfile) # excluded_entities.extend([e._type_ for e in existing_entities]) for entity_class in existing_entities: excluded_entities.extend(entity_class._type_) if entity_class._type_.endswith('Entity'): namespaces[entity_class._namespace_] = entity_class.__name__ print 'Generating %s...' % repr(opts.outfile) outfile = open(opts.outfile, 'ab' if opts.append else 'wb') if opts.append: outfile.write('\n\n') else: outfile.write('#!/usr/bin/env python\n\nfrom canari.maltego.entities import EntityField, Entity\n\n\n') for entity_file in entity_files: xml = XML(entity_source.open(entity_file).read()) id_ = xml.get('id') if (opts.entity and id_ not in opts.entity) or id_ in excluded_entities: continue namespace_entity = id_.split('.') base_classname = None namespace = '.'.join(namespace_entity[:-1]) name = namespace_entity[-1] classname = name if (opts.namespace and namespace not in opts.namespace) or namespace in opts.exclude_namespace: continue if namespace not in namespaces: base_classname = '%sEntity' % (''.join([n.title() for n in namespace_entity[:-1]])) namespaces[namespace] = base_classname outfile.write('class %s(Entity):\n _namespace_ = %s\n\n' % (base_classname, repr(namespace))) else: base_classname = namespaces[namespace] for field in xml.findall('Properties/Fields/Field'): fields = [ 'name=%s' % repr(field.get('name')), 'propname=%s' % repr(normalize_fn(field.get('name'))), 'displayname=%s' % repr(field.get('displayName')) ] outfile.write('@EntityField(%s)\n' % ', '.join(fields)) outfile.write('class %s(%s):\n pass\n\n\n' % (classname, base_classname)) outfile.close() print 'done.'
def run(args): opts = parse_args(args) if path.exists(opts.outfile) and not opts.append and not \ parse_bool('%s already exists. Are you sure you want to overwrite it? [y/N]: ' % repr(opts.outfile), default='n'): exit(-1) ar = DirFile( path.join(detect_settings_dir(), 'config', 'Maltego', 'Entities') ) if opts.mtz_file is None else ZipFile(opts.mtz_file) entities = filter(lambda x: x.endswith('.entity'), ar.namelist()) nses = dict() el = [] if opts.append: l = diff(opts.outfile) el.extend([i.type for i in l]) for i in l: if i.type.endswith('Entity'): nses[i.namespace] = i.__class__.__name__ print 'Generating %s...' % repr(opts.outfile) fd = open(opts.outfile, 'ab' if opts.append else 'wb') if opts.append: fd.write('\n\n') else: fd.write('#!/usr/bin/env python\n\nfrom canari.maltego.entities import EntityField, Entity\n\n\n') for e in entities: xml = XML(ar.open(e).read()) id_ = xml.get('id') if (opts.entity and id_ not in opts.entity) or id_ in el: continue ens = id_.split('.') base_classname = None namespace = '.'.join(ens[:-1]) name = ens[-1] classname = name if (opts.namespace and namespace not in opts.namespace) or namespace in opts.exclude_namespace: continue if namespace not in nses: base_classname = '%sEntity' % (''.join([ n.title() for n in ens[:-1] ])) nses[namespace] = base_classname fd.write('class %s(Entity):\n namespace = %s\n\n' % (base_classname, repr(namespace))) else: base_classname = nses[namespace] for f in xml.findall('Properties/Fields/Field'): fields = [ 'name=%s' % repr(f.get('name')), 'propname=%s' % repr(normalize_fn(f.get('name'))), 'displayname=%s' % repr(f.get('displayName')) ] fd.write('@EntityField(%s)\n' % ', '.join(fields)) fd.write('class %s(%s):\n pass\n\n\n' % (classname, base_classname)) fd.close() print 'done.'
def run(args): opts = parse_args(args) if path.exists(opts.outfile) and not opts.append and not \ parse_bool('%s already exists. Are you sure you want to overwrite it? [y/N]: ' % repr(opts.outfile), default='n'): exit(-1) entity_source = None if opts.mtz_file is None: d = detect_settings_dir() if maltego_version(d) >= '3.4.0': print(""" =========================== ERROR: NOT SUPPORTED =========================== Starting from Maltego v3.4.0 the 'canari generate-entities' command can no longer generate entity definition files from the Maltego configuration directory. Entities can only be generated from export files (*.mtz). To export entities navigate to the 'Manage' tab in Maltego, then click on the 'Export Entities' button and follow the prompts. Once the entities have been exported, run the following command: shell> canari generate-entities -m myentities.mtz =========================== ERROR: NOT SUPPORTED =========================== """) exit(-1) entity_source = DirFile(path.join(d, 'config', 'Maltego', 'Entities')) else: entity_source = ZipFile(opts.mtz_file) entity_files = filter(lambda x: x.endswith('.entity'), entity_source.namelist()) namespaces = dict() excluded_entities = [] if opts.append: existing_entities = get_existing_entities(opts.outfile) # excluded_entities.extend([e._type_ for e in existing_entities]) for entity_class in existing_entities: excluded_entities.extend(entity_class._type_) if entity_class._type_.endswith('Entity'): namespaces[entity_class._namespace_] = entity_class.__name__ print 'Generating %s...' % repr(opts.outfile) outfile = open(opts.outfile, 'ab' if opts.append else 'wb') if opts.append: outfile.write('\n\n') else: outfile.write( '#!/usr/bin/env python\n\nfrom canari.maltego.entities import EntityField, Entity\n\n\n' ) for entity_file in entity_files: xml = XML(entity_source.open(entity_file).read()) id_ = xml.get('id') if (opts.entity and id_ not in opts.entity) or id_ in excluded_entities: continue namespace_entity = id_.split('.') base_classname = None namespace = '.'.join(namespace_entity[:-1]) name = namespace_entity[-1] classname = name if (opts.namespace and namespace not in opts.namespace) or namespace in opts.exclude_namespace: continue if namespace not in namespaces: base_classname = '%sEntity' % (''.join( [n.title() for n in namespace_entity[:-1]])) namespaces[namespace] = base_classname outfile.write('class %s(Entity):\n _namespace_ = %s\n\n' % (base_classname, repr(namespace))) else: base_classname = namespaces[namespace] for field in xml.findall('Properties/Fields/Field'): fields = [ 'name=%s' % repr(field.get('name')), 'propname=%s' % repr(normalize_fn(field.get('name'))), 'displayname=%s' % repr(field.get('displayName')) ] outfile.write('@EntityField(%s)\n' % ', '.join(fields)) outfile.write('class %s(%s):\n pass\n\n\n' % (classname, base_classname)) outfile.close() print 'done.'
for e in entities: xml = XML(zip.open(e).read()) id_ = xml.get('id') ens = id_.split('.') base_classname = None namespace = '.'.join(ens[:-1]) name = ens[-1] classname = name if namespace not in nses: base_classname = '%sEntity' % (''.join([ n.title() for n in ens[:-1] ])) nses[namespace] = base_classname print 'class %s(Entity):\n namespace = %s\n\n' % (base_classname, repr(namespace)) else: base_classname = nses[namespace] for f in xml.findall('Properties/Fields/Field'): fields = [ 'name=%s' % repr(f.get('name')), 'propname=%s' % repr(normalize_fn(f.get('name'))), 'displayname=%s' % repr(f.get('displayName')) ] print '@EntityField(%s)' % ', '.join(fields) print 'class %s(%s):\n pass\n\n' % (classname, base_classname)
class NmapReportParser(object): def __init__(self, output): self.output = output self.xml = XML(output) def os(self, address): host = self._host(address) if host is not None: r = { 'osmatch': [osm.attrib for osm in host.findall('os/osmatch')], 'osclass': [osm.attrib for osm in host.findall('os/osclass')], 'portused': host.find('os/portused').attrib } return r return { 'osmatch' : [], 'osclass' : [], 'portused' : {} } @property def addresses(self): return [ a.get('addr') for a in self.xml.findall('host/address') if a.get('addrtype') == 'ipv4' ] @property def report(self): return self.output def mac(self, address): host = self._host(address) if host is not None: for addr in host.findall('address'): if addr.get('addrtype') == 'mac': return addr.get('addr') return None def _host(self, address): for host in self.xml.findall('host'): for addr in host.findall('address'): if addr.get('addr') == address: return host return None def ports(self, address): host = self._host(address) ports = [] if host is not None: for p in host.findall('ports/port'): r = p.attrib map(lambda x: r.update(x.attrib), p.getchildren()) ports.append(r) return ports @property def scaninfo(self): return self.xml.find('scaninfo').attrib @property def verbosity(self): return self.xml.find('verbose').get('level') @property def debugging(self): return self.xml.find('debugging').get('level') def hostnames(self, address): host = self._host(address) if host is not None: return [ hn.attrib for hn in host.findall('hostnames/hostname') ] return [] def times(self, address): host = self._host(address) if host is not None: return host.find('times').attrib return {} @property def runstats(self): rs = {} map(lambda x: rs.update(x.attrib), self.xml.find('runstats').getchildren()) return rs def scanstats(self, address): host = self._host(address) if host is not None: return host.attrib return {} def status(self, address): host = self._host(address) if host is not None: return host.find('status').attrib return {} @property def nmaprun(self): return self.xml.attrib def tobanner(self, port): banner = port.get('product', 'Unknown') version = port.get('version') if version is not None: banner += ' %s' % version extrainfo = port.get('extrainfo') if extrainfo is not None: banner += ' (%s)' % extrainfo return banner @property def greppable(self): n = self.nmaprun output = '# Nmap %s scan initiated %s as: %s\n' % (n['version'], n['startstr'], n['args']) for a in self.addresses: s = self.status(a) output += 'Host: %s () Status: %s\n' % (a, s['state'].title()) output += 'Host: %s () Ports:' % a for p in self.ports(a): output += ' %s/%s/%s//%s///,' % (p['portid'], p['state'], p['protocol'], p['name']) output = output.rstrip(',') output += '\n# %s\n' % self.runstats['summary'] return output
#!/usr/bin/env python3 import sys import urllib.request from xml.etree.cElementTree import XML if len(sys.argv) != 3: raise SystemExit('Usage: nextbus.py route stopid') route = sys.argv[1] stop = sys.argv[2] u = urllib.request.urlopen('http://ctabustracker.com/bustime/map/getStopPredictions.jsp?route={}&stop={}'.format( route,stop)) data = u.read() doc = XML(data) import pdb; pdb.set_trace() # Launch Debugger ( Manual debugging / breakpoint) for pt in doc.findall('.//pt'): print(pt.text)
nses = dict() for e in entities: xml = XML(zip.open(e).read()) id_ = xml.get('id') ens = id_.split('.') base_classname = None namespace = '.'.join(ens[:-1]) name = ens[-1] classname = name if namespace not in nses: base_classname = '%sEntity' % (''.join([n.title() for n in ens[:-1]])) nses[namespace] = base_classname print 'class %s(Entity):\n namespace = %s\n\n' % (base_classname, repr(namespace)) else: base_classname = nses[namespace] for f in xml.findall('Properties/Fields/Field'): fields = [ 'name=%s' % repr(f.get('name')), 'propname=%s' % repr(normalize_fn(f.get('name'))), 'displayname=%s' % repr(f.get('displayName')) ] print '@EntityField(%s)' % ', '.join(fields) print 'class %s(%s):\n pass\n\n' % (classname, base_classname)
def docx_to_json(self, path): document = zipfile.ZipFile(path) xml_content = document.read('word/document.xml') document.close() tree = XML(xml_content) sections = [] people = None search_date_res = re.findall(r'(?<=H)\d{6,8}', path) created_at = 0 if search_date_res: if len(search_date_res[0]) == 6: search_date_res[0] = '19' + search_date_res[0] created_at = util.get_timestamp_from_string(search_date_res[0]) paragraph_index = 0 section_index = 0 current_section = None # # remove the texts of tables # for tbl in tree.getiterator(WORD_NAMESPACE+'tbl'): # for _child in tbl.findall('.//'): # if _child.text: # print _child.text + ' ' + path # _child.text = '' for paragraph in tree.findall('./*/*'): texts = [ node.text for node in paragraph.getiterator(TEXT) if node.text ] if texts: # paragraphs.append(''.join(texts)) para_text = '' if paragraph.tag == WORD_NAMESPACE + 'tbl': if not re.match(ur'\d*香港立法局.{4,5}年.{1,2}月.{1,2}日\d*', ''.join(texts)): para_text = '** TABLE **' elif paragraph.tag == PARA: para_text = ''.join(texts) if re.findall( ur'^[^,]{1,6}議員[^說話]{0,6}:[^\s]+|' + ur'^.{1,10}:主席|' + ur'^[^,]{0,10}主席[^說話]{0,6}:|' + ur'^[^,]{1,8}局長[^說話]{0,6}:|' + ur'^[^,]{1,8}司長[^說話]{0,6}:|' + ur'^[^,]{1,6}長官[^說話]{0,6}:|' + ur'^.{1,10}(譯文):|' + ur'^.{1,10}(傳譯):|' + ur'^.{1,10}的譯文:|' + ur'^.{1,10}致辭:', para_text): split_res = re.split(ur':|:', para_text) para_text = ''.join(para_text.split(u':')[1:]) if split_res: people = split_res[0] while re.findall( ur'\d+\.|(譯文)|(傳譯)|致辭的譯文$|問題的譯文$|答覆的譯文$|致辭$|答復$|問$|答$|.+?、|(?<=議員).+|動議的.+', people): people = re.sub( ur'\d+\.|(譯文)|(傳譯)|致辭的譯文$|問題的譯文$|答覆的譯文$|致辭$|答復$|問$|答$|.+?、|(?<=議員).+|動議的.+', '', people) if len(people) > 15: self.logger_con.warning( 'too long people name: ' + people + ' ' + path) if current_section: sections.append(current_section) current_section = { 'people': people, 'section_index': section_index, 'created_at': created_at, 'paragraphs': [] } section_index += 1