def f_send_request(payload_o): f_verbose("[*] Sending request to {}.".format(base_url)) try: r = requests.post(base_url, headers=headers_, data=payload_o, timeout=timeout, verify=False) except requests.exceptions.ConnectionError: print "Error. Connection refused." sys.exit(1) except requests.exceptions.Timeout: f_verbose("[!] Time of response exceeded {} seconds!".format(timeout)) return timeout if r.status_code != 200: print red("[X]") + " Error with HTTP code ", r.status_code print r.text sys.exit(-1) time = r.elapsed.total_seconds() f_verbose(str(r.status_code)) f_verbose( "[*] Program has successfully sent payload to {}.".format(base_url)) f_verbose("Time of response: {} ".format(time)) return time
def checkGerberFile(self, filepath): """ Check if the given file is a RS-274X gerber file - Checks for a G04 command at the beginning of the file - Checks for a %LN command and verifies it against the filename - Checks for a G04 #@! TF.FileFunction command """ filename = os.path.basename(filepath) lines = readFileLines(filepath) #Find G04 line (i.e. what software created the file) if not any(map(lambda l: l.startswith("G04 "), lines)): print(red("Couldn't find G04 command (software description) in %s. Probably not a Gerber file." % filename, bold=True)) #Find %LN line, i.e. what the creating # software thinks the current layer is (e.g. "BottomMask") layerNoteRegex = re.compile(r"^\%LN([^\*]+)\*%$") fileFunctionRegex = re.compile(r"G04 #@! TF\.FileFunction,([^\*]+)\*") layerDescription = None for line in lines: if layerNoteRegex.match(line): layerDescription = layerNoteRegex.match(line).group(1) break #Expecting only one layer note elif fileFunctionRegex.match(line): layerDescription = fileFunctionRegex.match(line).group(1) layerDescription = layerDescription.split(",") #Check if the layer note we found makes sense if layerDescription == None: #No %LN line found print(yellow("Couldn't find %%LN command or file function command in %s" % filename)) else: #We found a layer description. Check for sanity if isinstance(layerDescription, list): # FileFunction command if layerDescription not in allowedLayerNotes[self.name]: print(red("Layer description '%s' in %s does not match any of the expected descriptions: %s" % (layerDescription, filename, allowedLayerNotes[self.name]), bold=True)) else: # %LN command if layerDescription not in allowedLayerNotes[self.name]: print(red("Layer description '%s' in %s does not match any of the expected descriptions: %s" % (layerDescription, filename, allowedLayerNotes[self.name]), bold=True))
def checkGerberFile(self, filepath): """ Check if the given file is a RS-274X gerber file - Checks for a G04 command at the beginning of the file - Checks for a %LN command and verifies it against the filename - Checks for a G04 #@! TF.FileFunction command """ filename = os.path.basename(filepath) lines = readFileLines(filepath) #Find G04 line (i.e. what software created the file) if not any(map(lambda l: l.startswith("G04 "), lines)): print (red("Couldn't find G04 command (software description) in %s. Probably not a Gerber file." % filename, bold=True)) #Find %LN line, i.e. what the creating # software thinks the current layer is (e.g. "BottomMask") layerNoteRegex = re.compile(r"^\%LN([^\*]+)\*%$") fileFunctionRegex = re.compile(r"G04 #@! TF\.FileFunction,([^\*]+)\*") layerDescription = None for line in lines: if layerNoteRegex.match(line): layerDescription = layerNoteRegex.match(line).group(1) break #Expecting only one layer note elif fileFunctionRegex.match(line): layerDescription = fileFunctionRegex.match(line).group(1) layerDescription = layerDescription.split(",") #Check if the layer note we found makes sense if layerDescription == None: #No %LN line found print (yellow("Couldn't find %%LN command or file function command in %s" % filename)) else: #We found a layer description. Check for sanity if isinstance(layerDescription, list): # FileFunction command if layerDescription not in allowedLayerNotes[self.name]: print (red("Layer description '%s' in %s does not match any of the expected descriptions: %s" % (layerDescription, filename, allowedLayerNotes[self.name]), bold=True)) else: # %LN command if layerDescription not in allowedLayerNotes[self.name]: print (red("Layer description '%s' in %s does not match any of the expected descriptions: %s" % (layerDescription, filename, allowedLayerNotes[self.name]), bold=True))
def parse_check_numbers(params, verbose=False): """ Checks if a list contains interval info. This function just exists to shorten read_timeline() to something that Python won't throw warnings about. There's no need to call this outside that function. :param params: is the list to check. :param verbose: is an optional flag. When true, extra parsing information is printed to the console. Defaults to false. """ # make sure it's a set of 3 numbers if len(params) != 3: if verbose: print(magenta("\t\tinvalid parameter count:"), len(params)) return False # make sure each of the 3 numbers is good for param in params: try: num = int(param, 10) # 10 is the number base # catch parsing errors except ValueError: if verbose: print(red("\t\tinvalid integer > "), end='') print(add_quotes(param)) return False else: # int() allows negatives, but we don't want those if num < 0: if verbose: print(red("\t\tinvalid integer range > "), end='') print(add_quotes(param)) return False return True
def print_state(self): regs = {} for r in reg_map: regs[r] = None for r in regs: regs[r] = self.emu.reg_read(reg_map[r]) if self.reg_state[r] != regs[r]: regs[r] = red('{}:0x{:08x}'.format(r, regs[r]), bold=True) else: regs[r] = '{}:0x{:08x}'.format(r, regs[r]) # eflags efl = self.emu.reg_read(UC_X86_REG_EFLAGS) flags = [] for flag in self.flags: if efl & (1 << flag): flags.append(self.flags[flag]) r_efl = 'eflags: ' + red(' '.join(flags)) print("{0} {1} {2} {3}".format(regs['eax'], regs['ebx'], regs['ecx'], regs['edx'])) print("{0} {1} {2} {3}".format(regs['esi'], regs['edi'], regs['esp'], regs['ebp'])) print("{0} {1}".format(regs['eip'], r_efl)) return
def first_stage_backreplace(self, s, repmap): """ Replace proto placeholders by final placeholders """ for protoPlaceholder, _ in repmap: # Get numeric placeholder placeholder = self.protoPlaceholderToNumericPlaceholder[protoPlaceholder] # Check if it got mis-translated... if placeholder not in s: # Special case for nested patterns: # Nested patterns will not be replaced by 2nd stage (numeric) placeholders is_nested = False for _, val in repmap: if protoPlaceholder in val: # Its nested in SOME pattern is_nested = True break if is_nested: continue # no need to replace numeric by proto pattern else: # not nested, fail! print(red("{} not found in '{}'".format(placeholder, s), bold=True)) return None if s.count(placeholder) > 1: print(red("Placeholder {} was duplicated in '{}'".format(placeholder, s), bold=True)) return None # Replace by proto-placeholder which is a unicode char s = re.sub(r"\s*" + placeholder + r"\s*", protoPlaceholder, s, flags=re.UNICODE) return s
def restoreDump(args): #Setup raw YakDB connection conn = YakDB.Connection() conn.connect(args.req_endpoint) #Filenames to dump to filenames = __getDumpFilenames(args) #NOTE: Partial & incremental restore is supported #Restory every table if the corresponding file exists if not args.no_documents: if not os.path.isfile(filenames[0]): print (red("Can't find document table file " + filenames[0], bold=True)) else: #It's a regular file print (blue("Restoring document table from " + filenames[0], bold=True)) importYDFDump(conn, filenames[0], 1) if not args.no_entities: if not os.path.isfile(filenames[1]): print (red("Can't find entity table file " + filenames[1], bold=True)) else: #It's a regular file print (blue("Restoring entity table from " + filenames[1], bold=True)) importYDFDump(conn, filenames[1], 2) if not args.no_document_idx: if not os.path.isfile(filenames[2]): print (red("Can't find document index table file " + filenames[2], bold=True)) else: #It's a regular file print (blue("Restoring document index table from " + filenames[2], bold=True)) importYDFDump(conn, filenames[2], 3) if not args.no_entity_idx: if not os.path.isfile(filenames[3]): print (red("Can't find document index table file " + filenames[3], bold=True)) else: #It's a regular file print (blue("Restoring entity index table from " + filenames[3], bold=True)) importYDFDump(conn, filenames[3], 4)
def test_justify_formatted(): def rjust(s, width): return s.rjust(width) assert justify_formatted( red("hi"), rjust, 10 ) == " " + red("hi")
def parse_adm_readall_ofs(p): if not p.haslayer('SAPMSAdmRecord'): print "Packet has no 'SAPMSAdmRecord'." exit(-1) print "[+] Text Storage" records = dict() for e in p.adm_records: name = e.rzl_strg_name value = str(e.rzl_strg_value) type_v = ms_adm_rzl_strg_type_values[e.rzl_strg_type] # encoding of value for logon group is binary (IP + port etc.) if value.startswith('LG_EYECAT'): value = parse_logon_group(value) records[name] = (type_v, value) # pretty print that for r in records.keys(): if isinstance(records[r][1], list): print red(r, bold=True) + '\t: ' + ' '.join(records[r][1]) elif records[r][0].endswith('_C'): print green(r) + '\t: ' + str(records[r][1]) #else: # print green(r) + '\t: ' + "[list of integers]" return records
def performRender(args): # Download / update if requested if args.download: download() # Create directory if not args.outdir: args.outdir = "output-{0}".format(args.language) if not os.path.isdir(args.outdir): os.mkdir(args.outdir) renderer = HTMLHitRenderer(args.outdir, args.language) # Generate HTML if not args.no_lint: print(black("Rendering lint...", bold=True)) success = False for i in range(25): try: renderer.renderLintHTML() success = True break except NoResultException: print(red("Lint fetch error, retrying...")) if not success: print(red("Lint fetch error (retries exhausted)", bold=True)) if not args.only_lint: # Import potDir = os.path.join("cache", args.language) print(black("Reading files from {0} folder...".format(potDir), bold=True)) poFiles = readPOFiles(potDir) print(black("Read {0} files".format(len(poFiles)), bold=True)) # Compute hits print(black("Computing rules...", bold=True)) renderer.computeRuleHitsForFileSet(poFiles) # Ensure the HUGE po stuff goes out of scope ASAP poFiles = None # Generate HTML print(black("Rendering HTML...", bold=True)) renderer.hitsToHTML() # Generate filestats.json print (black("Generating JSON API files...", bold=True)) renderer.writeStatsJSON() # If data is present, generate subtitle information videosJSONPath = os.path.join("cache", "videos.json") if os.path.isfile(videosJSONPath): print (black("Rendering subtitles overview...", bold=True)) with open(videosJSONPath) as infile: exercises = json.load(infile) subtitleTemplate = renderer.env.get_template("subtitles.html") writeToFile(os.path.join(args.outdir, "subtitles.html"), subtitleTemplate.render(exercises=exercises))
def check_regex_equal(self, regex, s1, s2, desc): m1 = [m.group(0).strip() for m in regex.finditer(s1)] m2 = [m.group(0).strip() for m in regex.finditer(s2)] if m1 != m2: print(red("Syntax comparison failed for {} regex:\n\t{}\n\t{}".format( desc, str(m1), str(m2)), bold=True)) print(red("Original: {}".format(s1), bold=True)) print(red("Translated: {}".format(s2), bold=True)) return False return True
def checkFile(directory, expectedFile, projectName): "Check if a given expected file exists inside a directory" filename = projectName + expectedFile.extension filepath = os.path.join(directory, filename) if os.path.isfile(filepath): print green("Found %s data %s" % (expectedFile.format, filename)) if expectedFile.checkFN is not None: expectedFile.checkFN(expectedFile, filepath) else: print red("File %s (%s) missing" % (filename, expectedFile.name), bold=True) return None return filename
def extractProjectPrefix(files): """ Extract a common project prefix from all files in a directory Fails & exits if no such prefix is found Example: [ABC.top, ABC.bot] => "ABC" """ commonprefix = os.path.commonprefix(files) if not commonprefix or not commonprefix.endswith("."): print(red("Can't extract project name from files: %s" % ", ".join(files), bold=True)) print(red("Please ensure that all files have a common filename and only differ in their extension!", bold=True)) print(red("Example: MyBoard.top, MyBoard.bot, ...", bold=True)) sys.exit(1) return commonprefix[:-1] #Strp off dot
def checkCopperLayer(self, filepath): #Basic gerber checks checkGerberFile(self, filepath) #Check if smallest aperture is < 6mil = 150um #NOTE: We currently don't compute the clearance (way too complicated) lines = readFileLines(filepath) apertures = parseGerberApertures(lines) unit = parseGerberUnit(lines) limit = 0.152 #TODO use inches if unit == "in" if unit == "in": limit = 0.006 for aperture in apertures: if aperture.diameter < limit: print red("Aperture %s (size %.3f %s) is smaller than %.3f %s minimum width" % \ (aperture.id, aperture.diameter, unit, limit, unit))
def parse(self, response): # Extract fields title = LAW.TITLE.xt(response) parl_id = LAW.PARL_ID.xt(response) status = LAW.STATUS.xt(response) LLP = LegislativePeriod.objects.get( roman_numeral=response.url.split('/')[-4]) # Extract foreign keys category = LAW.CATEGORY.xt(response) description = LAW.DESCRIPTION.xt(response) # Create category if we don't have it yet cat, created = Category.objects.get_or_create(title=category) if created: log.msg(u"Created category {}".format( green(u'[{}]'.format(category)))) # Create and save Law law_data = { 'title': title, 'status': status, 'description': description } law_item, law_created = Law.objects.update_or_create( parl_id=parl_id, legislative_period=LLP, source_link=response.url, defaults=law_data) # Attach foreign keys law_item.keywords = self.parse_keywords(response) law_item.category = cat law_item.documents = self.parse_docs(response) law_item.save() # Log our progress if law_created: logtext = u"Created {} with id {}, LLP {} @ {}" else: logtext = u"Updated {} with id {}, LLP {} @ {}" logtext = logtext.format( red(title), cyan(u"[{}]".format(parl_id)), green(str(LLP)), blue(response.url) ) log.msg(logtext, level=log.INFO) response.meta['law_item'] = law_item # is the tab 'Parlamentarisches Verfahren available?' if response.xpath('//h2[@id="tab-ParlamentarischesVerfahren"]'): self.parse_parliament_steps(response) if response.xpath('//h2[@id="tab-VorparlamentarischesVerfahren"]'): self.parse_pre_parliament_steps(response)
def f_check(): global timeout, base_url timeout = args.timeout logging.captureWarnings(True) # Capture the ssl warnings with the standard logging module if args.ssl: base_url = "https://{}:{}/{}".format(args.host, args.port, args.url) else: base_url = "http://{}:{}/{}".format(args.host, args.port, args.url) f_verbose("[*] Program will check out WebLogic for CVE-2017-3506 & 10271 vulnerability.") if f_run(): print red("[x]") + " Your system is potentially vulnerable to XML Serialization attack!" else: print green("[*]") + " Your system is " + blue("safe!")
def parse(self, response): """ Starting point from which to parse or statically provide debate-list urls (rss feeds) """ callback_requests = [] for llp in self.LLP: for nrbr in self.DEBATETYPES: params = {'view': 'RSS', 'jsMode': 'RSS', 'xdocumentUri': '/PAKT/STPROT/index.shtml', 'NRBRBV': nrbr, 'NUR_VORL': 'N', 'R_PLSO': 'PL', 'GP': llp, 'FBEZ': 'FP_011', 'listeId': '211', } llp_item = None try: llp_item = LegislativePeriod.objects.get( roman_numeral=params['GP']) except LegislativePeriod.DoesNotExist: self.logger.warning( red(u"LLP '{}' not found".format(params['GP']))) feed_url = self.BASE_URL + 'filter.psp?' + urlencode(params) callback_requests.append( scrapy.Request(feed_url, callback=self.parse_debatelist, meta={'llp': llp_item, 'type': params['NRBRBV']})) return callback_requests
def assertNotTranslated(engl): trans = RuleAutotranslator() result = trans.translate(engl) if result is not None: print( red("String should not be translated:'{}'".format(engl), bold=True))
def parse_debate(self, response): """ Debate-transcript ("Stenografisches Protokoll") parser """ for i, sect in enumerate(DOCSECTIONS.xt(response)): # Lookup + add references to the section data sect['debate'] = response.meta['debate'] if 'speaker_id' in sect and sect['speaker_id'] is not None: try: sect['person'] = Person.objects.get( parl_id=sect['speaker_id']) except Person.DoesNotExist: self.logger.warning( red(u"Person '{}' not found".format(sect['speaker_id']))) if sect['ref_timestamp'] is not None \ and len(sect['ref_timestamp']) == 2: sect['date'] = sect['debate'].date.replace( minute=sect['ref_timestamp'][0], second=sect['ref_timestamp'][1]) self.store_statement(sect, i) self.logger.info( green(u"Saved {} sections from {}".format(i, response.url)))
def parse_debate(self, response): """ Debate-transcript ("Stenografisches Protokoll") parser """ i = 0 for i, sect in enumerate(DOCSECTIONS.xt(response)): # Lookup + add references to the section data sect['debate'] = response.meta['debate'] if 'speaker_id' in sect and sect['speaker_id'] is not None: try: sect['person'] = Person.objects.get( parl_id=sect['speaker_id']) except Person.DoesNotExist: self.logger.warning( red(u"Person '{}' not found".format(sect['speaker_id']))) else: sect['person'] = None # Select best timestamps for start and end and make datetime start_ts = sect['time_start'] or sect['ref_timestamp'] end_ts = sect['time_end'] or sect['ref_timestamp'] sect['date'] = self._apply_ts(sect['debate'].date, start_ts) sect['date_end'] = self._apply_ts(sect['debate'].date, end_ts) self.store_statement(sect, i) self.logger.info( green(u"Saved {} sections from {}".format(i, response.url)))
def onMessage(self, payload, isBinary): request = json.loads(payload.decode('utf8')) # Perform action depending on query type qtype = request["qtype"] if qtype == "docsearch": results = self.performDocumentSearch(request["term"]) del request["term"] request["results"] = list(results.values()) elif qtype == "ner": results = self.performEntityNER(request["query"]) del request["query"] request["results"] = results elif qtype == "metadb": # Send meta-database to generate request["results"] = metaDB elif qtype == "entitysearch": request["entities"] = self.performEntitySearch(request["term"]) del request["term"] elif qtype == "getdocuments": # Serve one or multiple documents by IDs docIds = [s.encode() for s in request["query"]] request["results"] = self.db.docIdx.findEntities(docIds) del request["query"] else: print( red("Unknown websocket request type: %s" % request["qtype"], bold=True)) return # Do not send reply #Return modified request object: Keeps custom K/V pairs but do not re-send query self.sendMessage( json.dumps(request, default=documentSerializer).encode("utf-8"), False)
def onMessage(self, payload, isBinary): request = json.loads(payload.decode('utf8')) # Perform action depending on query type qtype = request["qtype"] if qtype == "docsearch": results = self.performDocumentSearch(request["term"]) del request["term"] request["results"] = list(results.values()) elif qtype == "ner": results = self.performEntityNER(request["query"]) del request["query"] request["results"] = results elif qtype == "metadb": # Send meta-database to generate request["results"] = metaDB elif qtype == "entitysearch": request["entities"] = self.performEntitySearch(request["term"]) del request["term"] elif qtype == "getdocuments": # Serve one or multiple documents by IDs docIds = [s.encode() for s in request["query"]] request["results"] = self.db.docIdx.findEntities(docIds) del request["query"] else: print(red("Unknown websocket request type: %s" % request["qtype"], bold=True)) return # Do not send reply #Return modified request object: Keeps custom K/V pairs but do not re-send query self.sendMessage(json.dumps(request, default=documentSerializer).encode("utf-8"), False)
def iterateUniprotDatabases(quiet=True): """ Fetch the uniprot metadatabase by guessing valid integral database IDs. Guarantees to yield all databases up to 9999 """ template = "http://www.uniprot.org/database/%d.rdf" for i in range(300): #In the far future, there might be more DBs than 300. r = requests.get(template % i) if r.status_code == requests.codes.ok: if not quiet: print(green("[UniProt MetaDB] Fetching DB #%d" % i)) soup = BeautifulSoup(r.text) #Very, very crude RDF/XML parser rdf = soup.html.body.find("rdf:rdf") db = { "id": rdf.abbreviation.text, "name": rdf.abbreviation.text, "category": rdf.category.text, "description": rdf.find("rdfs:label").text, } url = rdf.find("rdfs:seealso")["rdf:resource"] if url: db["url"] = url urltemplate = rdf.urltemplate.text if urltemplate: db["urltemplate"] = urltemplate yield (db) else: if not quiet: print(red("[UniProt MetaDB] Database #%d does not exist" % i))
def parse_inquiry_response(self, response): """ Callback function for parsing the inquiry responses """ inquiry_item = response.meta.get('inquiry_item',None) # allow testing single urls for parsing errors source_link = response.url parl_id = response.url.split('/')[-2] title = INQUIRY.TITLE.xt(response) description = INQUIRY.RESPONSEDESCRIPTION.xt(response) LLP = inquiry_item.legislative_period if inquiry_item else None category = INQUIRY.CATEGORY.xt(response) # Get or create Category object for the inquiry and log to screen if new # category is created. cat, created = Category.objects.get_or_create(title=category) if created: log.msg(u"Created category {}".format( green(u'[{}]'.format(category))),level=log.DEBUG) try: sender_object = Person.objects.get( parl_id=INQUIRY.RESPONSESENDER.xt(response)) except Exception, e: log.warning(red(u'Sender "{}" was not found in database, skipping Inquiry {} in LLP {}'.format( INQUIRY.RESPONSESENDER.xt(response), parl_id, LLP))) return
def print_answer(p): fromname = p.fromname try: flag = ms_flag_values[p[SAPMS].flag] except: flag = "0" try: opcode = str(ms_opcode_values[p[SAPMS].opcode]) except: opcode = str(p[SAPMS].opcode) try: opcode_err = str(ms_opcode_error_values[p[SAPMS].opcode_error]) except: opcode_err = 'None' if opcode_err == 'MSOP_OK': opcode_err = green(opcode_err) else: opcode_err = red(opcode_err, bold=True) if p.key != null_key: mskey_parse_print(p.key) key = p.key.encode('hex') else: key = "NULL" logger.debug("flag: " + cyan(flag) + " opcode:" + cyan(opcode) + \ " opcode_error: " + green(opcode_err) + " key: %s" % key)
def parse_inquiry_response(self, response): """ Callback function for parsing the inquiry responses """ inquiry_item = response.meta['inquiry_item'] source_link = response.url parl_id = response.url.split('/')[-2] title = INQUIRY.TITLE.xt(response) description = INQUIRY.RESPONSEDESCRIPTION.xt(response) LLP = inquiry_item.legislative_period category = INQUIRY.CATEGORY.xt(response) # Get or create Category object for the inquiry and log to screen if new # category is created. cat, created = Category.objects.get_or_create(title=category) if created: log.msg(u"Created category {}".format( green(u'[{}]'.format(category)))) try: sender_object = Person.objects.get( parl_id=INQUIRY.RESPONSESENDER.xt(response)) except: log.msg( red(u'Receiver was not found in database, skipping Inquiry {} in LLP {}' .format(parl_id, LLP))) return # Create or update Inquiry item inquiryresponse_item, inquiryresponse_created = InquiryResponse.objects.update_or_create( parl_id=parl_id, legislative_period=LLP, defaults={ 'title': title, 'source_link': source_link, 'description': description, 'sender': sender_object }) # Attach foreign Keys inquiryresponse_item.documents = self.parse_response_docs(response) inquiryresponse_item.category = cat # Save InquiryResponse object inquiryresponse_item.save() if inquiryresponse_created: logtext = u"[{} of {}] Created InquiryResponse {} with ID {}, LLP {} @ {}" else: logtext = u"[{} of {}] Updated InquiryResponse {} with ID {}, LLP {} @ {}" logtext = logtext.format(self.SCRAPED_COUNTER, self.TOTAL_COUNTER, cyan(title), cyan(u"{}".format(parl_id)), green(str(LLP)), blue(response.url)) log.msg(logtext, level=log.INFO) inquiry_item.response = inquiryresponse_item inquiry_item.save() return
def parse_inquiry_response(self, response): """ Callback function for parsing the inquiry responses """ inquiry_item = response.meta.get('inquiry_item',None) # allow testing single urls for parsing errors source_link = response.url parl_id = response.url.split('/')[-2] title = INQUIRY.TITLE.xt(response) description = INQUIRY.RESPONSEDESCRIPTION.xt(response) LLP = inquiry_item.legislative_period if inquiry_item else None category = INQUIRY.CATEGORY.xt(response) # Get or create Category object for the inquiry and log to screen if new # category is created. cat, created = Category.objects.get_or_create(title=category) if created: log.msg(u"Created category {}".format( green(u'[{}]'.format(category)))) try: sender_object = Person.objects.get( parl_id=INQUIRY.RESPONSESENDER.xt(response)) except Exception, e: log.msg(red(u'Sender "{}" was not found in database, skipping Inquiry {} in LLP {}'.format( INQUIRY.RESPONSESENDER.xt(response), parl_id, LLP))) return
def parse_debate(self, response): """ Debate-transcript ("Stenografisches Protokoll") parser """ i = 0 for i, sect in enumerate(DOCSECTIONS.xt(response)): # Lookup + add references to the section data sect['debate'] = response.meta['debate'] if 'speaker_id' in sect and sect['speaker_id'] is not None: try: sect['person'] = Person.objects.get( parl_id=sect['speaker_id']) except Person.DoesNotExist: self.logger.warning( red(u"Person '{}' not found".format( sect['speaker_id']))) else: sect['person'] = None # Select best timestamps for start and end and make datetime start_ts = sect['time_start'] or sect['ref_timestamp'] end_ts = sect['time_end'] or sect['ref_timestamp'] sect['date'] = self._apply_ts(sect['debate'].date, start_ts) sect['date_end'] = self._apply_ts(sect['debate'].date, end_ts) self.store_statement(sect, i) self.logger.info( green(u"Saved {} sections from {}".format(i, response.url)))
def handle_answer(s, p): fromname = p.fromname try: flag = ms_flag_values[p[SAPMS].flag] except: flag = "0" try: opcode = str(ms_opcode_values[p[SAPMS].opcode]) except: opcode = str(p[SAPMS].opcode) try: opcode_err = str(ms_opcode_error_values[p[SAPMS].opcode_error]) except: opcode_err = 'None' if opcode_err == 'MSOP_OK': opcode_err = green(opcode_err) else: opcode_err = red(opcode_err, bold=True) if p.key != null_key: key = " key: " + yellow('NOT NULL', bold=True) logger.error("[!] Out of order packets, reload this script.") #s.close() #exit(0) else: key = "" logger.info("flag: " + cyan(flag) + " opcode:" + cyan(opcode) + \ " opcode_error: " + green(opcode_err) + key)
def initializeMetaDatabase(filename="metadb.json"): """ Ensure we valid file with meta-database information, i.e. links, names and URL templates for any database being referenced. This information is used to generate links to external databases, e.g. STRING. This function fetches the Metadatabase from UniProt if required. The metadatabase dictionary is returned. Also reads and adds (or replaces) additional entries from metadb-additional.json """ # with open("metadb-additional.json") as infile: additional = json.load(infile) try: with open(filename) as infile: db = json.load(infile) db.update(additional) return db except: # Try to download from UniProt try: db = downloadUniprotMetadatabase(filename) db.update(additional) return db except Exception as ex: print(ex) print( red("Can neither read nor fetch metadabase. Database links will not work.", bold=True))
def truncate(args): "Delete data from one or more tables" #Check if the user is sure if not args.yes_i_know_what_i_am_doing: print (red("This will delete all your Translatron data. If you are sure, please use --yes-i-know-what-i-am-doing ", bold=True)) return #Setup raw YakDB connection conn = YakDB.Connection() conn.connect(args.req_endpoint) # #Restory every table if the corresponding file exists if not args.no_documents: print (blue("Truncating document table... ", bold=True)) if args.hard: conn.truncateTable(1) else: conn.deleteRange(1, None, None, None) if not args.no_entities: print (blue("Truncating entity table... ", bold=True)) if args.hard: conn.truncateTable(2) else: conn.deleteRange(2, None, None, None) if not args.no_document_idx: print (blue("Truncating document index table... ", bold=True)) if args.hard: conn.truncateTable(3) else: conn.deleteRange(3, None, None, None) if not args.no_entity_idx: print (blue("Truncating entity index table... ", bold=True)) if args.hard: conn.truncateTable(4) else: conn.deleteRange(4, None, None, None)
def handle_answer(s, p): fromname = p.fromname try: flag = ms_flag_values[p[SAPMS].flag] except: flag = "0" try: opcode = str(ms_opcode_values[p[SAPMS].opcode]) except: opcode = str(p[SAPMS].opcode) try: opcode_err = str(ms_opcode_error_values[p[SAPMS].opcode_error]) except: opcode_err = 'None' if opcode_err == 'MSOP_OK': opcode_err = green(opcode_err) else: opcode_err = red(opcode_err, bold=True) if p.key != null_key: p.show() key = " key: " + yellow('NOT NULL', bold=True) print "[!] Out of order packets, reload this script." #s.close() #exit(0) else: key = "" print "flag: " + cyan(flag) + " opcode:" + cyan(opcode) + \ " opcode_error: " + green(opcode_err) + key # "idenfify request from the server? if key != "" and flag == 'MS_REQUEST' and opcode == '0': s.send(ms_adm_nilist(p, 1))
def loadUsernamePassword(): """ """ try: with open("crowdin-credentials.json") as infile: data = json.load(infile) return data["username"], data["password"] except FileNotFoundError: print(red("Could not find crowdin-credentials.json. Please create that file from crowdin-credentials-template.json!", bold=True))
def readRulesFromGoogleDocs(gdid, rules=[]): rule_errors = [] for rule in readRulesFromGDocs(gdid): if isinstance(rule, RuleError): rule_errors.append(rule) print(red(rule.msg)) else: rules.append(rule) return rules, rule_errors
def test_colordiff(): x, y = colordiff("hi bob", "hi there", color_x=Colors.Red, color_y=Colors.Blue) fx = lambda s: red(s, reverse=True) fy = lambda s: blue(s, reverse=True) assert x == "hi " + fx("b") + fx("o") + fx("b") assert y == "hi " + fy("t") + fy("h") + fy("e") + fy("r") + fy("e")
def processPMCFileContent(xml): "Process a string representing a PMC XML file" soup = BeautifulSoup(xml, "lxml") try: return processPMCDoc(soup) except Exception as e: print(red("Parser exception while processsing PMC:%s" % extractArticleID(soup, "pmc"))) print(e) return None
def parse(self, response): persons = AUDITORS.LIST.xt(response) callback_requests = [] self.logger.info("Scraping {} persons".format(len(persons))) # Iterate all persons for p in persons: # Extract basic data parl_id = p["source_link"].split("/")[-2] p["source_link"] = "{}{}".format(BASE_HOST, p["source_link"]) # Create or update simple person's item person_data = {"reversed_name": p["reversed_name"]} person_item, created_person = Person.objects.update_or_create( source_link=p["source_link"], parl_id=parl_id, defaults=person_data ) if created_person: self.logger.info(u"Created Person {}".format(green(u"[{}]".format(p["reversed_name"])))) else: self.logger.info(u"Updated Person {}".format(green(u"[{}]".format(p["reversed_name"])))) mandate = p["mandate"] function_item, f_created = Function.objects.get_or_create(short=mandate["short"], title=mandate["title"]) if f_created: self.logger.info(u"Created function {}".format(green(u"[{}]".format(function_item.short)))) # Create and append mandate try: mandate_item, m_created = Mandate.objects.update_or_create( function=function_item, start_date=mandate["start_date"], end_date=mandate["end_date"] ) except: self.logger.info(red("Error saving Mandate {} ({} - {})".format(function_item, start_date, end_date))) import ipdb ipdb.set_trace() person_item.mandates.add(mandate_item) person_item.save() # First time we encounter a person, we scan her detail page too if not parl_id in self.persons_scraped: # Create Detail Page request req = scrapy.Request(p["source_link"], callback=self.parse_person_detail) req.meta["person"] = { "reversed_name": p["reversed_name"], "source_link": p["source_link"], "parl_id": parl_id, } callback_requests.append(req) self.persons_scraped.append(parl_id) return callback_requests
def write_progress(self, rate=None, prestart=None, wait=None, complete=False, error=None): # compute string lengths action = self.action.rjust(self.actionwidth) if error: rate = error elif prestart: rate = "starting" elif wait: rate = ("%s" % self.retry_wait) + "s..." elif complete: rate = "done" else: rate = "%s/s" % self.format_size(rate) rate = rate.ljust(self.ratewidth) url = self.url_fmt if self.totalsize: size = self.format_size(self.totalsize) elif self.download_size: size = self.format_size(self.download_size) else: size = "????? B" size = (" %s" % size).ljust(self.sizewidth) # add formatting if error: rate = ansicolor.red(rate) elif prestart or wait: rate = ansicolor.cyan(rate) elif complete: rate = ansicolor.green(rate) else: rate = ansicolor.yellow(rate) # draw progress bar if not (error or prestart or complete) and self.totalsize: c = int(self.urlwidth * self.download_size / self.totalsize) url = ansicolor.wrap_string(self.url_fmt, c, None, reverse=True) if not self.totalsize: size = ansicolor.yellow(size) line = "%s :: %s " % (action, rate) term = (os.environ.get("DEBUG_FETCH") and "\n") or "\r" if error or complete: term = "\n" ioutils.write_err("%s%s%s%s" % (line, url, size, term)) # log download if error: self.log_url(error, error=True) elif complete: self.log_url("done")
def loadUsernamePassword(): """ """ try: with open("crowdin-credentials.json") as infile: data = json.load(infile) return data["username"], data["password"] except FileNotFoundError: print( red("Could not find crowdin-credentials.json. Please create that file from crowdin-credentials-template.json!", bold=True))
def parse_adm_readall_ofs(p): if not p.haslayer('SAPMSAdmRecord'): print "Packet has no 'SAPMSAdmRecord'." exit(-1) logger.info("[+] Dumping Text Storage") records = dict() for e in p.adm_records: name = e.rzl_strg_name value = str(e.rzl_strg_value) type_v = ms_adm_rzl_strg_type_values[e.rzl_strg_type] records[name] = (type_v, value) # pretty print that for r in records.keys(): if records[r][1].startswith('LG_EYECAT'): print red(r, bold=True) + '\t: ' + ' '.join( parse_logon_group(records[r][1])) elif records[r][0].endswith('_C'): print green(r) + '\t: ' + str(records[r][1]) return records
def __init__(self, lang="de"): self.lang = lang if "GOOGLE_APPLICATION_CREDENTIALS" in os.environ: self.mode = "google-api" self.client = translate.Client(target_language=lang) self.translate = self._googleapi_translate print(green("Using google cloud translation API")) else: self.mode = "googletrans" self.translate = self._googletrans_translate print(red("Using googletrans"))
def checkExcellonMetric(self, filepath): "Check if a given file is a metric excellon file" filename = os.path.basename(filepath) lines = readFileLines(filepath) #Check for excellon header if lines[0] != "M48": print red("Can't find Excellon drill header (M48) in %s" % filename, bold="True") #Check for metric dimension: Line like METRIC,0000.00 if lines[1].partition(",")[0] != "METRIC": print red("Excellon drill program %s does not seem to be metric" % filename, bold="True") # # Drill statistics # toolStats = extractToolStatistics(lines) print(black(self.name + ":", bold=True)) for diameter, numDrills in toolStats.iteritems(): print("\t%d through holes of diameter %.2fmm" % (numDrills, diameter)) #Print "None" if there are no holes in this file if not toolStats: print "\tNone"
def findCoordinateFormat(lines): """ Try to find a FSLAX line and return the decimal-point factor for coordinates. """ rgx = re.compile(r"\%FSLAX(\d{2})Y(\d{2})\*\%") for line in lines: m = rgx.match(line) if m is not None: return 10.**int(m.group(1)[-1]),10.**int(m.group(2)[-1]) print(red("Could not find coordinate format info %FSLAX. Using default %FSLAX33")) return 100000.,100000.
def checkExcellonMetric(self, filepath): "Check if a given file is a metric excellon file" filename = os.path.basename(filepath) lines = readFileLines(filepath) #Check for excellon header if lines[0] != "M48": print(red("Can't find Excellon drill header (M48) in %s" % filename, bold="True")) #Check for metric dimension: Line like METRIC,0000.00 if lines[1].partition(",")[0] != "METRIC": print(red("Excellon drill program %s does not seem to be metric" % filename, bold="True")) # # Drill statistics # toolStats = extractToolStatistics(lines) print(black(self.name + ":", bold=True)) for diameter, numDrills in toolStats.items(): print("\t%d through holes of diameter %.2fmm" % (numDrills, diameter)) #Print "None" if there are no holes in this file if not toolStats: print("\tNone")
def status(self, msg, *args, **kwargs): text = msg % args if kwargs.get('error') == True: logging.error(text) if self.stdout_has_colours: sys.stdout.write(ansicolor.red(text)) else: sys.stdout.write(text) else: logging.info(text) sys.stdout.write(text) sys.stdout.write('\n')
def importEntities(args): for infile in args.infile: basename = os.path.basename(infile) if re.match(r"uniprot_[a-z]+\.dat\.gz", basename): print(blue("Importing UniProt file...")) from Translatron.Entities.UniProtImporter import importUniprot importUniprot(args, infile) elif re.match(r"d\d{4}.bin", basename): print(blue("Importing MeSH file...")) from Translatron.Entities.MeSHImporter import importMeSH importMeSH(args, infile) elif re.match(r"[a-z][a-z]wiki.+titles.+\.gz", basename): print(blue("Importing Wikipedia page title file...")) from Translatron.Entities.WikipediaImporter import importWikimediaPagelist importWikimediaPagelist(args, infile) else: print (red("Can't interpret entity input file (uniprot_sprot.dat.gz - UniProt) %s " % basename))
def downloadCrowdinById(session, crid, lang="de"): if lang in languageIDs: langId = languageIDs[lang] else: # Fallback -- wont really work print(red("Error: Language unknown: {0}".format(lang), bold=True)) langId = 11 #de url = "https://crowdin.com/translation/phrase?id={0}&project_id=10880&target_language_id={1}".format(crid, langId) response = session.get(url) try: jsondata = response.json()["data"] msgid = jsondata["translation"]["text"] msgstr = jsondata["top_suggestion"] comment = jsondata["translation"]["context"] filename = jsondata["translation"]["file_path"][1:] except: errstr = "[Retrieval error while fetching {0}]".format(url) return errstr, errstr, errstr, None return msgid, msgstr, comment, filename
def __init__(self, name, filename, severity=Severity.standard, flags=re.UNICODE): super().__init__(name, severity) self.filename = filename regexes = set() self.valid = False # Check if file exists if os.path.isfile(filename): with open(filename) as infile: for line in infile: rgx = line.strip().replace(" ", r"\s+") # Don't match in the middle of a word rgx = r"\b{0}\b".format(rgx) regexes.add(rgx) # Build large regex from all sub.regexes self.regex = reCompiler.compile("|".join(regexes), flags=flags) self.valid = True else: # File does not exist print(red("Unable to find text list file %s" % filename, bold=True))
def run_script(): (parser, a) = ioutils.init_opts("<url> ['<pattern>'] [options]") a("--recipe", metavar="<recipe>", dest="recipe", help="Use a spidering recipe") a("--fetch", action="store_true", help="Fetch urls, don't dump") a("--dump", action="store_true", help="Dump urls, don't fetch") a("--host", action="store_true", help="Only spider this host") a("--pause", type="int", metavar="<pause>", dest="pause", help="Pause for x seconds between requests") a("--depth", type="int", metavar="<depth>", dest="depth", help="Spider to this depth") (opts, args) = ioutils.parse_args(parser) try: if opts.fetch: os.environ["FETCH_ALL"] = "1" elif opts.dump: os.environ["DUMP_ALL"] = "1" if opts.host: os.environ["HOST_FILTER"] = "1" if opts.pause: os.environ["PAUSE"] = str(opts.pause) if opts.depth: os.environ["DEPTH"] = str(opts.depth) url = args[0] if opts.recipe: rules = recipe.load_recipe(opts.recipe, url) else: pattern = args[1] rules = recipe.get_recipe(pattern, url) session = Session.restore(url) session.rules = rules if session.queue is None: session.queue = recipe.get_queue(url, mode=fetch.Fetcher.SPIDER) if session.wb is None: session.wb = web.Web(url) except recipe.PatternError as e: ioutils.write_err(ansicolor.red("%s\n" % e)) sys.exit(1) except IndexError: ioutils.opts_help(None, None, None, parser) spiderfetcher = SpiderFetcher(session) spiderfetcher.main()
def parse_person_detail(self, response): """ Parse a persons detail page before creating the person object """ person = response.meta['person'] self.logger.info(u"Updating Person Detail {}".format( green(u"[{}]".format(person['reversed_name'])) )) full_name = PERSON.DETAIL.FULL_NAME.xt(response) bio_data = PERSON.DETAIL.BIO.xt(response) profile_photo_url = PERSON.DETAIL.PHOTO_URL.xt(response) profile_photo_copyright = PERSON.DETAIL.PHOTO_COPYRIGHT.xt(response) try: person_data = { 'photo_link': "{}{}".format(BASE_HOST, profile_photo_url), 'photo_copyright': profile_photo_copyright, 'full_name': full_name, 'reversed_name': person['reversed_name'], 'birthdate': bio_data['birthdate'], 'birthplace': bio_data['birthplace'], 'deathdate': bio_data['deathdate'], 'deathplace': bio_data['deathplace'], 'occupation': bio_data['occupation']} person_item, created_person = Person.objects.update_or_create( source_link=person['source_link'], parl_id=person['parl_id'], defaults=person_data ) person_item.save() # Instatiate slug person_item.slug except: self.logger.info(red("Error saving Person {}".format(full_name))) import ipdb ipdb.set_trace() return
def print_debug(self): """ Collects and prints a structured debug message """ message = """ {bar} {title} Scraping LLPs: {llps} Base URL: {url} {bar} """.format( bar=cyan( '############################################################'), title=red(self.title), llps=self.LLP or "Not applicable", url=self.BASE_URL ) print message
def _render_latest_events(self): """Render the latest events emitted by the computation. TODO(mpetazzoni): render custom events/alert events differently and support alert event schema v3. """ print('\nEvents:') def maybe_json(v): if isinstance(v, six.string_types): return json.loads(v) return v for event in self._events: ets = self._computation.get_metadata(event.tsid) contexts = json.loads(ets.get('sf_detectInputContexts', '{}')) values = maybe_json(event.properties.get('inputs', '{}')) values = ' | '.join([ u'{name} ({key}): {value}'.format( name=white(contexts[k].get('identifier', k)), key=','.join([u'{0}:{1}'.format(dim_name, dim_value) for dim_name, dim_value in v.get('key', {}).items()]), value=v['value']) for k, v in values.items()]) date = tslib.date_from_utc_ts(event.timestamp_ms) is_now = event.properties['is'] print(u' {mark} {date} [{incident}]: {values}' .format(mark=green(u'✓') if is_now == 'ok' else red(u'✗'), date=white(self._render_date(date), bold=True), incident=event.properties['incidentId'], values=values)) return 2 + len(self._events)
def parse(self, response): self.SCRAPED_COUNTER += 1 LLP = LegislativePeriod.objects.get( roman_numeral=response.url.split('/')[-4]) # Extract fields ts = GENERIC.TIMESTAMP.xt(response) title = LAW.TITLE.xt(response) parl_id = LAW.PARL_ID.xt(response) status = LAW.STATUS.xt(response) if not self.IGNORE_TIMESTAMP and not self.has_changes(parl_id, LLP, response.url, ts): self.logger.info( green(u"[{} of {}] Skipping Law, no changes: {}".format( self.SCRAPED_COUNTER, self.TOTAL_COUNTER, title))) return # Extract foreign keys category = LAW.CATEGORY.xt(response) description = LAW.DESCRIPTION.xt(response) # Create category if we don't have it yet cat, created = Category.objects.get_or_create(title=category) if created: log.msg(u"Created category {}".format( green(u'[{}]'.format(category)))) # Create and save Law law_data = { 'title': title, 'status': status, 'description': description, 'ts': ts, 'source_link': response.url, } law_item, law_created = Law.objects.update_or_create( parl_id=parl_id, legislative_period=LLP, defaults=law_data) # Attach foreign keys law_item.keywords = self.parse_keywords(response) law_item.category = cat law_item.documents = self.parse_docs(response) law_item.save() # Log our progress if law_created: logtext = u"[{} of {}] Created {} with id {}, LLP {} @ {}" else: logtext = u"[{} of {}] Updated {} with id {}, LLP {} @ {}" logtext = logtext.format( self.SCRAPED_COUNTER, self.TOTAL_COUNTER, red(title), cyan(u"[{}]".format(parl_id)), green(unicode(LLP)), blue(response.url) ) log.msg(logtext, level=log.INFO) response.meta['law_item'] = law_item # is the tab 'Parlamentarisches Verfahren available?' if response.xpath('//h2[@id="tab-ParlamentarischesVerfahren"]'): self.parse_parliament_steps(response) if response.xpath('//h2[@id="tab-VorparlamentarischesVerfahren"]'): self.parse_pre_parliament_steps(response)