Exemplo n.º 1
0
def f_send_request(payload_o):
    f_verbose("[*] Sending request to {}.".format(base_url))

    try:
        r = requests.post(base_url,
                          headers=headers_,
                          data=payload_o,
                          timeout=timeout,
                          verify=False)
    except requests.exceptions.ConnectionError:
        print "Error. Connection refused."
        sys.exit(1)
    except requests.exceptions.Timeout:
        f_verbose("[!] Time of response exceeded {} seconds!".format(timeout))
        return timeout

    if r.status_code != 200:
        print red("[X]") + " Error with HTTP code ", r.status_code
        print r.text
        sys.exit(-1)
    time = r.elapsed.total_seconds()
    f_verbose(str(r.status_code))
    f_verbose(
        "[*] Program has successfully sent payload to {}.".format(base_url))
    f_verbose("Time of response: {} ".format(time))

    return time
Exemplo n.º 2
0
def checkGerberFile(self, filepath):
    """
    Check if the given file is a RS-274X gerber file
    - Checks for a G04 command at the beginning of the file
    - Checks for a %LN command and verifies it against the filename
    - Checks for a G04 #@! TF.FileFunction command
    """
    filename = os.path.basename(filepath)
    lines = readFileLines(filepath)
    #Find G04 line (i.e. what software created the file)
    if not any(map(lambda l: l.startswith("G04 "), lines)):
        print(red("Couldn't find G04 command (software description) in %s. Probably not a Gerber file." % filename, bold=True))
    #Find %LN line, i.e. what the creating
    # software thinks the current layer is (e.g. "BottomMask")
    layerNoteRegex = re.compile(r"^\%LN([^\*]+)\*%$")
    fileFunctionRegex = re.compile(r"G04 #@! TF\.FileFunction,([^\*]+)\*")
    layerDescription = None
    for line in lines:
        if layerNoteRegex.match(line):
            layerDescription = layerNoteRegex.match(line).group(1)
            break #Expecting only one layer note
        elif fileFunctionRegex.match(line):
            layerDescription = fileFunctionRegex.match(line).group(1)
            layerDescription = layerDescription.split(",")
    #Check if the layer note we found makes sense
    if layerDescription == None: #No %LN line found
        print(yellow("Couldn't find %%LN command or file function command in %s" % filename))
    else: #We found a layer description. Check for sanity
        if isinstance(layerDescription, list): # FileFunction command
            if layerDescription not in allowedLayerNotes[self.name]:
                    print(red("Layer description '%s' in %s does not match any of the expected descriptions: %s" % (layerDescription, filename, allowedLayerNotes[self.name]), bold=True))

        else: # %LN command
            if layerDescription not in allowedLayerNotes[self.name]:
                print(red("Layer description '%s' in %s does not match any of the expected descriptions: %s" % (layerDescription, filename, allowedLayerNotes[self.name]), bold=True))
Exemplo n.º 3
0
def checkGerberFile(self, filepath):
    """
    Check if the given file is a RS-274X gerber file
    - Checks for a G04 command at the beginning of the file
    - Checks for a %LN command and verifies it against the filename
    - Checks for a G04 #@! TF.FileFunction command
    """
    filename = os.path.basename(filepath)
    lines = readFileLines(filepath)
    #Find G04 line (i.e. what software created the file)
    if not any(map(lambda l: l.startswith("G04 "), lines)):
        print (red("Couldn't find G04 command (software description) in %s. Probably not a Gerber file." % filename, bold=True))
    #Find %LN line, i.e. what the creating
    # software thinks the current layer is (e.g. "BottomMask")
    layerNoteRegex = re.compile(r"^\%LN([^\*]+)\*%$")
    fileFunctionRegex = re.compile(r"G04 #@! TF\.FileFunction,([^\*]+)\*")
    layerDescription = None
    for line in lines:
        if layerNoteRegex.match(line):
            layerDescription = layerNoteRegex.match(line).group(1)
            break #Expecting only one layer note
        elif fileFunctionRegex.match(line):
            layerDescription = fileFunctionRegex.match(line).group(1)
            layerDescription = layerDescription.split(",")
    #Check if the layer note we found makes sense
    if layerDescription == None: #No %LN line found
        print (yellow("Couldn't find %%LN command or file function command in %s" % filename))
    else: #We found a layer description. Check for sanity
        if isinstance(layerDescription, list): # FileFunction command
            if layerDescription not in allowedLayerNotes[self.name]:
                    print (red("Layer description '%s' in %s does not match any of the expected descriptions: %s" % (layerDescription, filename, allowedLayerNotes[self.name]), bold=True))

        else: # %LN command
            if layerDescription not in allowedLayerNotes[self.name]:
                print (red("Layer description '%s' in %s does not match any of the expected descriptions: %s" % (layerDescription, filename, allowedLayerNotes[self.name]), bold=True))
Exemplo n.º 4
0
def parse_check_numbers(params, verbose=False):
    """
    Checks if a list contains interval info. This function just exists to
    shorten read_timeline() to something that Python won't throw warnings
    about. There's no need to call this outside that function.
        :param params: is the list to check.
        :param verbose: is an optional flag.
            When true, extra parsing information is printed to the console. Defaults to false.
    """

    # make sure it's a set of 3 numbers
    if len(params) != 3:
        if verbose:
            print(magenta("\t\tinvalid parameter count:"), len(params))
        return False

    # make sure each of the 3 numbers is good
    for param in params:
        try:
            num = int(param, 10)  # 10 is the number base
        # catch parsing errors
        except ValueError:
            if verbose:
                print(red("\t\tinvalid integer > "), end='')
                print(add_quotes(param))
            return False
        else:
            # int() allows negatives, but we don't want those
            if num < 0:
                if verbose:
                    print(red("\t\tinvalid integer range > "), end='')
                    print(add_quotes(param))
                return False
    return True
Exemplo n.º 5
0
    def print_state(self):
        regs = {}
        for r in reg_map:
            regs[r] = None

        for r in regs:
            regs[r] = self.emu.reg_read(reg_map[r])
            if self.reg_state[r] != regs[r]:
                regs[r] = red('{}:0x{:08x}'.format(r, regs[r]), bold=True)
            else:
                regs[r] = '{}:0x{:08x}'.format(r, regs[r])

        # eflags
        efl = self.emu.reg_read(UC_X86_REG_EFLAGS)
        flags = []
        for flag in self.flags:
            if efl & (1 << flag):
                flags.append(self.flags[flag])
        r_efl = 'eflags: ' + red(' '.join(flags))

        print("{0}  {1}  {2}  {3}".format(regs['eax'], regs['ebx'],
                                          regs['ecx'], regs['edx']))
        print("{0}  {1}  {2}  {3}".format(regs['esi'], regs['edi'],
                                          regs['esp'], regs['ebp']))
        print("{0}  {1}".format(regs['eip'], r_efl))
        return
    def first_stage_backreplace(self, s, repmap):
        """
        Replace proto placeholders by final placeholders
        """
        for protoPlaceholder, _ in repmap:
            # Get numeric placeholder
            placeholder = self.protoPlaceholderToNumericPlaceholder[protoPlaceholder]
            # Check if it got mis-translated...
            if placeholder not in s:

                # Special case for nested patterns:
                # Nested patterns will not be replaced by 2nd stage (numeric) placeholders
                is_nested = False
                for _, val in repmap:
                    if protoPlaceholder in val: # Its nested in SOME pattern
                        is_nested = True
                        break

                if is_nested:
                    continue # no need to replace numeric by proto pattern
                else: # not nested, fail!
                    print(red("{} not found in '{}'".format(placeholder, s), bold=True))
                    return None
            if s.count(placeholder) > 1:
                print(red("Placeholder {} was duplicated in '{}'".format(placeholder, s), bold=True))
                return None
            # Replace by proto-placeholder which is a unicode char
            s = re.sub(r"\s*" + placeholder + r"\s*",
                protoPlaceholder, s, flags=re.UNICODE)
        return s
Exemplo n.º 7
0
def restoreDump(args):
    #Setup raw YakDB connection
    conn = YakDB.Connection()
    conn.connect(args.req_endpoint)
    #Filenames to dump to
    filenames = __getDumpFilenames(args)
    #NOTE: Partial & incremental restore is supported
    #Restory every table if the corresponding file exists
    if not args.no_documents:
        if not os.path.isfile(filenames[0]):
            print (red("Can't find document table file " + filenames[0], bold=True))
        else: #It's a regular file
            print (blue("Restoring document table from " + filenames[0], bold=True))
            importYDFDump(conn, filenames[0], 1)
    if not args.no_entities:
        if not os.path.isfile(filenames[1]):
            print (red("Can't find entity table file " + filenames[1], bold=True))
        else: #It's a regular file
            print (blue("Restoring entity table from " + filenames[1], bold=True))
            importYDFDump(conn, filenames[1], 2)
    if not args.no_document_idx:
        if not os.path.isfile(filenames[2]):
            print (red("Can't find document index table file " + filenames[2], bold=True))
        else: #It's a regular file
            print (blue("Restoring document index table from " + filenames[2], bold=True))
            importYDFDump(conn, filenames[2], 3)
    if not args.no_entity_idx:
        if not os.path.isfile(filenames[3]):
            print (red("Can't find document index table file " + filenames[3], bold=True))
        else: #It's a regular file
            print (blue("Restoring entity index table from " + filenames[3], bold=True))
            importYDFDump(conn, filenames[3], 4)
Exemplo n.º 8
0
def test_justify_formatted():
    def rjust(s, width):
        return s.rjust(width)

    assert justify_formatted(
        red("hi"), rjust, 10
    ) == "        " + red("hi")
Exemplo n.º 9
0
def parse_adm_readall_ofs(p):
    if not p.haslayer('SAPMSAdmRecord'):
        print "Packet has no 'SAPMSAdmRecord'."
        exit(-1)
    print "[+] Text Storage"
    records = dict()
    for e in p.adm_records:
        name = e.rzl_strg_name
        value = str(e.rzl_strg_value)
        type_v = ms_adm_rzl_strg_type_values[e.rzl_strg_type]

        # encoding of value for logon group is binary (IP + port etc.)
        if value.startswith('LG_EYECAT'):
            value = parse_logon_group(value)
        records[name] = (type_v, value)

    # pretty print that
    for r in records.keys():
        if isinstance(records[r][1], list):
            print red(r, bold=True) + '\t: ' + ' '.join(records[r][1])
        elif records[r][0].endswith('_C'):
            print green(r) + '\t: ' + str(records[r][1])
        #else:
        #    print green(r) + '\t: ' + "[list of integers]"
    return records
Exemplo n.º 10
0
def test_justify_formatted():
    def rjust(s, width):
        return s.rjust(width)

    assert justify_formatted(
        red("hi"), rjust, 10
    ) == "        " + red("hi")
Exemplo n.º 11
0
def performRender(args):
    # Download / update if requested
    if args.download:
        download()

    # Create directory
    if not args.outdir:
        args.outdir = "output-{0}".format(args.language)
    if not os.path.isdir(args.outdir):
        os.mkdir(args.outdir)

    renderer = HTMLHitRenderer(args.outdir, args.language)

    # Generate HTML
    if not args.no_lint:
        print(black("Rendering lint...", bold=True))
        success = False
        for i in range(25):
            try:
                renderer.renderLintHTML()
                success = True
                break
            except NoResultException:
                print(red("Lint fetch error, retrying..."))
        if not success:
            print(red("Lint fetch error (retries exhausted)", bold=True))


    if not args.only_lint:
        # Import
        potDir = os.path.join("cache", args.language)
        print(black("Reading files from {0} folder...".format(potDir), bold=True))
        poFiles = readPOFiles(potDir)
        print(black("Read {0} files".format(len(poFiles)), bold=True))
        # Compute hits
        print(black("Computing rules...", bold=True))
        renderer.computeRuleHitsForFileSet(poFiles)
        # Ensure the HUGE po stuff goes out of scope ASAP
        poFiles = None

        # Generate HTML
        print(black("Rendering HTML...", bold=True))
        renderer.hitsToHTML()

        # Generate filestats.json
        print (black("Generating JSON API files...", bold=True))
        renderer.writeStatsJSON()

    # If data is present, generate subtitle information
    videosJSONPath = os.path.join("cache", "videos.json")
    if os.path.isfile(videosJSONPath):
        print (black("Rendering subtitles overview...", bold=True))
        with open(videosJSONPath) as infile:
            exercises = json.load(infile)
        subtitleTemplate = renderer.env.get_template("subtitles.html")
        writeToFile(os.path.join(args.outdir, "subtitles.html"), subtitleTemplate.render(exercises=exercises))
 def check_regex_equal(self, regex, s1, s2, desc):
     m1 = [m.group(0).strip() for m in regex.finditer(s1)]
     m2 = [m.group(0).strip() for m in regex.finditer(s2)]
     if m1 != m2:
         print(red("Syntax comparison failed for {} regex:\n\t{}\n\t{}".format(
             desc, str(m1), str(m2)), bold=True))
         print(red("Original: {}".format(s1), bold=True))
         print(red("Translated: {}".format(s2), bold=True))
         return False
     return True
Exemplo n.º 13
0
def checkFile(directory, expectedFile, projectName):
    "Check if a given expected file exists inside a directory"
    filename = projectName + expectedFile.extension
    filepath = os.path.join(directory, filename)
    if os.path.isfile(filepath):
        print green("Found %s data %s" % (expectedFile.format, filename))
        if expectedFile.checkFN is not None:
            expectedFile.checkFN(expectedFile, filepath)
    else:
        print red("File %s (%s) missing" % (filename, expectedFile.name), bold=True)
        return None
    return filename
Exemplo n.º 14
0
def extractProjectPrefix(files):
    """
    Extract a common project prefix from all files in a directory
    Fails & exits if no such prefix is found
    Example: [ABC.top, ABC.bot] => "ABC"
    """
    commonprefix = os.path.commonprefix(files)
    if not commonprefix or not commonprefix.endswith("."):
        print(red("Can't extract project name from files: %s" % ", ".join(files), bold=True))
        print(red("Please ensure that all files have a common filename and only differ in their extension!", bold=True))
        print(red("Example: MyBoard.top, MyBoard.bot, ...", bold=True))
        sys.exit(1)
    return commonprefix[:-1] #Strp off dot
Exemplo n.º 15
0
def checkCopperLayer(self, filepath):
    #Basic gerber checks
    checkGerberFile(self, filepath)
    #Check if smallest aperture is < 6mil = 150um
    #NOTE: We currently don't compute the clearance (way too complicated)
    lines = readFileLines(filepath)
    apertures = parseGerberApertures(lines)
    unit = parseGerberUnit(lines)
    limit = 0.152 #TODO use inches if unit == "in"
    if unit == "in": limit = 0.006
    for aperture in apertures:
        if aperture.diameter < limit:
            print red("Aperture %s (size %.3f %s) is smaller than %.3f %s minimum width" % \
                        (aperture.id, aperture.diameter, unit, limit, unit))
    def parse(self, response):
        # Extract fields
        title = LAW.TITLE.xt(response)
        parl_id = LAW.PARL_ID.xt(response)
        status = LAW.STATUS.xt(response)

        LLP = LegislativePeriod.objects.get(
            roman_numeral=response.url.split('/')[-4])

        # Extract foreign keys
        category = LAW.CATEGORY.xt(response)
        description = LAW.DESCRIPTION.xt(response)

        # Create category if we don't have it yet
        cat, created = Category.objects.get_or_create(title=category)
        if created:
            log.msg(u"Created category {}".format(
                green(u'[{}]'.format(category))))

        # Create and save Law
        law_data = {
            'title': title,
            'status': status,
            'description': description
        }
        law_item, law_created = Law.objects.update_or_create(
            parl_id=parl_id,
            legislative_period=LLP,
            source_link=response.url,
            defaults=law_data)

        # Attach foreign keys
        law_item.keywords = self.parse_keywords(response)
        law_item.category = cat
        law_item.documents = self.parse_docs(response)

        law_item.save()

        # Log our progress
        if law_created:
            logtext = u"Created {} with id {}, LLP {} @ {}"
        else:
            logtext = u"Updated {} with id {}, LLP {} @ {}"

        logtext = logtext.format(
            red(title),
            cyan(u"[{}]".format(parl_id)),
            green(str(LLP)),
            blue(response.url)
        )
        log.msg(logtext, level=log.INFO)

        response.meta['law_item'] = law_item

        # is the tab 'Parlamentarisches Verfahren available?'
        if response.xpath('//h2[@id="tab-ParlamentarischesVerfahren"]'):
            self.parse_parliament_steps(response)

        if response.xpath('//h2[@id="tab-VorparlamentarischesVerfahren"]'):
            self.parse_pre_parliament_steps(response)
Exemplo n.º 17
0
def f_check():
    global timeout, base_url
    timeout = args.timeout
    logging.captureWarnings(True)  # Capture the ssl warnings with the standard logging module

    if args.ssl:
        base_url = "https://{}:{}/{}".format(args.host, args.port, args.url)
    else:
        base_url = "http://{}:{}/{}".format(args.host, args.port, args.url)

    f_verbose("[*] Program will check out WebLogic for CVE-2017-3506 & 10271 vulnerability.")

    if f_run():
        print red("[x]") + " Your system is potentially vulnerable to XML Serialization attack!"
    else:
        print green("[*]") + " Your system is " + blue("safe!")
Exemplo n.º 18
0
    def parse(self, response):
        """
        Starting point from which to parse or statically provide
        debate-list urls (rss feeds)
        """

        callback_requests = []
        for llp in self.LLP:
            for nrbr in self.DEBATETYPES:
                params = {'view': 'RSS',
                          'jsMode': 'RSS',
                          'xdocumentUri': '/PAKT/STPROT/index.shtml',
                          'NRBRBV': nrbr,
                          'NUR_VORL': 'N',
                          'R_PLSO': 'PL',
                          'GP': llp,
                          'FBEZ': 'FP_011',
                          'listeId': '211',
                          }

                llp_item = None
                try:
                    llp_item = LegislativePeriod.objects.get(
                        roman_numeral=params['GP'])
                except LegislativePeriod.DoesNotExist:
                    self.logger.warning(
                        red(u"LLP '{}' not found".format(params['GP'])))

                feed_url = self.BASE_URL + 'filter.psp?' + urlencode(params)
                callback_requests.append(
                    scrapy.Request(feed_url,
                                   callback=self.parse_debatelist,
                                   meta={'llp': llp_item, 'type': params['NRBRBV']}))

        return callback_requests
def assertNotTranslated(engl):
    trans = RuleAutotranslator()
    result = trans.translate(engl)
    if result is not None:
        print(
            red("String should not be translated:'{}'".format(engl),
                bold=True))
Exemplo n.º 20
0
    def parse_debate(self, response):
        """
        Debate-transcript ("Stenografisches Protokoll") parser
        """

        for i, sect in enumerate(DOCSECTIONS.xt(response)):
            # Lookup + add references to the section data
            sect['debate'] = response.meta['debate']
            if 'speaker_id' in sect and sect['speaker_id'] is not None:
                try:
                    sect['person'] = Person.objects.get(
                        parl_id=sect['speaker_id'])
                except Person.DoesNotExist:
                    self.logger.warning(
                        red(u"Person '{}' not found".format(sect['speaker_id'])))

            if sect['ref_timestamp'] is not None \
                    and len(sect['ref_timestamp']) == 2:
                sect['date'] = sect['debate'].date.replace(
                    minute=sect['ref_timestamp'][0],
                    second=sect['ref_timestamp'][1])

            self.store_statement(sect, i)

        self.logger.info(
            green(u"Saved {} sections from {}".format(i, response.url)))
Exemplo n.º 21
0
    def parse_debate(self, response):
        """
        Debate-transcript ("Stenografisches Protokoll") parser
        """
        i = 0
        for i, sect in enumerate(DOCSECTIONS.xt(response)):
            # Lookup + add references to the section data
            sect['debate'] = response.meta['debate']
            if 'speaker_id' in sect and sect['speaker_id'] is not None:
                try:
                    sect['person'] = Person.objects.get(
                        parl_id=sect['speaker_id'])
                except Person.DoesNotExist:
                    self.logger.warning(
                        red(u"Person '{}' not found".format(sect['speaker_id'])))
            else:
                sect['person'] = None

            # Select best timestamps for start and end and make datetime
            start_ts = sect['time_start'] or sect['ref_timestamp']
            end_ts = sect['time_end'] or sect['ref_timestamp']
            sect['date'] = self._apply_ts(sect['debate'].date, start_ts)
            sect['date_end'] = self._apply_ts(sect['debate'].date, end_ts)

            self.store_statement(sect, i)

        self.logger.info(
            green(u"Saved {} sections from {}".format(i, response.url)))
 def onMessage(self, payload, isBinary):
     request = json.loads(payload.decode('utf8'))
     # Perform action depending on query type
     qtype = request["qtype"]
     if qtype == "docsearch":
         results = self.performDocumentSearch(request["term"])
         del request["term"]
         request["results"] = list(results.values())
     elif qtype == "ner":
         results = self.performEntityNER(request["query"])
         del request["query"]
         request["results"] = results
     elif qtype == "metadb":
         # Send meta-database to generate
         request["results"] = metaDB
     elif qtype == "entitysearch":
         request["entities"] = self.performEntitySearch(request["term"])
         del request["term"]
     elif qtype == "getdocuments":
         # Serve one or multiple documents by IDs
         docIds = [s.encode() for s in request["query"]]
         request["results"] = self.db.docIdx.findEntities(docIds)
         del request["query"]
     else:
         print(
             red("Unknown websocket request type: %s" % request["qtype"],
                 bold=True))
         return  # Do not send reply
     #Return modified request object: Keeps custom K/V pairs but do not re-send query
     self.sendMessage(
         json.dumps(request, default=documentSerializer).encode("utf-8"),
         False)
Exemplo n.º 23
0
 def onMessage(self, payload, isBinary):
     request = json.loads(payload.decode('utf8'))
     # Perform action depending on query type
     qtype = request["qtype"]
     if qtype == "docsearch":
         results = self.performDocumentSearch(request["term"])
         del request["term"]
         request["results"] = list(results.values())
     elif qtype == "ner":
         results = self.performEntityNER(request["query"])
         del request["query"]
         request["results"] = results
     elif qtype == "metadb":
         # Send meta-database to generate
         request["results"] = metaDB
     elif qtype == "entitysearch":
         request["entities"] = self.performEntitySearch(request["term"])
         del request["term"]
     elif qtype == "getdocuments":
         # Serve one or multiple documents by IDs
         docIds = [s.encode() for s in request["query"]]
         request["results"] = self.db.docIdx.findEntities(docIds)
         del request["query"]
     else:
         print(red("Unknown websocket request type: %s" % request["qtype"], bold=True))
         return # Do not send reply
     #Return modified request object: Keeps custom K/V pairs but do not re-send query
     self.sendMessage(json.dumps(request, default=documentSerializer).encode("utf-8"), False)
def iterateUniprotDatabases(quiet=True):
    """
    Fetch the uniprot metadatabase by guessing valid integral database IDs.
    Guarantees to yield all databases up to 9999
    """
    template = "http://www.uniprot.org/database/%d.rdf"
    for i in range(300):  #In the far future, there might be more DBs than 300.
        r = requests.get(template % i)
        if r.status_code == requests.codes.ok:
            if not quiet:
                print(green("[UniProt MetaDB] Fetching DB #%d" % i))
            soup = BeautifulSoup(r.text)
            #Very, very crude RDF/XML parser
            rdf = soup.html.body.find("rdf:rdf")
            db = {
                "id": rdf.abbreviation.text,
                "name": rdf.abbreviation.text,
                "category": rdf.category.text,
                "description": rdf.find("rdfs:label").text,
            }
            url = rdf.find("rdfs:seealso")["rdf:resource"]
            if url: db["url"] = url
            urltemplate = rdf.urltemplate.text
            if urltemplate: db["urltemplate"] = urltemplate
            yield (db)
        else:
            if not quiet:
                print(red("[UniProt MetaDB] Database #%d does not exist" % i))
Exemplo n.º 25
0
    def parse_inquiry_response(self, response):
        """
        Callback function for parsing the inquiry responses
        """
        inquiry_item = response.meta.get('inquiry_item',None) # allow testing single urls for parsing errors
        source_link = response.url
        parl_id = response.url.split('/')[-2]
        title = INQUIRY.TITLE.xt(response)
        description = INQUIRY.RESPONSEDESCRIPTION.xt(response)
        LLP = inquiry_item.legislative_period if inquiry_item else None
        category = INQUIRY.CATEGORY.xt(response)

        # Get or create Category object for the inquiry and log to screen if new
        # category is created.
        cat, created = Category.objects.get_or_create(title=category)
        if created:
            log.msg(u"Created category {}".format(
                green(u'[{}]'.format(category))),level=log.DEBUG)

        try:
            sender_object = Person.objects.get(
                parl_id=INQUIRY.RESPONSESENDER.xt(response))
        except Exception, e:
            log.warning(red(u'Sender "{}" was not found in database, skipping Inquiry {} in LLP {}'.format(
                INQUIRY.RESPONSESENDER.xt(response), parl_id, LLP)))
            return
Exemplo n.º 26
0
def print_answer(p):
    fromname = p.fromname

    try:
        flag = ms_flag_values[p[SAPMS].flag]
    except:
        flag = "0"
    try:
        opcode = str(ms_opcode_values[p[SAPMS].opcode])
    except:
        opcode = str(p[SAPMS].opcode)
    try:
        opcode_err = str(ms_opcode_error_values[p[SAPMS].opcode_error])
    except:
        opcode_err = 'None'

    if opcode_err == 'MSOP_OK':
        opcode_err = green(opcode_err)
    else:
        opcode_err = red(opcode_err, bold=True)

    if p.key != null_key:
        mskey_parse_print(p.key)
        key = p.key.encode('hex')
    else:
        key = "NULL"

    logger.debug("flag: " + cyan(flag) + " opcode:" + cyan(opcode) + \
        " opcode_error: " + green(opcode_err) + " key: %s" % key)
Exemplo n.º 27
0
    def parse_inquiry_response(self, response):
        """
        Callback function for parsing the inquiry responses
        """
        inquiry_item = response.meta['inquiry_item']
        source_link = response.url
        parl_id = response.url.split('/')[-2]
        title = INQUIRY.TITLE.xt(response)
        description = INQUIRY.RESPONSEDESCRIPTION.xt(response)
        LLP = inquiry_item.legislative_period
        category = INQUIRY.CATEGORY.xt(response)

        # Get or create Category object for the inquiry and log to screen if new
        # category is created.
        cat, created = Category.objects.get_or_create(title=category)
        if created:
            log.msg(u"Created category {}".format(
                green(u'[{}]'.format(category))))

        try:
            sender_object = Person.objects.get(
                parl_id=INQUIRY.RESPONSESENDER.xt(response))
        except:
            log.msg(
                red(u'Receiver was not found in database, skipping Inquiry {} in LLP {}'
                    .format(parl_id, LLP)))
            return

        # Create or update Inquiry item
        inquiryresponse_item, inquiryresponse_created = InquiryResponse.objects.update_or_create(
            parl_id=parl_id,
            legislative_period=LLP,
            defaults={
                'title': title,
                'source_link': source_link,
                'description': description,
                'sender': sender_object
            })

        # Attach foreign Keys
        inquiryresponse_item.documents = self.parse_response_docs(response)
        inquiryresponse_item.category = cat

        # Save InquiryResponse object
        inquiryresponse_item.save()

        if inquiryresponse_created:
            logtext = u"[{} of {}] Created InquiryResponse {} with ID {}, LLP {} @ {}"
        else:
            logtext = u"[{} of {}] Updated InquiryResponse {} with ID {}, LLP {} @ {}"

        logtext = logtext.format(self.SCRAPED_COUNTER, self.TOTAL_COUNTER,
                                 cyan(title), cyan(u"{}".format(parl_id)),
                                 green(str(LLP)), blue(response.url))
        log.msg(logtext, level=log.INFO)

        inquiry_item.response = inquiryresponse_item
        inquiry_item.save()

        return
Exemplo n.º 28
0
    def parse_inquiry_response(self, response):
        """
        Callback function for parsing the inquiry responses
        """
        inquiry_item = response.meta.get('inquiry_item',None) # allow testing single urls for parsing errors
        source_link = response.url
        parl_id = response.url.split('/')[-2]
        title = INQUIRY.TITLE.xt(response)
        description = INQUIRY.RESPONSEDESCRIPTION.xt(response)
        LLP = inquiry_item.legislative_period if inquiry_item else None
        category = INQUIRY.CATEGORY.xt(response)

        # Get or create Category object for the inquiry and log to screen if new
        # category is created.
        cat, created = Category.objects.get_or_create(title=category)
        if created:
            log.msg(u"Created category {}".format(
                green(u'[{}]'.format(category))))

        try:
            sender_object = Person.objects.get(
                parl_id=INQUIRY.RESPONSESENDER.xt(response))
        except Exception, e:
            log.msg(red(u'Sender "{}" was not found in database, skipping Inquiry {} in LLP {}'.format(
                INQUIRY.RESPONSESENDER.xt(response), parl_id, LLP)))
            return
Exemplo n.º 29
0
    def parse_debate(self, response):
        """
        Debate-transcript ("Stenografisches Protokoll") parser
        """
        i = 0
        for i, sect in enumerate(DOCSECTIONS.xt(response)):
            # Lookup + add references to the section data
            sect['debate'] = response.meta['debate']
            if 'speaker_id' in sect and sect['speaker_id'] is not None:
                try:
                    sect['person'] = Person.objects.get(
                        parl_id=sect['speaker_id'])
                except Person.DoesNotExist:
                    self.logger.warning(
                        red(u"Person '{}' not found".format(
                            sect['speaker_id'])))
            else:
                sect['person'] = None

            # Select best timestamps for start and end and make datetime
            start_ts = sect['time_start'] or sect['ref_timestamp']
            end_ts = sect['time_end'] or sect['ref_timestamp']
            sect['date'] = self._apply_ts(sect['debate'].date, start_ts)
            sect['date_end'] = self._apply_ts(sect['debate'].date, end_ts)

            self.store_statement(sect, i)

        self.logger.info(
            green(u"Saved {} sections from {}".format(i, response.url)))
Exemplo n.º 30
0
def handle_answer(s, p):
    fromname = p.fromname
    try:
        flag = ms_flag_values[p[SAPMS].flag]
    except:
        flag = "0"
    try:
        opcode = str(ms_opcode_values[p[SAPMS].opcode])
    except:
        opcode = str(p[SAPMS].opcode)
    try:
        opcode_err = str(ms_opcode_error_values[p[SAPMS].opcode_error])
    except:
        opcode_err = 'None'

    if opcode_err == 'MSOP_OK':
        opcode_err = green(opcode_err)
    else:
        opcode_err = red(opcode_err, bold=True)

    if p.key != null_key:
        key = " key: " + yellow('NOT NULL', bold=True)
        logger.error("[!] Out of order packets, reload this script.")
        #s.close()
        #exit(0)
    else:
        key = ""

    logger.info("flag: " + cyan(flag) + " opcode:" + cyan(opcode) + \
        " opcode_error: " + green(opcode_err) + key)
def initializeMetaDatabase(filename="metadb.json"):
    """
    Ensure we valid file with meta-database information,
    i.e. links, names and URL templates for any database being referenced.

    This information is used to generate links to external databases, e.g. STRING.

    This function fetches the Metadatabase from UniProt if required.
    The metadatabase dictionary is returned.

    Also reads and adds (or replaces) additional entries from metadb-additional.json
    """
    #
    with open("metadb-additional.json") as infile:
        additional = json.load(infile)
    try:
        with open(filename) as infile:
            db = json.load(infile)
            db.update(additional)
            return db
    except:
        # Try to download from UniProt
        try:
            db = downloadUniprotMetadatabase(filename)
            db.update(additional)
            return db
        except Exception as ex:
            print(ex)
            print(
                red("Can neither read nor fetch metadabase. Database links will not work.",
                    bold=True))
Exemplo n.º 32
0
def truncate(args):
    "Delete data from one or more tables"
    #Check if the user is sure
    if not args.yes_i_know_what_i_am_doing:
        print (red("This will delete all your Translatron data. If you are sure, please use --yes-i-know-what-i-am-doing ", bold=True))
        return
    #Setup raw YakDB connection
    conn = YakDB.Connection()
    conn.connect(args.req_endpoint)
    #
    #Restory every table if the corresponding file exists
    if not args.no_documents:
        print (blue("Truncating document table... ", bold=True))
        if args.hard: conn.truncateTable(1)
        else: conn.deleteRange(1, None, None, None)
    if not args.no_entities:
        print (blue("Truncating entity table... ", bold=True))
        if args.hard: conn.truncateTable(2)
        else: conn.deleteRange(2, None, None, None)
    if not args.no_document_idx:
        print (blue("Truncating document index table... ", bold=True))
        if args.hard: conn.truncateTable(3)
        else: conn.deleteRange(3, None, None, None)
    if not args.no_entity_idx:
        print (blue("Truncating entity index table... ", bold=True))
        if args.hard: conn.truncateTable(4)
        else: conn.deleteRange(4, None, None, None)
Exemplo n.º 33
0
def handle_answer(s, p):
    fromname = p.fromname
    try:
        flag = ms_flag_values[p[SAPMS].flag]
    except:
        flag = "0"
    try:
        opcode = str(ms_opcode_values[p[SAPMS].opcode])
    except:
        opcode = str(p[SAPMS].opcode)
    try:
        opcode_err = str(ms_opcode_error_values[p[SAPMS].opcode_error])
    except:
        opcode_err = 'None'

    if opcode_err == 'MSOP_OK':
        opcode_err = green(opcode_err)
    else:
        opcode_err = red(opcode_err, bold=True)

    if p.key != null_key:
        p.show()
        key = " key: " + yellow('NOT NULL', bold=True)
        print "[!] Out of order packets, reload this script."
        #s.close()
        #exit(0)
    else:
        key = ""

    print "flag: " + cyan(flag) + " opcode:" + cyan(opcode) + \
        " opcode_error: " + green(opcode_err) + key

    # "idenfify request from the server?
    if key != "" and flag == 'MS_REQUEST' and opcode == '0':
        s.send(ms_adm_nilist(p, 1))
Exemplo n.º 34
0
def loadUsernamePassword():
    """ """
    try:
        with open("crowdin-credentials.json") as infile:
            data = json.load(infile)
            return data["username"], data["password"]
    except FileNotFoundError:
        print(red("Could not find crowdin-credentials.json. Please create that file from crowdin-credentials-template.json!", bold=True))
Exemplo n.º 35
0
def readRulesFromGoogleDocs(gdid, rules=[]):
    rule_errors = []
    for rule in readRulesFromGDocs(gdid):
        if isinstance(rule, RuleError):
            rule_errors.append(rule)
            print(red(rule.msg))
        else:
            rules.append(rule)
    return rules, rule_errors
Exemplo n.º 36
0
def readRulesFromGoogleDocs(gdid, rules=[]):
    rule_errors = []
    for rule in readRulesFromGDocs(gdid):
        if isinstance(rule, RuleError):
            rule_errors.append(rule)
            print(red(rule.msg))
        else:
            rules.append(rule)
    return rules, rule_errors
Exemplo n.º 37
0
def test_colordiff():
    x, y = colordiff("hi bob", "hi there",
                     color_x=Colors.Red, color_y=Colors.Blue)

    fx = lambda s: red(s, reverse=True)
    fy = lambda s: blue(s, reverse=True)

    assert x == "hi " + fx("b") + fx("o") + fx("b")
    assert y == "hi " + fy("t") + fy("h") + fy("e") + fy("r") + fy("e")
Exemplo n.º 38
0
def test_colordiff():
    x, y = colordiff("hi bob", "hi there",
                     color_x=Colors.Red, color_y=Colors.Blue)

    fx = lambda s: red(s, reverse=True)
    fy = lambda s: blue(s, reverse=True)

    assert x == "hi " + fx("b") + fx("o") + fx("b")
    assert y == "hi " + fy("t") + fy("h") + fy("e") + fy("r") + fy("e")
Exemplo n.º 39
0
def processPMCFileContent(xml):
    "Process a string representing a PMC XML file"
    soup = BeautifulSoup(xml, "lxml")
    try:
        return processPMCDoc(soup)
    except Exception as e:
        print(red("Parser exception while processsing PMC:%s" % extractArticleID(soup, "pmc")))
        print(e)
        return None
Exemplo n.º 40
0
    def parse(self, response):
        persons = AUDITORS.LIST.xt(response)
        callback_requests = []

        self.logger.info("Scraping {} persons".format(len(persons)))

        # Iterate all persons
        for p in persons:
            # Extract basic data
            parl_id = p["source_link"].split("/")[-2]
            p["source_link"] = "{}{}".format(BASE_HOST, p["source_link"])

            # Create or update simple person's item
            person_data = {"reversed_name": p["reversed_name"]}
            person_item, created_person = Person.objects.update_or_create(
                source_link=p["source_link"], parl_id=parl_id, defaults=person_data
            )
            if created_person:
                self.logger.info(u"Created Person {}".format(green(u"[{}]".format(p["reversed_name"]))))
            else:
                self.logger.info(u"Updated Person {}".format(green(u"[{}]".format(p["reversed_name"]))))

            mandate = p["mandate"]

            function_item, f_created = Function.objects.get_or_create(short=mandate["short"], title=mandate["title"])

            if f_created:
                self.logger.info(u"Created function {}".format(green(u"[{}]".format(function_item.short))))

            # Create and append mandate
            try:
                mandate_item, m_created = Mandate.objects.update_or_create(
                    function=function_item, start_date=mandate["start_date"], end_date=mandate["end_date"]
                )
            except:
                self.logger.info(red("Error saving Mandate {} ({} - {})".format(function_item, start_date, end_date)))
                import ipdb

                ipdb.set_trace()

            person_item.mandates.add(mandate_item)
            person_item.save()

            # First time we encounter a person, we scan her detail page too
            if not parl_id in self.persons_scraped:

                # Create Detail Page request
                req = scrapy.Request(p["source_link"], callback=self.parse_person_detail)
                req.meta["person"] = {
                    "reversed_name": p["reversed_name"],
                    "source_link": p["source_link"],
                    "parl_id": parl_id,
                }
                callback_requests.append(req)
                self.persons_scraped.append(parl_id)
        return callback_requests
Exemplo n.º 41
0
    def write_progress(self, rate=None, prestart=None, wait=None, complete=False, error=None):
        # compute string lengths
        action = self.action.rjust(self.actionwidth)

        if error:
            rate = error
        elif prestart:
            rate = "starting"
        elif wait:
            rate = ("%s" % self.retry_wait) + "s..."
        elif complete:
            rate = "done"
        else:
            rate = "%s/s" % self.format_size(rate)
        rate = rate.ljust(self.ratewidth)

        url = self.url_fmt

        if self.totalsize:
            size = self.format_size(self.totalsize)
        elif self.download_size:
            size = self.format_size(self.download_size)
        else:
            size = "????? B"
        size = ("  %s" % size).ljust(self.sizewidth)

        # add formatting
        if error:
            rate = ansicolor.red(rate)
        elif prestart or wait:
            rate = ansicolor.cyan(rate)
        elif complete:
            rate = ansicolor.green(rate)
        else:
            rate = ansicolor.yellow(rate)

        # draw progress bar
        if not (error or prestart or complete) and self.totalsize:
            c = int(self.urlwidth * self.download_size / self.totalsize)
            url = ansicolor.wrap_string(self.url_fmt, c, None, reverse=True)

        if not self.totalsize:
            size = ansicolor.yellow(size)

        line = "%s ::  %s  " % (action, rate)

        term = (os.environ.get("DEBUG_FETCH") and "\n") or "\r"
        if error or complete:
            term = "\n"
        ioutils.write_err("%s%s%s%s" % (line, url, size, term))

        # log download
        if error:
            self.log_url(error, error=True)
        elif complete:
            self.log_url("done")
Exemplo n.º 42
0
def loadUsernamePassword():
    """ """
    try:
        with open("crowdin-credentials.json") as infile:
            data = json.load(infile)
            return data["username"], data["password"]
    except FileNotFoundError:
        print(
            red("Could not find crowdin-credentials.json. Please create that file from crowdin-credentials-template.json!",
                bold=True))
Exemplo n.º 43
0
def parse_adm_readall_ofs(p):
    if not p.haslayer('SAPMSAdmRecord'):
        print "Packet has no 'SAPMSAdmRecord'."
        exit(-1)
    logger.info("[+] Dumping Text Storage")
    records = dict()
    for e in p.adm_records:
        name = e.rzl_strg_name
        value = str(e.rzl_strg_value)
        type_v = ms_adm_rzl_strg_type_values[e.rzl_strg_type]
        records[name] = (type_v, value)

    # pretty print that
    for r in records.keys():
        if records[r][1].startswith('LG_EYECAT'):
            print red(r, bold=True) + '\t: ' + ' '.join(
                parse_logon_group(records[r][1]))
        elif records[r][0].endswith('_C'):
            print green(r) + '\t: ' + str(records[r][1])
    return records
 def __init__(self, lang="de"):
     self.lang = lang
     if "GOOGLE_APPLICATION_CREDENTIALS" in os.environ:
         self.mode = "google-api"
         self.client = translate.Client(target_language=lang)
         self.translate = self._googleapi_translate
         print(green("Using google cloud translation API"))
     else:
         self.mode = "googletrans"
         self.translate = self._googletrans_translate
         print(red("Using googletrans"))
Exemplo n.º 45
0
def checkExcellonMetric(self, filepath):
    "Check if a given file is a metric excellon file"
    filename = os.path.basename(filepath)
    lines = readFileLines(filepath)
    #Check for excellon header
    if lines[0] != "M48":
        print red("Can't find Excellon drill header (M48) in %s" % filename, bold="True")
    #Check for metric dimension: Line like METRIC,0000.00
    if lines[1].partition(",")[0] != "METRIC":
        print red("Excellon drill program %s does not seem to be metric" % filename, bold="True")
    #
    # Drill statistics
    #
    toolStats = extractToolStatistics(lines)
    print(black(self.name + ":", bold=True))
    for diameter, numDrills in toolStats.iteritems():
        print("\t%d through holes of diameter %.2fmm" % (numDrills, diameter))
    #Print "None" if there are no holes in this file
    if not toolStats:
        print "\tNone"
Exemplo n.º 46
0
def findCoordinateFormat(lines):
    """
    Try to find a FSLAX line and return the decimal-point factor for coordinates.
    """
    rgx = re.compile(r"\%FSLAX(\d{2})Y(\d{2})\*\%")
    for line in lines:
        m = rgx.match(line)
        if m is not None:
            return 10.**int(m.group(1)[-1]),10.**int(m.group(2)[-1])
    print(red("Could not find coordinate format info %FSLAX. Using default %FSLAX33"))
    return 100000.,100000.
Exemplo n.º 47
0
def findCoordinateFormat(lines):
    """
    Try to find a FSLAX line and return the decimal-point factor for coordinates.
    """
    rgx = re.compile(r"\%FSLAX(\d{2})Y(\d{2})\*\%")
    for line in lines:
        m = rgx.match(line)
        if m is not None:
            return 10.**int(m.group(1)[-1]),10.**int(m.group(2)[-1])
    print(red("Could not find coordinate format info %FSLAX. Using default %FSLAX33"))
    return 100000.,100000.
Exemplo n.º 48
0
def checkExcellonMetric(self, filepath):
    "Check if a given file is a metric excellon file"
    filename = os.path.basename(filepath)
    lines = readFileLines(filepath)
    #Check for excellon header
    if lines[0] != "M48":
        print(red("Can't find Excellon drill header (M48) in %s" % filename, bold="True"))
    #Check for metric dimension: Line like METRIC,0000.00
    if lines[1].partition(",")[0] != "METRIC":
        print(red("Excellon drill program %s does not seem to be metric" % filename, bold="True"))
    #
    # Drill statistics
    #
    toolStats = extractToolStatistics(lines)
    print(black(self.name + ":", bold=True))
    for diameter, numDrills in toolStats.items():
        print("\t%d through holes of diameter %.2fmm" % (numDrills, diameter))
    #Print "None" if there are no holes in this file
    if not toolStats:
        print("\tNone")
Exemplo n.º 49
0
 def status(self, msg, *args, **kwargs):
     text = msg % args
     if kwargs.get('error') == True:
         logging.error(text)
         if self.stdout_has_colours:
             sys.stdout.write(ansicolor.red(text))
         else:
             sys.stdout.write(text)
     else:
         logging.info(text)
         sys.stdout.write(text)
     sys.stdout.write('\n')
Exemplo n.º 50
0
def importEntities(args):
    for infile in args.infile:
        basename = os.path.basename(infile)
        if re.match(r"uniprot_[a-z]+\.dat\.gz", basename):
            print(blue("Importing UniProt file..."))
            from Translatron.Entities.UniProtImporter import importUniprot
            importUniprot(args, infile)
        elif re.match(r"d\d{4}.bin", basename):
            print(blue("Importing MeSH file..."))
            from Translatron.Entities.MeSHImporter import importMeSH
            importMeSH(args, infile)
        elif re.match(r"[a-z][a-z]wiki.+titles.+\.gz", basename):
            print(blue("Importing Wikipedia page title file..."))
            from Translatron.Entities.WikipediaImporter import importWikimediaPagelist
            importWikimediaPagelist(args, infile)
        else:
            print (red("Can't interpret entity input file (uniprot_sprot.dat.gz - UniProt) %s " % basename))
Exemplo n.º 51
0
def downloadCrowdinById(session, crid, lang="de"):
    if lang in languageIDs:
        langId = languageIDs[lang]
    else:  # Fallback -- wont really work
        print(red("Error: Language unknown: {0}".format(lang), bold=True))
        langId = 11  #de
    url = "https://crowdin.com/translation/phrase?id={0}&project_id=10880&target_language_id={1}".format(crid, langId)
    response = session.get(url)
    try:
        jsondata = response.json()["data"]
        msgid = jsondata["translation"]["text"]
        msgstr = jsondata["top_suggestion"]
        comment = jsondata["translation"]["context"]
        filename = jsondata["translation"]["file_path"][1:]
    except:
        errstr = "[Retrieval error while fetching {0}]".format(url)
        return errstr, errstr, errstr, None
    return msgid, msgstr, comment, filename
Exemplo n.º 52
0
 def __init__(self, name, filename, severity=Severity.standard, flags=re.UNICODE):
     super().__init__(name, severity)
     self.filename = filename
     regexes = set()
     self.valid = False
     # Check if file exists
     if os.path.isfile(filename):
         with open(filename) as infile:
             for line in infile:
                 rgx = line.strip().replace(" ", r"\s+")
                 # Don't match in the middle of a word
                 rgx = r"\b{0}\b".format(rgx)
                 regexes.add(rgx)
         # Build large regex from all sub.regexes
         self.regex = reCompiler.compile("|".join(regexes), flags=flags)
         self.valid = True
     else:  # File does not exist
         print(red("Unable to find text list file %s" % filename, bold=True))
Exemplo n.º 53
0
def run_script():
    (parser, a) = ioutils.init_opts("<url> ['<pattern>'] [options]")
    a("--recipe", metavar="<recipe>", dest="recipe", help="Use a spidering recipe")
    a("--fetch", action="store_true", help="Fetch urls, don't dump")
    a("--dump", action="store_true", help="Dump urls, don't fetch")
    a("--host", action="store_true", help="Only spider this host")
    a("--pause", type="int", metavar="<pause>", dest="pause", help="Pause for x seconds between requests")
    a("--depth", type="int", metavar="<depth>", dest="depth", help="Spider to this depth")
    (opts, args) = ioutils.parse_args(parser)
    try:
        if opts.fetch:
            os.environ["FETCH_ALL"] = "1"
        elif opts.dump:
            os.environ["DUMP_ALL"] = "1"
        if opts.host:
            os.environ["HOST_FILTER"] = "1"
        if opts.pause:
            os.environ["PAUSE"] = str(opts.pause)
        if opts.depth:
            os.environ["DEPTH"] = str(opts.depth)

        url = args[0]
        if opts.recipe:
            rules = recipe.load_recipe(opts.recipe, url)
        else:
            pattern = args[1]
            rules = recipe.get_recipe(pattern, url)

        session = Session.restore(url)
        session.rules = rules

        if session.queue is None:
            session.queue = recipe.get_queue(url, mode=fetch.Fetcher.SPIDER)
        if session.wb is None:
            session.wb = web.Web(url)

    except recipe.PatternError as e:
        ioutils.write_err(ansicolor.red("%s\n" % e))
        sys.exit(1)
    except IndexError:
        ioutils.opts_help(None, None, None, parser)

    spiderfetcher = SpiderFetcher(session)
    spiderfetcher.main()
Exemplo n.º 54
0
    def parse_person_detail(self, response):
        """
        Parse a persons detail page before creating the person object
        """
        person = response.meta['person']
        self.logger.info(u"Updating Person Detail {}".format(
            green(u"[{}]".format(person['reversed_name']))
        ))

        full_name = PERSON.DETAIL.FULL_NAME.xt(response)
        bio_data = PERSON.DETAIL.BIO.xt(response)

        profile_photo_url = PERSON.DETAIL.PHOTO_URL.xt(response)
        profile_photo_copyright = PERSON.DETAIL.PHOTO_COPYRIGHT.xt(response)

        try:
            person_data = {
                'photo_link': "{}{}".format(BASE_HOST, profile_photo_url),
                'photo_copyright': profile_photo_copyright,
                'full_name': full_name,
                'reversed_name': person['reversed_name'],
                'birthdate': bio_data['birthdate'],
                'birthplace': bio_data['birthplace'],
                'deathdate': bio_data['deathdate'],
                'deathplace': bio_data['deathplace'],
                'occupation': bio_data['occupation']}

            person_item, created_person = Person.objects.update_or_create(
                source_link=person['source_link'],
                parl_id=person['parl_id'],
                defaults=person_data
            )
            person_item.save()

            # Instatiate slug
            person_item.slug

        except:
            self.logger.info(red("Error saving Person {}".format(full_name)))
            import ipdb
            ipdb.set_trace()
            return
Exemplo n.º 55
0
    def print_debug(self):
        """
        Collects and prints a structured debug message
        """
        message = """
    {bar}

    {title}

      Scraping LLPs: {llps}
      Base URL:      {url}

    {bar}
        """.format(
            bar=cyan(
                '############################################################'),
            title=red(self.title),
            llps=self.LLP or "Not applicable",
            url=self.BASE_URL
        )
        print message
Exemplo n.º 56
0
    def _render_latest_events(self):
        """Render the latest events emitted by the computation.

        TODO(mpetazzoni): render custom events/alert events differently and
        support alert event schema v3.
        """
        print('\nEvents:')

        def maybe_json(v):
            if isinstance(v, six.string_types):
                return json.loads(v)
            return v

        for event in self._events:
            ets = self._computation.get_metadata(event.tsid)
            contexts = json.loads(ets.get('sf_detectInputContexts', '{}'))

            values = maybe_json(event.properties.get('inputs', '{}'))
            values = ' | '.join([
                u'{name} ({key}): {value}'.format(
                    name=white(contexts[k].get('identifier', k)),
                    key=','.join([u'{0}:{1}'.format(dim_name, dim_value)
                                  for dim_name, dim_value
                                  in v.get('key', {}).items()]),
                    value=v['value'])
                for k, v in values.items()])

            date = tslib.date_from_utc_ts(event.timestamp_ms)
            is_now = event.properties['is']

            print(u' {mark} {date} [{incident}]: {values}'
                  .format(mark=green(u'✓') if is_now == 'ok' else red(u'✗'),
                          date=white(self._render_date(date), bold=True),
                          incident=event.properties['incidentId'],
                          values=values))

        return 2 + len(self._events)
Exemplo n.º 57
0
    def parse(self, response):
        self.SCRAPED_COUNTER += 1

        LLP = LegislativePeriod.objects.get(
            roman_numeral=response.url.split('/')[-4])

        # Extract fields
        ts = GENERIC.TIMESTAMP.xt(response)
        title = LAW.TITLE.xt(response)
        parl_id = LAW.PARL_ID.xt(response)
        status = LAW.STATUS.xt(response)

        if not self.IGNORE_TIMESTAMP and not self.has_changes(parl_id, LLP, response.url, ts):
            self.logger.info(
                green(u"[{} of {}] Skipping Law, no changes: {}".format(
                    self.SCRAPED_COUNTER,
                    self.TOTAL_COUNTER,
                    title)))
            return

        # Extract foreign keys
        category = LAW.CATEGORY.xt(response)
        description = LAW.DESCRIPTION.xt(response)

        # Create category if we don't have it yet
        cat, created = Category.objects.get_or_create(title=category)
        if created:
            log.msg(u"Created category {}".format(
                green(u'[{}]'.format(category))))

        # Create and save Law
        law_data = {
            'title': title,
            'status': status,
            'description': description,
            'ts': ts,
            'source_link': response.url,
        }
        law_item, law_created = Law.objects.update_or_create(
            parl_id=parl_id,
            legislative_period=LLP,
            defaults=law_data)

        # Attach foreign keys
        law_item.keywords = self.parse_keywords(response)
        law_item.category = cat
        law_item.documents = self.parse_docs(response)

        law_item.save()

        # Log our progress
        if law_created:
            logtext = u"[{} of {}] Created {} with id {}, LLP {} @ {}"
        else:
            logtext = u"[{} of {}] Updated {} with id {}, LLP {} @ {}"

        logtext = logtext.format(
            self.SCRAPED_COUNTER,
            self.TOTAL_COUNTER,
            red(title),
            cyan(u"[{}]".format(parl_id)),
            green(unicode(LLP)),
            blue(response.url)
        )
        log.msg(logtext, level=log.INFO)

        response.meta['law_item'] = law_item

        # is the tab 'Parlamentarisches Verfahren available?'
        if response.xpath('//h2[@id="tab-ParlamentarischesVerfahren"]'):
            self.parse_parliament_steps(response)

        if response.xpath('//h2[@id="tab-VorparlamentarischesVerfahren"]'):
            self.parse_pre_parliament_steps(response)