def main():
    main_obj = get_all_persons(_main_url)

    for entity in get_entities(main_obj):
        # helpers.check(entity)

        helpers.emit(entity)
Exemplo n.º 2
0
def main():
    main_obj = get_all_persons(_main_url)

    for entity in get_entities(main_obj):
        # helpers.check(entity)

        helpers.emit(entity)
Exemplo n.º 3
0
def main():
    for url in URLS:
        main_obj = get_all_persons(url)

        for entity in get_entities(main_obj):
            # helpers.check(entity)
            helpers.emit(entity)
Exemplo n.º 4
0
def _generate_entities(data):
    """for each scrapable page, yield an entity"""

    i = 0
    while i < len(data):
        release_date = datetime.strptime(data[i].text, '%m/%d/%Y')
        release_date = release_date.strftime('%Y-%m-%d')
        name = data[i + 1].text
        url = data[i + 1].find_element_by_tag_name('a').get_attribute("/href")

        href = data[i + 2].find_element_by_tag_name('a').get_attribute("/href")
        related = []
        if href:
            doc = BeautifulSoup(helpers.fetch_string(href), "html.parser")
            tds = doc.find_all("td", class_='ms-vb')
            for td in tds:
                try:
                    related.append(td.find('a')['href'])
                except AttributeError:
                    pass

        related_documents = ' '.join(related)
        fields = [{
            "name": "Release date",
            "value": release_date
        }, {
            "tag": "url",
            "value": url
        }, {
            "name": "Related documents",
            "value": related_documents
        }]
        i += 3

        my_id = helpers.make_id(name)
        if len(my_id) > 99:
            my_id = my_id[:99]

        entity = {
            "_meta": {
                "id": my_id,
                "entity_type": "company"
            },
            "fields": fields,
            "name": name,
        }

        helpers.emit(entity)
Exemplo n.º 5
0
def _generate_entities(data):
    """for each scrapable page, yield an entity"""

    i = 0
    while i < len(data):
        release_date = datetime.strptime(data[i].text, '%m/%d/%Y')
        release_date = release_date.strftime('%Y-%m-%d')
        name = data[i+1].text
        url = data[i+1].find_element_by_tag_name('a').get_attribute("/href")

        href = data[i+2].find_element_by_tag_name('a').get_attribute("/href")
        related = []
        if href:
            doc = BeautifulSoup(helpers.fetch_string(href), "html.parser")
            tds = doc.find_all("td", class_='ms-vb')
            for td in tds:
                try:
                    related.append(td.find('a')['href'])
                except AttributeError:
                    pass
         
        related_documents = ' '.join(related) 
        fields = [{"name": "Release date", "value": release_date},
                  {"tag": "url", "value": url},
                  {"name": "Related documents", "value": related_documents}]
        i += 3

        my_id = helpers.make_id(name)
        if len(my_id) > 99:
            my_id = my_id[:99]

        entity = {
            "_meta": {
                "id": my_id,
                "entity_type": "company"
            },
            "fields": fields,
            "name": name,
        }

        helpers.emit(entity)
Exemplo n.º 6
0
def build_document(member):
    role_and_name = role_name_splitter(member.select("h2 a")[0].get_text())
    years_served = member.select("ul.memberServed li")[0].get_text()
    party = member.select("div.memberProfile table tr")[2].select("td")[0].get_text()
    serve_state = member.select("div.memberProfile table tr")[0].select("td")[0].get_text()

    entity = {
        "_meta": {
            "id": hashlib.sha224((re.sub("[^a-zA-Z0-9]", "", role_and_name[1] + serve_state))).hexdigest(),
            "entity_type": "person"
        },
        "name": " ".join(role_and_name[1:]).strip(),
        "types": ["pep"],
        "fields": [
            {"name": "Comment", "value": "Member of US Congress"},
            {"name": "Role", "value": role_and_name[0]},
            {"name": "Years Served", "value": years_served},
            {"name": "Political Party", "value": party},
            {"name": "State", "value": serve_state}
        ]
    }
    helpers.emit(entity)
Exemplo n.º 7
0
def build_document(member):
    role_and_name = role_name_splitter(member.select("h2 a")[0].get_text())
    years_served = member.select("ul.memberServed li")[0].get_text()
    party = member.select("div.memberProfile table tr")[2].select(
        "td")[0].get_text()
    serve_state = member.select("div.memberProfile table tr")[0].select(
        "td")[0].get_text()

    entity = {
        "_meta": {
            "id":
            hashlib.sha224(
                (re.sub("[^a-zA-Z0-9]", "",
                        role_and_name[1] + serve_state))).hexdigest(),
            "entity_type":
            "person"
        },
        "name":
        " ".join(role_and_name[1:]).strip(),
        "types": ["pep"],
        "fields": [{
            "name": "Comment",
            "value": "Member of US Congress"
        }, {
            "name": "Role",
            "value": role_and_name[0]
        }, {
            "name": "Years Served",
            "value": years_served
        }, {
            "name": "Political Party",
            "value": party
        }, {
            "name": "State",
            "value": serve_state
        }]
    }
    helpers.emit(entity)
Exemplo n.º 8
0
def emit_meminfo():
    helpers.emit(
        subprocess.check_output(['free', '--human'], universal_newlines=True))
Exemplo n.º 9
0
def emit_meminfo():
    helpers.emit(subprocess.check_output(['free', '--human'],
                                         universal_newlines=True))
Exemplo n.º 10
0
def main():
    main_obj = get_rows(_base_url)

    for entity in get_entities(main_obj):
        # helpers.check(entity)
        helpers.emit(entity)
def main():
    main_obj = get_all_persons(_DOMAIN)

    for entity in get_entities(main_obj):
        helpers.emit(entity)
Exemplo n.º 12
0
def main():
    for entity in _get_people(_get_parties(_base_url)):
        # helpers.check(entity)
        helpers.emit(entity)
Exemplo n.º 13
0
def main():
    for entity in _generate_entities():
        helpers.emit(entity)
Exemplo n.º 14
0
def main():
    for entity in _get_people(_get_parties(_base_url)):
        # helpers.check(entity)
        helpers.emit(entity)
Exemplo n.º 15
0
def main():
    main_obj = get_rows([_main_url, _second_url])

    for entity in get_entities(main_obj):
        helpers.emit(entity)
Exemplo n.º 16
0
def main():
    main_obj = get_rows(_base_url)

    for entity in get_entities(main_obj):
        # helpers.check(entity)
        helpers.emit(entity)