Exemplo n.º 1
0
def scrape():
    csv = MultiCSV()
    threaded(gdocs_persons(),
             lambda data: scrape_image(data['Full Name'], data['Image URL'],
                                       csv, data['Image Credit']),
             num_threads=THREAD_COUNT)
    csv.close()
Exemplo n.º 2
0
def scrape():
    csv = MultiCSV()
    threaded(
        gdocs_persons(),
        lambda data: scrape_image(data['Full Name'], data['Image URL'], csv,
                                  data['Image Credit']),
        num_threads=THREAD_COUNT
    )
    csv.close()
Exemplo n.º 3
0
def scrape():
    csv = MultiCSV()
    threaded(make_names_from_gdocs(),
             lambda name: scrape_image(name, csv),
             num_threads=THREAD_COUNT)
    csv.close()
Exemplo n.º 4
0
def load():
    csv = MultiCSV()
    api_meta = requests.get(INSTANCE_URL).json().get('meta')
    orgs = load_organizations(api_meta, csv)
    #orgs = {}
    load_persons(api_meta, csv, orgs)
Exemplo n.º 5
0
def scrape(limit):
    searcher = Searcher(limit)
    csv = MultiCSV()
    searcher.init_session(csv)
    csv.close()
    searcher.report()
Exemplo n.º 6
0
def scrape():
    csv = MultiCSV()
    threaded(make_names_from_gdocs(),
             lambda name: scrape_image(name, csv),
             num_threads=THREAD_COUNT)
    csv.close()
Exemplo n.º 7
0
def scrape(limit):
    searcher = Searcher(limit)
    csv = MultiCSV()
    searcher.init_session(csv)
    csv.close()
    searcher.report()
Exemplo n.º 8
0
def scrape_npos():
    csv = MultiCSV()
    threaded(make_urls(), lambda i: scrape_npo(csv, i), num_threads=30)
    csv.close()
Exemplo n.º 9
0
def scrape_ngos():
    csv = MultiCSV()
    threaded(make_urls(), lambda url: scrape_ngo(csv, url), num_threads=3)
    csv.close()
Exemplo n.º 10
0
def scrape_companies():
    csv = MultiCSV()
    threaded(make_urls(), lambda url: scrape_company(csv, url), num_threads=5)
    csv.close()
Exemplo n.º 11
0
def scrape_persons():
    csv = MultiCSV()
    threaded(make_urls(), lambda i: scrape_person(csv, i), num_threads=25)
    csv.close()
Exemplo n.º 12
0
def scrape():
    csv = MultiCSV()
    threaded(scrape_index(), lambda i: scrape_record(csv, i), num_threads=30)
    csv.close()
Exemplo n.º 13
0
def scrape():
    csv = MultiCSV()
    threaded(scrape_index(), lambda i: scrape_record(csv, i),
             num_threads=30)
    csv.close()
Exemplo n.º 14
0
def scrape_contracts():
    csv = MultiCSV()
    threaded(make_urls2(), lambda i: scrape_contract(csv, i), num_threads=30)
    csv.close()
Exemplo n.º 15
0
            return

        self.url_graph.add_edge(src_url, dest_url)
        self.url_graph.node[dest_url]['min_distance'] = this_distance
        # if dest_url is within range and it used to be outside range
        if this_distance <= self.degrees and min_distance > self.degrees:
            self.url_to_scrape.add(dest_url)
        # if dest_url's children are within range and used to be outside range
        if this_distance < self.degrees and min_distance + 1 > self.degrees:
            for child_url in self.url_graph[dest_url].keys():
                self.add_edge(dest_url, child_url)


if __name__ == '__main__':
    degrees = 0
    try:
        degrees = int(sys.argv[1])
    except (IndexError, ValueError):
        pass
    scraper = NetworkScraper(csv=MultiCSV(), thread_count=5, degrees=degrees)
    scraper.start()
    for data in gdocs_persons():
        try:
            scraper.scrape(
                search_term=data['Full Name'],
                start_url=data['WhosWho'],
            )
        except Exception as e:
            sys.stderr.write("%s\n" % str(e))
    scraper.finish()
Exemplo n.º 16
0
def scrape_npos():
    csv = MultiCSV()
    threaded(make_urls(), lambda i: scrape_npo(csv, i), num_threads=30)
    csv.close()
Exemplo n.º 17
0
def scrape():
    scraper = ResultsScraper()
    csv = MultiCSV()
    scraper.init_session.run(csv)
    csv.close()
    scraper.report()