crawler = Crawler(login, password) crawler.add(sys.argv[2]) # Crawl for profiles with # profile headline contains 'wonderful company' #crawler.add_crawl_from_connections(CrawlConditions({"headline": re.compile(r'wonderful company')})) # profile headline contains 'recruiter', taken into account for depth >= 2 #crawler.add_crawl_from_connections(CrawlConditions({"headline": re.compile(r'recruiter')}, 2)) # ->: means connection # eg.: A (initial profile) -> AA (accountant for wonderful company) [depth=1] -> AAA (accountant for wonderful company) [depth=2] **IGNORED** # -> AB (accountant for wonderful company) [depth=1] -> ABA (recruiter for wonderful company) [depth=2] **OK** # Crawl in order to find someone called Patrick working at 'wonderful company' crawler.add_target_short_profile( CrawlTarget({ "fullname": re.compile(r'patrick'), "headline": re.compile(r'wonderful company') })) # and someone called Charles whose work location is France crawler.add_target_full_profile( CrawlTarget({ "fullname": re.compile(r'charles'), "fmt_location": re.compile(r'france') })) # /!\ NO CAPITAL LETTERS while num_scans < max_profiles and crawler.has_next(): # In case of big sleep if num_scans != 0 and num_scans % long_sleep_every == 0: num_loops = long_sleep_time / 60 for i in range(num_loops):