# profile headline contains 'recruiter', taken into account for depth >= 2 #crawler.add_crawl_from_connections(CrawlConditions({"headline": re.compile(r'recruiter')}, 2)) # ->: means connection # eg.: A (initial profile) -> AA (accountant for wonderful company) [depth=1] -> AAA (accountant for wonderful company) [depth=2] **IGNORED** # -> AB (accountant for wonderful company) [depth=1] -> ABA (recruiter for wonderful company) [depth=2] **OK** # Crawl in order to find someone called Patrick working at 'wonderful company' crawler.add_target_short_profile( CrawlTarget({ "fullname": re.compile(r'patrick'), "headline": re.compile(r'wonderful company') })) # and someone called Charles whose work location is France crawler.add_target_full_profile( CrawlTarget({ "fullname": re.compile(r'charles'), "fmt_location": re.compile(r'france') })) # /!\ NO CAPITAL LETTERS while num_scans < max_profiles and crawler.has_next(): # In case of big sleep if num_scans != 0 and num_scans % long_sleep_every == 0: num_loops = long_sleep_time / 60 for i in range(num_loops): print "Waiting... %d/%d" % (i, num_loops) time.sleep(60) # Sleep between each profile num_scans += 1 time.sleep(sleep_time)