def test_search_apa(): sort = lambda x: sorted(x, key=lambda y: y.id) sleep() gen = craigslist.search('washingtondc', 'apa', postal=20071, search_distance=1, type_='regularsearch') posts = sort([post for post in gen]) sleep() gen2 = craigslist.search('washingtondc', 'apa', postal=20071, search_distance=1, type_='jsonsearch') posts2 = sort([post for post in gen2]) A = {x.id for x in posts} B = {x.id for x in posts2} # instead of assert A == B, lets give it a tolerance of 5 assert len(A - B) <= 5 assert len(B - A) <= 5 # save post id and url for use in a later test post = posts[0] global post_id, post_url post_id = post.id post_url = post.url
def test_search_apa_with_clusters_or_pages(): gen = craigslist.search('washingtondc', 'apa', postal=20071, search_distance=1, type_='regularsearch') for post in islice(gen, 0, 200): # force getting at one more page pass gen2 = craigslist.search('washingtondc', 'apa', postal=20071, search_distance=1) for post in islice(gen2, 0, 200): # force getting at least one cluster pass
def test_search_apa_with_detail(): gen = craigslist.search('washingtondc', 'apa', postal=20071, search_distance=1, get_detailed_posts=True) post = next(gen)
def search(): """ Search for Craigslist locations and return the result as a JSON string. Returns an object with the following structure: { 'result': 'location': [item1, item2, ...] } If the user searched for multiple locations, the object will have a key for each location, e.g.: { 'result': 'location1': [item1, item2, ...], 'location2': [item1, item2, ...] } """ args = flask.request.args locations = args.getlist('location') category = args.get('type', None) query = args.get('q', None) filters = get_filters(args) listings = {} if query and locations and category: for location in locations: listings[location] = craigslist.search(location, category, query, filters=filters) return flask.jsonify(result=listings)
def test_search_sss(): gen = craigslist.search('vancouver', 'sss', query='shoes', condition=[10, 20], hasPic=1, max_price=20) post = next(gen)
def test_search_with_debug_executor(): gen = craigslist.search('washingtondc', 'apa', postal=20071, search_distance=1, executor_class='craigslist.io.DebugExecutor') for post in islice(gen, 0, 200): # force getting at least one cluster pass
def test_search_apa(): gen = craigslist.search('washingtondc', 'apa', postal=20071, search_distance=1, type_='regularsearch') post = next(gen) gen2 = craigslist.search('washingtondc', 'apa', postal=20071, search_distance=1) post2 = next(gen2) assert post.id == post2.id assert post.title == post2.title assert arrow.get(post.date) == arrow.get(post2.date).replace(second=0) # save post id and url for use in a later test global post_id, post_url post_id = post.id post_url = post.url
async def test_search_apa_async(): gen = craigslist.search_async('washingtondc', 'apa', postal=20071, search_distance=1) posts = [post async for post in gen] gen2 = craigslist.search('washingtondc', 'apa', postal=20071, search_distance=1) posts2 = [post for post in gen2] sort = lambda x: sorted(x, key=lambda y: y.id) assert sort(posts) == sort(posts2)
def cli_search(args): # TODO: make this a whitelist not a blacklist for starters filter_out_params = [ 'verbose', 'command', 'area', 'category', 'detail', 'executor_class', 'method', 'max_workers', 'limit', 'cache', 'func' ] params = { k: v for k, v in vars(args).items() if v and k not in filter_out_params } logging.info('querying with parameters: {}'.format(params)) # subclass ArgumentParser to make this happen automatically # it seems to stop using the `choices` parameter if nargs is defined for k, v in params.items(): if k in search_arguments and\ search_arguments[k].get('nargs') == '*' and\ search_arguments[k].get('choices') is not None and\ isinstance(v, list): mapping = search_arguments[k].get('choices') params[k] = [mapping[x] for x in v] executor = make_executor(args.executor_class, args.max_workers) posts = itertools.islice( search(args.area, args.category, get_detailed_posts=args.detail, executor=executor, type_=args.method, **params), 0, args.limit) try: for post in posts: print(json.dumps(post._asdict())) # TODO: this is wrong, if I want to limit, I need to do it # in some other way than by using islice # maybe I can using the signal library to send a signal? # https://docs.python.org/3/library/signal.html # executor.shutdown(wait=False) # sys.exit() except CraigslistException as e: print(e, file=sys.stderr) sys.exit()
async def test_search_apa_with_detail_async(): sleep() gen = craigslist.search_async('washingtondc', 'apa', postal=20071, search_distance=0.1, get_detailed_posts=True) posts = [post async for post in gen] sleep() gen2 = craigslist.search('washingtondc', 'apa', postal=20071, search_distance=0.1, get_detailed_posts=True) posts2 = [post for post in gen2] sort = lambda x: sorted(x, key=lambda y: y.id) assert sort(posts) == sort(posts2)
def main(): for post in craigslist.search('washingtondc', 'apa', postal=20071, search_distance=1): print(post)
def PROCESS_URLS(host, found_urls): NEW_URLS = {} for url, date_posted in found_urls.items(): print url if IS_NEW_TO_DB(host, url, date_posted): WRITE_TO_DB(host, url, date_posted) NEW_URLS[url] = date_posted print "FOUND NEW URL POSTED ON %s: %s" % (date_posted, url) SAVE_DB() return NEW_URLS ###################### ## RUN MAPPING = { "Craigslist": craigslist.search(), } for host, search_results in MAPPING.items(): new_urls = PROCESS_URLS(host, search_results) if len(new_urls) > 0: NOTIFY_OF_NEW_URL(new_urls, host, IS_LIVE_RUN) else: print "No new URLs"