示例#1
0
def test_search_apa():

    sort = lambda x: sorted(x, key=lambda y: y.id)

    sleep()

    gen = craigslist.search('washingtondc',
                            'apa',
                            postal=20071,
                            search_distance=1,
                            type_='regularsearch')
    posts = sort([post for post in gen])

    sleep()

    gen2 = craigslist.search('washingtondc',
                             'apa',
                             postal=20071,
                             search_distance=1,
                             type_='jsonsearch')
    posts2 = sort([post for post in gen2])

    A = {x.id for x in posts}
    B = {x.id for x in posts2}

    # instead of assert A == B, lets give it a tolerance of 5
    assert len(A - B) <= 5
    assert len(B - A) <= 5

    # save post id and url for use in a later test
    post = posts[0]
    global post_id, post_url
    post_id = post.id
    post_url = post.url
示例#2
0
def test_search_apa_with_clusters_or_pages():
    gen = craigslist.search('washingtondc',
                            'apa',
                            postal=20071,
                            search_distance=1,
                            type_='regularsearch')
    for post in islice(gen, 0, 200):  # force getting at one more page
        pass

    gen2 = craigslist.search('washingtondc',
                             'apa',
                             postal=20071,
                             search_distance=1)
    for post in islice(gen2, 0, 200):  # force getting at least one cluster
        pass
示例#3
0
def test_search_apa_with_detail():
    gen = craigslist.search('washingtondc',
                            'apa',
                            postal=20071,
                            search_distance=1,
                            get_detailed_posts=True)
    post = next(gen)
示例#4
0
def search():
    """
    Search for Craigslist locations and return the result as a JSON string.
    Returns an object with the following structure:

       {
        'result':
         'location': [item1, item2, ...]
       }

    If the user searched for multiple locations, the object will have a key for
    each location, e.g.:

       {
        'result':
         'location1': [item1, item2, ...],
         'location2': [item1, item2, ...]
       }
    """
    args = flask.request.args
    locations = args.getlist('location')
    category = args.get('type', None)
    query = args.get('q', None)
    filters = get_filters(args)
    listings = {}

    if query and locations and category:
        for location in locations:
            listings[location] = craigslist.search(location, category, query,
                                                   filters=filters)

    return flask.jsonify(result=listings)
示例#5
0
def test_search_sss():
    gen = craigslist.search('vancouver',
                            'sss',
                            query='shoes',
                            condition=[10, 20],
                            hasPic=1,
                            max_price=20)
    post = next(gen)
示例#6
0
def test_search_with_debug_executor():
    gen = craigslist.search('washingtondc',
                            'apa',
                            postal=20071,
                            search_distance=1,
                            executor_class='craigslist.io.DebugExecutor')
    for post in islice(gen, 0, 200):  # force getting at least one cluster
        pass
示例#7
0
def test_search_apa():
    gen = craigslist.search('washingtondc',
                            'apa',
                            postal=20071,
                            search_distance=1,
                            type_='regularsearch')
    post = next(gen)

    gen2 = craigslist.search('washingtondc',
                             'apa',
                             postal=20071,
                             search_distance=1)
    post2 = next(gen2)

    assert post.id == post2.id
    assert post.title == post2.title
    assert arrow.get(post.date) == arrow.get(post2.date).replace(second=0)

    # save post id and url for use in a later test
    global post_id, post_url
    post_id = post.id
    post_url = post.url
示例#8
0
async def test_search_apa_async():
    gen = craigslist.search_async('washingtondc',
                                  'apa',
                                  postal=20071,
                                  search_distance=1)
    posts = [post async for post in gen]

    gen2 = craigslist.search('washingtondc',
                             'apa',
                             postal=20071,
                             search_distance=1)
    posts2 = [post for post in gen2]

    sort = lambda x: sorted(x, key=lambda y: y.id)
    assert sort(posts) == sort(posts2)
示例#9
0
        def cli_search(args):
            # TODO: make this a whitelist not a blacklist for starters
            filter_out_params = [
                'verbose', 'command', 'area', 'category', 'detail',
                'executor_class', 'method', 'max_workers', 'limit', 'cache',
                'func'
            ]
            params = {
                k: v
                for k, v in vars(args).items()
                if v and k not in filter_out_params
            }
            logging.info('querying with parameters: {}'.format(params))

            # subclass ArgumentParser to make this happen automatically
            # it seems to stop using the `choices` parameter if nargs is defined
            for k, v in params.items():
                if k in search_arguments and\
                    search_arguments[k].get('nargs') == '*' and\
                    search_arguments[k].get('choices') is not None and\
                    isinstance(v, list):

                    mapping = search_arguments[k].get('choices')
                    params[k] = [mapping[x] for x in v]
            executor = make_executor(args.executor_class, args.max_workers)
            posts = itertools.islice(
                search(args.area,
                       args.category,
                       get_detailed_posts=args.detail,
                       executor=executor,
                       type_=args.method,
                       **params), 0, args.limit)
            try:
                for post in posts:
                    print(json.dumps(post._asdict()))
                # TODO: this is wrong, if I want to limit, I need to do it
                # in some other way than by using islice
                # maybe I can using the signal library to send a signal?
                # https://docs.python.org/3/library/signal.html
                # executor.shutdown(wait=False)
                # sys.exit()
            except CraigslistException as e:
                print(e, file=sys.stderr)
                sys.exit()
示例#10
0
async def test_search_apa_with_detail_async():

    sleep()

    gen = craigslist.search_async('washingtondc',
                                  'apa',
                                  postal=20071,
                                  search_distance=0.1,
                                  get_detailed_posts=True)
    posts = [post async for post in gen]

    sleep()

    gen2 = craigslist.search('washingtondc',
                             'apa',
                             postal=20071,
                             search_distance=0.1,
                             get_detailed_posts=True)
    posts2 = [post for post in gen2]

    sort = lambda x: sorted(x, key=lambda y: y.id)
    assert sort(posts) == sort(posts2)
示例#11
0
def main():
    for post in craigslist.search('washingtondc', 'apa', postal=20071, search_distance=1):
        print(post)
示例#12
0
def PROCESS_URLS(host, found_urls):
    NEW_URLS = {}

    for url, date_posted in found_urls.items():
        print url
        if IS_NEW_TO_DB(host, url, date_posted):
            WRITE_TO_DB(host, url, date_posted)
            NEW_URLS[url] = date_posted
            print "FOUND NEW URL POSTED ON %s: %s" % (date_posted, url)

    SAVE_DB()

    return NEW_URLS


######################
## RUN

MAPPING = {
    "Craigslist": craigslist.search(),
}

for host, search_results in MAPPING.items():
    new_urls = PROCESS_URLS(host, search_results)

    if len(new_urls) > 0:
        NOTIFY_OF_NEW_URL(new_urls, host, IS_LIVE_RUN)
    else:
        print "No new URLs"