def get_results(i): '''get results and check against the db if they are new. add to queue if new''' filters['start'] = i temp_list = [] # get 25 results, using provided filters with start index [ temp_list.append(x) for x in json.loads( CommonFuncs.convertBytesToString(api.search( **filters)))['results'] ] [ new_jobs_queue.put(x) for x in temp_list if new_jobs_queue.unfinished_tasks < desired_result_count ]
def get_api_results(self, desired_result_count=1): '''return job json objects from the indeed api.''' job_profile = CommonFuncs.get_job_profile() # GET LOCATION IN JOB PROFILE locations = CommonFuncs.get_locations_list(job_profile) # KEYWORDS CONNECTED BY OR query_list = CommonFuncs.build_query_string(job_profile=job_profile, or_delim='or', bracket1='(', bracket2=')', adv_supp=True) query_string = query_list[0] new_jobs_queue = queue.Queue(maxsize=0) new_jobs = None limit = '25' # 25 is the max results per request lookback_period = '60' # default lookback period client_id = {} api = None # CONNECT TO INDEED API FOR JOB QUERIES try: client_id = json.load(open(API_KEYS_PATH, 'r')) api = IndeedClient(publisher=client_id['publisher_id']) except: ValueError('No publisher id found. Filtering aborted.') filters = { 'q': query_string, 'l': '', 'userip': "1.2.3.4", 'useragent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)", "raw": "False", "sort": "date", "radius": job_profile.radius, "limit": limit, "fromage": lookback_period, } # FIND NEW JOB JSON OBJECT USING INDEED API # GET NEW JOBS for location in locations: # iterate over each location filters['l'] = location filters['q'] = query_string # THREAD-BRAINED APPROACH to get all results at once def get_results(i): '''get results and check against the db if they are new. add to queue if new''' filters['start'] = i temp_list = [] # get 25 results, using provided filters with start index [ temp_list.append(x) for x in json.loads( CommonFuncs.convertBytesToString(api.search( **filters)))['results'] ] [ new_jobs_queue.put(x) for x in temp_list if new_jobs_queue.unfinished_tasks < desired_result_count ] result_count = int( json.loads( CommonFuncs.convertBytesToString( api.search(**filters)))['totalResults']) list_of_filter_starts = [ str(i) for i in range(0, result_count, 25) ] # build list of start positions for item in list_of_filter_starts: if not new_jobs_queue.unfinished_tasks < desired_result_count: break get_results(item) new_jobs = list( new_jobs_queue.queue) # append query results to list # RETURN JOBS if new_jobs: if desired_result_count == 1: # just return a single job, not in a list return new_jobs[0] elif desired_result_count <= len( new_jobs ): # if we have more than enough new jobs, return those in a list return new_jobs[0:desired_result_count] else: # if more than the available number of new jobs requested, return all that could be found return new_jobs else: return [] # if no new links found