def search_handler(): entries = parser.parse_page(request.query_string) global Flats numbers_to_indices = { entry['number']: index for index, entry in enumerate(entries) } selection = Flats.find({'number': {'$in': list(numbers_to_indices.keys())}}, fields=['number']) for i in selection: entries[numbers_to_indices[i['number']]]['old'] = True new_entries = [ e for e in entries if not e.get('old') ] if new_entries: Flats.insert(copy.deepcopy(new_entries)) next_offset = int(request.query.get('start', 0)) + len(entries) return { 'entries': entries, 'next_offset': next_offset }
def parse_pages(flats): parse_params = { 'object_type': 0, 'operation': 0, 'oblast_id': 37, 'town_id': 99511, 'view': 'table' } offset = 0 while True: parse_params['start'] = offset entries = parser.parse_page(urllib.parse.urlencode(parse_params)) entries_parsed = len(entries) remove_old_entries(entries, flats) if not entries: break flats.insert(entries) print("Parsed {0} entries, added {1} new entries".format(entries_parsed, len(entries))) offset += entries_parsed