def get_group_link(event): airtable = Airtable(BASE_KEY, 'events', api_key=API_KEY) eventrecord = airtable.match('Event', event) group_link = eventrecord['fields']['Group link'] return group_link
def add_visit(page_id): response = Response(True) try: airtable = Airtable(AIRTABLE_BASE_ID, PAGE_TABLES[int(page_id)]) today = datetime.date.today().strftime("%d-%m-%Y") existing_day = airtable.match('Date', today) if existing_day: visits = existing_day['fields']['Visits'] visits = visits + 1 airtable.update(existing_day['id'], {'Visits': visits}, typecast=True) else: new_record = {'Date': today, 'Visits': 1} airtable.insert(new_record) except Exception as e: response.status = False response.message = str(e) return response.compose()
def get_user_by_password(password): users_table = Airtable(BASE_ID, USERS_TABLE_NAME, api_key=API_KEY) user = users_table.match('password', password) if not user: raise UserNotFound logging.info('get_user_by_password') return user
class Api(): def __init__(self, *, config=None): api_key = config['airtable_api_key'] base_key = config['airtable_base_key'] table_name_state = config['table_name_state'] table_name_timesheet = config['table_name_timesheet'] self.pricipal = config['principal'] self.lunch_break = config['lunch_break'] self.state = Airtable(base_key, table_name_state, api_key) self.timesheet = Airtable(base_key, table_name_timesheet, api_key) def get_state(self): record = self.state.match('state_principal', self.pricipal) return record['fields']['state_name'] def set_state(self, *, state_name=None): record = {'state_name': state_name} self.state.update_by_field('state_principal', self.pricipal, record) def clock_in(self, *, day=None, time=None): record = { 'record_day': day, 'clock_in_time': time, 'lunch_break': self.lunch_break } self.timesheet.insert(record) def clock_out(self, *, day=None, time=None): record = {'clock_out_time': time} self.timesheet.update_by_field('record_day', day, record)
def get_user_by_id_in_media(media, user_id): users_table = Airtable(BASE_ID, USERS_TABLE_NAME, api_key=API_KEY) user = users_table.match(media.name.lower() + '_id', user_id) if not user: raise UserNotFound logging.info('get_user_by_id_in_media') return user
def get_user_by_passwords_hash(passwords_hash): users_table = Airtable(BASE_ID, USERS_TABLE_NAME, api_key=API_KEY) user = users_table.match(field_name='passwords_hash', field_value=passwords_hash) if not user: raise UserNotFound logging.info('get_user_by_passwords_hash') return user
def registeragent(agent, username, userid): airtable = Airtable(BASE_KEY, 'member', api_key=API_KEY) if airtable.match('Agent', agent): # 查重 return False else: record = {'Agent': agent, 'User name': username, 'User id': userid} airtable.insert(record) return True
def insert_level(agent_userid, level): airtable = Airtable(BASE_KEY, 'member', api_key=API_KEY) record = airtable.match('User id', agent_userid) if 'Classification level' in record['fields']: return False else: airtable.update_by_field('User id', agent_userid, {'Classification level': level}) return True
def get_event_date(event_code): airtable_client = Airtable('app1TMHqTJRUWnrvB', 'Events', api_key=settings.EVENTS_API_KEY) try: meeting_date = airtable_client.match('Event Code', str(event_code))['fields']['Date'] except: meeting_date = "" return meeting_date
def getevent_byorganizer(organizer_userid): airtable = Airtable(BASE_KEY, 'member', api_key=API_KEY) eventrecordlist = airtable.match( 'User id', organizer_userid, filterByFormula='IS_AFTER({Date}, TODAY())' )['fields']['Events organized'] eventList = [getevent_byrecordid(i)[0] for i in eventrecordlist] return eventList
def get_generations_counter(): result = None airtable = Airtable(AIRTABLE_BASE_ID, 'GenerationsCounter') generations_counter = airtable.match('Name', 'Generations') if generations_counter: result = {'table': airtable, 'counter': generations_counter} return result
def matcher(x): table_name = 'xxx' base_key = 'xxx' airtable = Airtable(base_key, table_name, api_key='xxx') results = airtable.match('name', x) try: if results['fields']['type'] == 'primary': returner = results['fields']['name'] elif results['fields']['type'] == 'variant': returner = results['fields']['use'] except: returner = 'Not found' return returner
def joinevent(recordid, event): airtable = Airtable(BASE_KEY, 'events', api_key=API_KEY) eventrecord = airtable.match('Event', event) if 'Attendee' in eventrecord['fields']: if recordid in eventrecord['fields']['Attendee']: return False else: addendeelist = eventrecord['fields']['Attendee'] addendeelist.append(recordid) else: addendeelist = [recordid] airtable.update_by_field('Event', event, {'Attendee': addendeelist}) return len(addendeelist)
def exitevent(recordid, event): airtable = Airtable(BASE_KEY, 'events', api_key=API_KEY) eventrecord = airtable.match('Event', event) if 'Attendee' in eventrecord['fields']: if recordid in eventrecord['fields']['Attendee']: addendeelist = eventrecord['fields']['Attendee'] addendeelist.remove(recordid) else: return False else: return False airtable.update_by_field('Event', event, {'Attendee': addendeelist}) return True
def getattendee(event): airtable = Airtable(BASE_KEY, 'events', api_key=API_KEY) record = airtable.match('Event', event)['fields'] if 'Attendee' in record: attendeelist = record['Attendee'] count = len(attendeelist) agents = getagentinfo_byrecordid(attendeelist) attendee = ('\n').join(agents[5]) # combine (codename + @username) agentcodename = agents[1] # codename with @ else: count = 0 attendee = '' agentcodename = '' return count, attendee, agentcodename
async def insert_or_update_record(an_record: ATRecord): """ Given an AN record for an already-set context, see if there's an existing AT record for the same key. If not, insert it. """ record_type = MC.get() at_key, at_base, at_table, at_typecast = MC.at_connect_info() at = Airtable(at_base, at_table, api_key=at_key) record_dict = at.match(MC.at_key_field(), an_record.key) if not record_dict: prinl(f"Uploading new {record_type} record.") at.insert(an_record.all_fields(), typecast=at_typecast) return prinlv(f"Retrieved matching Airtable {record_type} record.") if at_record := ATRecord.from_record(record_dict): an_record.at_match = at_record
def getmyevent(userid): airtable = Airtable(BASE_KEY, 'member', api_key=API_KEY) record = airtable.match('User id', userid) if 'Events attended' in record['fields']: eventidlist = record['fields']['Events attended'] eventrecordlist = [getevent_byrecordid(i)[2] for i in eventidlist] pasteventlist = [ eventrecord['fields']['Event'] for eventrecord in eventrecordlist if eventrecord['fields']['Countdown'] < 0 ] futureeventlist = [ eventrecord['fields']['Event'] for eventrecord in eventrecordlist if eventrecord['fields']['Countdown'] >= 0 ] else: pasteventlist = [] futureeventlist = [] return pasteventlist, futureeventlist
def updateAirtable(): airtable = Airtable( os.environ["AIRTABLE_BASE_KEY"], "People", api_key=os.environ["AIRTABLE_API_KEY"], ) pwc = getPeopleWantingCourse() centres = getCentres() for person in pwc: firstcode = person.split() nearest = postcodenearest(firstcode[0], centres) fields = { "firstNearestActiveCentreDist": nearest[0][0], "firstNearestActiveCentre": [centres[nearest[0][1]]], "secondNearestActiveCentreDist": nearest[1][0], "secondNearestActiveCentre": [centres[nearest[1][1]]], "thirdNearestActiveCentreDist": nearest[2][0], "thirdNearestActiveCentre": [centres[nearest[2][1]]], } record = airtable.match("Record Id", pwc[person]) airtable.update(record["id"], fields) print("Updated row" + pwc[person])
class AirtablePipeline(object): """ Stub pipeline to save to AirTable. """ def __init__(self): self.airtable = Airtable(AIRTABLE_BASE_KEY, AIRTABLE_DATA_TABLE) def process_item(self, item, spider): # copy item; airtable-specific munging is happening here that breaks # opencivicdata standard if item.get('start_time'): dt = dateutil.parser.parse(item['start_time']) if dt < datetime.datetime.now(dt.tzinfo): return item else: return item time.sleep(randint(0, 3)) new_item = item.copy() # make id new_item['id'] = self._make_id(new_item, spider) # flatten location new_item['location_url'] = get_key(new_item, 'location.url') new_item['location_name'] = get_key(new_item, 'location.name') new_item['location_address'] = get_key(new_item, 'location.address') new_item['location_latitude'] = get_key( new_item, 'location.coordinates.latitude') new_item['location_longitude'] = get_key( new_item, 'location.coordinates.longitude') new_item['all_day'] = 'false' new_item['agency_name'] = spider.long_name new_item['start_time_formatted'] = self._transform_date( new_item['start_time']) new_item['end_time_formatted'] = self._transform_date( new_item['end_time']) new_item = {k: v for k, v in new_item.items() if k in FIELDS_WHITELIST} try: self.save_item(new_item, spider) return item except HTTPError as e: spider.logger.error('HTTP error') spider.logger.error(e.response.content) spider.logger.exception('Original message') spider.logger.error(json.dumps(new_item, indent=4, sort_keys=True)) raise DropItem('Could not save {0}'.format(new_item['id'])) except Exception as e: spider.logger.exception('Unknown error') def save_item(self, item, spider): now = datetime.datetime.now().isoformat() airtable_item = self.airtable.match('id', item['id']) if airtable_item: # update spider.logger.debug('AIRTABLE PIPELINE: Updating {0}'.format( item['id'])) item['scrape_date_updated'] = now self.airtable.update(airtable_item['id'], item) else: # create spider.logger.debug('AIRTABLE PIPELINE: Creating {0}'.format( item['id'])) item['scrape_date_updated'] = now item['scrape_date_initial'] = now self.airtable.insert(item) def _make_id(self, item, spider): return '{spider_long_name} {item_name} ({spider_name}-{item_id})'.format( spider_name=spider.name, spider_long_name=spider.long_name, item_id=item['id'], item_name=item['name']) def _transform_date(self, timestring): """ Parse to friendly format for Zapier integration. """ try: dt = dateutil.parser.parse(timestring) except TypeError: return None return dt.strftime('%a %B %d, %Y, %I:%M%p')
def getorganizerid(event): airtable = Airtable(BASE_KEY, 'events', api_key=API_KEY) record = airtable.match('Event', event) recordidlist = record['fields']['Organizer'] return recordidlist
def get_request_by_id(request_id): table = Airtable(BASE_ID, REQUESTS_TABLE_NAME, api_key=API_KEY) request_elem = table.match('id', request_id) logging.info('get_request_by_id') return request_elem
class AirtablePipeline(object): """ Stub pipeline to save to AirTable. """ def __init__(self): self.airtable = Airtable(AIRTABLE_BASE_KEY, AIRTABLE_DATA_TABLE) def process_item(self, item, spider): # copy item; airtable-specific munging is happening here that breaks # opencivicdata standard if item.get('start_time') is None: spider.logger.debug( 'AIRTABLE PIPELINE: Ignoring event without start_time {0}'. format(item['id'])) return item dt = dateutil.parser.parse(item['start_time']) if dt < datetime.datetime.now(dt.tzinfo): spider.logger.debug( 'AIRTABLE PIPELINE: Ignoring past event {0}'.format( item['id'])) return item time.sleep(randint(0, 3)) # to avoid rate limiting? new_item = item.copy() # flatten location new_item['location_url'] = get_key(new_item, 'location.url') new_item['location_name'] = get_key(new_item, 'location.name') new_item['location_address'] = get_key(new_item, 'location.address') new_item['location_latitude'] = get_key( new_item, 'location.coordinates.latitude') new_item['location_longitude'] = get_key( new_item, 'location.coordinates.longitude') new_item[ 'timezone'] = 'America/Chicago' # TODO have this passed in by the spiders new_item['all_day'] = 'false' new_item['agency_name'] = spider.long_name new_item = {k: v for k, v in new_item.items() if k in FIELDS_WHITELIST} try: self.save_item(new_item, spider) return item except HTTPError as e: spider.logger.error('HTTP error') spider.logger.error(e.response.content) spider.logger.exception('Original message') spider.logger.error(json.dumps(new_item, indent=4, sort_keys=True)) raise DropItem('Could not save {0}'.format(new_item['id'])) except Exception as e: spider.logger.exception('Unknown error') def save_item(self, item, spider): now = datetime.datetime.now().isoformat() airtable_item = self.airtable.match('id', item['id']) if airtable_item: spider.logger.debug('AIRTABLE PIPELINE: Updating {0}'.format( item['id'])) item['scrape_date_updated'] = now self.airtable.update_by_field('id', airtable_item['id'], item) else: spider.logger.debug('AIRTABLE PIPELINE: Creating {0}'.format( item['id'])) item['scrape_date_updated'] = now item['scrape_date_initial'] = now self.airtable.insert(item)
def copy_to_airtable(self): """ copies the DB to airtables """ try: logger.debug('Starting Airtable copy...') # set the ready state to false to indicate that it's not ready state_table = Airtable(self.airtable_basekey, 'State', self.airtable_apikey) ready_id = state_table.match('key', 'ready')['id'] fields = {'key': 'ready', 'value': 'false'} state_table.replace(ready_id, fields) # delete previous table entries logger.debug("Deleting previous table entries...") pilot_table = Airtable(self.airtable_basekey, 'Pilots', self.airtable_apikey) character_table = Airtable(self.airtable_basekey, 'Characters', self.airtable_apikey) attribute_table = Airtable(self.airtable_basekey, 'Attributes', self.airtable_apikey) attribute_groups_table = Airtable(self.airtable_basekey, 'AttributeGroups', self.airtable_apikey) pilot_table.batch_delete( [entry['id'] for entry in pilot_table.get_all()]) character_table.batch_delete( [entry['id'] for entry in character_table.get_all()]) attribute_table.batch_delete( [entry['id'] for entry in attribute_table.get_all()]) attribute_groups_table.batch_delete( [entry['id'] for entry in attribute_groups_table.get_all()]) logger.debug("Previous table entries deleted!") # copy pilots table logger.debug("Copying Pilots table...") pilots = session.query(Pilot) pilot_records = [] for pilot in pilots: pilot_records.append({ 'id': pilot.id, 'discord_id': pilot.discord_id, 'discord_name': pilot.discord_name, 'discord_discriminator': pilot.discord_discriminator }) pilot_table.batch_insert(pilot_records) logger.debug("Pilots table copied!") # copy characters table logger.debug("Copying Characters table...") characters = session.query(Character) character_records = [] for character in characters: character_records.append({ 'id': character.id, 'pilot_id': character.pilot_id, 'name': character.name }) character_table.batch_insert(character_records) logger.debug("Characters table copied!") # copy attributes logger.debug("Copying Attribute table...") attributes = session.query(Attribute) attribute_records = [] for attribute in attributes: attribute_records.append({ 'id': attribute.id, 'attribute_group_id': attribute.attribute_group_id, 'key': attribute.key, 'value': attribute.value, 'friendly_name': attribute.friendly_name }) attribute_table.batch_insert(attribute_records) logger.debug("Attribute table copied!") # copy attributegroups logger.debug("Copying AttributeGroup table...") attribute_groups = session.query(AttributeGroup) attribute_group_records = [] for attribute_group in attribute_groups: attribute_group_records.append({ 'id': attribute_group.id, 'pilot_id': attribute_group.pilot_id, 'name': attribute_group.name, 'description': attribute_group.description }) attribute_groups_table.batch_insert(attribute_group_records) logger.debug("AttributeGroup table copied!") # set the ready state to true to indicate that it's ready state_table = Airtable(self.airtable_basekey, 'State', self.airtable_apikey) ready_id = state_table.match('key', 'ready')['id'] fields = {'key': 'ready', 'value': 'true'} state_table.replace(ready_id, fields) logger.debug('Airtable copy complete!') except: logger.error( f"Failed to copy to airtable:\n{traceback.format_exc()}")
class GeocoderPipeline(object): def __init__(self, session=None): if session is None: session = requests.Session() self.session = session self.client = MapzenAPI(os.environ.get('MAPZEN_API_KEY')) self.geocode_database = Airtable(AIRTABLE_BASE_KEY, AIRTABLE_GEOCODE_TABLE) def process_item(self, item, spider): """ Geocodes an item by: (1) looking in airtable cache (2) making a mapzen query and adding the result to the cache if (1) is not found Mapzen queries are standardized to end with ', Chicago, IL'. If something like '5100 Milwaukee Chicago, IL' is not found, '5100 Milwaukee Ave., Chicago, IL' and '5100 Milwaukee St., Chicago, IL' are also tried. """ # skip geocoding if event is in the past if item.get('start_time') is None: spider.logger.debug( 'GEOCODER PIPELINE: Ignoring event without start_time {0}'. format(item['id'])) return item dt = item['start_time'] if dt < datetime.datetime.now(dt.tzinfo): spider.logger.debug( 'GEOCODER PIPELINE: Ignoring past event {0}'.format( item['id'])) return item query = self._get_mapzen_query(item.get('location', {})) if not query: spider.logger.debug( 'GEOCODER PIPELINE: Empty query. Not geocoding {0}'.format( item['id'])) return item for suffix in ['', ' ave.', ' st.']: new_query = query.replace(', chicago, il', '{0}, chicago, il'.format(suffix)) time.sleep(randint(0, 3)) # to avoid rate limiting? updated_item = self._update_fromDB(new_query, item) if updated_item: spider.logger.debug( 'GEOCODER PIPELINE: Geocoded item from airtable cache.') return updated_item bad_addresses = ['Chicago, IL, USA', 'Illinois, USA', ''] for suffix in ['', ' ave.', ' st.']: new_query = query.replace(', chicago, il', '{0}, chicago, il'.format(suffix)) geocoded_item = self._geocode(new_query, item, spider) address = geocoded_item['location']['address'] if (address not in bad_addresses) and (address.endswith( 'Chicago, IL, USA')) and (self._hasDigit(address)): write_item = { 'mapzen_query': new_query, 'longitude': geocoded_item['location']['coordinates']['longitude'], 'latitude': geocoded_item['location']['coordinates']['latitude'], 'name': geocoded_item['location']['name'], 'address': geocoded_item['location']['address'], 'geocode': geocoded_item['geocode'], 'community_area': geocoded_item['community_area'] } self._geocodeDB_write(spider, write_item) spider.logger.debug( 'GEOCODER PIPELINE: Geocoded item from mapzen.') return geocoded_item spider.logger.exception(( "GEOCODER PIPELINE: Couldn't geocode using mapzen or airtable cache. " "Query: {0}. Item id: {1}").format(query, item['id'])) return item def _geocode(self, query, item, spider): """ Makes a Mapzen query and returns results. """ try: geocode = self.client.search(query, boundary_country='US', format='keys') except ValueError: spider.logger.debug( ('GEOCODER PIPELINE: Could not geocode, skipping. ' 'Query: {0}. Item id: {1}').format(query, item['id'])) except Exception as e: spider.logger.info( ('GEOCODER PIPELINE: Unknown error when geocoding, skipping. ' 'Query: {0}. Item id: {1}. Message: {2}').format( query, item['id'], str(e))) else: new_data = { 'location': { 'coordinates': { 'longitude': str(geocode['features'][0]['geometry']['coordinates'] [0]), 'latitude': str(geocode['features'][0]['geometry']['coordinates'] [1]) }, 'name': geocode['geocoding']['query']['parsed_text'].get( 'query', ''), 'address': geocode['features'][0]['properties']['label'], 'url': item.get('location', { 'url': '' }).get('url', '') }, 'geocode': json.dumps(geocode, indent=4, sort_keys=True), 'community_area': geocode['features'][0]['properties'].get('neighbourhood', '') } geocoded_item = item.copy() geocoded_item.update(new_data) return geocoded_item return {'location': {'address': ''}} def _hasDigit(self, string): """ Returns True if the string contains a digit. """ return any(char.isdigit() for char in string) def _get_mapzen_query(self, location_dict): """ Clean and item's location to make a mapzen query. All cleaned queries are lowercase and end with ', chicago, il'. """ name = location_dict.get('name', '').strip() address = location_dict.get('address', '').strip() query = ', '.join([ name, address ]).strip(', ').lower() # combine '{name}, {address}' and lowercase query = query.replace('-', ' ').replace('/', ' ') # remove special characters query = query.replace('milwukee', 'milwaukee').replace( 'milwuakee', 'milwaukee') # fix misspellings query = query.replace('n.', 'n. ').replace('s.', 's. ').replace( 'e.', 'e. ').replace('w.', 'w. ') query = re.sub(r' +', ' ', query) # remove repeated spaces query = re.sub(r',* chicago,*( il)* *\d*$', ', chicago, il', query) # remove zip code, standardize ', chicago, il' if not query: return '' if 'city hall' in query.lower(): return 'chicago city hall, chicago, il' if not query.endswith(', chicago, il'): return '{0}, chicago, il'.format(query) else: return query def _update_fromDB(self, query, item): """ Query the geocode database and update item with results. """ fetched_item = self._geocodeDB_fetch(query) try: new_data = { 'location': { 'coordinates': { 'longitude': str(fetched_item['longitude']), 'latitude': str(fetched_item['latitude']) }, 'name': fetched_item.get('name', ''), 'address': fetched_item['address'], 'url': item.get('location', { 'url': '' }).get('url', '') }, 'geocode': str(fetched_item.get('geocode', '')), 'community_area': fetched_item.get('community_area', '') } except: return {} else: updated_item = item.copy() updated_item.update(new_data) return updated_item def _geocodeDB_fetch(self, query): """ Fetch from geocode_database. """ try: return self.geocode_database.match('mapzen_query', query)['fields'] except: return None def _geocodeDB_write(self, spider, item): """ Write to geocode_database. """ spider.logger.debug('GEOCODER PIPELINE: Caching {0}'.format( item['mapzen_query'])) item['geocode_date_updated'] = datetime.datetime.now().isoformat() airtable_item = self.geocode_database.match('mapzen_query', item['mapzen_query']) if airtable_item: self.geocode_database.update_by_field('mapzen_query', item['mapzen_query'], item) else: self.geocode_database.insert(item)
def getagentrecordid_byuserid(userid): airtable = Airtable(BASE_KEY, 'member', api_key=API_KEY) recordid = airtable.match('User id', userid)['id'] return recordid
def gather_video_descriptions(): at_orange = Airtable(os.getenv('AT_APP_KEY_ORANGE'), 'Session hosting', os.getenv('AT_API_KEY')) at_optin = Airtable(os.getenv('AT_APP_KEY_ORANGE'), 'optin', os.getenv('AT_API_KEY')) at_gray = Airtable(os.getenv('AT_APP_KEY_GRAY'), 'submissions', os.getenv('AT_API_KEY')) payloads = [] for row in at_orange.get_all(): submission_ids = row['fields']['submission_ids'].split(', ') for num, submission_id in enumerate(submission_ids): col = f'videoid_{num}' if col not in row['fields']: continue if checked(f'posted_{num}', row['fields']): # No use in reposting the information continue matches = at_optin.search('submission_id', submission_id) yes = 'I want my presentation video posted to YouTube' if not all([match['fields']['choice'] == yes for match in matches]) or len(matches) == 0 and ( not checked('manually_approved', row['fields'])): # Not yet approved to send out continue extended_info = at_gray.match('submission_id', submission_id) all_info = {**row['fields'], **extended_info['fields']} if f'extra_info_{num}' in all_info: all_info['extra_info'] = all_info[f'extra_info_{num}'] else: all_info['extra_info'] = "" print(all_info['extra_info']) payload = { 'id': row['fields'][col], 'snippet': { 'title': generate_title(all_info), # Max 100 characters 'description': generate_description(all_info), # Max 5000 bytes 'categoryId': "28", 'defaultLanguage': 'en', }, 'status': { 'embeddable': True, 'license': 'youtube', 'privacyStatus': 'public', 'publicStatsViewable': True, 'selfDeclaredMadeForKids': False, }, 'recordingDetails': { 'recordingDate': all_info['starttime'] } } payloads.append(payload) return payloads
def getattendeeid(event): airtable = Airtable(BASE_KEY, 'events', api_key=API_KEY) record = airtable.match('Event', event) recordidlist = record['fields']['Attendee'] return recordidlist
class AirtablePipeline(object): """ Stub pipeline to save to AirTable. """ def __init__(self): self.airtable = Airtable(AIRTABLE_BASE_KEY, AIRTABLE_DATA_TABLE) def process_item(self, item, spider): # copy item; airtable-specific munging is happening here that breaks # opencivicdata standard if item.get('start_time') is None: spider.logger.debug( 'AIRTABLE PIPELINE: Ignoring event without start_time {0}'. format(item['id'])) return item dt = item['start_time'] if dt < datetime.datetime.now(dt.tzinfo): spider.logger.debug( 'AIRTABLE PIPELINE: Ignoring past event {0}'.format( item['id'])) return item time.sleep(randint(0, 3)) # to avoid rate limiting? new_item = item.copy() # flatten location new_item['location_url'] = get_key(new_item, 'location.url') new_item['location_name'] = get_key(new_item, 'location.name') new_item['location_address'] = get_key(new_item, 'location.address') new_item['location_latitude'] = get_key( new_item, 'location.coordinates.latitude') new_item['location_longitude'] = get_key( new_item, 'location.coordinates.longitude') new_item['url'] = new_item.get('sources', [{ 'url': '' }])[0].get('url', '') new_item = { k: self._format_values(k, v) for k, v in new_item.items() if k in KEEP_FIELDS } try: self.save_item(new_item, spider) return item except HTTPError as e: spider.logger.error('HTTP error') spider.logger.error(e.response.content) spider.logger.exception('Original message') spider.logger.error(json.dumps(new_item, indent=4, sort_keys=True)) raise DropItem('Could not save {0}'.format(new_item['id'])) except Exception as e: spider.logger.exception('Unknown error') def _format_values(self, k, v): if ((v is None) or v == '') and (k not in ['start_time', 'end_time']): return 'N/A' if k == 'location_name': return ' '.join([w.capitalize() for w in v.split(' ')]) if isinstance(v, bool): return int(v) if isinstance(v, datetime.datetime): # converts '2018-10-14T00:00:00-05:00' into '2018-10-14T05:00:00+00:00' # as required by the Airtable API return v.astimezone(utc).isoformat() return v def save_item(self, item, spider): now = datetime.datetime.now().isoformat() airtable_item = self.airtable.match('id', item['id']) if airtable_item: spider.logger.debug('AIRTABLE PIPELINE: Updating {0}'.format( item['id'])) item['scrape_date_updated'] = now self.airtable.update_by_field('id', item['id'], item) else: spider.logger.debug('AIRTABLE PIPELINE: Creating {0}'.format( item['id'])) item['scrape_date_updated'] = now item['scrape_date_initial'] = now self.airtable.insert(item)
def main(): # get auth keys from environment airtable_api_key = os.environ['AIRTABLE_API_KEY'] airtable_base_backline = os.environ['AIRTABLE_BASE_BACKLINE'] # get arguments argparser = init_argparse() args = argparser.parse_args() # set up Airtable connections deliveries_table = Airtable(airtable_base_backline, 'Deliveries', api_key=airtable_api_key) chapters_table = Airtable(airtable_base_backline, 'Chapters', api_key=airtable_api_key) delivery_locations_table = Airtable(airtable_base_backline, 'Delivery Locations', api_key=airtable_api_key) recipients_table = Airtable(airtable_base_backline, 'Recipients', api_key=airtable_api_key) restaurants_table = Airtable(airtable_base_backline, 'Restaurants', api_key=airtable_api_key) # Airtable column names airtable_fieldnames = [ "Chapter", "Floor", "Day of Hospital Contact", "Hospital Contact Phone", "Recipient", "Delivery Location", "empty", "day_one", "restaurant_one", "meals_number_one", "day_two", "restaurant_two", "meals_number_two", "day_three", "restaurant_three", "meals_number_three" ] # read CSV with open(args.csv, newline='') as csvfile: next(csvfile, None) # skip csv header reader = csv.DictReader(csvfile, fieldnames=airtable_fieldnames) for row in reader: common = {} one = {} two = {} three = {} chapter_id = chapters_table.match('Name', row['Chapter']).get('id') for k, v in row.items(): v = v.strip() if not k.startswith(('day', 'restaurant', 'meals_number')): if k == 'Chapter': common[k] = [chapter_id or v] elif k == 'Recipient': if v == 'Yale New Haven Hospital - SRC': v = 'Yale New Haven Hospital St Raphael Campus' common[k] = [ recipients_table.match('Name', v).get('id') ] elif k == 'Delivery Location': common[k] = [ delivery_locations_table.match('Name', v).get('id') ] else: common[k] = v elif k.endswith('one'): one['Delivery Scheduled'] = format_datetime_Y( row['day_one']) if v == 'Roia Restaurant': row["restaurant_one"] = 'ROIA Restaurant' one['Restaurant'] = [ restaurants_table.match( 'Name', row["restaurant_one"]).get('id') ] one['Number of Meals'] = int(row["meals_number_one"]) elif k.endswith('two'): two['Delivery Scheduled'] = format_datetime_y( row['day_two'] + " " + datetime.strptime(row['day_one'], '%m/%d/%Y %I:%M%p' ).strftime("%I:%M%p")) if v == 'Roia Restaurant': row["restaurant_two"] = 'ROIA Restaurant' two['Restaurant'] = [ restaurants_table.match( 'Name', row["restaurant_two"]).get('id') ] two['Number of Meals'] = int(row['meals_number_two']) elif k.endswith('three'): three['Delivery Scheduled'] = format_datetime_y( row['day_three'] + " " + datetime.strptime(row['day_one'], '%m/%d/%Y %I:%M%p' ).strftime("%I:%M%p")) if v == 'Roia Restaurant': row["restaurant_three"] = 'ROIA Restaurant' three['Restaurant'] = [ restaurants_table.match( 'Name', row["restaurant_three"]).get('id') ] three['Number of Meals'] = int(row['meals_number_three']) final_delivery_row_1 = {} final_delivery_row_1.update(common) final_delivery_row_1.update(one) final_delivery_row_1.pop('empty') deliveries_table.insert( final_delivery_row_1) # insert delivery_row_1 to Airtable final_delivery_row_2 = {} final_delivery_row_2.update(common) final_delivery_row_2.update(two) final_delivery_row_2.pop('empty') deliveries_table.insert( final_delivery_row_2) # insert delivery_row_2 to Airtable final_delivery_row_3 = {} final_delivery_row_3.update(common) final_delivery_row_3.update(three) final_delivery_row_3.pop('empty') deliveries_table.insert( final_delivery_row_3) # insert delivery_row_3 to Airtable