class SWDS: def __init__(self): self.db_Manager = DbManager() def get_json(url): resp = requests.get(url) if resp.status_code != 200: return None return json.loads(resp.text) def get_entity(url, obj): json_data = get_json(url) entity = obj() entity.parse_json(json_data) self.db_Manager.save(entity) def populate(obj, next_url): while next_url: group_data = get_json(next_url) results = group_data['results'] for row in results: url = row['url'] results = self.db_Manager.query(obj).filter( obj.url == url).all() if len(results) == 0: save_entity(url, obj) # print(url) next_url = group_data['next']
class DCK(): def __init__(self): self.db_Manager = DbManager() def get_json(self, url): resp = requests.get(url) if resp.status_code!=200: return None return json.loads(resp.text) def get_entity(self, obj, url): results = self.db_Manager.open().query(obj).filter(obj.api == url).all() json_data= self.get_json(url) entity = obj() entity.parse_json(json_data) if len(results)==0: self.db_Manager.save(entity) return entity def find_end(self, endChar, url): for index, c in enumerate(url[::-1]): if c==endChar: return len(url)-index-1 return None def find_base_url(self, url): index=self.find_end('/', url) if index: return url[: index+1] else: return None def populate_table(self, obj, next_url): base_url=self.find_base_url(next_url) if base_url: group_data=self.get_json(next_url) page_id=1 while group_data: print(next_url) for result in group_data: single_url = result['api'] print(single_url) self.get_entity(obj, single_url) page_id=page_id+1 next_url=base_url+str(page_id) group_data=self.get_json(next_url) # group_data=None
class PPDS: def __init__(self): self.db = DbManager() def get_json(self, url): print('GET\t<{}>'.format(url)) response = requests.get(url, headers=DEFAULT_HEADER) return json.loads(response.text) def get_candidates(self, state): state_cands_url = 'https://api.propublica.org/campaign-finance/v1/2014/races/{}.json'.format( state) results = self.db.open().query(Candidate).filter( Candidate.url == state_cands_url).all() def get_person(self, i): person_url = 'http://data.coding.kitchen/api/person/{}'.format(i) results = self.db.open().query(Person).filter( Person.url == person_url).all() if len(results) == 0: json_data = self.get_json(person_url) person = Person() return results[0]
def __init__(self): self.db_Manager = DbManager()
class CDC(): def __init__(self): self.db_Manager = DbManager() def get_data(self, url): resp = requests.get(url, headers=DEFAULT_HEADER) if resp.status_code != 200: print(resp.status_code) return None return json.loads(resp.text) def get_entity(self, obj, url): json_data = self.get_data(url) entity = obj() try: entity.parse_json(json_data) except: entity = None print("bad link: {}".format(url)) return entity results = self.db_Manager.open().query(obj).filter( obj.url == url).all() if len(results) == 0: self.db_Manager.save(entity) print("new add: {}".format(entity.url)) else: print("already added: {}".format(entity.url)) return entity def populate_committee(self, congress, chamber): chamber = chamber.lower() url = API_BASE + "{}/{}/committees.json".format(congress, chamber) json_data = self.get_data(url) try: results = json_data['results'][0] except: print("Bad Link") return "Bad Link" committee_list = [c['api_uri'] for c in results['committees']] subcommittee_list = [ c['subcommittees'] for c in results['committees'] if c.get('subcommittees') ] sc_url_list = [sc for sc in subcommittee_list if len(sc) > 0] for sc_url in sc_url_list: for sc in sc_url: pprint(sc) self.get_entity(SubCommittee, sc['api_uri']) for com_url in committee_list: self.get_entity(Committee, com_url) return "Good Link" def populate_members(self, congress, chamber): chamber = chamber.lower() url = API_BASE + "{}/{}/members.json".format(congress, chamber) json_data = self.get_data(url) try: results = json_data['results'][0] except: return "Bad Link" member_list = [m['id'] for m in results['members']] for mid in member_list: member_url = API_BASE + "members/{}.json".format(mid) self.get_entity(Member, member_url) return "Good Link" def populate_house(self, congress): url = API_BASE + "{}/{}/members.json".format(congress, "house") self.get_entity(Congress, url) def populate_senate(self, congress): url = API_BASE + "{}/{}/members.json".format(congress, "senate") self.get_entity(Congress, url)
from entities import Company, Club, Person, League, City, Department, State, Exchange, Listing, Job, Address import requests, json from pprint import pprint from base import DbManager CK_API = 'http://data.coding.kitchen/api/{}/{}/' db = DbManager() def get_json(url): response = requests.get(url) print(url) return json.loads(response.text) def get_state(url): state = None try: state = db.open().query(State).filter(State.api == url).one() except: state = State() json_data = get_json(url) state.parse_json(json_data) db.save(state) return state def get_city(url): city = None
import requests, json from pprint import pprint from base import DbManager from models import Person, Planet, Species, Starship, Vehicle DB = DbManager() SWAPI_URL = 'https://swapi.co/api/{}/' def get_request(url): response = requests.get(url) return json.loads(response.text) def get_count(type): response = get_request(SWAPI_URL.format(type)) count = response['count'] return count def get_persons(): count = get_count('people') for person_id in range(1, count + 1): url = SWAPI_URL.format('people') + '{}/'.format(person_id) person = None try: person = DB.open().query(Person).filter( Person.api_url == url).one() except:
class CKDS: def __init__(self): self.db = DbManager() def get_json(self, url): print('GET\t<{}>'.format(url)) response = requests.get(url) return json.loads(response.text) def get_person(self, i): person_url = 'http://data.coding.kitchen/api/person/{}'.format(i) results = self.db.open().query(Person).filter( Person.url == person_url).all() if len(results) == 0: json_data = self.get_json(person_url) person = Person() return results[0] def get_company(self, i): company_url = 'http://data.coding.kitchen/api/company/{}'.format(i) results = self.db.open().query(Company).filter( Company.url == company_url).all() if len(results) == 0: json_data = self.get_json(company_url) company = Company() return results[0] def get_department(self, i): department_url = 'http://data.coding.kitchen/api/department/{}'.format( i) results = self.db.open().query(Department).filter( Department.url == department_url).all() if len(results) == 0: json_data = self.get_json(department_url) department = Department() return results[0] def get_state(self, i): state_url = 'http://data.coding.kitchen/api/state/{}'.format(i) results = self.db.open().query(State).filter( State.url == state_url).all() if len(results) == 0: json_data = self.get_json(state_url) state = State() return results[0] def get_city(self, i): city_url = 'http://data.coding.kitchen/api/city/{}'.format(i) results = self.db.open().query(City).filter(City.url == city_url).all() if len(results) == 0: json_data = self.get_json(city_url) city = City() return results[0] def get_league(self, i): league_url = 'http://data.coding.kitchen/api/league/{}'.format(i) results = self.db.open().query(League).filter( League.url == league_url).all() if len(results) == 0: json_data = self.get_json(league_url) league = League() return results[0] def get_club(self, i): club_url = 'http://data.coding.kitchen/api/club/{}'.format(i) results = self.db.open().query(Club).filter(Club.url == club_url).all() if len(results) == 0: json_data = self.get_json(club_url) club = Club() return results[0] def get_exchange(self, i): exchange_url = 'http://data.coding.kitchen/api/exchange/{}'.format(i) results = self.db.open().query(Exchange).filter( Exchange.url == exchange_url).all() if len(results) == 0: json_data = self.get_json(exchange_url) exchange = Exchange() return results[0]
for obj_dict in results: persist(obj_class, obj_dict, obj_class.api, 'api') return i = 1 while (True): results = get_json_dict('{}{}'.format(url, i)) if (results is None): break for obj_dict in results: persist(obj_class, obj_dict, obj_class.api, 'api') i = i + 1 ############################# db = DbManager() # db is global, so not passing into populate populate_swapi('https://swapi.co/api/films/', swapi.Film) populate_swapi('https://swapi.co/api/people/', swapi.Person) populate_swapi('https://swapi.co/api/planets/', swapi.Planet) populate_swapi('https://swapi.co/api/species/', swapi.Species) populate_swapi('https://swapi.co/api/starships/', swapi.Starship) populate_swapi('https://swapi.co/api/vehicles/', swapi.Vehicle) populate_dck('http://data.coding.kitchen/api/cities/', dck.City) populate_dck('http://data.coding.kitchen/api/clubs/', dck.Club) populate_dck('http://data.coding.kitchen/api/companies/', dck.Company) populate_dck('http://data.coding.kitchen/api/departments/', dck.Department) populate_dck('http://data.coding.kitchen/api/exchanges/', dck.Exchange) populate_dck('http://data.coding.kitchen/api/leagues/', dck.League) populate_dck('http://data.coding.kitchen/api/people/', dck.Person)
import json, requests from pprint import pprint import models as mo from base import DbManager DB_MANAGER = DbManager() KK_API = 'http://35.153.66.157/api/{}/{}' def get_request(url): response = requests.get(url) return json.loads(response.text) def get_count_all_item(key): n = 0 for page in range(1, 10): url = KK_API.format(key, page) try: n = n + len(get_request(url)) except: break if n == 0: try: url = KK_API.format(key, '') n = len(get_request(url)) except: pass return n
class SWDS: def __init__(self): self.db = DbManager() def get_json(self, url): print("GET\t<{}>".format(url)) response = requests.get(url) return json.loads(response.text) def get_person(self, i): person_url = 'https://swapi.co/api/people/{}/'.format(i) results = self.db.open().query(Person).filter( Person.url == person_url).all() if len(results) == 0: json_data = self.get_json(person_url) person = Person() if 'detail' not in json_data: person.parse_person(json_data) return self.db.save(person) else: return results[0] def get_planet(self, i): planet_url = 'https://swapi.co/api/planets/{}/'.format(i) results = self.db.open().query(Planet).filter( Planet.url == planet_url).all() if len(results) == 0: json_data = self.get_json(planet_url) planet = Planet() if 'detail' not in json_data: planet.parse_planet(json_data) self.db.save(planet) else: return results[0] def get_vehicle(self, i): vehicle_url = 'https://swapi.co/api/vehicles/{}/'.format(i) results = self.db.open().query(Vehicle).filter( Vehicle.url == vehicle_url).all() if len(results) == 0: json_data = self.get_json(vehicle_url) vehicle = Vehicle() if 'detail' not in json_data: vehicle.parse_vehicle(json_data) self.db.save(vehicle) else: return results[0] def get_starship(self, i): starship_url = 'https://swapi.co/api/starships/{}/'.format(i) results = self.db.open().query(Starship).filter( Starship.url == starship_url).all() if len(results) == 0: json_data = self.get_json(starship_url) starship = Starship() if 'detail' not in json_data: starship.parse_starship(json_data) self.db.save(starship) else: return results[0] def get_species(self, i): species_url = 'https://swapi.co/api/species/{}/'.format(i) results = self.db.open().query(Species).filter( Species.url == species_url).all() if len(results) == 0: json_data = self.get_json(species_url) species = Species() if 'detail' not in json_data: species.parse_species(json_data) self.db.save(species) else: return results[0]