def get_version(self): # Return version if it is cached. if self._version is not None: return self._version if self.is_https(): server_cert, server_key, ca = self.get_server_certificate_files() client_cert, client_key = self.get_client_certificate_files(ca) self._version = utils.http_get('localhost', self.port, method='POST', url='/server/version', key_file=client_key, cert_file=client_cert) else: self._version = utils.http_get('localhost', self.port, method='POST', url='/server/version') return self._version
def get_version(self): # Return version if it is cached. if self._version is not None: return self._version if self.is_https(): server_cert, server_key, ca = self.get_server_certificate_files() client_cert, client_key = self.get_client_certificate_files(ca) self._version = utils.http_get('localhost', self.port, method='POST', url='/heartbeat', key_file=client_key, cert_file=client_cert, ca_cert=ca) else: self._version = utils.http_get('localhost', self.port, method='POST', url='/heartbeat') return self._version
def kegg_weblink_pathway(path, dataset_string): url = 'www.kegg.jp/kegg-bin/show_pathway?' + dataset_string resp = utils.http_get(url, https=True) for line in resp.text.split("\n"): line = line.strip() if line.startswith("<img src=\"/tmp/mark_pathway"): print(line) img_url = 'www.kegg.jp' + line.split("\"")[1] img_name = img_url.split("/")[-1] break img_data = utils.http_get(img_url, https=True).content with open(path + '\\' + img_name, 'wb') as handler: handler.write(img_data)
def get_version(self): # Return version if it is cached. if self._version is not None: return self._version if self.is_https(): server_cert, server_key, ca = self.get_server_certificate_files() client_cert, client_key = self.get_client_certificate_files(ca) common_name = NuclideCertificatesGenerator.get_common_name(server_cert) self._version = utils.http_get(common_name, self.port, method='POST', url='/heartbeat', key_file=client_key, cert_file=client_cert, ca_cert=ca) else: self._version = utils.http_get('localhost', self.port, method='POST', url='/heartbeat') return self._version
def __init__(self, url, attempts=NUMBER_OF_ATTEMPTS, quiet=False): self.url = url inmate_result = http_get(url, number_attempts=attempts, quiet=quiet) self.__inmate_found = inmate_result is not None if self.__inmate_found: inmate_doc = pq(inmate_result.content) self.__columns = inmate_doc('table tr:nth-child(2n) td')
def depth(self, coin): """ :param coin: str :type coin: str :return: { "code": 0, // 状态码 "msg": "Success", // 提示语 "data": { "asks": [ ['0.00000210', '12867.7080'], ['0.00000209', '123770.4725'], ['0.00000208', '162688.6625'] ], //asks 委卖单[价格, 委单量],价格从高到低排序 [str, str] "bids": [ ['0.00000195', '893.0000'], ['0.00000193', '22826.6243'], ['0.00000192', '90270.4217'] ], //bids 委买单[价格, 委单量],价格从高到低排序 [str, str] "date": 1506047161 //时间戳 } } """ params = 'coin=%(coin)s' % {'coin': coin} return http_get(self.__url + BitZMarket.DEPTH_RESOURCE + '?' + params)
def depth(self, symbol): # type: (str) -> dict """ :param symbol: :return: { 'bids': [ # [bid_price, bid_volume] # [float, float] [0.010104, 1129.16], # bid_1 [0.010103, 1000.0], # bid_2 [0.010102, 1000.0], # bid_3 ], 'asks': [ # [ask_price, ack_volume] # [float, float] [0.0102, 10000.0], # ask_1 [0.0106, 2799.02], # ask_2 [0.010694, 1000.0] # ask_3 ], 'rawBids': None, 'rawAsks': None, 'id': 0, 'seq': 0 } """ params = 'symbol=%(symbol)s' % {'symbol': symbol} return http_get( self.__url + AllCoinMarket.DEPTH_RESOURCE + '?' + params, False)
def get_vids(sid="", eid="", category="", vtid=""): vids = [] vid_list_url = get_vid_list_url(sid=sid, category=category, vtid=vtid) res = utils.http_get(vid_list_url) json_data = json.load(res) status = int(json_data['status']) while status == 0: # more to fetch vids.extend(scrape_vids(json_data['html'])) last_id = vids[-1]['id'] vid_list_url = get_vid_list_url(sid=sid, category=category, vtid=vtid, before=last_id) res = utils.http_get(vid_list_url) json_data = json.load(res) status = int(json_data['status']) return vids
def login(request): data = {'ret_code': 0, 'ret_data': {}} try: if request.method == 'POST': req_data = json.loads(request.body) else: req_data = request.GET jscode = req_data['jscode'] sess = utils.http_get(config.JSCODE_SESSION_URL + jscode) if 'openid' in sess: sess_key = utils.save_session(sess) if sess_key: data['ret_code'] = 1 data['sess_key'] = sess_key else: data['msg'] = 'save session error.' mark.info('%s\t%s\t%s\tlogin' % (sess['openid'], utils.getuip(request), time.strftime("%Y-%m-%d %H:%M:%S"))) else: data['msg'] = 'fetch session error' logger.error(str(sess)) except Exception as err: data['msg'] = 'program or internet error.' logger.error(str(err)) res = json.dumps(data, ensure_ascii=False) return HttpResponse(res, content_type="application/json")
def crawl(self, url): #get novel page html = utils.http_get(url, encode="gbk") html = self.parser.to_utf8(html) #html = utils.gbk_to_utf8(html) novel = self.parser.parse_novel_page(url, html) list_url = novel['list_url'] html = utils.http_get(list_url, encode='gbk') html = self.parser.to_utf8(html) chapter_list = self.parser.parse_list_page(list_url, html) novel['chapter_list'] = chapter_list novel["chapters"] = len(chapter_list) novel['update_time'] = time.time() novel["last_chapter"] = chapter_list[-1]["url"] return novel
def get_department_list(access_token): url = 'https://%s/department/list?' % API_ADDR args = { 'access_token': access_token } url += urlencode(args) return http_get(url)
def _get_encrypt_datas(self, start_date, end_date, keywords): """ :start_date; str, 2018-10-01 :end_date; str, 2018-10-01 :keyword; list, ['1', '2', '3'] """ request_args = { 'word': json.dumps([[{ 'name': keyword, 'wordType': 1 }] for keyword in keywords]), 'startDate': start_date.strftime('%Y-%m-%d'), 'endDate': end_date.strftime('%Y-%m-%d'), 'area': self.area, } url = 'http://index.baidu.com/api/SearchApi/index?' + urlencode( request_args) html = utils.http_get(url, self.cookies) datas = json.loads(html) uniqid = datas['data']['uniqid'] encrypt_datas = [] for single_data in datas['data']['userIndexes']: encrypt_datas.append(single_data) return (encrypt_datas, uniqid)
def get_access_token(corp_id, secret): url = 'https://%s/gettoken?' % API_ADDR args = { 'corpid': corp_id, 'corpsecret': secret } url += urlencode(args) return http_get(url)
def get_user(access_token, userid): url = 'https://%s/user/get?' % API_ADDR args = { 'access_token': access_token, 'userid': userid } url += urlencode(args) return http_get(url)
def get_jsapi_ticket(access_token): url = 'https://%s/get_jsapi_ticket?' % API_ADDR args = { 'access_token': access_token, 'type': 'jsapi' } url += urlencode(args) return http_get(url)
def delete_department(access_token, department_id): url = 'https://%s/department/delete?' % API_ADDR args = { 'access_token': access_token, 'id': department_id } url += urlencode(args) return http_get(url)
def get_user_id(access_token,code): url = 'https://%s/user/getuserinfo?' % API_ADDR args = { 'access_token': access_token, 'code': code } url += urlencode(args) return http_get(url)
def execute(): grader_father = os.path.abspath( os.path.dirname(os.getcwd()) + os.path.sep + "..") data_dir = grader_father + os.path.sep + "data" + os.path.sep + "app" logger.debug("downloader starting!") while True: data = get_app_list() for _id, link in data: logger.debug("id: %s, link: %s" % (_id, link)) filename = link.split("/")[-1] dst = data_dir + os.path.sep + filename if not os.path.exists(dst): http_get(link, dst) logger.info("Installer Downloaded: " + dst) else: logger.info("Installer exists: " + dst) time.sleep(5)
def get_department_list(access_token, fetch_child=True, parentid=1): url = 'https://%s/department/list?' % API_ADDR args = { 'access_token': access_token, 'fetch_chiild': fetch_child, 'id': parentid } url += urlencode(args) return http_get(url)
def get_department_detail_userlist(access_token, department_id, fetch_child=0): url = 'https://%s/user/list?' % API_ADDR args = { 'access_token': access_token, 'department_id': department_id, 'fetch_child': fetch_child } url += urlencode(args) return http_get(url)
def scrape_season_list(): season_list = [] html = utils.http_get("/en/videos/all_videos/") soup = BeautifulSoup(html) for year in soup.find("select", id="filter-by-year").find_all("option"): if year.attrs['value'] != "": season_list.append(year.text) return season_list
def query(self, type, tag, page_limit=40, page_start=0): resp = utils.http_get( self.API, { 'type': type, 'tag': tag, 'sort': 'recommend', 'page_limit': page_limit, 'page_start': page_start }) return json.loads(resp)['subjects'] if resp else None
def get_balance(self): # type: () -> dict params = { 'accesskey': self.__access_key, 'nonce': str(int(time.time() * 1000)) } params['signature'] = build_exx_sign(params, self.__secret_key) param_str = 'accesskey=%(accesskey)s&nonce=%(nonce)s&signature=%(signature)s' % params return http_get(self.__url + ExxTrade.BALANCE_RESOURCE + '?' + param_str)
def deploy_ci(client, project, install): """Deploys a package by downloading the latest CircleCI artifact using ci:<user>/<repo> Requires the CIRCLECI environment variable be set with an access token """ url = "https://circleci.com/api/v1.1/project/github/%s?circle-token=%s" % ( project, os.environ['CIRCLECI']) build = http_get(url).json()[0]["build_num"] url = "https://circleci.com/api/v1.1/project/github/%s/%s/artifacts?circle-token=%s" % ( project, build, os.environ['CIRCLECI']) for file in http_get(url).json(): if install and file["pretty_path"].endswith("-install.zip"): url = file["url"] elif not install and file["pretty_path"].endswith("-upgrade.zip"): url = file["url"] temp = tempfile.NamedTemporaryFile(delete=False) download_file(url + "?circle-token=%s" % (os.environ['CIRCLECI']), temp.name) client.deploy_package(project + ".zip", temp)
def thumb(self, url): content = utils.http_get(url, timeout=self.TIMEOUT) if not content: return None try: img = Image.open(StringIO(content)) img.thumbnail(self.SIZE) thumb_data = StringIO() img.save(thumb_data, 'JPEG') thumb_data.seek(0) return thumb_data.getvalue() except IOError, e: print('PIL error: ' + str(e)) return None
def scrape(slug): episode_list = [] response = utils.http_get(slug + '?fields=true') json_data = json.load(response) for ep in json_data['episodes']: new_ep = make_ep_obj(ep) new_ep.showImage = json_data['show']['image']['showImage'] episode_list.append(new_ep) return sorted(episode_list, key=lambda k: k.get_episodeNumber())
def get_open_orders(self, currency, order_type): # type: (str, str) -> list params = { 'accesskey': self.__access_key, 'currency': currency, 'nonce': str(int(time.time() * 1000)), 'pageIndex': '1', 'type': order_type } params['signature'] = build_exx_sign(params, self.__secret_key) param_str = 'accesskey=%(accesskey)s¤cy=%(currency)s&nonce=%(nonce)s&pageIndex=%(pageIndex)s&type=%(' \ 'type)s&signature=%(signature)s' % params return http_get(self.__url + ExxTrade.OPEN_ORDERS_RESOURCE + '?' + param_str)
def kegg_map_api(ko_list): if len(ko_list) == 0: return [] pathway_list = [] api_url = 'rest.kegg.jp/link/pathway/' for ko in ko_list: resp = utils.http_get(api_url + ko, https=False) if resp.text.startswith(ko): for line in resp.text.splitlines(): path_id = line.split('\tpath:')[1].strip() if path_id.startswith('ko'): pathway_list.append(path_id) else: pathway_list = [] return pathway_list
def cancel_order(self, currency, order_id): # type: (str, str) -> bool params = { 'accesskey': self.__access_key, 'currency': currency, 'id': str(order_id), 'nonce': str(int(time.time() * 1000)) } params['signature'] = build_exx_sign(params, self.__secret_key) param_str = 'accesskey=%(accesskey)s¤cy=%(currency)s&id=%(id)s&nonce=%(nonce)s&signature=%(signature)s' % params result = http_get(self.__url + ExxTrade.CANCEL_RESOURCE + '?' + param_str) print(result) if result['code'] == 100: return True print(result['message']) return False
def get_captcha( self ): rand = random.random() link = self.captcha_URI + str( rand ) + '&' + self.session print "Downloading captcha image...", sys.stdout.flush() img = utils.http_get( link, self.booking_URI ) print 'OK' f = open( 'captcha.png', 'wb' ) f.write( img ) f.close() webbrowser.open( 'captcha.png' ) return raw_input( 'Captcha text: ' )
def get_department_detail_userlist(access_token, department_id, offset=None, size=None, order=None): url = 'https://%s/user/list?' % API_ADDR args = {'access_token': access_token, 'department_id': department_id} # 可选分页参数处理 if offset != None: args['offset'] = offset if size != None: args['size'] = size if order != None: args['order'] = order url += urlencode(args) return http_get(url)
def k_line(self, coin, k_type): """ :param coin: :type coin: str :param k_type: :type k_type: str, 1m, 5m, 15m, 30m, 1h, 1d :return: [[timestamp, open, high, low, close, volume]] [[int, float, float, float, float, float]] [ [1517063160000, 2.24e-06, 2.24e-06, 2.24e-06, 2.24e-06, 100.0], [1517063220000, 2.24e-06, 2.27e-06, 2.24e-06, 2.24e-06, 1790], [1517063280000, 2.24e-06, 2.24e-06, 2.24e-06, 2.24e-06, 100.0] ] """ params = 'coin=%(coin)s&type=%(type)s' % {'coin': coin, 'type': k_type} ret = http_get(self.__url + BitZMarket.K_LINE_RESOURCE + '?' + params) if 'code' in ret and ret['code'] == 0: return json.loads(ret['data']['datas']['data'])
def download_chaper(imgs_url,dst_dir): if not os.path.exists(dst_dir): os.makedirs(dst_dir) # imgs_url = Parse.get_imgs_url(chp_url) for img_url in imgs_url: img_name = img_url[img_url.rfind("/") + 1:] img_path = os.path.join(dst_dir, img_name) if os.path.exists(img_path): print "skip:%s"%img_path continue img_context = http_get(img_url) with open(img_path,"wb") as f: f.write(img_context) print "download %s:%s"%(img_url, img_path)
def order(self, order_type, currency, price, amount): # type: (str, str, str, str) -> str params = { 'accesskey': self.__access_key, 'amount': amount, 'currency': currency, 'nonce': str(int(time.time() * 1000)), 'price': price, 'type': order_type } params['signature'] = build_exx_sign(params, self.__secret_key) param_str = 'accesskey=%(accesskey)s&amount=%(amount)s¤cy=%(currency)s&nonce=%(nonce)s&price=%(' \ 'price)s&type=%(type)s&signature=%(signature)s' % params result = http_get(self.__url + ExxTrade.ORDER_RESOURCE + '?' + param_str) print(result) if result['code'] == 100: return result['id'] print(result['message']) return None
def kegg_id_api(operation, identifer): if len(identifer) == 0: return '' if operation == 'conv': api_url = 'rest.kegg.jp/conv/genes/' query_url = 'ncbi-geneid:' + str(identifer) elif operation == 'conv_uniprot': api_url = 'rest.kegg.jp/conv/genes/' query_url = 'up:' + str(identifer) elif operation == 'ko': api_url = 'rest.kegg.jp/link/ko/' query_url = str(identifer) else: logger.warning('operation' + operation + 'not found.') kegg_list = [] resp = utils.http_get(api_url + query_url, https=False) if resp.text.startswith(query_url): keggs = resp.text.strip().split('\n') for kegg in keggs: kegg_list.append(kegg.split('\t')[1]) return kegg_list
def scrape(): show_list = [] response = utils.http_get('shows?take=-1') json_data = json.load(response) for show in json_data['payload']: new_show = classes.Show() new_show.slug = show['slug'] new_show.title = show['title'] new_show.drm = show['drm'] new_show.episodeCount = show['episodeCount'] new_show.description = show['description'] new_show.showImage = show['image']['showImage'] new_show.genre = show['genre'] if new_show.drm is True: continue if new_show.episodeCount > 0: show_list.append(new_show) return sorted(show_list, key=lambda k: k.get_title())
def depth(self, currency): # type: (str) -> dict """ :param currency: bcc_btc, spc_qtum :return: dict { 'timestamp': 1516520422, 'asks': [ # [sell_price, sell_quantity] [str, str] ['0.008600', '51200.00'], # ask_3 ['0.008500', '1187.62'], # ask_2 ['0.008479', '4234.88'] # ask_1 ], 'bids': [ # [buy_price, buy_quantity] [str, str] ['0.008200', '3508.67'], # bid_1 ['0.008125', '6606.25'], # bid_2 ['0.008124', '30000.00'] # bid_3 ] } """ params = 'currency=%(currency)s' % {'currency': currency} return http_get(self.__url + ExxMarket.DEPTH_RESOURCE + '?' + params)
def crawl_content(self, url): html = utils.http_get(url, encode='gbk') html = self.parser.to_utf8(html) content = self.parser.parse_content_page(url, html) return content
def web_index(): return utils.http_get(config.msg_index_url).split()
#!/usr/bin/env python import config import utils def web_index(): return utils.http_get(config.msg_index_url).split() def parse_index(index): s = set() for f in index: s.add(f.strip()) return s if __name__ == "__main__": try: l = open(config.logdir + "/" + config.msg_index) local = parse_index(l) except IOError: local = set() web = parse_index(web_index()) diff_web = web - local for d in reversed(sorted(diff_web)): print "Getting", d msg = utils.http_get(config.base_url + d) utils.write_file(config.logdir + "/" + d, msg) utils.update_index(config.logdir)
def handle(self, *args, **options): """ Copies all data from each API endpoint of the server's CookCountyJail database onto the caller's local machine. NOTE: For the time being, cloning the whole database is impossible; the 'courtdate' and 'housinghistory' API endpoints aren't functional as it stands: each will stall for about 4 minutes before giving up with a 504 error if you try to query them with 'limit=0', which is the default. Thus it's recommended to either supply an '--api' flag excluding those two APIs (e.g. 'countyinmate, courtlocation, housinglocation'), or to supply a '--limit' flag with some large number (e.g. '2000'). For best results, you might want to do both, in two separate runs: "./manage.py clone_db -a 'countyinmate, courtlocation, housinglocation' " And then: "./manage.py clone_db -a 'courtdate, housinghistory' -l 2000 " You could just see how high you can increase that limit before it fails. """ base_url = "http://cookcountyjail.recoveredfactory.net/api/1.0/" suffix = "?format=json&limit=0" exclude = [] # ordereddict mapping APIs with: # --> a Django model that can be used to create an object; and, # --> a set of identifying filters needed to initialize that object api_map = collections.OrderedDict([ ('countyinmate', (CountyInmate, ['jail_id'])), ('courtlocation', (CourtLocation, ['location'])), ('housinglocation', (HousingLocation, ['housing_location'])), ('courtdate', (CourtDate, ['date', 'inmate', 'location'])), ('housinghistory', (HousingHistory, ['inmate', 'housing_location'])) ]) # dict mapping an identifying filter with: # --> a key to index into our json object with, whose # value is the value that will be used to initialize # the object we'll create with our foreign model # --> an API to use to get a model and initializing filters; # these will be used to create the foreign object foreign_map = { 'inmate' : ('inmate_jail_id', 'countyinmate'), 'location' : ('location', 'courtlocation'), 'housing_location' : ('location_id', 'housinglocation') } # set a limit if necessary if options['limit']: suffix = '?format=json&limit=%s' % options['limit'] # build a list of excluded APIs if necessary if options['api']: for k in api_map.keys(): if k not in options['api'].split(', '): exclude.append(k) # iterate over our APIs for api in api_map.keys(): # if '--api' flag was given, but this API wasn't included, ignore it. if api in exclude: continue # for each API, get the model we'll use to re-create all of its data model = api_map[api][0] # for each API, get the identifying filters we'll need to initialize each object of this type identifiers = api_map[api][1] # query our server to get all the data of a particular API result = http_get("%s%s%s" % (base_url, api, suffix), number_attempts=1, quiet=False, retrieval_msg="retrieving from API:") if result: # parse the json we get back into a dictionary of keys and values json_results = json.loads(result.text) # all our data is stored inside the 'objects' attribute for json_obj in json_results['objects']: # this a dict of filters we'll use to initialize our object, when we create it. # if we have foreign keys to deal with, we'll have to first get_or_create # an object based on those foreign keys. filters = {} # but if there's only one filter, the model doesn't have any foreign keys, # so it's a simpler case. if len(identifiers) == 1: # here, we basically just mindlessly copy the value from the json object # into our dict of filters filters = {identifiers[0]: json_obj[identifiers[0]]} else: # otherwise, iterate through the filters for i in identifiers: # if we have defined an attribute as requiring foreignkey # lookup, we get to work. if i in foreign_map.keys(): json_key = foreign_map[i][0] foreign_api = foreign_map[i][1] foreign_model = api_map[foreign_api][0] foreign_key = api_map[foreign_api][1][0] # do a foreign key lookup; this means doing get_or_create to instantiate # the object before including it as an attribute of our object; # note that we take the first match if there are multiple try: foreigner, created = foreign_model.objects.get_or_create( **{foreign_key: json_obj[json_key]}) except MultipleObjectsReturned: foreigner = foreign_model.objects.filter( **{foreign_key: json_obj[json_key]})[0] # include the result with our list of filters filters[i] = foreigner else: # if an attribute isn't defined in our map above as a foreign key, # it would get pulled from the json object like normal. filters[i] = json_obj[i] # here, we actually create our object, again taking the first # available, if there are multiple try: obj, created = model.objects.get_or_create(**filters) except MultipleObjectsReturned: obj = model.objects.filter(**filters)[0] # now that we've created our object, we take all the # remaining attributes from our json object, and # reassign them to our new django object. for k, v in json_obj.iteritems(): # if the value is None, it stays None; # if we already assigned the key in filters, don't reassign; # if key is not part of our model, don't bother assigning it (e.g. 'resource_uri', '_state') if k not in filters.keys() and v is not None and \ k in [a for a in obj.__dict__ if not a.startswith('_')]: setattr(obj, k, v) # finally, we're done. obj.save()
#!/usr/bin/env python import config import utils def web_index(): return utils.http_get(config.msg_index_url).split() def parse_index(index): s = set() for f in index: s.add(f.strip()) return s if __name__ == "__main__": try: l = open(config.logdir + "/" + config.msg_index) local = parse_index(l) except IOError: local = set() web = parse_index(web_index()) diff_web = web - local for d in reversed(sorted(diff_web)): print "Getting", d msg = utils.http_get(config.msg_url + d) utils.write_file(config.logdir + "/" + d, msg) utils.update_index(config.logdir)
def get_department_detail_userlist(access_token, department_id, fetch_child=0): url = "https://%s/user/list?" % API_ADDR args = {"access_token": access_token, "department_id": department_id, "fetch_child": fetch_child} url += urlencode(args) return http_get(url)
def get_department_list(access_token): url = 'https://%s/department/list?' % API_ADDR args = {'access_token': access_token} url += urlencode(args) return http_get(url)
def delete_department(access_token, department_id): url = 'https://%s/department/delete?' % API_ADDR args = {'access_token': access_token, 'id': department_id} url += urlencode(args) return http_get(url)
def get_user(access_token, userid): url = "https://%s/user/get?" % API_ADDR args = {"access_token": access_token, "userid": userid} url += urlencode(args) return http_get(url)
def handle(self, *args, **options): """ Copies all data from each API endpoint of the server's CookCountyJail database onto the caller's local machine. NOTE: For the time being, cloning the whole database is impossible; the 'courtdate' and 'housinghistory' API endpoints aren't functional as it stands: each will stall for about 4 minutes before giving up with a 504 error if you try to query them with 'limit=0', which is the default. Thus it's recommended to either supply an '--api' flag excluding those two APIs (e.g. 'countyinmate, courtlocation, housinglocation'), or to supply a '--limit' flag with some large number (e.g. '2000'). For best results, you might want to do both, in two separate runs: "./manage.py clone_db -a 'countyinmate, courtlocation, housinglocation' " And then: "./manage.py clone_db -a 'courtdate, housinghistory' -l 2000 " You could just see how high you can increase that limit before it fails. """ base_url = "http://cookcountyjail.recoveredfactory.net/api/1.0/" suffix = "?format=json&limit=0" exclude = [] # ordereddict mapping APIs with: # --> a Django model that can be used to create an object; and, # --> a set of identifying filters needed to initialize that object api_map = collections.OrderedDict([ ('countyinmate', (CountyInmate, ['jail_id'])), ('courtlocation', (CourtLocation, ['location'])), ('housinglocation', (HousingLocation, ['housing_location'])), ('courtdate', (CourtDate, ['date', 'inmate', 'location'])), ('housinghistory', (HousingHistory, ['inmate', 'housing_location'])) ]) # dict mapping an identifying filter with: # --> a key to index into our json object with, whose # value is the value that will be used to initialize # the object we'll create with our foreign model # --> an API to use to get a model and initializing filters; # these will be used to create the foreign object foreign_map = { 'inmate': ('inmate_jail_id', 'countyinmate'), 'location': ('location', 'courtlocation'), 'housing_location': ('location_id', 'housinglocation') } # set a limit if necessary if options['limit']: suffix = '?format=json&limit=%s' % options['limit'] # build a list of excluded APIs if necessary if options['api']: for k in api_map.keys(): if k not in options['api'].split(', '): exclude.append(k) # iterate over our APIs for api in api_map.keys(): # if '--api' flag was given, but this API wasn't included, ignore it. if api in exclude: continue # for each API, get the model we'll use to re-create all of its data model = api_map[api][0] # for each API, get the identifying filters we'll need to initialize each object of this type identifiers = api_map[api][1] # query our server to get all the data of a particular API result = http_get("%s%s%s" % (base_url, api, suffix), number_attempts=1, quiet=False, retrieval_msg="retrieving from API:") if result: # parse the json we get back into a dictionary of keys and values json_results = json.loads(result.text) # all our data is stored inside the 'objects' attribute for json_obj in json_results['objects']: # this a dict of filters we'll use to initialize our object, when we create it. # if we have foreign keys to deal with, we'll have to first get_or_create # an object based on those foreign keys. filters = {} # but if there's only one filter, the model doesn't have any foreign keys, # so it's a simpler case. if len(identifiers) == 1: # here, we basically just mindlessly copy the value from the json object # into our dict of filters filters = {identifiers[0]: json_obj[identifiers[0]]} else: # otherwise, iterate through the filters for i in identifiers: # if we have defined an attribute as requiring foreignkey # lookup, we get to work. if i in foreign_map.keys(): json_key = foreign_map[i][0] foreign_api = foreign_map[i][1] foreign_model = api_map[foreign_api][0] foreign_key = api_map[foreign_api][1][0] # do a foreign key lookup; this means doing get_or_create to instantiate # the object before including it as an attribute of our object; # note that we take the first match if there are multiple try: foreigner, created = foreign_model.objects.get_or_create( **{foreign_key: json_obj[json_key]}) except MultipleObjectsReturned: foreigner = foreign_model.objects.filter( **{foreign_key: json_obj[json_key]})[0] # include the result with our list of filters filters[i] = foreigner else: # if an attribute isn't defined in our map above as a foreign key, # it would get pulled from the json object like normal. filters[i] = json_obj[i] # here, we actually create our object, again taking the first # available, if there are multiple try: obj, created = model.objects.get_or_create(**filters) except MultipleObjectsReturned: obj = model.objects.filter(**filters)[0] # now that we've created our object, we take all the # remaining attributes from our json object, and # reassign them to our new django object. for k, v in json_obj.iteritems(): # if the value is None, it stays None; # if we already assigned the key in filters, don't reassign; # if key is not part of our model, don't bother assigning it (e.g. 'resource_uri', '_state') if k not in filters.keys() and v is not None and \ k in [a for a in obj.__dict__ if not a.startswith('_')]: setattr(obj, k, v) # finally, we're done. obj.save()
def scrape_ep(slug): ep = json.load(utils.http_get(slug)) new_ep = make_ep_obj(ep) return new_ep