示例#1
0
def getAllPullRequestsForRepo(userAndProject):
	pullRequestRecords = []

	conn = httplib.HTTPSConnection('api.github.com')

	conn.request("GET", '/repos/' + userAndProject + '/pulls?state=open&per_page=100')
	pullRequests = demjson.decode(conn.getresponse().read())

	conn.request("GET", '/repos/' + userAndProject + '/pulls?state=closed&per_page=100')
	pullRequests.extend(demjson.decode(conn.getresponse().read()))

	# print '<div>%s has %d pull requests</div>' % (userAndProject, len(pullRequests))

	# html url
	# https://github.com/mozila/pdf.js/pull/643

	for pullRequest in pullRequests:
		pullRecord = {
			'ts': private_strptime(pullRequest["created_at"][0:19]),
			'kind': 'pull created',
			'category': canonicalizeCategory(userAndProject),
			'user': canonicalizeUsername(pullRequest["user"]["login"]),
			'url': pullRequest["html_url"],
			'title': pullRequest["title"][0:80].replace('\n', ' ')
		}
		pullRequestRecords.append(pullRecord)


	return pullRequestRecords
示例#2
0
    def _get_company_as_string (self):
        comp1 = esc_str(self.get_company())
        if not comp1:
            return 'nil'

        comp = copy.deepcopy(self.get_custom('company'))
        ver = self.get_store().get_file_format()
        ## FIXME: This is an egregious design violation, as noted earlier. We
        ## should move all such version specific conversions to pimdb_bb.el
        if ver == '6':
            if comp and len(comp) > 0:
                comp = demjson.decode(comp)
                comp = [chompq(x) for x in comp]
            else:
                comp = []

            comp.insert(0, comp1)
            return unchompq('; '.join(comp))
        elif ver == '7':
            if comp and len(comp) > 0:
                comp = demjson.decode(comp)
                comp.insert(0, unchompq(comp1))
            else:
                comp = [unchompq(comp1)]

            return ('(' + ' '.join(comp) + ')')
示例#3
0
def get_os_id(params):
    """
    Function to get Open States ID.  Please do not abuse API key.
    """
    apikey = '49c5c72c157d4b37892ddb52c63d06be'
    params['apikey'] = apikey

    os_url = create_os_url(params)
    raw = scraperwiki.scrape(os_url)
    os_data = demjson.decode(raw)
    os_found = len(os_data)
    os_id = ''

    # Use first if any found, if not remove last name
    if os_found > 0:
        os_id = os_data[0]['id']
    else:
        del params['first_name']
        os_url = create_os_url(params)
        raw = scraperwiki.scrape(os_url)
        os_data = demjson.decode(raw)
        os_found = str(len(os_data)) + '-removed-first'
        if len(os_data) > 0:
            os_id = os_data[0]['id']

    return {
        'found': os_found,
        'id': os_id
    }
示例#4
0
 def __getDataDemoFromString__(self,jsonStr): 
     self.crawlerTime = datetime.datetime.now()
     dataDemo = jsonDataDemo()
     items =[]
     variables =[]
     text = demjson.decode(jsonStr)
     dataDemo.__setTableName__('tableName',text.get('tableName'))
     dataDemo.__setPrimaryKey__('primaryKey',text.get('primaryKey'))
     # replace DataTime to Now
     startUrl = text.get('URL')
     startUrl = startUrl.replace("##year", str(self.crawlerTime.year))
     startUrl = startUrl.replace("##month", str(self.crawlerTime.month))
     startUrl = startUrl.replace("##day", str(self.crawlerTime.day))
     startUrl = startUrl.replace("##hour", str(self.crawlerTime.hour))
     startUrl = startUrl.replace("##minute", str(self.crawlerTime.minute))
     startUrl = startUrl.replace("##second", str(self.crawlerTime.second))
     
     dataDemo.__setURL__('URL',startUrl)
     
     dataDemo.__setTask__('task',text.get('task'))
     for item in text.get('items'):
         items.append(demjson.decode(demjson.encode(item)))
         dataDemo.__setItems__('items',items)
     for variable in text.get('variables'):
         variables.append(demjson.decode(demjson.encode(variable)))
     dataDemo.__setVariables__('variables',variables)
     
     chirdrenObjs = []
     for chirdren in text.get('chirdren'):
         chirdrenJson = json.dumps(chirdren)
         chirdrenObj = self.__getDataDemoFromString__(chirdrenJson)
         chirdrenObjs.append(chirdrenObj)
     dataDemo.__setChirdren__(chirdrenObjs)
     return dataDemo
示例#5
0
def get_description(artist_name):
	"""Grabs a bunch of info about the band from Seevl (or last.fm for description if there isn't 
	   one on seevl). Returns a triple: 	
	   ("Description Text", "Genre", [("link_type", "link_url]),..,("link_type", "link_url)] )"""
	#Setup the variables incase everything fails
	artist_description = "We don't have a description or bio for this band, sorry :("
	genre = "Unknown"
	url_list = []	
	try:
		#Set up the headers etc for Seevl.net API and request artist infos
		url = 'http://data.seevl.net/entity/?prefLabel={name}'.format(name=urllib.quote(artist_name))
		headers = { 'Accept' : 'application/json',
		            'X_APP_ID' : SV_ID,
		            'X_APP_KEY' : SV_KEY }
		req = urllib2.Request(url, None, headers)
		response = urllib2.urlopen(req)
		artist_page = response.read()
		artist_info = json.decode(artist_page) #This is a dict with a load of seevl info about artist
		#If seevl doesen't have a description then look for it on last.fm to see if they have one:
		if len(artist_info['results']) == 0:
			try:
				lfm_url = "http://ws.audioscrobbler.com/2.0/?method=artist.getinfo&artist={artist}&api_key={key}&format=json".format(artist=urllib.quote(artist_name), key=LF_KEY)
				lfm_info = json.decode(urllib2.urlopen(lfm_url).read())
				artist_description = "{text}   {attrib}".format(text=lfm_info['artist']['bio']['summary'].encode('utf-8'), attrib="Description from last.fm")
				lfm_description = True
				#Grab the genre off last.fm too if Seevl doesen't have it
				tag_url = "http://ws.audioscrobbler.com/2.0/?method=artist.gettoptags&artist={artist}&api_key={key}&format=json".format(artist=urllib.quote(artist_name).encode('utf-8'), key=LF_KEY)
				tag_data = json.decode(urllib2.urlopen(tag_url).read())
				genre = tag_data['toptags']['tag'][0]['name'].title()
			except Exception, e: #Fun error handling
				print "Error", e
		else:
def get_collaborators(artistName):
   
    collaborators = set ()
    #artistNameQuoted = artistName.replace(" ","+")
    #artistNameQuoted = urllib.quote_plus(artistName)
   
    # Search in the artist field
    #scrape_url = "http://itunes.apple.com/search?term=%s&limit=5000&entity=song&attribute=artistTerm" % artistNameQuoted
    params = {'term' : artistName, 'limit' : 5000, 'entity' : 'song', 'attribute' : 'artistTerm' }
    search_json = demjson.decode(get_json(params))
    
    collaborators.update(get_collaborators_from_json(search_json, artistName))

    # Search in the title field (will fail for artists whose names also appear as unrelated song titles...) 
    #scrape_url = "http://itunes.apple.com/search?term=%s&limit=5000&entity=song&attribute=songTerm" % artistNameQuoted
    params = {'term' : artistName, 'limit' : 5000, 'entity' : 'song', 'attribute' : 'songTerm' }
    search_json = demjson.decode(get_json(params))
    #print search_json
    
    collaborators.update(get_collaborators_from_json(search_json, artistName))
    
    for collaborator in collaborators:
        
        data = {'artist' : artistName, 'collaborator' : collaborator }
        scraperwiki.sqlite.save(unique_keys=['artist', 'collaborator'], data=data)
    return collaborators  
示例#7
0
 def DELETE(self):
      input_data = web.data()
      data = urlparse.parse_qs(input_data)
      v_ct_fids = db.query("select distinct t.source_fid,t.target_fid,t.type_fid,t.owner,t.family_id from t_ci_relation t where t.family_id=$fid and t.endtime=$endtime",vars={'endtime':ENDTIME,'fid':data['fid'][0]})
      json_en = demjson.encode(v_ct_fids)
      json_de = demjson.decode(json_en)
      v_ct_fid_num = len(json_de)
      if v_ct_fid_num == 0:
          return 2 #there is no records to delete in table T_CI_RELATION
      elif v_ct_fid_num > 1:
          return 3 #there are more than one records to delete in table T_CI_RELATION
      
      v_curtime = time.strftime("%Y%m%d%H%M%S", time.localtime())
      #Notice;if the relation is composition and the target ci exists, we should delete the relative ci
      v_target_fids = db.query("select t.family_id, crt.relation from t_ci t, t_ci_relation_type crt where t.family_id=$target_fid and t.endtime=$endtime and crt.family_id=$type_fid and crt.endtime=$endtime and crt.relation='COMPOSITION'",vars={'endtime':ENDTIME,'target_fid':json_de[0]['TARGET_FID'],'type_fid':json_de[0]['TYPE_FID']})
      target_json_en = demjson.encode(v_target_fids)
      target_json_de = demjson.decode(target_json_en)
      v_target_num = len(target_json_de)
      if v_target_num <> 0:
          #delete the existed ci. It will also delete the relative ci_attribute and ci_relation.
          n = webci.fn_delete_ci(json_de[0]['TARGET_FID'], v_curtime, data['change_log'][0])
      else:
          #delete t_ci_relation
          n = fn_delete_cirela(data['fid'][0],v_curtime,data['change_log'][0])
      
      return n
示例#8
0
    def return_check_requests(cls, account='TIANJINOPERATION', operate_type='1', order_ids=[]):
        """
        退货审核
        :param account:
        :param operate_type: 1:批准,2:二次配送
        :param ids:
        """
        print u'*' * 20 + u'退货审核'

        obj = obj = demjson.decode(RainbowUtil.rainbow_get_return_request(order_nos=order_ids))
        n = 0
        while int(obj['total']) != len(order_ids) and n < tmsBase.retry_times:
            print 'expect %s, actual %s' % (len(order_ids), obj['total'])
            sleep(1)
            obj = demjson.decode(RainbowUtil.rainbow_get_return_request(order_nos=order_ids))
            n += 1

        print 'expect %s, actual %s' % (len(order_ids), obj['total'])

        if int(obj['total']) != len(order_ids):
            print 'expect %s, actual %s' % (len(order_ids), obj['total'])
            raise StandardError(u'>>>>>>>>>>期望待退货审核运单,与实际可退货运单不一致')

        ids = list()
        for item in obj['rows']:
            ids.append(item['id'])

        url = tmsBase.base_url + '/tms/sort/refundOrderCheckController/updateCheckResult.do?operation=' + operate_type
        resp = HttpRequest.post_request(TmsLogin.get_session(account), url, data={'ids[]': ids})
        check_operation_result(resp)
        return resp
示例#9
0
    def refund_apply(cls, account='ADMIN', order_nos=[]):
        """
        退款申请
        :param account:
        :param ids:
        """
        print u'*' * 20 + u'退款申请'
        # 获取运单id
        order_ids = []
        obj = demjson.decode(RainbowUtil.rainbow_get_refund_apply_info(account=account, order_ids=order_nos))
        for item in obj['rows']:
            order_ids.append(item['id'])
        # 申请退款
        url = tmsBase.base_url + '/tms/sort/refundOrderController/refundApply.do'
        resp = HttpRequest.post_request(TmsLogin.get_session(account), url, data={'ids[]': order_ids})
        # check_operation_result(resp)
        print resp

        # 获取退款申请号
        obj = demjson.decode(RainbowUtil.rainbow_get_refund_apply_info(account=account, order_ids=order_nos))

        refund_apply_no = obj['rows'][0]['refundApplyNo']
        print u'----------退款申请号:%s' % refund_apply_no

        return refund_apply_no
示例#10
0
def main(infiles=None, locfile=None, **kwargs):
    locations = {}
    metadata_file = locfile.read()
    match = PATTERN2.finditer(metadata_file)
    for entry in match:
        locations[entry.group(1)] = demjson.decode(entry.group(2))

    tracks = {}
    match = PATTERN3.finditer(metadata_file)
    for entry in match:
        tracks[entry.group(1)] = demjson.decode(entry.group(2)).get('name')

    events = []
    for infile in infiles:
        data = json.load(infile)
        if data is None:
            continue
        events.extend(data['events'])

    for track_id, track_name in tracks.items():
        cal = Calendar()
        cal['dtstart'] = '20180519T080000'
        cal['summary'] = 'OpenStack Summit Vancouver 2018: ' + track_name
        tz = Timezone(TZID='America/Vancouver')
        tz.add_component(TimezoneStandard(DTSTART="20171105T020000",
                                        TZOFFSETFROM="-0700",
                                        TZOFFSETTO="-0800",
                                        RDATE="20181104T020000",
                                        TZNAME="PST"))
        tz.add_component(TimezoneDaylight(DTSTART="20180311T020000",
                                        TZOFFSETFROM="-0800",
                                        TZOFFSETTO="-0700",
                                        TZNAME="PDT"))
        cal.add_component(tz)

        for session in events:
            if track_id != str(session.get('track_id')):
                continue
            timezone_str = session.get('time_zone_id')
            tzinfos = {"UN": gettz(timezone_str)}
            start_datetime_str = session.get('start_datetime')
            start_datetime = parse(start_datetime_str + " UN", tzinfos=tzinfos)
            start_datetime_utc = start_datetime.astimezone(utc)
            end_datetime_str = session.get('end_datetime')
            end_datetime = parse(end_datetime_str + " UN", tzinfos=tzinfos)
            end_datetime_utc = end_datetime.astimezone(utc)
            desc = PATTERN.sub('', session.get('abstract'))
            for pre, post in REPLACE_MAP.items():
                desc = desc.replace(pre, post)

            event = Event()
            event.add('dtstart', start_datetime_utc)
            event.add('dtend', end_datetime_utc)
            event.add('summary', session.get('title'))
            event.add('location', locations.get(str(session.get('location_id')), {}).get('name_nice', ""))
            event.add('description', desc)
            event.add('uid', "%s@openstacksummitboston2017" % session.get('id'))
            cal.add_component(event)
        with open("%s.ics" % PATTERN4.sub("-", track_name), "w") as f:
            f.write(cal.to_ical())
示例#11
0
 def testObjectNonstringKeys(self):
     self.assertEqual(demjson.decode('{55:55}',strict=False), {55:55})
     self.assertEqual(demjson.decode('{fiftyfive:55}',strict=False), {'fiftyfive':55})
     self.assertRaises(demjson.JSONDecodeError, demjson.decode,
                       '{fiftyfive:55}', strict=True)
     self.assertRaises(demjson.JSONEncodeError, demjson.encode,
                       {55:'fiftyfive'}, strict=True)
     self.assertEqual(demjson.encode({55:55}, strict=False), '{55:55}')
def _get_demjson_diagnostics(raw):
    """Get diagnostics string for invalid JSON files from demjson."""
    errstr = None
    try:
        demjson.decode(raw, strict=True)
    except demjson.JSONError as err:
        errstr = err.pretty_description()
    return errstr
示例#13
0
文件: jsoncheck.py 项目: fijal/ampify
def validate(content, source_id="<source>"):
    """Return whether the content is valid JSON."""

    try:
        decode(content, strict=True)
    except JSONDecodeError, error:
        print "\nInvalid JSON source: %s" % source_id
        print "\n\t%s\n" % error.pretty_description()
        return False
示例#14
0
def choice_budget():
	global chosen, validCities, costDict
	travelcosts = dict()
	lattitude = (str)(loc[0])
	longitude = (str)(loc[1])
	re = requests.get('https://api.sandbox.amadeus.com/v1.2/airports/nearest-relevant?apikey=' + apikey + '&latitude=' + (lattitude) + '&longitude=' + (longitude))
	page = re.text
	page = demjson.decode(page)
	d_code = (page[0])["airport"]
	x = len(countries)
	if international:
		x = 15
	for i in range(x):
			if (1==validCities[i]):
				city = cities[i].replace (" ", "%20")
				print city
				re = requests.get("https://api.sandbox.amadeus.com/v1.2/airports/autocomplete?apikey=" + apikey + "&term={0}".format(city)) 
				page = re.text
				page = demjson.decode(page)
				if page == []:
					validCities[i] = 0
				else:
					a_code = page[0]["value"]
					re = requests.get("https://api.sandbox.amadeus.com/v1.2/flights/low-fare-search?apikey=" + apikey + "&origin="+d_code+"&destination="+a_code+"&departure_date="+str(departDate)+"&return_date="+str(arriveDate))
					page = re.text
					page = demjson.decode(page)
					if ("status" in page):
						validCities[i] = 0
					else:
						global travelcosts, costDict
						results = page["results"]
						price = results[0]["fare"]["total_price"]
						airfare = (float)(price)

						re = requests.get("https://api.sandbox.amadeus.com/v1.2/hotels/search-airport?apikey=" + apikey + "&location="+a_code+"&check_in="+str(departDate)+"&check_out="+str(arriveDate))
						page = re.text
						page = demjson.decode(page)
						results = page["results"]
						if results == []:
							validCities[i] = 0
						else:
							price = results[0]["total_price"]["amount"]
							stayfare = (float)(price)
							costDict[cities[i]] = [airfare,stayfare]
							total_cost = airfare+stayfare
							travelcosts[total_cost]= cities[i]
	costs = travelcosts.keys()
	costs.sort()
	costs = budget_helper(costs, budget)
	for i in range(4):
		if i>=len(travelcosts):
			chosen[0]=0
		else:
			chosen[travelcosts[costs[i]]] = costs[i]
	print travelcosts
	print costDict
示例#15
0
 def testDecodeWhitespace(self):
     self.assertEqual(demjson.decode(' []'), [])
     self.assertEqual(demjson.decode('[] '), [])
     self.assertEqual(demjson.decode(' [ ] '), [])
     self.assertEqual(demjson.decode('\n[]\n'), [])
     self.assertEqual(demjson.decode('\t\r \n[\n\t]\n'), [])
     # Form-feed is not a valid JSON whitespace char
     self.assertRaises(demjson.JSONDecodeError, demjson.decode, '\x0c[]', strict=True)
     # No-break-space is not a valid JSON whitespace char
     self.assertRaises(demjson.JSONDecodeError, demjson.decode, u'\u00a0[]', strict=True)
示例#16
0
 def testDecodeComments(self):
     self.assertEqual(demjson.decode('//hi\n42', allow_comments=True), 42)
     self.assertEqual(demjson.decode('/*hi*/42', allow_comments=True), 42)
     self.assertEqual(demjson.decode('/*hi//x\n*/42', allow_comments=True), 42)
     self.assertEqual(demjson.decode('"a/*xx*/z"', allow_comments=True), 'a/*xx*/z')
     self.assertRaises(demjson.JSONDecodeError, demjson.decode, \
                       '4/*aa*/2', allow_comments=True)
     self.assertRaises(demjson.JSONDecodeError, demjson.decode, \
                       '//hi/*x\n*/42', allow_comments=True)
     self.assertRaises(demjson.JSONDecodeError, demjson.decode, \
                       '/*hi/*x*/42', allow_comments=True)
示例#17
0
    def testDecodeStringRawUnicode(self):
        self.assertEqual(demjson.decode('"\xc3\xa0"', encoding='utf-8'), u'\u00e0')

        self.assertEqual(demjson.decode('"\x00\x00\x00\xe0\x00\x00\x00"\x00\x00\x00',
                                        encoding='ucs4le'), u'\u00e0')
        self.assertEqual(demjson.decode('\x00\x00\x00"\x00\x00\x00\xe0\x00\x00\x00"',
                                        encoding='ucs4be'), u'\u00e0')
        self.assertEqual(demjson.decode('\x00\x00\x00"\x00\x00\x00\xe0\x00\x00\x00"',
                                        encoding='utf-32be'), u'\u00e0')
        self.assertEqual(demjson.decode('\x00\x00\xfe\xff\x00\x00\x00"\x00\x00\x00\xe0\x00\x00\x00"',
                                        encoding='ucs4'), u'\u00e0')
示例#18
0
    def fetch_trades (self, url, apikey, body):
        ret = {}
        req = urllib2.Request(url, body,
                              {'Content-Type': 'application/json'})
        resp = urllib2.urlopen(req)
        content = resp.read()

        c = demjson.decode(content)
        if 'error' in c:
            raise CSError(c[u'error'])

        return demjson.decode(content)
示例#19
0
def get_preview(artist):
	"""Grabs the preview clip URL from 7Digital for the top song of the artist"""
	try:
		top_song_url = "http://ws.audioscrobbler.com/2.0/?method=artist.gettoptracks&artist={artist}&api_key={key}&format=json".format(artist=urllib.quote(artist), key=LF_KEY)
		song_json = json.decode(urllib2.urlopen(top_song_url).read())
		toptrack = song_json['toptracks']['track'][0]['name']
		en_url = "http://developer.echonest.com/api/v4/song/search?api_key=N6E4NIOVYMTHNDM8J&format=json&results=1&artist={artist}&title={track}&bucket=id:7digital&bucket=audio_summary&bucket=tracks".format(artist=urllib.quote(artist), track=urllib.quote(toptrack))	
		en_json = json.decode(urllib2.urlopen(en_url).read())
		return en_json['response']['songs'][0]['tracks'][0]['preview_url']
	except Exception, e:
		print "Error", e
		return None
示例#20
0
 def testDecodeSupplementalUnicode(self):
     import sys
     if sys.maxunicode > 65535:
         self.assertEqual(demjson.decode(r'"\udbc8\udf45"'), u'\U00102345')
         self.assertEqual(demjson.decode(r'"\ud800\udc00"'), u'\U00010000')
         self.assertEqual(demjson.decode(r'"\udbff\udfff"'), u'\U0010ffff')
     for bad_case in [r'"\ud801"', r'"\udc02"',
                      r'"\ud801\udbff"', r'"\ud801\ue000"',
                      r'"\ud801\u2345"']:
         try:
             self.assertRaises(demjson.JSONDecodeError, demjson.decode(bad_case))
         except demjson.JSONDecodeError:
             pass
示例#21
0
def country_from_city(city):#Ran only once to get co-ordinates of each city using POI
	city = city.replace (" ", "%20")
	r = requests.get('https://api.sandbox.amadeus.com/v1.2/points-of-interest/yapq-search-text?apikey=' + apikey + '&city_name={0}'.format(city))
	the_page = r.text
	the_page = demjson.decode(the_page)
	while ("status" in the_page):
		r = requests.get('https://api.sandbox.amadeus.com/v1.2/points-of-interest/yapq-search-text?apikey=' + apikey + '&city_name={0}'.format(city))
		the_page = r.text
		the_page = demjson.decode(the_page)
	l1 = the_page["points_of_interest"]
	i = l1[0]
	location = [(i["location"]).values()[0],(i["location"]).values()[2]]
	return (getplace(location[0],location[1]))[1]
    def transform_records(self, uuids_and_insts=None):
        """
        Transforms a set of ISO19139 records into GeoBlacklight JSON.
        Uses iso2geoBL.xsl to perform the transformation.
        """
        inst = self.inst
        for r in self.records:
            if not inst and not uuids_and_insts:
                inst = self.get_inst_for_record(r)
            elif uuids_and_insts:
                inst = uuids_and_insts[r]
            rec = self.records[r].xml
            rec = rec.replace("\n", "")
            root = etree.fromstring(rec)
            record_etree = etree.ElementTree(root)

            result = self.transform(record_etree,institution=self.institutions[inst])



#             if self.collection:
#                 result = self.transform(
#                     record_etree,
#                     institution=self.institutions[inst],
#                     collection=self.collection
#                 )
#             else:
#                 result = self.transform(
#                     record_etree,
#                     institution=self.institutions[inst]
#                 )

            result_u = unicode(result)
#             A dirty hack to avoid XSLT quagmire WRT skipping non-HTTPS links :{}
            result_u = result_u.replace(",}","}").replace("{,", "{")

            try:
                result_json = demjson.decode(result_u)
                if self.md_link:
                    refs =  demjson.decode(result_json["dct_references_s"])
                    refs["http://www.isotc211.org/schemas/2005/gmd/"] = self.OPENGEOMETADATA_URL.format(
                        repo=self.opengeometadata_map[inst],
                        uuid_path=self.get_uuid_path(r))
                    result_json["dct_references_s"] = demjson.encode(refs)
                result_dict = OrderedDict({r: result_json})
                log.debug(result_dict)
                self.record_dicts.update(result_dict)
            except demjson.JSONDecodeError as e:
                log.error("ERROR: {e}".format(e=e))
                log.error(result_u)
示例#23
0
文件: core.py 项目: mishley/stoq
    def loads(self, data):
        """
        Wrapper for json library. Load json string as a python dict

        :param str data: json string to load into dict

        :returns: Converted dict
        :rtype: dict

        """

        try:
            return json.decode(data.decode("utf-8"))
        except:
            return json.decode(data)
示例#24
0
    def load_registry(self, fqfn):
        """load registry json file into self.registry.

        Does no validation other than requiring the file to be valid json.

        :param fqfn     - str
        """
        if not isfile(fqfn):
            self._abort("Invalid registry file: %s" % fqfn)

        with open(fqfn) as infile:
            json_str = infile.read()
            try:
                self.registry, reg_errors, reg_stats = demjson.decode(json_str, return_errors=True)
            except demjson.JSONDecodeError as e:
                self.logger.critical("registry json load error: %s", e)
                for err in reg_errors:
                    self.logger.critical(err)
                self._abort("Invalid registry file - could not load/decode")
            else:
                if reg_errors:
                    self.logger.critical("registry json load error")
                    for err in reg_errors:
                        self.logger.critical(err)
                    self._abort("Invalid registry file - json errors discovered during load")
示例#25
0
 def __init__(self, level, type_, name, pos, **args):
   super(Entity, self).__init__()
   self.level = None # Set by level.add_entity
   level.add_entity(self)
   
   self.type = type_
   self.name = name
   self.pos = pos
   
   self.removed = False
   
   defaults = dictkeys_to_ascii( demjson.decode(pyglet.resource.file("entity/%s.json" % (type_)).read() ) )
   for key, value in args.items():
     defaults[key].update(value)
   print defaults
   
   self.behaviorName = defaults['behavior']['name'] if "behavior" in defaults else "none"
   self.physicsName = defaults['physics']['name'] if "physics" in defaults else "static"
   self.width = defaults['width']
   self.height = defaults['height']
   
   self.vel = euclid.Vector2(0., 0.)
   self.max_vel = euclid.Vector2(7, 25)
   
   self._boundingbox = BoundingBox(euclid.Vector2(-self.width/2, -self.height/2), euclid.Vector2(self.width/2, self.height/2))
   
   self.view_direction = 1
   
   self._state = []
   
   if self.level.game.delegate:
     self.level.game.delegate.initEntity(self, **defaults)
     self.level.game.delegate.entityCreated(self)
示例#26
0
    def render_POST(self, request, **kwargs):
        """
        :param request:
            body should contain JSON

        Required keys in JSON posted:

        :spider_name: string
            name of spider to be scheduled.

        :request: json object
            request to be scheduled with spider.
            Note: request must contain url for spider.
            It may contain kwargs to scrapy request.

        """
        request_body = request.content.getvalue()
        try:
            request_data = demjson.decode(request_body)
        except ValueError as e:
            message = "Invalid JSON in POST body. {}"
            message.format(e.pretty_description())
            raise Error('400', message=message)

        log.msg("{}".format(request_data))
        spider_data = self.get_required_argument(request_data, "request")
        error_msg = "Missing required key 'url' in 'request' object"
        self.get_required_argument(spider_data, "url", error_msg=error_msg)

        return self.prepare_crawl(request_data, spider_data, **kwargs)
示例#27
0
    def _snarf_names_from_parse_res (self, pr):
        n = pr['firstname']
        if n and n != 'nil':
            self.set_firstname(unesc_str(chompq(n)))

        n = pr['lastname']
        if n and n != 'nil':
            self.set_lastname(unesc_str(chompq(n)))

        try:
            affix = pr['affix']
            if affix and affix != 'nil':
                str_re = self.get_store().get_str_re()
                affix = re.findall(str_re, affix)
                self.set_suffix(unesc_str(chompq(affix[0])))

                if len(affix) > 1:
                    aff = demjson.encode([unesc_str(chompq(x)) for x in affix[1:]])
                    ## FIXME: Do we need to escape the quotes in json encoding
                    ## as in the except clause?
                    self.add_custom('affix', aff)
        except KeyError, e:
            ## FIXME: There should be a better way to handle the format
            ## differences.... for now we'll put up with the hacks
            affix = self.get_custom('affix')

            if affix:
                affix = demjson.decode(affix)
                if len(affix) > 0:
                    self.set_suffix(affix[0])
                    affix = affix[1:]
                    if len(affix) > 0:
                        aff = demjson.encode(affix)
                        self.add_custom('affix', aff)
def get_link(url , cat):
	try:
		url='https://en-ae.wadi.com/api/sawa/v1/u' + url
		#print(url)
		response = requests.get(url)
		data = demjson.decode(response.content)
		item_count=int(data['totalCount'])
		page_count=math.ceil(item_count/30)
		page_count=int(page_count)
		print ("Item count : ")
		print(item_count)
		print("Page Count : ")
		print(page_count)
		count=1
		while (page_count>=0):
			try:
				get_scrape(url + '&page=' + str(count),cat,str(count))
				count=count+1
				page_count=page_count-1
			except Exception as e:
				print(str(e))
				return
	except Exception as e:
		print(str(e))
		return
	return
示例#29
0
    def validate_file(self, filename):

        if not isfile(filename):
            print('validate_file - b')
            return ValueError, 'Invalid file: %s' % filename

        try:
            with open(filename) as infile:
                json_str = infile.read()
                try:
                    self.registry, reg_errors, reg_stats = demjson.decode(json_str, return_errors=True)
                except demjson.JSONDecodeError as e:
                    self.logger.critical("registry json validation error: %s", e)
                    for err in reg_errors:
                        self.logger.critical(err)
                    self._abort("Invalid registry file - could not decode")
                else:
                    if reg_errors:
                        self.logger.critical("registry json validation error")
                        for err in reg_errors:
                            self.logger.critical(err)
                        self._abort("Invalid registry file - json errors discovered")
        except IOError:
            self._abort("Invalid registry file - could not open")

        try:
            self.validate()
        except:
            self.logger.critical("registry file validation failed")
            raise
示例#30
0
def sp_episodes():
    import demjson
    xbmcplugin.addSortMethod(pluginhandle, xbmcplugin.SORT_METHOD_EPISODE)
    url = 'http://www.southparkstudios.com/feeds/full-episode/carousel/'+common.args.url+'/dc400305-d548-4c30-8f05-0f27dc7e0d5c'
    json = common.getURL(url)
    episodes = demjson.decode(json)['season']['episode']
    for episode in episodes:
        title = episode['title']
        description = episode['description'].encode('ascii', 'ignore')
        thumbnail = episode['thumbnail'].replace('width=55','')
        episodeid = episode['id']
        senumber = episode['episodenumber']
        date = episode['airdate'].replace('.','-')
        seasonnumber = senumber[:-2]
        episodenumber = senumber[len(seasonnumber):]
        try:
            season = int(seasonnumber)
            episode = int(episodenumber)
        except:
            season = 0
            episode = 0
        u = sys.argv[0]
        u += '?url="'+urllib.quote_plus(episodeid)+'"'
        u += '&mode="comedy"'
        u += '&sitemode="sp_play"'
        infoLabels={ "Title": title,
                    "Season":season,
                    "Episode":episode,
                    "premiered":date,
                    "Plot":description,
                    "TVShowTitle":"South Park"
                    }
        common.addVideo(u,title,thumbnail,infoLabels=infoLabels)
    common.setView('episodes')
示例#31
0
    ling_con = MysqlLing()
    ling_request = LingRequest()
    author_list = ling_con.search(
        "select * from toutiao_author where media_id=0")

    if len(author_list) >= 1:
        for author in author_list:
            user_id = author['author_id']
            respond = ling_request.request(base_url.format(user_id))
            s = reg.search(respond.content)
            s1 = reg1.search(respond.content)
            if s and s1:
                content, number = re.subn("\r", "",
                                          str(reg2.search(s.group()).group()))
                content, number = re.subn("\n", "", content)
                user1 = demjson.decode(content)

                content, number = re.subn("\r", "",
                                          str(reg2.search(s1.group()).group()))
                content, number = re.subn("\n", "", content)
                user2 = demjson.decode(content)
                user = dict(user1.items() + user2.items())
                if user['avatarUrl'].find('//') is 0:
                    user['avatarUrl'] = "http:" + user['avatarUrl']
                else:
                    pass
                update(user)
            else:
                # time.sleep(5)
                continue
            # time.sleep(0.5)
示例#32
0
    def parse_info(self, response):
        origin_shop = response.meta['origin_shop']

        # 判断是否弹出验证码
        new_url = response.url
        if new_url.startswith('https://verify.meituan.com/'):
            # 表示需要重试url
            print('有验证码, 重试')
            url = origin_shop.get('url')
            print('出现验证码重试的url:{}'.format(url))
            request = scrapy.Request(url,
                                     callback=self.parse_info,
                                     dont_filter=True)
            request.meta['origin_shop'] = origin_shop
            yield request
        else:
            # 使用selenium解析经纬度信息
            url = origin_shop.get('url')
            while True:
                options = webdriver.ChromeOptions()
                options.add_argument('--headless')
                chrome = webdriver.Chrome(chrome_options=options)
                chrome.get(url)

                e = etree.HTML(chrome.page_source)
                try:
                    img_src = e.xpath('//div[@id="map"]/img/@src')[0]
                    lat_lng_str = img_src.split('|')[1]
                    lat_lng_list = lat_lng_str.split(',')
                    lat = lat_lng_list[0]
                    lng = lat_lng_list[1]
                    chrome.quit()
                except:
                    lat = ''
                    lng = ''
                    chrome.quit()
                if lat != '' and lng != '':
                    break

            page_source = etree.HTML(response.text)

            # 解析出原始信息
            shop_info_xpath = '//script[10]'
            try:
                shop_info_tag = page_source.xpath(shop_info_xpath)[0]
            except:
                # 有的页面是另一种
                shop_info_tag = page_source.xpath(
                    '//*[@id="top"]/script[1]')[0]

            try:
                shop_info_dict = demjson.decode(
                    shop_info_tag.xpath('./text()')[0].split('shop_config=')
                    [1])

                # 解析商家的id
                item = ShopItem()
                # 加入url作为去重的标准
                item['sort'] = 'shop'
                # 控制数据的版本
                item['version'] = '0'
                item['url'] = origin_shop.get('url')

                item['full_name'] = shop_info_dict.get('fullName')
                item['city_en_name'] = shop_info_dict.get('cityEnName')
                item['address'] = shop_info_dict.get('address')
                item['city_id'] = shop_info_dict.get('cityId')
                # item['shop_lat'] = shop_info_dict.get('shopGlat')
                # item['shop_lng'] = shop_info_dict.get('shopGlng')
                item['shop_lat'] = lat
                item['shop_lng'] = lng
                item['city_lat'] = shop_info_dict.get('cityGlat')
                item['city_lng'] = shop_info_dict.get('cityGlng')
                item['power'] = shop_info_dict.get('power')
                item['shop_power'] = shop_info_dict.get('shopPower')
                item['shop_type'] = shop_info_dict.get('shopType')
                item['shop_group_id'] = shop_info_dict.get('shopGroupId')
                item['main_region_id'] = shop_info_dict.get('mainRegionId')
                item['main_category_name'] = shop_info_dict.get(
                    'mainCategoryName')
                item['main_category_id'] = shop_info_dict.get('mainCategoryId')
                # food
                item['category_url_name'] = shop_info_dict.get(
                    'categoryURLName')
                # 比如 美食
                item['category_name'] = shop_info_dict.get('categoryName')

                # 有一个textCssVersion, 应该是会定期更新文字库
                # 支持自动更新字库
                text_css_version = shop_info_dict.get('textCssVersion')
                # 加载一下字库看行不行
                text_css_info = load_text_css(text_css_version)
                if text_css_info is None:
                    print('网站的字符集有变更, 需要重新解析css')
                    # 抽取css的url
                    css_xpath = '//link[contains(@rel,"stylesheet") and contains(@href, "svgtextcss")]/@href'
                    css_url = 'http:' + page_source.xpath(css_xpath)[0]
                    get_css_text_info(css_url, text_css_version)

                # 解析svg字体
                vote_xpath = '//*[@id="reviewCount"]'
                item['vote_total'] = parse_text_svg(vote_xpath, page_source,
                                                    text_css_version)

                # 如果店铺已关闭, 则营业时间和电话都没有了
                shop_closed_xpath = '//p[@class="shop-closed"]'
                shop_closed_tag = page_source.xpath(shop_closed_xpath)
                if shop_closed_tag != []:
                    # 店铺已关闭
                    item['is_open'] = False
                else:
                    item['is_open'] = True
                    phone_xpath = '//*[@id="basic-info"]/p'
                    item['phone'] = parse_text_svg(phone_xpath, page_source,
                                                   text_css_version)

                    # 开放时间
                    bh_xpath = '//*[@id="basic-info"]/div[4]/p[1]/span[2]'
                    item['business_hours'] = parse_text_svg(
                        bh_xpath, page_source, text_css_version)

                # 人均
                avg_xpath = '//*[@id="avgPriceTitle"]'
                item['avg_price'] = parse_text_svg(avg_xpath, page_source,
                                                   text_css_version)
                # 评分
                taste_xpath = '//*[@id="comment_score"]/span[1]'
                item['taste_score'] = parse_text_svg(taste_xpath, page_source,
                                                     text_css_version)
                service_xpath = '//*[@id="comment_score"]/span[2]'
                item['service_score'] = parse_text_svg(service_xpath,
                                                       page_source,
                                                       text_css_version)
                env_xpath = '//*[@id="comment_score"]/span[3]'
                item['env_score'] = parse_text_svg(env_xpath, page_source,
                                                   text_css_version)
                # print(item)
                yield item
            except Exception as e:
                # print(item)
                print(traceback.format_exc(), e)
                print('静态信息解析错误, 查看原因.')
示例#33
0
#         print "pass--------"
#     print "current letter:" , letter

# list = ['php', 'Python', 'c++']
# print list
# print list[2]

# tup = ('english', 'chinese', 'math')
# print tup
# print tup[1:2]
#
import demjson
data = {}

dic = {'aaa': '11', 'bb': 222}
print dic
for k, v in dic.items():
    data[k] = v
    print k, v

print data

json = demjson.encode(data)

print 'json:', json

dict_2 = demjson.decode(json)
print dict_2

print dict(zip(dict_2.values(), dict_2.keys()))
def getjsontime(data):
	data=json.loads(data)
	data=demjson.decode(data['Datapoints'])
	return data[len(data)-1]['timestamp']
示例#35
0
def stock_report_fund_hold(symbol: str = "基金持仓",
                           date: str = "20201231") -> pd.DataFrame:
    """
    东方财富网-数据中心-主力数据-基金持仓
    http://data.eastmoney.com/zlsj/2020-12-31-1-2.html
    :param symbol: choice of {"基金持仓", "QFII持仓", "社保持仓", "券商持仓", "保险持仓", "信托持仓"}
    :type symbol: str
    :param date: 财报发布日期, xxxx-03-31, xxxx-06-30, xxxx-09-30, xxxx-12-31
    :type date: str
    :return: 基金持仓数据
    :rtype: pandas.DataFrame
    """
    symbol_map = {
        "基金持仓": "1",
        "QFII持仓": "2",
        "社保持仓": "3",
        "券商持仓": "4",
        "保险持仓": "5",
        "信托持仓": "6",
    }
    date = "-".join([date[:4], date[4:6], date[6:]])
    url = "http://data.eastmoney.com/dataapi/zlsj/list"
    params = {
        "tkn": "eastmoney",
        "ReportDate": date,
        "code": "",
        "type": symbol_map[symbol],
        "zjc": "0",
        "sortField": "Count",
        "sortDirec": "1",
        "pageNum": "1",
        "pageSize": "50000",
        "cfg": "jjsjtj",
        "p": "1",
        "pageNo": "1",
    }
    r = requests.get(url, params=params)
    data_text = r.text
    data_json = demjson.decode(data_text[data_text.find("{"):])
    temp_df = pd.DataFrame(data_json["data"])
    temp_df.reset_index(inplace=True)
    temp_df["index"] = list(range(1, len(temp_df) + 1))
    if temp_df.empty:
        return None
    temp_df.columns = [
        "序号",
        "股票代码",
        "股票简称",
        "_",
        "_",
        "_",
        "持有基金家数",
        "持股变化",
        "持股总数",
        "持股市值",
        "持股变动比例",
        "_",
        "持股变动数值",
        "_",
    ]
    temp_df = temp_df[[
        "序号",
        "股票代码",
        "股票简称",
        "持有基金家数",
        "持股总数",
        "持股市值",
        "持股变化",
        "持股变动数值",
        "持股变动比例",
    ]]
    return temp_df
示例#36
0
"""
import demjson
import tuyapower

# Terminal Color Formatting
bold = "\033[0m\033[97m\033[1m"
subbold = "\033[0m\033[32m"
normal = "\033[97m\033[0m"
dim = "\033[0m\033[97m\033[2m"
alert = "\033[0m\033[91m\033[1m"
alertdim = "\033[0m\033[91m\033[2m"

# Load Device Keys from Tuya JSON file
print("Loading Tuya Keys...")
f = open('devices.json', "r")
data = demjson.decode(f.read())
f.close()
print("    %s%s device keys loaded%s" % (dim, len(data), normal))
print()

print("Scanning network for Tuya devices...")
devices = tuyapower.deviceScan(False, 20)
print("    %s%s devices found%s" % (dim, len(devices), normal))
print()


def getIP(d, gwid):
    for ip in d:
        if (gwid == d[ip]['gwId']):
            return (ip, d[ip]['version'])
    return (0, 0)
示例#37
0
def covid_19_163(indicator: str = "实时") -> pd.DataFrame:
    """
    网易-新冠状病毒
    https://news.163.com/special/epidemic/?spssid=93326430940df93a37229666dfbc4b96&spsw=4&spss=other&#map_block
    https://news.163.com/special/epidemic/?spssid=93326430940df93a37229666dfbc4b96&spsw=4&spss=other&
    :return: 返回指定 indicator 的数据
    :rtype: pandas.DataFrame
    """
    url = "https://c.m.163.com/ug/api/wuhan/app/data/list-total"
    headers = {
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36",
    }
    payload = {
        "t": int(time.time() * 1000),
    }
    r = requests.get(url, params=payload, headers=headers)
    data_json = r.json()
    # data info
    url = "https://news.163.com/special/epidemic/"
    r = requests.get(url, headers=headers)
    soup = BeautifulSoup(r.text, "lxml")
    data_info_df = pd.DataFrame(
        [
            item.text.strip().split(".")[1]
            for item in soup.find("div", attrs={"class": "data_tip_pop_text"}).find_all(
                "p"
            )
        ]
    )
    data_info_df.columns = ["info"]

    # 中国历史时点数据
    hist_today_df = pd.DataFrame(
        [item["today"] for item in data_json["data"]["chinaDayList"]],
        index=[item["date"] for item in data_json["data"]["chinaDayList"]],
    )

    # 中国历史累计数据
    hist_total_df = pd.DataFrame(
        [item["total"] for item in data_json["data"]["chinaDayList"]],
        index=[item["date"] for item in data_json["data"]["chinaDayList"]],
    )

    # 中国实时数据
    current_df = pd.DataFrame.from_dict(data_json["data"]["chinaTotal"])

    # 世界历史时点数据
    outside_today_df = pd.DataFrame(
        [item["today"] for item in data_json["data"]["areaTree"]],
        index=[item["name"] for item in data_json["data"]["areaTree"]],
    )

    # 世界历史累计数据
    outside_total_df = pd.DataFrame(
        [item["total"] for item in data_json["data"]["areaTree"]],
        index=[item["name"] for item in data_json["data"]["areaTree"]],
    )

    # 全球所有国家及地区时点数据
    all_world_today_df = pd.DataFrame(
        jsonpath.jsonpath(data_json["data"]["areaTree"], "$..today"),
        index=jsonpath.jsonpath(data_json["data"]["areaTree"], "$..name"),
    )

    # 全球所有国家及地区累计数据
    all_world_total_df = pd.DataFrame(
        jsonpath.jsonpath(data_json["data"]["areaTree"], "$..total"),
        index=jsonpath.jsonpath(data_json["data"]["areaTree"], "$..name"),
    )

    # 中国各地区累计数据
    area_total_df = pd.DataFrame(
        [item["total"] for item in data_json["data"]["areaTree"][2]["children"]],
        index=[item["name"] for item in data_json["data"]["areaTree"][2]["children"]],
    )

    # 中国各地区时点数据
    area_today_df = pd.DataFrame(
        [item["today"] for item in data_json["data"]["areaTree"][2]["children"]],
        index=[item["name"] for item in data_json["data"]["areaTree"][2]["children"]],
    )

    # 疫情学术进展
    url_article = "https://vip.open.163.com/api/cms/topic/list"
    payload_article = {
        "topicid": "00019NGQ",
        "listnum": "1000",
        "liststart": "0",
        "pointstart": "0",
        "pointend": "255",
        "useproperty": "true",
    }
    r_article = requests.get(url_article, params=payload_article)
    article_df = pd.DataFrame(r_article.json()["data"]).iloc[:, 1:]

    # 资讯
    url_info = "https://ent.163.com/special/00035080/virus_report_data.js"
    payload_info = {
        "_": int(time.time() * 1000),
        "callback": "callback",
    }
    r_info = requests.get(url_info, params=payload_info, headers=headers)
    data_info_text = r_info.text
    data_info_json = demjson.decode(data_info_text.strip(" callback(")[:-1])

    if indicator == "数据说明":
        print(f"数据更新时间: {data_json['data']['lastUpdateTime']}")
        return data_info_df

    if indicator == "中国实时数据":
        print(f"数据更新时间: {data_json['data']['lastUpdateTime']}")
        return current_df

    if indicator == "中国历史时点数据":
        print(f"数据更新时间: {data_json['data']['lastUpdateTime']}")
        return hist_today_df

    if indicator == "中国历史累计数据":
        print(f"数据更新时间: {data_json['data']['lastUpdateTime']}")
        return hist_total_df

    if indicator == "世界历史时点数据":
        print(f"数据更新时间: {data_json['data']['lastUpdateTime']}")
        return outside_today_df

    if indicator == "世界历史累计数据":
        print(f"数据更新时间: {data_json['data']['lastUpdateTime']}")
        return outside_total_df

    if indicator == "全球所有国家及地区时点数据":
        print(f"数据更新时间: {data_json['data']['lastUpdateTime']}")
        return all_world_today_df

    elif indicator == "全球所有国家及地区累计数据":
        print(f"数据更新时间: {data_json['data']['lastUpdateTime']}")
        return all_world_total_df

    elif indicator == "中国各地区时点数据":
        print(f"数据更新时间: {data_json['data']['lastUpdateTime']}")
        return area_today_df

    elif indicator == "中国各地区累计数据":
        print(f"数据更新时间: {data_json['data']['lastUpdateTime']}")
        return area_total_df

    elif indicator == "疫情学术进展":
        return article_df

    elif indicator == "实时资讯新闻播报":
        return pd.DataFrame(data_info_json["list"])

    elif indicator == "实时医院新闻播报":
        return pd.DataFrame(data_info_json["hospital"])

    elif indicator == "前沿知识":
        return pd.DataFrame(data_info_json["papers"])

    elif indicator == "权威发布":
        return pd.DataFrame(data_info_json["power"])

    elif indicator == "境外输入疫情趋势":
        url = "https://c.m.163.com/ug/api/wuhan/app/data/list-by-area-code"
        params = {
            "areaCode": "66",
            "t": round(int(time.time() * 1000))
        }
        r = requests.get(url, params=params, headers=headers)
        data_json = r.json()
        temp_df = pd.DataFrame(data_json["data"]["list"])
        today_list = [item.get("input", 0) for item in temp_df["today"]]
        total_list = [item.get("input", 0) for item in temp_df["total"]]
        result_df = pd.DataFrame([today_list, total_list]).T
        result_df.columns = ["境外输入新增确诊", "境外输入累计确诊"]
        result_df.index = pd.to_datetime(temp_df.date)
        return result_df

    elif indicator == "境外输入确诊病例来源":
        url = "https://c.m.163.com/ug/api/wuhan/app/index/input-data-list"
        params = {
            "t": round(int(time.time() * 1000))
        }
        r = requests.get(url, params=params, headers=headers)
        data_json = r.json()
        temp_df = pd.DataFrame(data_json["data"]["list"])
        del temp_df["page"]
        return temp_df
示例#38
0

with open('../lib/data.js', 'r') as datajs:
    """
	Abre o arquivo atual de dados e extrai o json de acronimos
	"""
    line_string = ''.join(datajs.readlines())

    data_start = line_string.index('{')
    data_end = line_string.index('};') + 1
    before = line_string[:data_start]
    after = line_string[data_end:]

    data_string = line_string[data_start:data_end].split('\n')
    formatted_data = remove_indentation(data_string)

    data = demjson.decode(remove_indentation(data_string))

with open('../lib/data.js', 'w') as newdatajs:
    """
	Exporta o arquivo de dados de modo que as siglas estejam ordenadas
	"""
    ordered_data = json.dumps(data,
                              sort_keys=True,
                              indent=4,
                              ensure_ascii=False).encode('utf8')

    newdatajs.write(before)
    newdatajs.write(ordered_data)
    newdatajs.write(after)
示例#39
0
bing_wallpaper_url = 'https://cn.bing.com/HPImageArchive.aspx?format=js&idx=0&n=6&pid=hp&uhd=1&uhdwidth=2880&uhdheight=1620'

headers = {
    "Connection": "keep-alive",
    "Pragma": "no-cache",
    "Cache-Control": "no-cache",
    "Upgrade-Insecure-Requests": "1",
    "User-Agent":
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.68 Safari/537.36",
    "Accept":
    "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
    "Accept-Encoding": "gzip, deflate",
    "Accept-Language": "zh-CN,zh-Hans;q=0.9,zh;q=0.8,und;q=0.7",
}

manifest_json = demjson.decode(manifest)
photo_json = demjson.decode(photo)
bing_wallpaper_json = demjson.decode(bing_wallpaper)

manifest_json['version'] = version
try:
    rs = requests.session()
    res = rs.get(bing_wallpaper_url, headers=headers)
    res.encoding = 'utf-8'
    resjson = demjson.decode(res.text)
    i = 1
    for image in resjson['images']:
        image_url = "https://cn.bing.com" + image['url']
        try:
            img_name = 'background-' + str(i) + '.jpg'
            img_r = requests.get(image_url)
示例#40
0
def stock_zh_a_daily(
    symbol: str = "sh601939",
    start_date: str = "19900101",
    end_date: str = "22001220",
    adjust: str = "",
) -> pd.DataFrame:
    """
    新浪财经-A股-个股的历史行情数据, 大量抓取容易封 IP
    https://finance.sina.com.cn/realstock/company/sh689009/nc.shtml
    :param start_date: 20201103; 开始日期
    :type start_date: str
    :param end_date: 20201103; 结束日期
    :type end_date: str
    :param symbol: sh600000
    :type symbol: str
    :param adjust: 默认为空: 返回不复权的数据; qfq: 返回前复权后的数据; hfq: 返回后复权后的数据; hfq-factor: 返回后复权因子; hfq-factor: 返回前复权因子
    :type adjust: str
    :return: specific data
    :rtype: pandas.DataFrame
    """
    def _fq_factor(method):
        if method == "hfq":
            res = requests.get(zh_sina_a_stock_hfq_url.format(symbol))
            hfq_factor_df = pd.DataFrame(
                eval(res.text.split("=")[1].split("\n")[0])["data"])
            if hfq_factor_df.shape[0] == 0:
                raise ValueError("sina hfq factor not available")
            hfq_factor_df.columns = ["date", "hfq_factor"]
            hfq_factor_df.index = pd.to_datetime(hfq_factor_df.date)
            del hfq_factor_df["date"]
            return hfq_factor_df
        else:
            res = requests.get(zh_sina_a_stock_qfq_url.format(symbol))
            qfq_factor_df = pd.DataFrame(
                eval(res.text.split("=")[1].split("\n")[0])["data"])
            if qfq_factor_df.shape[0] == 0:
                raise ValueError("sina hfq factor not available")
            qfq_factor_df.columns = ["date", "qfq_factor"]
            qfq_factor_df.index = pd.to_datetime(qfq_factor_df.date)
            del qfq_factor_df["date"]
            return qfq_factor_df

    if adjust in ("hfq-factor", "qfq-factor"):
        return _fq_factor(adjust.split("-")[0])

    res = requests.get(zh_sina_a_stock_hist_url.format(symbol))
    js_code = py_mini_racer.MiniRacer()
    js_code.eval(hk_js_decode)
    dict_list = js_code.call("d",
                             res.text.split("=")[1].split(";")[0].replace(
                                 '"', ""))  # 执行js解密代码
    data_df = pd.DataFrame(dict_list)
    data_df.index = pd.to_datetime(data_df["date"])
    del data_df["date"]
    data_df = data_df.astype("float")
    r = requests.get(zh_sina_a_stock_amount_url.format(symbol, symbol))
    amount_data_json = demjson.decode(
        r.text[r.text.find("["):r.text.rfind("]") + 1])
    amount_data_df = pd.DataFrame(amount_data_json)
    amount_data_df.index = pd.to_datetime(amount_data_df.date)
    del amount_data_df["date"]
    temp_df = pd.merge(data_df,
                       amount_data_df,
                       left_index=True,
                       right_index=True,
                       how="outer")
    temp_df.fillna(method="ffill", inplace=True)
    temp_df = temp_df.astype(float)
    temp_df["amount"] = temp_df["amount"] * 10000
    temp_df["turnover"] = temp_df["volume"] / temp_df["amount"]
    temp_df.columns = [
        "open",
        "high",
        "low",
        "close",
        "volume",
        "outstanding_share",
        "turnover",
    ]

    if adjust == "":
        temp_df = temp_df[start_date:end_date]
        temp_df["open"] = round(temp_df["open"], 2)
        temp_df["high"] = round(temp_df["high"], 2)
        temp_df["low"] = round(temp_df["low"], 2)
        temp_df["close"] = round(temp_df["close"], 2)
        temp_df.dropna(inplace=True)
        return temp_df

    if adjust == "hfq":
        res = requests.get(zh_sina_a_stock_hfq_url.format(symbol))
        hfq_factor_df = pd.DataFrame(
            eval(res.text.split("=")[1].split("\n")[0])["data"])
        hfq_factor_df.columns = ["date", "hfq_factor"]
        hfq_factor_df.index = pd.to_datetime(hfq_factor_df.date)
        del hfq_factor_df["date"]

        temp_df = pd.merge(temp_df,
                           hfq_factor_df,
                           left_index=True,
                           right_index=True,
                           how="outer")
        temp_df.fillna(method="ffill", inplace=True)
        temp_df = temp_df.astype(float)
        temp_df["open"] = temp_df["open"] * temp_df["hfq_factor"]
        temp_df["high"] = temp_df["high"] * temp_df["hfq_factor"]
        temp_df["close"] = temp_df["close"] * temp_df["hfq_factor"]
        temp_df["low"] = temp_df["low"] * temp_df["hfq_factor"]
        temp_df.dropna(how="any", inplace=True)
        temp_df = temp_df.iloc[:, :-1]
        temp_df = temp_df[start_date:end_date]
        temp_df["open"] = round(temp_df["open"], 2)
        temp_df["high"] = round(temp_df["high"], 2)
        temp_df["low"] = round(temp_df["low"], 2)
        temp_df["close"] = round(temp_df["close"], 2)
        temp_df.dropna(inplace=True)
        return temp_df

    if adjust == "qfq":
        res = requests.get(zh_sina_a_stock_qfq_url.format(symbol))
        qfq_factor_df = pd.DataFrame(
            eval(res.text.split("=")[1].split("\n")[0])["data"])
        qfq_factor_df.columns = ["date", "qfq_factor"]
        qfq_factor_df.index = pd.to_datetime(qfq_factor_df.date)
        del qfq_factor_df["date"]

        temp_df = pd.merge(temp_df,
                           qfq_factor_df,
                           left_index=True,
                           right_index=True,
                           how="outer")
        temp_df.fillna(method="ffill", inplace=True)
        temp_df = temp_df.astype(float)
        temp_df["open"] = temp_df["open"] / temp_df["qfq_factor"]
        temp_df["high"] = temp_df["high"] / temp_df["qfq_factor"]
        temp_df["close"] = temp_df["close"] / temp_df["qfq_factor"]
        temp_df["low"] = temp_df["low"] / temp_df["qfq_factor"]
        temp_df.dropna(how="any", inplace=True)
        temp_df = temp_df.iloc[:, :-1]
        temp_df = temp_df[start_date:end_date]
        temp_df["open"] = round(temp_df["open"], 2)
        temp_df["high"] = round(temp_df["high"], 2)
        temp_df["low"] = round(temp_df["low"], 2)
        temp_df["close"] = round(temp_df["close"], 2)
        temp_df.dropna(inplace=True)
        return temp_df
示例#41
0
data1 = {
    "VisitId": "927f2bb2-5b3d-e811-8d9e-000c2918a2b6",
    "ClinicalDiagnosis": "33333"
}

data = {
    "VisitId":
    "927f2bb2-5b3d-e811-8d9e-000c2918a2b6",
    "ClinicalDiagnosis":
    "33333",
    "InspectionFromItems": [{
        "InspectionGroupId": "c504b05c-902e-e811-8d9e-000c2918a2b6",
        "ItemName": "骨科组套1",
        "GroupName": "骨科组套1",
        "Quantity": 1
    }, {
        "InspectionGroupId": "039c86f6-f332-e811-8d9e-000c2918a2b6",
        "ItemName": "检验租套测试test1",
        "GroupName": "检验租套测试test1",
        "Quantity": 1
    }]
}

print "bianma:%s" % sys.getdefaultencoding()
print "type:%s" % type(data)
print data
print "******************************111111"
print data1
print demjson.decode(data)
print demjson.decode(data1)
示例#42
0
def covid_19_baidu(indicator: str = "浙江") -> pd.DataFrame:
    """
    百度-新型冠状病毒肺炎-疫情实时大数据报告
    https://voice.baidu.com/act/newpneumonia/newpneumonia/?from=osari_pc_1
    :param indicator: 看说明文档
    :type indicator: str
    :return: 指定 indicator 的数据
    :rtype: pandas.DataFrame
    """
    url = "https://huiyan.baidu.com/openapi/v1/migration/rank"
    payload = {
        "type": "move",
        "ak": "kgD2HiDnLdUhwzd3CLuG5AWNfX3fhLYe",
        "adminType": "country",
        "name": "全国",
    }
    r = requests.get(url, params=payload)
    move_in_df = pd.DataFrame(r.json()["result"]["moveInList"])
    move_out_df = pd.DataFrame(r.json()["result"]["moveOutList"])

    url = "https://opendata.baidu.com/api.php"
    payload = {
        "query": "全国",
        "resource_id": "39258",
        "tn": "wisetpl",
        "format": "json",
        "cb": "jsonp_1580470773343_11183",
    }
    r = requests.get(url, params=payload)
    text_data = r.text
    json_data_news = json.loads(
        text_data.strip("/**/jsonp_1580470773343_11183(").rstrip(");")
    )

    url = "https://opendata.baidu.com/data/inner"
    payload = {
        "tn": "reserved_all_res_tn",
        "dspName": "iphone",
        "from_sf": "1",
        "dsp": "iphone",
        "resource_id": "28565",
        "alr": "1",
        "query": "肺炎",
        "cb": "jsonp_1580470773344_83572",
    }
    r = requests.get(url, params=payload)
    json_data = json.loads(r.text[r.text.find("({") + 1 : r.text.rfind(");")])
    spot_report = pd.DataFrame(json_data["Result"][0]["DisplayData"]["result"]["items"])

    # domestic-city
    url = "https://voice.baidu.com/act/newpneumonia/newpneumonia/?from=osari_pc_1"
    r = requests.get(url)
    soup = BeautifulSoup(r.text, "lxml")
    temp_soup = str(soup.find(attrs={"id": "captain-config"}))
    data_json = demjson.decode(temp_soup[temp_soup.find("{"): temp_soup.rfind("}")+1])

    big_df = pd.DataFrame()
    for i, p in enumerate(
        jsonpath.jsonpath(data_json["component"][0]["caseList"], "$..area")
    ):
        temp_df = pd.DataFrame(
            jsonpath.jsonpath(data_json["component"][0]["caseList"], "$..subList")[i]
        )
        temp_df["province"] = p
        big_df = big_df.append(temp_df, ignore_index=True)
    domestic_city_df = big_df

    domestic_province_df = pd.DataFrame(data_json["component"][0]["caseList"]).iloc[
        :, :-2
    ]

    big_df = pd.DataFrame()
    for i, p in enumerate(
        jsonpath.jsonpath(data_json["component"][0]["caseOutsideList"], "$..area")
    ):
        temp_df = pd.DataFrame(
            jsonpath.jsonpath(
                data_json["component"][0]["caseOutsideList"], "$..subList"
            )[i]
        )
        temp_df["province"] = p
        big_df = big_df.append(temp_df, ignore_index=True)
    outside_city_df = big_df

    outside_country_df = pd.DataFrame(
        data_json["component"][0]["caseOutsideList"]
    ).iloc[:, :-1]

    big_df = pd.DataFrame()
    for i, p in enumerate(
        jsonpath.jsonpath(data_json["component"][0]["globalList"], "$..area")
    ):
        temp_df = pd.DataFrame(
            jsonpath.jsonpath(data_json["component"][0]["globalList"], "$..subList")[i]
        )
        temp_df["province"] = p
        big_df = big_df.append(temp_df, ignore_index=True)
    global_country_df = big_df

    global_continent_df = pd.DataFrame(data_json["component"][0]["globalList"])[
        ["area", "died", "crued", "confirmed", "confirmedRelative"]
    ]

    if indicator == "热门迁入地":
        return move_in_df
    elif indicator == "热门迁出地":
        return move_out_df
    elif indicator == "今日疫情热搜":
        return pd.DataFrame(json_data_news["data"][0]["list"][0]["item"])
    elif indicator == "防疫知识热搜":
        return pd.DataFrame(json_data_news["data"][0]["list"][1]["item"])
    elif indicator == "热搜谣言粉碎":
        return pd.DataFrame(json_data_news["data"][0]["list"][2]["item"])
    elif indicator == "复工复课热搜":
        return pd.DataFrame(json_data_news["data"][0]["list"][3]["item"])
    elif indicator == "热门人物榜":
        return pd.DataFrame(json_data_news["data"][0]["list"][4]["item"])
    elif indicator == "历史疫情热搜":
        return pd.DataFrame(json_data_news["data"][0]["list"][5]["item"])
    elif indicator == "搜索正能量榜":
        return pd.DataFrame(json_data_news["data"][0]["list"][6]["item"])
    elif indicator == "游戏榜":
        return pd.DataFrame(json_data_news["data"][0]["list"][7]["item"])
    elif indicator == "影视榜":
        return pd.DataFrame(json_data_news["data"][0]["list"][8]["item"])
    elif indicator == "小说榜":
        return pd.DataFrame(json_data_news["data"][0]["list"][9]["item"])
    elif indicator == "疫期飙升榜":
        return pd.DataFrame(json_data_news["data"][0]["list"][10]["item"])
    elif indicator == "实时播报":
        return spot_report
    elif indicator == "中国分省份详情":
        return domestic_province_df
    elif indicator == "中国分城市详情":
        return domestic_city_df
    elif indicator == "国外分国详情":
        return outside_country_df
    elif indicator == "国外分城市详情":
        return outside_city_df
    elif indicator == "全球分洲详情":
        return global_continent_df
    elif indicator == "全球分洲国家详情":
        return global_country_df
示例#43
0
def choice_budget():
    global chosen, validCities, costDict
    travelcosts = dict()
    lattitude = (str)(loc[0])
    longitude = (str)(loc[1])
    re = requests.get(
        'https://api.sandbox.amadeus.com/v1.2/airports/nearest-relevant?apikey=WCC0Tn8fJ5hScMw7NTDDAAkjydFLOYTf&latitude='
        + (lattitude) + '&longitude=' + (longitude))
    page = re.text
    page = demjson.decode(page)
    d_code = (page[0])["airport"]
    x = len(countries)
    if international:
        x = 15
    for i in range(x):
        if (1 == validCities[i]):
            city = cities[i].replace(" ", "%20")
            print city
            re = requests.get(
                "https://api.sandbox.amadeus.com/v1.2/airports/autocomplete?apikey=WCC0Tn8fJ5hScMw7NTDDAAkjydFLOYTf&term={0}"
                .format(city))
            page = re.text
            page = demjson.decode(page)
            if page == []:
                validCities[i] = 0
            else:
                a_code = page[0]["value"]
                re = requests.get(
                    "https://api.sandbox.amadeus.com/v1.2/flights/low-fare-search?apikey=WCC0Tn8fJ5hScMw7NTDDAAkjydFLOYTf&origin="
                    + d_code + "&destination=" + a_code + "&departure_date=" +
                    str(departDate) + "&return_date=" + str(arriveDate))
                page = re.text
                page = demjson.decode(page)
                if ("status" in page):
                    validCities[i] = 0
                else:
                    global travelcosts, costDict
                    results = page["results"]
                    price = results[0]["fare"]["total_price"]
                    airfare = (float)(price)

                    re = requests.get(
                        "https://api.sandbox.amadeus.com/v1.2/hotels/search-airport?apikey=WCC0Tn8fJ5hScMw7NTDDAAkjydFLOYTf&location="
                        + a_code + "&check_in=" + str(departDate) +
                        "&check_out=" + str(arriveDate))
                    page = re.text
                    page = demjson.decode(page)
                    results = page["results"]
                    if results == []:
                        validCities[i] = 0
                    else:
                        price = results[0]["total_price"]["amount"]
                        stayfare = (float)(price)
                        costDict[cities[i]] = [airfare, stayfare]
                        total_cost = airfare + stayfare
                        travelcosts[total_cost] = cities[i]
    costs = travelcosts.keys()
    costs.sort()
    costs = budget_helper(costs, budget)
    for i in range(4):
        if i >= len(travelcosts):
            chosen[0] = 0
        else:
            chosen[travelcosts[costs[i]]] = costs[i]
    print travelcosts
    print costDict
示例#44
0
import demjson

#连接数据库
client = pymongo.MongoClient(
    "mongodb://*****:*****@cluster0-shard-00-00-gmjko.mongodb.net:27017/admin?ssl=true&replicaSet=cluster0-shard-00-00-gmjko&authSource=admin"
)
db = client.geokg
col = db.region

data = xlrd.open_workbook("E:/资料/大三下/实训/广府建筑表格/region.xls")
table = data.sheets()[0]
#读取excel第一行数据作为存入mongodb的字段名
rowstag = table.row_values(0)
nrows = table.nrows
ncols = table.ncols
returnData = {}

for i in range(1, nrows):
    #将字段名和excel数据存储为字典形式,并转换为json格式
    #returnData[i]=json.dumps(dict(zip(rowstag,table.row_values(i))))
    returnData[i] = json.dumps(
        dict(
            zip(rowstag, [
                table.row_values(i)[0],
                demjson.decode(table.row_values(i)[1])
            ])))
    #通过编解码还原数据
    returnData[i] = json.loads(returnData[i])
    # print()
    print(returnData[i])
    # col.insert(returnData[i])
示例#45
0
def fetch_album(album_url, dlPath):
    try:
        resp = opener.open(album_url)
    except:
        print 'Problem while fetching ' + album_url
        return 0

    content = resp.read()

    regex = r'var EmbedData = (\{(.*?)\});'
    jsValues = re.search(regex, content, flags=re.M | re.S)
    jsString = jsValues.group(1).replace('\\r', ' ').replace(
        '\\n', ' ').decode('utf8').encode('ascii', errors='ignore')
    jsString = jsString.replace("\\\"", "'")
    jsString = re.sub(r'//.[^,]*$', '', jsString, 0, flags=re.M)
    jsString = jsString.replace('\n\n', '').replace('\n',
                                                    ' ').replace('" + "', '')

    try:
        albumNfo = demjson.decode(jsString)
    except:
        print("F**k 124 !!\n\n")
        return 0

    regex = r'var TralbumData = (\{(.*?)\});'
    jsValues = re.search(regex, content, flags=re.M | re.S)

    jsString = jsValues.group(1).replace('\\r', ' ').replace(
        '\\n', ' ').decode('utf8').encode('ascii', errors='ignore')
    jsString = jsString.replace("\\\"", "'")
    jsString = re.sub(r'//.[^,]*$', '', jsString, 0, flags=re.M)
    jsString = jsString.replace('\n\n', '').replace('\n',
                                                    ' ').replace('" + "', '')

    try:
        albumDatas = demjson.decode(jsString)
    except:
        print("F**k 146 !!\n\n")
        pprint(jsString)
        return 0

    albumNfo.update(albumDatas)
    albumNfo['album_art_id'] = albumNfo['art_id']

    if 'album_title' in albumNfo:
        albumTitle = albumNfo['album_title']
    else:
        albumTitle = '_alone_track'

    try:
        dname = os.path.dirname(dlPath + sanitizeFname(albumNfo['artist']) +
                                '/' + sanitizeFname(albumTitle) + '/')
    except:
        print('F**k 163 !!!\n\n')
        pprint(albumNfo)
        return 0

    if not os.path.exists(dname):
        try:
            os.makedirs(dname)
        except OSError:
            pass

    if 'trackinfo' in albumDatas:
        download_album_cover(albumNfo, dname)

        for song in albumDatas['trackinfo']:
            download_song(song, albumNfo, dname)
    else:
        return 0
示例#46
0
 def get_weather_json(self, url):
     self.log(url)
     weather = requests.get(url).text.split('=')[1][:-1]
     self.log(weather)
     weather = demjson.decode(weather)['tqInfo']
     return weather
示例#47
0
    def get_config(cls):

        file = open(join(cls.WD, "config.json"), "r")
        cls.config = demjson.decode(file.read())
        file.close()
        return cls.config
示例#48
0
- what is the hierarchy that is returned?? (are the leaf nodes what is really important?)
- are the roles of the input params? in particular, what role do the lexicon values play?
"""

import sys, os
import demjson
from urllib import urlopen, urlencode
from std import StdNode

baseurl = 'http://standards.teachersdomain.org/td_standards_json/get_standards_hierarchical/'

params = {
    'jurisdictions': 'NY',
    'lexicon_terms': '121,122',
    'grade_range': 'k-12'
}

data = urlopen(baseurl, urlencode(params))
"""
Returns a JSON formatted ordered 'node set' for a given Jurisdiction, list of Lexicon Term IDs and grade range
      use example: get_standards_hierarchical_json('NY','121,122','k-12')
"""

print "foo"
json = demjson.decode(data.read())
# print json
for tree in json:
    jurisdiction = tree[0]
    node = StdNode(tree[1])
    node.report()
示例#49
0
    def parseData(self, response):
        pass
        self.logger.info(response.url)
        data = response.body.decode('gb2312');
        data = demjson.decode(data);
        self.logger.info(len(data));
        
        # {symbol:"sz300711",code:"300711",name:"广哈通信",
        # trade:"19.400",pricechange:"0.210",changepercent:"1.094",
        # buy:"19.390",sell:"19.400",settlement:"19.190",open:"19.190",
        # high:"19.520",low:"18.740",volume:2857915,amount:54821946,
        #ticktime:"15:00:03", 
        #per:40.417,
        #pb:4.974,mktcap:279740.15076,nmc:88562.94,turnoverratio:6.26036}
        
        #symbol:代码
        #code:编号
        #name:简称
        #trade:最新价
        #pricechange:涨跌额
        #changepercent:涨跌幅
        #buy:买入
        #sell:卖出
        #settlement:昨收
        #open:开盘
        #high:最高
        #low:最低
        #volume:成交量
        #amount:成交额
        #mktcap:总市值
        #nmc:流通市值
        #ticktime:时间
        #pb:市净率
        #turnoverratio:换手率

        for each in data:
            item = {};
            item = each;
            item['date'] = getLastClosingDate();
            item['type'] = 'DayClosingData'
            # yield item;

            #实时数据
            # yield scrapy.Request('http://hq.sinajs.cn/list='+ item['symbol'],meta=item,callback= self.parseNewData)
            
            code = item['code']
            symbol = item['symbol']
            #5分钟数据
            meta = {
            'symbol':symbol, #代码
            'scale':'5',    #分钟间隔 5,15,30,60
            'ma':'5',       #均值(5、10、15、20、25)
            'count':'1023'  #数量
            }
            url = Template('http://money.finance.sina.com.cn/quotes_service/api/json_v2.php/CN_MarketData.getKLineData?symbol=${symbol}&scale=${scale}&ma=${ma}&datalen=${count}')
            # yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseMinuteData)
            
            #历史交易
            meta = {
            'symbol':symbol, #代码
            'code':code, #代码
            'year':'2018',
            'quarter':'1',       #季度 1 2 3 4
            }
            url = Template('http://money.finance.sina.com.cn/corp/go.php/vMS_MarketHistory/stockid/${code}.phtml?year=${year}&jidu=${quarter}')
            # yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseQuarterData)

            #历史交易明细数据接口,需要获取开盘日
            meta = {
            'symbol':symbol, #代码
            'date':time.strftime("%Y-%m-%d",time.localtime(time.time())), #代码
            'page':0,
            }
            url = Template('http://market.finance.sina.com.cn/transHis.php?symbol=${symbol}&date=${date}&page=${page}')
            yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseDetailsData)

            #资金流
            url = "http://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/MoneyFlow.ssi_ssfx_flzjtj?format=text&daima=" + symbol
            # yield scrapy.Request(url,meta=meta,callback= self.parseCapitalFlow)
            
            #https://blog.csdn.net/woloqun/article/details/80734088
            #财报数据
            url = Template("http://vip.stock.finance.sina.com.cn/corp/go.php/vFD_FinanceSummary/stockid/${code}.phtml?qq-pf-to=pcqq.c2c")
            meta = {
            'code':code,
            'symbol':symbol
            }
            # yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseFinanceSummaryData)
            
            #https://blog.csdn.net/luanpeng825485697/article/details/78442062?locationNum=5&fps=1
            #腾讯股票数据
            #分时图
            url = Template('http://data.gtimg.cn/flashdata/hushen/minute/${symbol}.js?maxage=${maxage}&${random}')
            meta = {
                'symbol' :symbol,
                'maxage':'110',
                'random':random.random()
            }
            # yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseTencentMinuteData)
            
            #5天分时图
            url = Template('http://data.gtimg.cn/flashdata/hushen/4day/${tag}/${symbol}.js?maxage=${maxage}&visitDstTime=${visitDstTime}')
            meta = {
                'symbol' :symbol,
                'tag':symbol[0:2],
                'maxage':'110',
                'visitDstTime':1
            }
            # yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseTencentDayData)
            
            #日k
            url = Template('http://data.gtimg.cn/flashdata/hushen/latest/daily/${symbol}.js?maxage=${maxage}&visitDstTime=${visitDstTime}')
            meta = {
                'symbol' :symbol,
                'maxage':'43201',
                'visitDstTime':1
            }
            # yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseTencentDayKData)
            
            #年日K
            url = Template('http://data.gtimg.cn/flashdata/hushen/daily/${year}/${symbol}.js?visitDstTime=${visitDstTime}')
            meta = {
                'symbol' :symbol,
                'year':'2017'[-2:],
                'visitDstTime':1
            }
            # yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseTencentYearDayKData)
            
            #周K
            url = Template('http://data.gtimg.cn/flashdata/hushen/latest/weekly/${symbol}.js?maxage=${maxage}&visitDstTime=${visitDstTime}')
            meta = {
                'symbol' :symbol,
                'maxage':'43201',
                'visitDstTime':1
            }
            # yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseTencentWeekKData)
            
            #月K
            url = Template('http://data.gtimg.cn/flashdata/hushen/monthly/${symbol}.js?maxage=${maxage}')
            meta = {
                'symbol' :symbol,
                'maxage':'43201',
                'visitDstTime':1
            }
            # yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseTencentMonthKData)
            
            #成交明细 列项
            url = Template('http://stock.gtimg.cn/data/index.php?appn=detail&action=timeline&c=${symbol}')
            meta = {
                'symbol' :symbol
            }
            # yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseTencentClosingDetailsListData)
            
            #成交明细
            url = Template('http://stock.gtimg.cn/data/index.php?appn=detail&action=data&c=${symbol}&p=${page}')
            meta = {
                'symbol' :symbol,
                'page':0,
                'date':'20180413'
            }
            # yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseTencentClosingDetailsData)
            
            #大单数据
            #opt=10 11 12 13 分别对应成交额大于等于(100万 200万 500万 1000万)
            #opt=1,2,3,4,5,6,7,8 分别对应成交量大于等于(100手 200手 300手 400手 500手 800手 1000手 1500手 2000手)
            url = Template('http://stock.finance.qq.com/sstock/list/view/dadan.php?t=js&c=${symbol}&max=${max}&p=${page}&opt=${opt}&o=${o}')
            meta = {
                'symbol' :symbol,
                'max':80,
                'page':0,
                'opt':10,
                'o':0,
            }
            # yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseTencentLargeSingleData)
            
            break;

        #data[0]
        #https://hq.sinajs.cn/?_=1554047924366&list=ml_sh600100
        #公告
        #https://vip.stock.finance.sina.com.cn/api/jsonp.php/var%20noticeData=/CB_AllService.getMemordlistbysymbol?num=8&PaperCode=600100
        #ttps://news.sinajs.cn/rn=1554047925361&maxcnt=20&scnt=20&list=sh600100,gg_sh600100,ntc_sh600100,blog_sh600100,tg_sh600100,lcs_sh600100

        #ttps://vip.stock.finance.sina.com.cn/quotes_service/api/jsonp.php/var%20moneyFlowData=/MoneyFlow.ssi_ssfx_flzjtj?daima=sh600100&gettime=1
        
        #https://finance.sina.com.cn/realstock/company/sh600100/hisdata/klc_kl.js?d=2019_4_1
        return;

        node = response.meta['node']
        tag = response.meta['tag']
        count = int(response.meta['count'])
        page = int(response.meta['page'])
        if page * 80 < count:
            param = self.getData_request(node,tag,page + 1,count)
            yield scrapy.Request(param['url'],meta=param['meta'],callback= self.parseData)
示例#50
0
    def _build_demjson_config(self, demjson_string):
        if demjson_string is None:
            return OmegaConf.create()

        demjson_dict = demjson.decode(demjson_string)
        return OmegaConf.create(demjson_dict)
def getjsondelay(data):
	data=json.loads(data)
	data=demjson.decode(data['Datapoints'])
	value=float(data[len(data)-1]['avg_rtt'])
	return round(value,2)
#140 tex
#141 tor
#142 min
#143 phi
#144 atl
#145 cws
#146 mia
#147 nyy
#158 mil

for team in range(108, 160):
    page_url = Template(
        "http://mlb.mlb.com/lookup/json/named.roster_40.bam?team_id=$team"
    ).substitute(team=team)
    json1 = scraperwiki.scrape(page_url)
    json_decode1 = demjson.decode(json1)

    roster_len = int(json_decode1['roster_40']['queryResults']['totalSize'])
    if roster_len > 0:
        players = json_decode1['roster_40']['queryResults']['row']
        for i in range(0, roster_len):
            record = {}
            record['key'] = players[i]['player_id']
            record['player_id'] = players[i]['player_id']
            record['status_code'] = players[i]['status_code']
            record['primary_position'] = players[i]['primary_position']
            record['position_txt'] = players[i]['position_txt']
            record['name_display_first_last'] = players[i][
                'name_display_first_last']
            record['name_display_last_first'] = players[i][
                'name_display_last_first']
示例#53
0
    def _get_rank_fund_info(self):
        '''
        得到天天基金全部基金的rank_fund
        :return: a list
        '''
        rank_fund_list = []
        for page_num in range(self.page_num_start, self.page_num_end):
            print('正在抓取第{0}页的基金信息...'.format(page_num))
            cookies = {
                'st_pvi':
                '11586003301354',
                'EMFUND1':
                'null',
                'EMFUND0':
                'null',
                'EMFUND2':
                '07-10%2018%3A01%3A38@%23%24%u534E%u6DA6%u5143%u5927%u73B0%u91D1%u901A%u8D27%u5E01B@%23%24002884',
                'EMFUND3':
                '07-10%2018%3A01%3A48@%23%24%u5929%u5F18%u73B0%u91D1%u7BA1%u5BB6%u8D27%u5E01B@%23%24420106',
                'EMFUND4':
                '07-10%2018%3A11%3A53@%23%24%u65B9%u6B63%u5BCC%u90A6%u4FDD%u9669%u4E3B%u9898%u6307%u6570%u5206%u7EA7@%23%24167301',
                'EMFUND5':
                '07-10%2018%3A04%3A32@%23%24%u62DB%u5546%u4E2D%u8BC1%u94F6%u884C%u6307%u6570%u5206%u7EA7@%23%24161723',
                'EMFUND6':
                '07-10%2018%3A05%3A13@%23%24%u5929%u5F18%u4E2D%u8BC1%u94F6%u884C%u6307%u6570C@%23%24001595',
                'EMFUND7':
                '07-10%2018%3A06%3A13@%23%24%u5929%u5F18%u4E2D%u8BC1%u94F6%u884C%u6307%u6570A@%23%24001594',
                'st_si':
                '38764934559714',
                'ASP.NET_SessionId':
                'hqeo1xk5oqgwb0cqzxicytda',
                'EMFUND8':
                '07-11 11:28:55@#$%u7533%u4E07%u83F1%u4FE1%u591A%u7B56%u7565%u7075%u6D3B%u914D%u7F6E%u6DF7%u5408A@%23%24001148',
                'EMFUND9':
                '07-11 11:28:55@#$%u5E7F%u53D1%u751F%u7269%u79D1%u6280%u6307%u6570%28QDII%29@%23%24001092',
            }

            headers = {
                'Accept-Encoding': 'gzip, deflate',
                'Accept-Language': 'zh-CN,zh;q=0.9',
                # 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
                'Accept': '*/*',
                # 'Referer': 'http://fund.eastmoney.com/data/fundranking.html',
                'Proxy-Connection': 'keep-alive',
            }

            end_date = str(get_shanghai_time())[:10]
            start_date = str(
                datetime.datetime(year=get_shanghai_time().year - 1,
                                  month=get_shanghai_time().month,
                                  day=get_shanghai_time().day))[:10]
            print('开始时间: {0}, 结束时间: {1}'.format(start_date, end_date))

            params = (
                ('op', 'ph'),
                ('dt', 'kf'),
                ('ft', 'all'),
                ('rs', ''),
                ('gs', '0'),
                ('sc', 'zzf'),
                ('st', 'desc'),
                ('sd', start_date),  # '2017-07-10'
                ('ed', end_date),  # '2018-07-10'
                ('qdii', ''),
                ('tabSubtype', ',,,,,'),
                ('pi', str(page_num)),  # rank_data的页码
                ('pn', '50'),
                ('dx', '1'),
                # ('v', '0.5290053467389759'),
            )

            url = 'http://fund.eastmoney.com/data/rankhandler.aspx'

            # TODO 常规requests被502
            # body = MyRequests.get_url_body(url=url, headers=headers, params=params, cookies=None)
            # print(body)

            # 用phantomjs
            body = self.my_phantomjs.use_phantomjs_to_get_url_body(
                url=_get_url_contain_params(url, params))

            try:
                body = re.compile('<body>(.*)</body>').findall(body)[0]
                this_page_rank_data = re.compile(r'rankData = (.*);').findall(
                    body)[0]
                # print(this_page_rank_data)
            except IndexError:
                print('在获取this_page_rank_data时索引异常!请检查!')
                continue

            # 报错: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)
            # 解决方案: 用demjson处理下
            this_page_rank_data = demjson.decode(this_page_rank_data).get(
                'datas', {})
            # pprint(this_page_rank_data)
            if this_page_rank_data == {}:
                return []

            for item in this_page_rank_data:
                _i = item.split(',')
                rank_fund_list.append({
                    '基金代码': _i[0],
                    '基金简称': _i[1],
                    '当天日期': _i[3],
                    '单位净值': _i[4],
                    '累计净值': _i[5],
                    '日增长率': _i[6],
                    '近1周': _i[7],
                    '近1月': _i[8],
                    '近3月': _i[9],
                    '近6月': _i[10],
                    '近1年': _i[11],
                    '近2年': _i[12],
                    '近3年': _i[13],
                    '今年来': _i[14],
                    '成立来': _i[15],
                    '手续费': _i[20],
                })

            sleep(2.5)

        print('\n抓取完毕!\n')

        # pprint(rank_fund_list)

        return rank_fund_list
示例#54
0
# Name:        navitia2OSM.py
#
# Author:      @nlehuby - noemie.lehuby(at)gmail.com
#
# Created:     04/06/2014
# Licence:     WTFPL
#-------------------------------------------------------------------------------

import requests
import demjson
import smtplib

url = "http://taginfo.openstreetmap.org/api/4/key/values?key=brewery:note"
appel_taginfo = requests.get(url)

data_tag = demjson.decode(appel_taginfo.content)

if data_tag['total'] == 0:
    print "Pas de résultats, rien de neuf ..."
else:
    print "Il y a des résultats !"
    FROM = '*****@*****.**'
    TO = ['*****@*****.**']
    SUBJECT = "Il y a du nouveau sur taginfo !"
    TEXT = "Il y a " + str(data_tag['total']) + " nouveaux résultats : " + url
    message = """\From: %s\nTo: %s\nSubject: %s\n\n%s""" % (
        FROM, ", ".join(TO), SUBJECT, TEXT)
    smtp = smtplib.SMTP()
    smtp.connect('serveur', 587)
    smtp.starttls()
    smtp.login('login', 'password')
def getjsonvalue(data):
	data=json.loads(data)
	data=demjson.decode(data['Datapoints'])
	value=float(data[len(data)-1]['Value'])
	return round(value,2)
示例#56
0
def air_quality_hist(city: str = "杭州",
                     period: str = "day",
                     start_date: str = "2019-03-27",
                     end_date: str = "2020-04-27") -> pd.DataFrame:
    """
    真气网-空气历史数据
    https://www.zq12369.com/
    :param city: 调用 air_city_list 接口获取所有城市列表
    :type city: str
    :param period: "hour": 每小时一个数据, 由于数据量比较大, 下载较慢; "day": 每天一个数据; "month": 每个月一个数据
    :type period: str
    :param start_date: e.g., "2019-03-27"
    :type start_date: str
    :param end_date: e.g., ""2020-03-27""
    :type end_date: str
    :return: 指定城市和数据频率下在指定时间段内的空气质量数据
    :rtype: pandas.DataFrame
    """
    url = "https://www.zq12369.com/api/newzhenqiapi.php"
    file_data = _get_file_content(file_name="outcrypto.js")
    out = execjs.compile(file_data)
    appId = "4f0e3a273d547ce6b7147bfa7ceb4b6e"
    method = "CETCITYPERIOD"
    timestamp = execjs.eval("timestamp = new Date().getTime()")
    p_text = json.dumps(
        {
            "city": city,
            "endTime": f"{end_date} 23:45:39",
            "startTime": f"{start_date} 00:00:00",
            "type": period.upper(),
        },
        ensure_ascii=False,
        indent=None,
    ).replace(' "', '"')
    secret = out.call("hex_md5",
                      appId + method + str(timestamp) + "WEB" + p_text)
    payload = {
        "appId": "4f0e3a273d547ce6b7147bfa7ceb4b6e",
        "method": "CETCITYPERIOD",
        "timestamp": int(timestamp),
        "clienttype": "WEB",
        "object": {
            "city": city,
            "type": period.upper(),
            "startTime": f"{start_date} 00:00:00",
            "endTime": f"{end_date} 23:45:39",
        },
        "secret": secret,
    }
    need = (json.dumps(payload,
                       ensure_ascii=False,
                       indent=None,
                       sort_keys=False).replace(' "', '"').replace(
                           "\\", "").replace('p": ',
                                             'p":').replace('t": ', 't":'))

    headers = {
        "User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36"
    }
    params = {"param": out.call("AES.encrypt", need)}
    r = requests.post(url, data=params, headers=headers)
    temp_text = out.call("decryptData", r.text)
    data_json = demjson.decode(out.call("b.decode", temp_text))
    temp_df = pd.DataFrame(data_json["result"]["data"]["rows"])
    temp_df.index = temp_df["time"]
    del temp_df["time"]
    temp_df = temp_df.astype(float, errors="ignore")
    return temp_df
示例#57
0
def get_fbpage_informaton(page_source):
    '''獲取粉絲專頁內的詳細資訊
    Args:
        user_fbid: 粉絲專頁唯一的fbid
        page_source:頁面
    Returns:
    '''

    lat = ''
    lng = ''
    fbid = ''
    type = ''
    offical_website = ''
    name = ''
    blue_verification_badge = False
    likes = 0

    soup = bs(page_source, 'html.parser')

    type_elm = soup.select_one(
        'img[src="https://static.xx.fbcdn.net/rsrc.php/v3/y7/r/3OfQvJdYD_W.png"]'
    ).parent
    if type_elm:
        type = type_elm.text.split(' · ')

    offical_website_elm = soup.select_one(
        'img[src="https://static.xx.fbcdn.net/rsrc.php/v3/yN/r/aE7VLFYMYdl.png"]'
    ).parent
    if type_elm:
        offical_website = offical_website_elm.text

    blue_verification_badge_elm = soup.select_one(
        'img[src="https://static.xx.fbcdn.net/rsrc.php/v3/yN/r/ZRwcHdL-Tur.png"]'
    )
    if blue_verification_badge_elm:
        blue_verification_badge = True

    likes_elm = soup.select_one(
        '[style="font-size: 14px;font-weight: 400;line-height: 16px;color: #606770"]'
    )
    if likes_elm:
        likes = re.search(re.compile(r"(\d+)"),
                          likes_elm.text.replace(',', '')).groups()[0]

    name_elm = soup.select_one('[data-sigil="MBackNavBarClick"]')
    if name_elm:
        name = name_elm.text.replace(' - 首頁', '')

    fbid_elm = soup.select_one('a[rel = "async"]')
    if fbid_elm:
        fbid = fatch_fbid(fbid_elm['href'])

    geog_elm = soup.select_one('.profileMapTile')
    if geog_elm:
        landscape_url = demjson.decode(geog_elm['data-store'])['landscapeURL']

        lat = re.findall("\d+\.\d+", landscape_url)[0]
        lng = re.findall("\d+\.\d+", landscape_url)[1]

    data = {
        'fbid': fbid,
        'offical_website': offical_website,
        'name': name,
        'type': type,
        'lat': lat,
        'lng': lng,
        'blue_verification_badge': blue_verification_badge,
        'likes': likes,
    }

    return data
示例#58
0
	def json(self,api,*vars):
                url=api % vars;
                return demjson.decode(self.downloader(url))
示例#59
0
def stock_report_fund_hold_detail(symbol: str = "005827",
                                  date: str = "20201231") -> pd.DataFrame:
    """
    东方财富网-数据中心-主力数据-基金持仓-明细
    http://data.eastmoney.com/zlsj/ccjj/2020-12-31-008286.html
    :param symbol: 基金代码
    :type symbol: str
    :param date: 财报发布日期, xxxx-03-31, xxxx-06-30, xxxx-09-30, xxxx-12-31
    :type date: str
    :return: 基金持仓-明细数据
    :rtype: pandas.DataFrame
    """
    date = "-".join([date[:4], date[4:6], date[6:]])
    url = "http://datainterface3.eastmoney.com/EM_DataCenter_V3/api/ZLCCMX/GetZLCCMX"
    params = {
        "js": "datatable8848106",
        "tkn": "eastmoney",
        "SHType": "1",
        "SHCode": symbol,
        "SCode": "",
        "ReportDate": date,
        "sortField": "SCode",
        "sortDirec": "1",
        "pageNum": "1",
        "pageSize": "500",
        "cfg": "ZLCCMX",
        "_": "1611579153269",
    }
    r = requests.get(url, params=params)
    data_text = r.text
    data_json = demjson.decode(data_text[data_text.find("{"):-1])
    temp_df = pd.DataFrame(data_json["Data"][0])
    temp_df = temp_df["Data"].str.split("|", expand=True)
    temp_df.reset_index(inplace=True)
    temp_df["index"] = range(1, len(temp_df) + 1)
    temp_df.columns = [
        "序号",
        "股票代码",
        "股票简称",
        "_",
        "_",
        "_",
        "_",
        "_",
        "_",
        "_",
        "持股数",
        "持股市值",
        "占总股本比例",
        "占流通股本比例",
    ]
    temp_df = temp_df[[
        "序号",
        "股票代码",
        "股票简称",
        "持股数",
        "持股市值",
        "占总股本比例",
        "占流通股本比例",
    ]]
    return temp_df
示例#60
0
def json_decode(str):
    return demjson.decode(str)