def main(): if 'STEAM_API_KEY' in os.environ: api_key = os.environ['STEAM_API_KEY'] else: api_key = '' if sys.argv[1] == 'fetch': if 'STEAM_API_KEY' not in os.environ: print("STEAM_API_KEY environment variable not set") return fetch('matches/', api_key) elif sys.argv[1] == 'compile': compile_data('matches/', 'train.csv', 'test.csv') elif sys.argv[1] == 'train': iterations = [int(x) for x in sys.argv[2].split(',')] train('train.csv', 'test.csv', 'ml.model', iterations) elif sys.argv[1] == 'predict': predict(sys.argv[2]) elif sys.argv[1] == 'coefs': coefs(sys.argv[2]) elif sys.argv[1] == 'winrates': get_winrates() elif sys.argv[1] == 'heroes': save_hero_data('data/heroes.json', api_key, 'data/winrates.json')
def get_cn_questions(): form_data = { 'operationName': 'allQuestions', 'query': 'query allQuestions {\n allQuestionsBeta {\n ...questionSummaryFields\n __typename\n }\n}\n\nfragment questionSummaryFields on QuestionNode {\n title\n titleSlug\n translatedTitle\n questionId\n questionFrontendId\n status\n difficulty\n isPaidOnly\n categoryTitle\n __typename\n}\n', 'variables': {} } detail_data = { 'operationName': 'questionData', 'query': 'query questionData($titleSlug: String!) {\n question(titleSlug: $titleSlug) {\n questionId\n questionFrontendId\n boundTopicId\n title\n titleSlug\n content\n translatedTitle\n translatedContent\n isPaidOnly\n difficulty\n likes\n dislikes\n isLiked\n similarQuestions\n contributors {\n username\n profileUrl\n avatarUrl\n __typename\n }\n langToValidPlayground\n topicTags {\n name\n slug\n translatedName\n __typename\n }\n companyTagStats\n codeSnippets {\n lang\n langSlug\n code\n __typename\n }\n stats\n hints\n solution {\n id\n canSeeDetail\n __typename\n }\n status\n sampleTestCase\n metaData\n judgerAvailable\n judgeType\n mysqlSchemas\n enableRunCode\n envInfo\n book {\n id\n bookName\n pressName\n source\n shortDescription\n fullDescription\n bookImgUrl\n pressImgUrl\n productUrl\n __typename\n }\n isSubscribed\n isDailyQuestion\n dailyRecordStatus\n editorType\n ugcQuestionId\n __typename\n }\n}\n', 'variables': { 'titleSlug': '' } } resp = fetch(url=cn_graphql_url, method=Req.POST, headers=async_headers, data=json.dumps(form_data)) if resp is None: return print(resp.text) res = resp.json() questions = res['data']['allQuestionsBeta'] final_res = dict() for q in questions: qfid = q['questionFrontendId'] qfid_zero = str(q['questionFrontendId']).zfill(4) title = q['title'].strip() cn_title = q['translatedTitle'].strip() title_slug = q['titleSlug'].strip() difficulty = q['difficulty'] pre = no_dict[str(int(qfid) // 100)] folder_name = '{}/{}.{}'.format(pre, qfid_zero, title).replace('?', '').replace(':', '') link = 'https://leetcode-cn.com/problems/{}'.format(title_slug) detail_data['variables']['titleSlug'] = title_slug print(detail_data) detail_resp = fetch(cn_graphql_url, method=Req.POST, headers=async_headers, data=json.dumps(detail_data)) if detail_resp is None: continue detail_res = detail_resp.json() question_content = detail_res['data']['question']['translatedContent'] file_dir = '{}'.format(folder_name) if not os.path.isdir(file_dir): os.makedirs(file_dir) print(file_dir) with open(file_dir + '/README.md', 'w', encoding='utf-8') as f: content = template.format(qfid + '. ' + cn_title, link, question_content).replace( '\u200b', '').replace('\ufe48', '') f.flush() f.write(content)
def main(mode: str): """ Main program. Either fetches new articles from RSS feeds or analyzes them using Textrazor. :param mode: One of 'fetch' or 'analyze'. :return: Nothing, only prints messages regarding success or failure. """ # time-related now = datetime.now() # configuration, database connection conf = Config() db = DatabaseClient(conf.database.host, conf.database.database, conf.database.user, conf.database.password, conf.database.port) # fetch / analyze articles if mode == "fetch": fetch(conf, db, now.isoformat()) elif mode == "analyze": analyze(conf, db, now) else: raise ValueError("--mode must be one of 'fetch', 'analyze'.") # commit any changes to database db.connection.commit()
def main(ids): Args = collections.namedtuple('Args', ['id', 'paperdir']) paperdir = os.path.expanduser("~/Papers") with open('good.txt', 'w') as g, open('bad.txt', 'w') as b: for id in ids: args = Args(id, paperdir) try: fetch(None, args) except FetchError as e: f = b print(e.msg) print("failed", id) else: f = g print("succeded", id) f.write(id) f.write('\n') time.sleep(30) return
def makeCurlAndAIAmovie(start, end_or_span, folder, movieFile='out.mp4', downscale_factor=16): """Make movie of curl images next to AIA images from datetime START to datetime or timedelta END_OR_SPAN. Save all frames and MOVIEFILE in FOLDER. """ if not os.path.isdir(folder): os.makedirs(folder) downscale_factors = (downscale_factor, downscale_factor) dimension = IMAGE_DIM // downscale_factor df_curl = fetch(start=start, end_or_span=end_or_span, parse_dates=['DATE__OBS'], **fetch_args) print('Fetched HMI data, {} images total.'.format(df_curl.shape[0])) first_date = df_curl.at[0, 'DATE__OBS'] offset = first_date - start df_aia = fetch('aia.lev1_euv_12s', start=first_date, end_or_span=end_or_span + offset, wavelengths=171, cadence=timedelta(minutes=12), keys=['DATE__OBS'], segments='image', df=True, parse_dates=['DATE__OBS']) print('Fetched AIA data, {} images total.'.format(df_aia.shape[0])) font = ImageFont.truetype('/Library/Fonts/Arial.ttf', 12) cmap_aia = sunpy.cm.get_cmap('sdoaia171') cmap_aia.set_bad() # Set masked values to black color norm_aia = matplotlib.colors.LogNorm(vmin=10, vmax=6000, clip=True) curl0 = downscale_local_mean(calculateCurl(df_curl, 0), downscale_factors) norm_curl = matplotlib.colors.SymLogNorm(1, vmin=np.nanmin(curl0), vmax=np.nanmax(curl0)) start = datetime.now() for i in range(df_curl.shape[0]): curl = downscale_local_mean(calculateCurl(df_curl, i), downscale_factors) curl_img = misc.toimage(matplotlib.cm.viridis(norm_curl(np.fliplr(curl)))) ImageDraw.Draw(curl_img).text((2, 2), 'Curl {:%Y-%m-%d\n%H:%M:%S}'.format( df_curl.at[i,'DATE__OBS']), font=font) f = fits.open(df_aia.at[i,'image']) f[1].verify('silentfix') data = np.flipud(downscale_local_mean(f[1].data, downscale_factors)) f.close() aia_img = misc.toimage(cmap_aia(norm_aia(data))) ImageDraw.Draw(aia_img).text((2, 2), 'AIA 171 {:%Y-%m-%d\n%H:%M:%S}'.format( df_aia.at[i,'DATE__OBS']), font=font) image = Image.new('RGB', (2 * dimension, dimension)) image.paste(curl_img, (0, 0)) image.paste(aia_img, (dimension, 0)) image.save(os.path.join(folder, 'frame_{:04d}.png'.format(i))) printTimeInfo(start, i+1, df_curl.shape[0]) print('\nMaking movie with ffmpeg') cmd = 'ffmpeg -y -r 30 -i {} -c:v libx264 -pix_fmt yuv420p {}'.format( os.path.join(folder, 'frame_%04d.png'), os.path.join(folder, movieFile)) subprocess.call(cmd.split())
def synoptic_glob(): day = datetime(2010, 5, 13) # series start date series_approx_end = datetime.utcnow() - timedelta( days=24) # series lags by ~24d image_cache = np.zeros( (512, 512)) # placeholder to add 1600 images to for sum norm = mov_img.colors.LogNorm(1) # log color norm for 1600 client = hek.HEKClient() # client for querying flare data while day < series_approx_end: print(day) # fetch directory location for the desired day's data r = fetch.fetch('aia.fits_web', useJSOC2=True, start=day, segments='Images') day_dir = r[0] for hour in range(24): hour_dir = os.path.join(day_dir, 'H{}/'.format(str(hour).zfill(4))) # step through data at 12 minute cadence for minute in range(0, 60, 12): fname = 'AIA{}_{}_1600.fits'.format( day.strftime('%Y%m%d'), str(hour * 100 + minute).zfill(4)) image_path = os.path.join(hour_dir, fname) try: amap = mov_img.Map(image_path) except ValueError: print(image_path) raise ValueError('File not found ^^^') data = mov_img.downscale_local_mean(amap.data, (2, 2)) image_cache += data if hour % 6 == 0: # produce image t = day + timedelta(hours=hour) ch3 = np.flipud(norm(image_cache)) ch2 = np.flipud(norm(data)) image_cache = np.zeros((512, 512)) r = fetch.fetch( 'hmi.M_720s', start=t, end_or_span=timedelta(minutes=12), segments='magnetogram', keys=['rsun_obs', 'cdelt1', 'quality', 'date__obs']) if not no_images(r): ch1 = mov_img.process_hmi(r[0][-1], float(r[0][0]), float(r[0][1]), downscale=(8, 8), single_channel=True) result = client.query( hek.attrs.Time(t, t + timedelta(days=1)), hek.attrs.EventType('FL')) cls = get_max_flare(result) out_path = '/Users/pauly/repos/aia/data/glob/{}/{}'.format( cls, r[0][-2]) img = mov_img.misc.toimage(np.array([ch1, ch2, ch3])) img.save('{}.jpg'.format(out_path)) day += timedelta(days=1)
def make_hmi(start=datetime(2010, 5, 1), r=[]): """Fetch data, then filter based on quality """ if len(r) == 0: end = datetime.utcnow() delta = timedelta(days=30) while end - start > delta: r.extend( fetch.fetch( 'hmi.M_720s', start=start, end_or_span=delta, segments='magnetogram', cadence=timedelta(hours=6), keys=['rsun_obs', 'cdelt1', 'quality', 'date__obs'])) start += delta r.extend( fetch.fetch('hmi.M_720s', start=start, end_or_span=datetime.utcnow(), segments='magnetogram', cadence=timedelta(hours=6), keys=['rsun_obs', 'cdelt1', 'quality', 'date__obs'])) print('original query length:', len(r)) r = [sub for sub in r if sub[2] == '0x00000000'] print('filtered query length:', len(r)) client = hek.HEKClient() for i, sub in enumerate(r): try: path = sub[-1] hdr = getFitsHdr(path) tstart = parse_time(sub[3]) tend = tstart + timedelta(days=1) result = client.query(hek.attrs.Time(tstart, tend), hek.attrs.EventType('FL')) cls = get_max_flare(result) out_path = '/Users/pauly/repos/aia/data/hmi/{}/{}'.format( cls, tstart.strftime('%Y-%m-%dT%H:%M:%S')) img = mov_img.process_hmi(path, float(sub[0]), float(sub[1]), downscale=(8, 8)) # save image and its symmetries img.save('{}_1.jpg'.format(out_path)) ImageOps.flip(img).save('{}_2.jpg'.format(out_path)) ImageOps.mirror(img).save('{}_3.jpg'.format(out_path)) ImageOps.flip(ImageOps.mirror(img)).save( '{}_4.jpg'.format(out_path)) print('\r{}%'.format(int(100 * i / len(r))), end='') except: continue
def fetch_libs(): for name,lib in libs.items(): if name == 'openjpeg': filename = check_hash(fetch(lib['url']),lib['hash']) for compiler in compilers.values(): if not os.path.exists(os.path.join(build_dir,lib['dir']+compiler['inc_dir'])): extract(filename, build_dir) os.rename(os.path.join(build_dir,lib['dir']), os.path.join(build_dir,lib['dir']+compiler['inc_dir'])) else: extract(check_hash(fetch(lib['url']),lib['hash']),build_dir)
def gen_img(title, channel, views, time): with open('common.txt') as f: common = f.read().splitlines() args = [] for word in title.lower().split(' '): word = re.sub(r'[^a-z0-9]', '', word) if (len(word) <= 2 or word in common): continue args.append(word) fetch(args) im = Image.open('thumb.jpg') draw = ImageDraw.Draw(im) im = im.convert('RGB') im = im.resize((640, 360)) draw = ImageDraw.Draw(im) draw.rectangle([550, 310, 630, 350], fill='black') fnt = ImageFont.truetype('Arial Bold', 27) draw.text((555, 315), time, font=fnt, fill='white') im.save('thumb.jpg') im = Image.open('out.png') draw = ImageDraw.Draw(im) draw.rectangle([0, 0, 1250, 380], fill='#F1F1F1') draw.rectangle([10, 10, 640, 360], fill='white') thumb = Image.open('thumb.jpg') im.paste(thumb, (10, 10)) im.save('out.png') txt = ImageText('out.png') _, y = txt.write_text_box((670, -40), title, box_width=550, font_filename='Arial', font_size=55, color='black') txt.save('out.png') fnt = ImageFont.truetype('Arial', 40) im = Image.open('out.png') draw = ImageDraw.Draw(im) draw.text((675, y + 30), channel, font=fnt, fill='grey') draw.text((675, y + 85), views, font=fnt, fill='grey') im.save('out.png') print('Image generated!')
def main(): # check if the folder named documents exist if not os.path.isdir(ROOTDIR): print("Downloading PDF files...") fetch() print("Download finished") writer = Writer() for page in get_all_references_pages(ROOTDIR): writer.addPage(page) with open("references.pdf", "wb") as output: writer.write(output)
def fetch_main(prog, options): desc = 'fetch an exploit' parser = argparse.ArgumentParser(description=desc, prog=prog) parser.add_argument("--issue", metavar="NO", required=True, help="specify the issue number") add_team(parser) add_conf(parser) add_token(parser, False) args = parser.parse_args(options) config = load_config(args.conf) fetch(args.team, args.issue, config, args.token)
def __init__(self): self.db = db_connect.get_db() events = [] test = Portfolio('test') # Comment me out self.portfolios.append(test) # Comment me out # main = Portfolio() # Uncomment Me # self.portfolios.append(main) # Uncomment Me for p in self.portfolios: self.tickers.extend(p.getTickers()) if 'current' not in self.db.collection_names(): fetch.fetch(self.tickers)
def fetch_libs(): for name, lib in libs.items(): if name == 'openjpeg': filename = check_hash(fetch(lib['url']), lib['hash']) for compiler in compilers.values(): if not os.path.exists(os.path.join( build_dir, lib['dir']+compiler['inc_dir'])): extract(filename, build_dir) os.rename(os.path.join(build_dir, lib['dir']), os.path.join( build_dir, lib['dir']+compiler['inc_dir'])) else: extract(check_hash(fetch(lib['url']), lib['hash']), build_dir)
def fetch_all(project): try: os.makedirs("project_dbs") except: pass try: os.makedirs("Report Output/" + project['projectId']) except: pass try: fetch(project['projectId']) except Exception as e: print("Error fetching ", project['projectId']) logging.exception(e)
def make_aia(start=datetime(2010, 5, 1), r=[]): """Fetch data, then filter based on quality """ if len(r) == 0: end = datetime.utcnow() delta = timedelta(days=30) while end - start > delta: r.extend( fetch.fetch('aia.lev1_euv_12s', start=start, end_or_span=delta, wavelengths=171, segments='image', cadence=timedelta(hours=6))) start += delta r.extend( fetch.fetch('aia.lev1_euv_12s', start=start, end_or_span=datetime.utcnow(), wavelengths=171, segments='image', cadence=timedelta(hours=6))) print('original query length:', len(r)) r = [path for path in r if getFitsHdr(path)['quality'] == 0] print('filtered query length:', len(r)) client = hek.HEKClient() for i, path in enumerate(r): try: hdr = getFitsHdr(path) tstart = parse_time(hdr['date-obs']) tend = tstart + timedelta(days=1) result = client.query(hek.attrs.Time(tstart, tend), hek.attrs.EventType('FL')) cls = get_max_flare(result) out_path = '/Users/pauly/repos/aia/data/aia/{}/{}'.format( cls, tstart.strftime('%Y-%m-%dT%H:%M:%S')) img = mov_img.process_img(path, downscale=(8, 8)) # save image and its symmetries img.save('{}_1.jpg'.format(out_path)) ImageOps.flip(img).save('{}_2.jpg'.format(out_path)) ImageOps.mirror(img).save('{}_3.jpg'.format(out_path)) ImageOps.flip(ImageOps.mirror(img)).save( '{}_4.jpg'.format(out_path)) print('\r{}%'.format(int(100 * i / len(r))), end='') except: continue
def main(): parser = argparse.ArgumentParser(description='Run Spark DSE flow') parser.add_argument('-n', '--firstN', type=int, default=3) args = parser.parse_args() # Download all SPD matrices if not already present # get matrix list and print it # if no matrices specified, use the default criteria systems = fetch.fetch().getSpdLinearSystems() # add solution information add_solutions(systems) systems_without_solutions = systems.select(lambda x: not x.hasSol) if systems_without_solutions: utils.warn('Some systems do not have solutions') print systems_without_solutions systems_with_solutions = systems.select(lambda x: x.hasSol) if not systems_with_solutions.matrixList: utils.warn('No system has an expected solution') make_benchmark(systems_without_solutions) run_benchmark(systems_without_solutions.head(args.firstN)) return make_benchmark(systems_with_solutions) run_benchmark(systems_with_solutions.head(args.firstN))
def ReadRestaurants(url): """Gets the details for restaurants on an UrbanSpoon page. Uses the built-in google map to extract the locations of each restaurant.""" page = fetch(url) soup = BeautifulSoup(page[1]) ## Get the details for the restaurants on this table by going into their page rest_table = soup.findAll("table",id="r-t")[0] # r-t is the id of the restaurants rests = rest_table.findAll("div","t") # class "t" holds the restaurant entries links_to_rests = [r.findAll("a",href=True)[0] for r in rests] # valid entries have a link in the href ## go get the restaurants rest_dicts = [ParseRestaurant(x['href']) for x in links_to_rests] ## Get the lat/lons out of the map on the link page scripts = soup.findAll("script") map_script = scripts[-4] ## No idea if it will always be the 4th-last one # regex for lat, lon pairs in text lat_lon_re = re.compile("-?[0-9]{1,2}.[0-9]+, -?[0-9]{1,3}.[0-9]+") # list of them. they will be in alphabetical order, same as the rest dicts rest_lat_lons = [x.split(", ") for x in lat_lon_re.findall(map_script.text)] # which means we can just zip them up and append the locations to the restaurant dict for restaurant,location in zip(rest_dicts,rest_lat_lons): restaurant["lat"] = float(location[0]) restaurant["lon"] = float(location[1]) return rest_dicts
def spider(SpiderGlobalVariable): if SpiderGlobalVariable.spider_use_gevent: import gevent while True: if SpiderGlobalVariable.spider_urlnode_queue.qsize() > 0: _,node = SpiderGlobalVariable.spider_urlnode_queue.get() html = fetch(node.url, SpiderGlobalVariable.spider_model, SpiderGlobalVariable.fetch_time_interval, SpiderGlobalVariable.random_agent) if len(html) < 10: pass html_node = HtmlNode(node.url, html, timestamp(), node.depth) SpiderGlobalVariable.htmlnode_queue.put(html_node) SpiderGlobalVariable.total_count += 1 if SpiderGlobalVariable.print_all: msg = "[Url] %s Depth: %s Found: %s Remaining: %s Html: %s"% (node.url, str(node.depth), str(SpiderGlobalVariable.total_count), str(SpiderGlobalVariable.spider_urlnode_queue.qsize()), str(len(html))) spider_logger.info(msg) else: msg = "[Url] %s Depth: %s Found: %s Remaining: %s Html: %s" % (node.url, str(node.depth), str(SpiderGlobalVariable.total_count), str(SpiderGlobalVariable.spider_urlnode_queue.qsize()), str(len(html))) console_width = getTerminalSize()[0] - 0 if len(msg) - console_width > 0: msg = msg[:console_width] sys.stdout.write('\r' + msg) sys.stdout.flush() else: sys.stdout.write('\r' + msg + ' ' * (console_width - len(msg))) sys.stdout.flush() if SpiderGlobalVariable.spider_use_gevent: gevent.sleep(0) else: if SpiderGlobalVariable.spider_use_gevent: gevent.sleep(0) else: time.sleep(5) SpiderGlobalVariable.exit_flag_count += 1
def run(YEAR, DAY, p1_fn, p2_fn, force=False, fake_time=False, D=False, run_samples=True, samples_only=False): if run_samples: for fname, data in sorted(get_samples(YEAR, DAY)): print(fname) print(p1_fn(data)) print(p2_fn(data)) target = get_target(YEAR, DAY, fake=fake_time) fmt_str = '%(asctime)-15s %(filename)8s:%(lineno)-3d %(message)s' log.basicConfig(level=log.DEBUG, format=fmt_str) now = time.time() left = target - now if left > 0: log.debug("Target: {} Now: {}".format(target, now)) log.debug("Seconds Left: {}".format(left)) if samples_only: return v = fetch(YEAR, DAY, log, wait_until=target, force=force) if D: print(v) print('part_1: {}'.format(p1_fn(v))) print('part_2: {}'.format(p2_fn(v)))
def get_players(self, position: Position, week: Week, real: bool) -> List[PlayerStats]: if real: return [] stats_to_return = [] page = fetch(self._fetch_url(position, week)) html = BeautifulSoup(page, "html.parser") players = html.select("table#data tr") for player in players: name_cell = player.select("td.player-label") if not name_cell: # Probably a header continue name = name_cell[0].select("a.player-name")[0].text points_raw = player.select("td")[-1].text try: points = float(points_raw) except ValueError: points = 0.0 if points == 0: break p = PlayerStats( name=name, position=position, week=week, points=points, real=real, source=self.source(), ) stats_to_return.append(p) return stats_to_return
def login_baidu (): url = 'https://passport.baidu.com/v2/api/?login' header = {'staticpage':'http://pan.baidu.com/res/static/thirdparty/pass_v3_jump.html', 'charset':'utf-8', 'token':'cb43da0cb6c17aa9f58a44369d624d67', 'tpl':'netdisk', 'apiver':'v3', 'tt':timestamp(), 'codestring':'', 'isPhone':'false', 'safeflg':'0', 'u':'http://pan.baidu.com/', 'quick_user':'******', 'logintype':'basicLogin', 'username':'******', 'password':'******', 'verifycode':'', 'mem_pass':'******', 'ppui_logintime':'10423', 'callback':'parent.bd__pcbs__dikal3' } print url return fetch(url,header,None,True)
def current(): rv = fetch() data = {'Light': 4095 - int(rv['light']), 'Temp': str(rv['temp']) + ' C', 'Pressure': str(rv['pressure'] / 100) + ' hPa', 'Humidity': str(rv['hum']) + '%'} return render_template('current.html', items=data.items())
def main(): result = fetch(sheetId, rangeName='Data!A1:R57') values = result.get('values', []) for systemdata in values[1:]: name = systemdata[2] x, y, z = tuple(systemdata[-4:-1]) coords = '(%s, %s, %s)' % (x, y, z) print('%s %s' % (name.rjust(20), coords.ljust(5)))
def google(keyword, page = 1, language = 'lang_en', country = 'countryUS', proxy = None, ua = None, cookie = None, verbose = True): from fetch import fetch url = "http://www.google.com/search?q=%s&hl=en&lr=%s&cr=%s" % (keyword, language, country) if page > 1: url = "%s&start=%d" % (url, 10 * (page - 1)) return fetch(url, proxy, ua, cookie, verbose)
def posttoSlack(slack_url,app_token,access_token,packetm,packetn,count): # for debugging token, check if valid debug_url ="https://graph.facebook.com/debug_token?input_token="+ access_token +"&access_token=" + app_token prev_packetm = packetm prev_packetn = packetn try: response = requests.get(debug_url) data = json.loads(response.text) # validation of token if 'error' in data.keys(): packetm = "access_token not valid" else: # fetch messages data try: data = fetch(access_token) except: packetm = "Couldnt Fetch" # parse fetched message data try: packetm = parse(data) except: packetm = "Couldnt Parse" # change this if you need to get updated for notifications more frequently if count % 6 == 0: # fetch notifications data try: data = fetchnotif(access_token) except: packetn = "Couldnt Fetch Notif" # parse notifications data try: packetn = parsenotif(data) except: packetn = "Couldnt Parse Notif" except: packetm = "Couldnt Debug" # prepare packet with messages and notifications if count % 6 == 0: # post to slack if not same as earliar if packetn+packetm != prev_packetn+prev_packetm: payload={"text": packetn+packetm } response = requests.post(slack_url,json.dumps(payload)) # prepare packet with messages only else: # post to slack if not same as earliar if packetm != prev_packetm: payload={"text": packetm } response = requests.post(slack_url,json.dumps(payload)) return packetm,packetn
def posttoSlack(slack_url, app_token, access_token, packetm, packetn, count): # for debugging token, check if valid debug_url = "https://graph.facebook.com/debug_token?input_token=" + access_token + "&access_token=" + app_token prev_packetm = packetm prev_packetn = packetn try: response = requests.get(debug_url) data = json.loads(response.text) # validation of token if 'error' in data.keys(): packetm = "access_token not valid" else: # fetch messages data try: data = fetch(access_token) except: packetm = "Couldnt Fetch" # parse fetched message data try: packetm = parse(data) except: packetm = "Couldnt Parse" # change this if you need to get updated for notifications more frequently if count % 6 == 0: # fetch notifications data try: data = fetchnotif(access_token) except: packetn = "Couldnt Fetch Notif" # parse notifications data try: packetn = parsenotif(data) except: packetn = "Couldnt Parse Notif" except: packetm = "Couldnt Debug" # prepare packet with messages and notifications if count % 6 == 0: # post to slack if not same as earliar if packetn + packetm != prev_packetn + prev_packetm: payload = {"text": packetn + packetm} response = requests.post(slack_url, json.dumps(payload)) # prepare packet with messages only else: # post to slack if not same as earliar if packetm != prev_packetm: payload = {"text": packetm} response = requests.post(slack_url, json.dumps(payload)) return packetm, packetn
def verify_google_scopes(self): """Verify that if we are running on Google that our scopes are valid.""" if not is_google_instance(): return if not self.verify_true_false('providers.google.enabled'): return if not self.__bindings.get('providers.google.enabled'): return result = fetch(GOOGLE_INSTANCE_METADATA_URL + '/service-accounts/', google=True) service_accounts = result.content if result.ok() else '' required_scopes = [GOOGLE_OAUTH_URL + '/compute'] found_scopes = [] for account in filter(bool, service_accounts.split('\n')): if account[-1] == '/': # Strip off trailing '/' so we can take the basename. account = account[0:-1] result = fetch(os.path.join(GOOGLE_INSTANCE_METADATA_URL, 'service-accounts', os.path.basename(account), 'scopes'), google=True) # cloud-platform scope implies all the other scopes. have = str(result.content) if have.find( 'https://www.googleapis.com/auth/cloud-platform') >= 0: found_scopes.extend(required_scopes) for scope in required_scopes: if have.find(scope) >= 0: found_scopes.append(scope) for scope in required_scopes: if not scope in found_scopes: self.__errors.append( 'Missing required scope "{scope}".'.format(scope=scope))
def get_players(self, fake_position: Position, week: Week, real: bool) -> List[PlayerStats]: if fake_position != Position.QUARTERBACK or real: return [] done = False page_num = 0 stats_to_return = [] while not done: page = fetch(self._fetch_url(week, page_num=page_num)) html = BeautifulSoup(page, "html.parser") players = html.select("tr.pncPlayerRow") orig_len = len(stats_to_return) for player in players: name = player.select("td.playertablePlayerName a")[0].text position_raw = player.select( "td.playertablePlayerName")[0].text points_raw = player.select( "td.playertableStat.sortedCell")[0].text try: points = float(points_raw) except ValueError: points = 0.0 position = None for part in position_raw.strip().split()[::-1]: try: position = Position.from_short_str(part) break except ValueError: continue if not position: continue if points == 0: done = True break p = PlayerStats( name=name, position=position, week=week, points=points, real=real, source=self.source(), ) stats_to_return.append(p) page_num += 1 if len(stats_to_return) == orig_len: break return stats_to_return
def verify_google_scopes(self): """Verify that if we are running on Google that our scopes are valid.""" if not is_google_instance(): return if not self.verify_true_false('providers.google.enabled'): return if not self.__bindings.get('providers.google.enabled'): return result = fetch( GOOGLE_INSTANCE_METADATA_URL + '/service-accounts/', google=True) service_accounts = result.content if result.ok() else '' required_scopes = [GOOGLE_OAUTH_URL + '/compute'] found_scopes = [] for account in filter(bool, service_accounts.split('\n')): if account[-1] == '/': # Strip off trailing '/' so we can take the basename. account = account[0:-1] result = fetch( os.path.join(GOOGLE_INSTANCE_METADATA_URL, 'service-accounts', os.path.basename(account), 'scopes'), google=True) # cloud-platform scope implies all the other scopes. have = str(result.content) if have.find('https://www.googleapis.com/auth/cloud-platform') >= 0: found_scopes.extend(required_scopes) for scope in required_scopes: if have.find(scope) >= 0: found_scopes.append(scope) for scope in required_scopes: if not scope in found_scopes: self.__errors.append( 'Missing required scope "{scope}".'.format(scope=scope))
def do_fetch(self, id): """Fetch a paper from ADS given the id iota> fetch id """ self.args.id = id try: sexps = fetch(self.database, self.args) except FetchError as e: logging.error(e.msg) else: self.print_sexp(sexps)
def download_and_unpack(): # If the tarfile is around then we might fix the problem by unpacking # it. unpacked_stat, msg = unpack_test_tar() if unpacked_stat is None: if msg: sys.stdout.write(msg) # There is no tarfile or it is broken, so try to download it and then # try unpacking again. sys.stdout.write("Trying to download %r..." % test_tar_file) fetch.fetch(["ccc-gistemp-test-2009-12-28.tar"], ".") unpacked_stat, msg = unpack_test_tar() if unpacked_stat != 0: sys.stderr.write(msg) return 1 elif unpacked_stat > 0: sys.stderr.write(msg) return 1 return 0
def worker(pid, oklog, faillog, path, url, thumb, down, extract, metadata, post): '''Process to be run by main.py Arguments taken: 'pid' -> Number to put next to terminal output 'oklog' -> Dictionary to put successful urls into 'faillog' -> List to put failed urls into 'path' -> Path to create zipfiles in [str] 'url' -> Link to site to zip [str] 'thumb' -> Url to thumbnail (if page doesn't have any images) [str] 'down' -> Url to file to download 'metadata' -> Dict of metadata to include in zipfile 'post' -> Function to run after downlaod to clean content Logs start time, checks url, downloads url, checks for extra files, downloads them if necessary, and zips everything up. ''' start = time.time() http = re.compile('^http') md5 = hash_url(url) print("[PID:%s] Processing '%s'" % (pid, url)) if check_url(url, 5): meta = fetch(url, path, md5, extract, metadata) if thumb: if http.match(thumb): if check_url(thumb): fetch_file(thumb, path, md5, 'thumbnail') else: print("\nThumbnail url check failed!\n%s\n" % thumb) faillog.append(thumb) if down: if http.match(down): if check_url(down): down_name = re.search("([^/.]*\.pdf)", down).group(0)[:-4] fetch_file(down, path, md5, down_name) else: print("\nDownload file url check failed!\n%s\n" % down) faillog.append(down) else: faillog.append(url) print("\nUrl check failed!\n'%s'\n" % url) if post: if post == 'storybook': print('Storybook is disabled from the move! Fix it before calling') elif post == 'news24': news24(md5, path) zip_up(md5, path) finished = time.time() - start oklog.append(url) shutil.rmtree(join(path, md5)) print('[PID: %s] Completed successfully in %ss.' % (pid, finished))
def main(): statsd = StatsClient(host='localhost', port=8125, prefix=None, maxudpsize=512) queue = get_SQSqueue() db_connection = get_dynamoDBconnection() while True: messages = fetch(items=MAX_ITEMS, queue=queue, statsd=statsd) for message in messages: if apply2(message=message, connection=db_connection, statsd=statsd): pass
def do_GET(self): self.send_response(200) self.send_header('Content-type', 'text/plain') self.end_headers() if(self.path == "/favicon.ico"): return x=0 if self.path != "/": x = float(self.path[1:]) execute(x,10) y = fetch(x) self.wfile.write(str(y).encode())
def extract_libs(): for name, lib in libs.items(): filename = fetch(lib["url"]) if name == "openjpeg": for compiler in all_compilers(): if not os.path.exists( os.path.join(build_dir, lib["dir"] + compiler["inc_dir"]) ): extract(filename, build_dir) os.rename( os.path.join(build_dir, lib["dir"]), os.path.join(build_dir, lib["dir"] + compiler["inc_dir"]), ) else: extract(filename, build_dir)
def extract_libs(): for name, lib in libs.items(): filename = lib['filename'] if not os.path.exists(filename): filename = fetch(lib['url']) if name == 'openjpeg': for compiler in compilers.values(): if not os.path.exists(os.path.join( build_dir, lib['dir']+compiler['inc_dir'])): extract(filename, build_dir) os.rename(os.path.join(build_dir, lib['dir']), os.path.join( build_dir, lib['dir']+compiler['inc_dir'])) else: extract(filename, build_dir)
def get(self): page = self.get_argument('p', 1) try: page = int(page) except ValueError: page = 1 results = fetch.fetch(page=page) self.finish({ 'page': page, 'rows': [ { 'id': id, 'title': entry.media.title.text, 'url': entry.media.player.url } for id, entry in results.iteritems() ] })
def get_lcof_questions(): """获取剑指Offer题目""" resp = fetch(url=lcof_problems_url, headers=async_headers) if resp is None: return None res = resp.json() questions = res['stat_status_pairs'] for question in questions: fe_qid = question['stat']['frontend_question_id'] qno = fe_qid.replace('面试题', '').strip() title = question['stat']['question__title'].replace(' LCOF', '').strip() link = problems_url + question['stat']['question__title_slug'] git_link = '/lcof/{}/README.md'.format(quote(fe_qid + '. ' + title)) col1 = '[{}]({})'.format(qno, link) col2 = '[{}]({})'.format(title, git_link) col3 = difficulties.get(str(question['difficulty']['level'])) yield [col1, col2, col3]
def get_all_questions(): """获取所有题目""" cn_res = get_cn_questions() resp = fetch(url=all_problems_url, headers=async_headers) if resp is None: return res = resp.json() questions = res['stat_status_pairs'] for question in questions: qid = str(question['stat']['question_id']).zfill(4) title = question['stat']['question__title'] link = problems_url + question['stat']['question__title_slug'] git_link = '/solution/{}/README.md'.format(qid + '.' + quote(title)) cn_title = cn_res.get(qid) or title col1 = '[{}]({})'.format(qid, link) col2 = '[{}]({})'.format(cn_title, git_link) col3 = difficulties.get(str(question['difficulty']['level'])) yield [col1, col2, col3]
def evaluate_truncations(**kwargs): # init inputs data = kwargs['data'] word = kwargs['word'] method = kwargs['method'] # setup truncator trunc_input_data = fetch.fetch(method, data) Truncator = get_truncator(truncator_name) t = Truncator(trunc_input_data) # get truncations truncations = t.get_truncations(word) # get other stats evaluator = TruncatorEvaluator() # run the following line if you want to. commented out just bc its slow # out = evaluator.test_file( # path.join(OUT_DIR, "out.csv"), # "../fonts/Monaco.dfont" # ) return truncations
def FillSubsequentPages(base): ## Get the restaurants for this page rests = ReadRestaurants(base) ## Are there any pages after this? page = fetch(base) soup = BeautifulSoup(page[1]) try: next_page_tag = [pg for pg in soup.findAll('a') if "next page" in pg.text][0] except IndexError: next_page_tag = False ## If so: if next_page_tag: ## Tell it what the proper URL is next_page_url = "http://www.urbanspoon.com" + next_page_tag['href'] ## And get the restaurants on that page too rests = rests + FillSubsequentPages(next_page_url) return rests else: return rests
def main(args): filename = os.getcwd() + '/.patchesrc' if os.access(filename, os.R_OK): raise Exception('Configuration file %s already exists' % filename) ini = RawConfigParser() if args.url: ini.add_section('fetch') ini.set('fetch', 'url', args.url[0]) with open(filename, 'w') as fp: ini.write(fp) if args.url: config.setup(filename) return fetch.fetch() return 0
def getFile(fileName, url, cacheTime = 7*24*3600): '''Returns the path of filename on the disk, if the file doesn't exist, then it fetches them, and if the file is too old, then it replace them with new one. ''' filePath = os.path.join('data', fileName) if os.path.isfile(filePath): info = os.stat(filePath) if (time.time() - info.st_mtime) < cacheTime: return filePath print 'Fetching url: ', url urlData = fetch(url) if urlData['responseCode'] != 200: raise Exception("Couldn't fetch url: ", url) f = open(filePath, 'wb') f.write(urlData['html']) f.close() return filePath
def ParseRestaurant(rest_page_url): """Parses an UrbanSpoon restaurant page for the name, rating, and address of the restaurant. Text values are encoded to UTF8 from unicode for writing to CSV post-hoc. Note that HTML class names etc. may change.""" page = fetch(rest_page_url) try: soup = BeautifulSoup(page[1]) name = soup.findAll('h1','page-title fn org')[0].text.encode("utf8") print "looking at %s"%name ## Address info street_addr = soup.findAll('span','street-address')[0].text.encode("utf8") suburb = soup.findAll('span','locality')[0].text.encode("utf8") postcode = soup.findAll('a','quiet-link postal-code')[0].text.encode("utf8") ## Number of votes try: num_voted = int(soup.findAll('div',id='num-votes')[0].text.split(" ")[0]) except ValueError: num_voted = 0 ## Rating - if restaurant hasn't been rated, there isn't a span of than name ## And if there is, there might be something Weird try: rating = soup.findAll('span','percent-text rating average')[0].text rating_as_num = int(rating[:-1]) # knock off the % sign except (ValueError,IndexError): rating_as_num = 0 rest_details = {"name":name, "street":street_addr, "suburb":suburb, "postcode":postcode, "number_voted":num_voted, "rating":rating_as_num,} return rest_details except IndexError: return {}
def find_match(urls,param): result = [] for url in urls: content = fetch(url) param2 = '' if type(param)==type(''): param1 = param elif type(param) == type([]): param1 = param[0] param2 = param[1] #find the regular match url temp = re.findall(param1,content,re.S|re.M|re.I) for i in temp: result.append( urllib.unquote(urlparse.urljoin(url,i)) ) #find the "brother" match url if param2: temp = re.findall(param2, content,re.S|re.M|re.I) for i in temp: next_url = urllib.unquote(urlparse.urljoin(url,i)) result.extend(find_match ([next_url],param)) return result
def worker(): item_list = fetch() conn = DB() rdd = setup_rdd() readability_list = [] for item in item_list: mark = add_to_readability(rdd, item[1]) print item[0], item[1], make_link(mark.article.id) conn.insert_db( (item[0], item[1], make_link(mark.article.id), date.today() - timedelta(days=1)) ) article = rdd.get_article(mark.article.id) readability_list.append( {'title': item[0], 'link': make_link(mark.article.id), 'content': article.content }) send_mail(readability_list)
def post(self): uid= self.request.get("uid") fetch(uid)
from fetch import fetch import os if __name__ == '__main__': for version in ['2.7.10', '3.3.5', '3.4.3']: for platform in ['', '.amd64']: for extension in ['', '.asc']: fetch('https://www.python.org/ftp/python/%s/python-%s%s.msi%s' % (version, version, platform, extension)) # find pip, if it's not in the path! os.system('pip install virtualenv')
sci_site=u"""首页头条 http://www.sciencenet.cn/xml/news.aspx?di=0 首页要闻 http://www.sciencenet.cn/xml/news.aspx?di=1 频道要闻 http://www.sciencenet.cn/xml/news.aspx?di=3 一般新闻 http://www.sciencenet.cn/xml/news.aspx?di=4 通知公告 http://www.sciencenet.cn/xml/news.aspx?di=5 新闻评论 http://www.sciencenet.cn/xml/news.aspx?di=6 国际快讯 http://www.sciencenet.cn/xml/news.aspx?di=7 热门论文 http://www.sciencenet.cn/xml/news.aspx?di=8 人才高教 http://www.sciencenet.cn/xml/news.aspx?di=9 生命科学 http://www.sciencenet.cn/xml/field.aspx?di=3 前沿交叉 http://www.sciencenet.cn/xml/field.aspx?di=4 政策管理 http://www.sciencenet.cn/xml/field.aspx?di=5 医药健康 http://www.sciencenet.cn/xml/field.aspx?di=6 基础科学 http://www.sciencenet.cn/xml/field.aspx?di=7 工程技术 http://www.sciencenet.cn/xml/field.aspx?di=8 信息科学 http://www.sciencenet.cn/xml/field.aspx?di=9 资源环境 http://www.sciencenet.cn/xml/field.aspx?di=10 所有文章 http://www.sciencenet.cn/xml/blog.aspx?di=0 学术教育 http://www.sciencenet.cn/xml/blog.aspx?di=1 政策争鸣 http://www.sciencenet.cn/xml/blog.aspx?di=2 历史人文 http://www.sciencenet.cn/xml/blog.aspx?di=3 娱乐生活 http://www.sciencenet.cn/xml/blog.aspx?di=4 人物纪事 http://www.sciencenet.cn/xml/blog.aspx?di=5 图片百科 http://www.sciencenet.cn/xml/blog.aspx?di=6""" import socket socket.setdefaulttimeout(10) for i in sci_site.split('\n'): i=i.split('\t') fetch(i[1].strip(),local_saver("www.sciencenet.cn.txt","科学网/"+i[0]))
def go (url='http://www.kiees.com/'): #发现值得买 page = fetch (url) info = extract (page) insert (info)
def __init__(self): self.fetch = fetch() Command.__init__(self)
def exec_fetch(args): print('fetch.py ' + ' '.join(args)) sys.exit(fetch.fetch(args))
def grab(num): files = fetch.fetch(num) return jsonify({'files' : files})
time = [] try: for i in xrange(1, 10): grade = trs[i].contents[23].text high = trs[i].contents[7].text weight = trs[i].contents[9].text time.append([grade, high, weight]) except IndexError, e: pass data = [stuid, name, sex, time] return data #返回json格式的数据 if __name__ == '__main__': import fetch fetchtool = fetch.fetch(proxy = "goagent") data = fetchtool.fetchPage("0843041063") for i in data: try: if i + ' ': print i continue except TypeError: pass for j in i: for k in j: print k
def api(owner, repo): data = fetch(owner, repo) return jsonify(meta=dict(status=200, message='OK'), data=sorted(data, key=lambda r: r['count'], reverse=True))
#encoding=utf8 import re import urllib from fetch import fetch result = '' for i in range(8): url = "http://licai.taobao.com/json/show_buyer_list.html?bid_page=%d&item_id=35979593061&seller_id=1759036930&page_size=%d" url = url % (i+1,100) content = fetch(url) data = re.findall (r'''<tr> <td.*?>([^<]+)<span.*?>([^<]+)</span></td> <td.*?><span>([^<]+)</span><span.*?></span></td> <td.*?>([\d\.]+).*?</td> <td.*?>([^<]+)</td> <td.*?>([^<]+)</td> </tr>''',content,re.I|re.M|re.S) print i+1, len(data),"records" if not data: break for i in data: line = ','.join(i) result += line + '\n' w = open('taobao_licai_jijin.txt','w') w.write(result) w.close()
#!/bin/python from fetch import fetch FILE = 'faster_rcnn_models.tgz' URL = 'http://people.eecs.berkeley.edu/~rbg/faster-rcnn-data/%s' % FILE CHECKSUM = 'ac116844f66aefe29587214272054668' fetch(FILE, URL, CHECKSUM)