def __fetch_daily_returns(self): fetcher = Fetcher(list(self.stockList)) fetcher.fetch_history(self.startDate, self.endDate) self.df = fetcher.get_dataframe('Adj_Close') globalStats = GlobalStats(self.df) return globalStats.get_daily_returns()
def fetch(agfid, u_ip, d_ip, r_ip, a_ip, u_port, d_port, r_port, a_port, temp_file_dir, device_name): logging.basicConfig( filename="fetcher.log", filemode="w", format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%d-%M-%Y %H:%M:%S", level=logging.DEBUG) logging.info('_r=true' + ';agfid=' + agfid + ';a_port=' + str(a_port) + ';u_port=' + str(u_port) + ';r_port=' + str(r_port) + ';temp_file_dir=' + temp_file_dir + ';device_name=' + device_name + ';') params = { "rnode_ip": r_ip, "unode_ip": u_ip, "dnode_ip": d_ip, "unode_port": str(u_port), "rnode_port": str(r_port), "dnode_port": str(d_port), "adc_ip": a_ip, "adc_port": str(a_port) } fetcher = Fetcher(params) err = fetcher.start(agfid=agfid, device_name=device_name, temp_file_dir=temp_file_dir) if err is not None: click.echo("_r=false;message=" + err + ";") logging.error("_r=false;message=" + err + ";") if fetcher.ffmpeg != None: fetcher.ffmpeg.kill()
def fetch_prices(self): t = 6 * 3600 * int(time.time() / (6 * 3600)) + 6 * 3600 js = Fetcher(json.loads).fetch( URL_PRICES.format(self.id, DATE_START.strftime("%Y-%m-%d"), t)) self.rawdata = js['data'] self.btc_series = [(datetime.strptime(k.split("T")[0], "%Y-%m-%d"), v['BTC'][0]) for k, v in self.rawdata.items()] if self.data: self.btc_series.append( (datetime.now(), self.data["quote"]["BTC"]["price"])) series_fill_zeroes(self.btc_series) normalize(self, "btc_series") self.usd_series = [(datetime.strptime(k.split("T")[0], "%Y-%m-%d"), v['USD'][0]) for k, v in self.rawdata.items()] if self.data: self.usd_series.append( (datetime.now(), self.data["quote"]["USD"]["price"])) series_fill_zeroes(self.usd_series) normalize(self, "usd_series") self.supply = [] try: self.supply = [(datetime.strptime(k.split("T")[0], "%Y-%m-%d"), 10 * round(0.1 * div0(v['USD'][2], v['USD'][0]))) for k, v in self.rawdata.items()] except: pass series_fill_zeroes(self.supply) normalize(self, "supply")
def main(argv): # myBrowser = Browser() isLogin = False myFetcher = Fetcher() username = "" password = "" domain = "" try: opts, args = getopt.getopt(argv, "u:p:d:", ["login", "domain="]) except getopt.GetoptError: print "(mandatory)\n-d <domain> or --domain <domain>" \ " \n(optional)\n--login followed by \n -u <username> -p <password> " exit() for opt, arg in opts: if opt == "--login": isLogin = True elif opt == "-u": username = arg elif opt == "-p": password = arg elif opt in ("-d", "--domain"): domain = arg if isLogin: myFetcher.setCredentials(username, password) myFetcher.login(domain) print myFetcher.getCookies()
def __init__(self,config_parser): # Connect to engine database_path = get_from_config_parser(config_parser,'Database','path','database') database_debug = get_boolean_from_config_parser(config_parser,'Database','debug',False) dir = os.path.dirname(database_path) if not os.path.exists(dir): mkdir(dir) sys.stderr.write('Connecting to database at "%s"\n' % database_path) self._engine = create_engine('sqlite:///%s' % database_path,echo=database_debug) # Start session Session = sessionmaker(bind=self._engine) self._session = Session() # Initialize feed storage self._feed_storage = FeedStorage(self._engine,self._session) # Initialize item storage self._item_storage = ItemStorage(self._engine,self._session) # A list of subprocess.Popen processes that will be maintained # by the Coffer object. self._external_processes = [] # File storage (data dump) file_storage_path = get_from_config_parser(config_parser,'FileStorage','path','datadump') max_block_size = get_int_from_config_parser(config_parser,'FileStorage','max-block-size', file_storage.DEFAULT_MAX_BLOCK_SIZE) bzip2_path = get_from_config_parser(config_parser,'FileStorage','bzip2-path','/usr/bin/bzip2') self._file_storage = FileStorage(self._external_processes,file_storage_path, max_block_size,bzip2_path) # Content fetcher configuration self._fetcher = Fetcher(config_parser)
def __init__(self, target_url): self.url = target_url self.logger = logging.getLogger('SaveService') self.fetcher = Fetcher() self.getter = Getter() self.url_info = {} self.client = MongoHelper(config.store_db)
def crawl_category(self): fetcher = Fetcher() kk = yield fetcher.fetch( "http://www.carters.com/%s?startRow=0&sz=all" % self.slug) page = kk.body self._process(page)
def main(): fm = FileManager() fe = Fetcher() folder_dir = fm.get_folder_dir() df = [fe.statistics(), fe.coutries_data(), fe.countries_historical_data()] filenames = [ 'statistics', 'all_country_data', 'all_country_historical_data' ] for i in range(3): filename = '{}/{}.csv'.format(folder_dir, filenames[i]) df[i].to_csv(filename, index=False) print('{} is created successfully.'.format(filename)) if len(argv) > 1: country = argv[1] df = [fe.coutry_data(country), fe.country_historical_data(country)] filenames = [ '{}__country_data'.format(country), '{}__country_historical_data'.format(country) ] for i in range(2): filename = '{}/{}.csv'.format(folder_dir, filenames[i]) df[i].to_csv(filename, index=False) print('{} is created successfully.'.format(filename))
def __init__(self,packageName,debug): #Variables for tests self.logger = logging.getLogger("tmLogger") if debug: self.logger.setLevel(logging.DEBUG) # Use self.package name as a directory name self.packageName = "%s/" % packageName #Assign directories to be worked on self.workDir = "/tmp/testManager" self.repo = urlparse.urljoin ("http://cekirdek.pardus.org.tr/~serbulent/test_guides/", self.packageName) self.saveDir = os.path.join("/tmp/testManager", self.packageName) self.filesDir = os.path.join(self.saveDir, "files") self.configFile = os.path.join(self.saveDir, "testProcess.conf") #Create test directories for item in (self.workDir, self.saveDir, self.filesDir): if not os.path.isdir(item): try: os.mkdir(item) debugMsg = "%s created" % item self.logger.debug(debugMsg) except OSError: errorMsg = "An error occured when creating directory %s" % item self.logger.error(errorMsg) #Read package configuration self.fetcher = Fetcher(debug) url = urlparse.urljoin(self.repo, "testProcess.conf") self.fetcher.download(url, self.configFile) cfr = ConfReader(self.configFile) self.params = cfr.read() self.fetchFiles()
def main(args, session): logging.info('Deleting existing xeno-canto recordings') session.query(Recording).filter(Recording.source == 'xc').delete() fetcher = Fetcher(cache_group='xc_api', pool_size=args.recording_load_jobs, clear_cache=args.clear_recordings_cache) query = XcQuery({'nr': f'{args.start_xc_id}-{args.end_xc_id}'}, fetcher) first_page = query.fetch_page(1) num_pages = first_page['numPages'] num_recordings = int(first_page['numRecordings']) logging.info(f'Found {num_pages} pages, {num_recordings} recordings') with multiprocessing.pool.ThreadPool(args.recording_load_jobs) as pool: for page in progress.percent( itertools.chain([first_page], pool.imap(query.fetch_page, range(2, num_pages + 1))), num_pages): try: # Allow replacements in case the API shifts pages around # (it seems to do that, probably when new recordings are # added during the run). recordings = [_parse_recording(r) for r in page['recordings']] session.bulk_save_objects_with_replace(recordings) except Exception: logging.error( f'Error parsing page:\n{json.dumps(page, indent=" ")}', exc_info=True) raise
def serve_user_podcast(username): limit = request.args.get("limit") fetcher = Fetcher(request.environ['YOUTUBERSS_CONFIG']) print "What the f**k is going on here" print request.environ['YOUTUBERSS_CONFIG'] podcast, upload_playlist = fetcher.get_user_data(username) return serve(fetcher, podcast, upload_playlist, limit)
def __init__(self, host: str, path: str, timestamp=datetime.datetime.now(), spreadsheet_id=SPREADSHEET_ID): self.host = host self.path = path self.renderer = SheetsRenderer(spreadsheet_id) self.fetcher: Fetcher = Fetcher(s, self.host) self.listing_cache: ListingCache = ListingCache("/tmp/cache-%s" % host, self.fetcher) self.emailer = Emailer(self.host, "/tmp/email_log") self.timestamp = timestamp
def _get_category_page(self): fetcher = Fetcher() ret = yield fetcher.fetch('http://www.6pm.com/%s' % self.slug) body = PQ(ret.body) foo = body('.last a')[0].get('href') max_page = int(re.findall('-page(\d+)', foo)[0]) for i in range(max_page): self._crawl_category_page(i)
def schedule_fetcher(self, cycle=FETCHER_CYCLE): """ 定时获取代理 """ fetcher = Fetcher() while True: print('开始抓取代理') fetcher.run() time.sleep(cycle)
def init(self): self.series = [] if self.sub is not None: s = Fetcher(lambda text: json.loads(text)['subscriberCountTimeSeries']).fetch( URL_SUBS.format(self.sub, datetime.now().strftime("%Y_%m_%d"))) for a in s: day = datetime(1970, 1, 1) + timedelta(days=a['utcDay']) if day >= datetime(2018, 1, 1): self.series.append((day, a['count'])) series_fill_zeroes(self.series) normalize(self, "series")
def load(self, loadRosters=False): """ Calls MLB.com server and loads all team information Arguments: loadRosters : If true, rosters will automatically be loaded (more HTTP requests!) """ f = Fetcher(Fetcher.MLB_LEAGUE_URL) for item in f.fetch(): t = team.Team(item) if loadRosters: t.loadRoster() self.teams[t['team_code']] = t
def init(self): self.series = [] if self.sub: s = Fetcher(lambda text: json.loads(text)['message']['total'][ 'data']).fetch(URL_SUBS, data={ 'reddit0': self.sub, '_': datetime.now().strftime("%Y_%m_%d") }) for a in s: day = datetime.strptime(a['y'], '%Y-%m-%d') if day >= DATE_START: self.series.append((day, int(a['a']))) series_fill_zeroes(self.series) normalize(self, "series")
def database_init(connector): """This method takes needed actions to create and populate the DB""" # We use the fetcher to create then populate all DB tables fetcher = Fetcher() fetcher.create_table(connector) fetcher.populate_categories(connector) # we populate products table categories by categories for cat in CATEGORIES: fetcher.create_crits("categories", "contains", cat, 1000, "unique_scans_n") fetcher.request() fetcher.populate_products(cat, connector) fetcher.populate_tags(connector) fetcher.populate_products_has_tags(connector) print("Congratulations, you have initialized the DB! \nYou must now " "launch the program without option")
def __init__(self, data_dir, seed_urls, similarity_method): if not os.path.exists(data_dir): os.makedirs(data_dir) self.fetcher = Fetcher(data_dir) if similarity_method == "cosine": self.similarity = Cosine_Similarity() elif similarity_method == "jaccard": self.similarity = Jaccard_Similarity() else: self.similarity = None self.K = max(len(seed_urls)/2, 10) self.host = set() self.update_seeds(seed_urls)
def _crawl_url(self, url): fetcher = Fetcher() ret = yield fetcher.fetch(url) body = PQ(ret.body) products = body('a.product') data = [] for product in products: foo = PQ(product) origin_price = re.findall('\$([\d\.]+)', foo('.discount').text()) if origin_price: origin_price = origin_price[0] sales_price = foo('.price-6pm').text().replace('$', '').strip() if not origin_price and not sales_price: continue title = '[%s] %s' % (foo('.brandName').text(), foo('.productName').text()) data.append({ 'image': foo('.productImg').attr('src'), 'link': parse_url('http://www.6pm.com' + foo('a').attr('href')), 'title': title, 'original_price': origin_price or sales_price, 'sales_price': sales_price }) data = { 'website': '6pm', 'currency': 'USD', 'country': 'USA', 'store_id': self.store_id, 'data': json.dumps(data) } data.update(self._extra_kwargs) q = yield fetcher.fetch( 'http://127.0.0.1:8000/ezlookup/deal/?key=998998998', method="POST", data=data)
def loadRoster(self): """ Calls MLB.com servers to obtain the complete roster for the team. If call fails, '_error' property is set. """ f = Fetcher(Fetcher.MLB_ROSTER_URL, team_id=self['team_id']) j = f.fetch() if 'roster_40' not in j: self._error = "ERROR on %s: key roster_40 not found (cannot load 40 man roster)" % ( f.url) return False parent = j['roster_40']['queryResults'] if parent['totalSize'] > 0: for record in parent['row']: player_id = record['player_id'] self.roster[player_id] = player.Player(player_id)
def command_line_runner(): args = docopt.docopt(__doc__, version=__version__) if args["--clear-cache"]: if caching._clear_cache(): exit("Cache cleared successfully.") else: exit("Clearing cache failed.") if args["--max-number"]: try: args["--max-number"] = int(args["--max-number"]) except ValueError: exit(_yellow("--max-number value should be a number!")) fetcher = Fetcher(args) selected_pkg = fetcher.user_confirm() PkgbuildReview(selected_pkg, args) DiffReview(selected_pkg, args)
def run(self): logger.info('%s :: agent starting' % skyline_app) pid = getpid() # Start the workers for i in range(settings.VISTA_WORKER_PROCESSES): if i == 0: logger.info('%s :: agent :: starting Worker 1' % skyline_app) Worker(pid).start() else: logger.info('%s :: agent :: starting Worker %s' % (skyline_app, str(i))) Worker(pid).start() # Start the fetcher logger.info('%s :: agent :: starting Fetcher' % skyline_app) Fetcher(pid).start() while 1: sleep(100)
def __init__(self, master): self.master = master self.ram = None self.fetcher = Fetcher() master.title('BoostPack v2 Installer') master.geometry('500x500') master.iconbitmap('assets/output_onlinepngtools_ZGQ_icon.ico') img = PhotoImage(file='assets/install.png') self.install = Button(master, image=img, command=self.install, borderwidth=0) self.install.image = img self.install.pack() img = PhotoImage(file='assets/uninstall.png') self.uninstall = Button(master, image=img, command=self.uninstall, borderwidth=0) self.uninstall.image = img self.uninstall.pack() img = PhotoImage(file='assets/update.png') self.update = Button(master, image=img, command=self.update_patcher, borderwidth=0) self.update.image = img self.update.pack() self.txt = Label(master, text='Ram') self.txt.pack() self.ram = Scale(master, from_=1, to=self.fetcher.get_ram, orient=HORIZONTAL, command=self.set_ram) self.ram.pack()
def begin(): try: f = Fetcher() fetch_result = f.fetch(kjxjr=False) p = Parser() parse_result = p.parse() print(parse_result) db = DB() db.insert(parse_result) r = Report() today = arrow.now() if today.format('dddd') == 'Friday': to_addr = [('*****@*****.**', '张磊'), ('*****@*****.**', '张永泉')] r.send_report(to_addr=to_addr) else: to_addr = [('*****@*****.**', '张磊'), ] r.send_report(to_addr=to_addr) except Exception as e: print(e)
def run(args): try: # init fetcher and check for season range season = args.season fetcher = Fetcher() _, lastest_season = fetcher.get_latest_season() if season not in xrange(1, int(lastest_season) + 1): raise Exception('Season out of range!') # init parser and uploader gameParser = JeopardyParser() uploader = SeasonAPI(FirebaseAPI()) if args.upload: engine = GeventEngine(fetcher, gameParser, uploader) engine.process_season(season) elif args.delete: uploader.delete_season(season) except Exception: logger.error('Task failed! Error: %s', traceback.format_exc()) raise SystemExit
def init(self): d = Fetcher(_get_data_from_coinpage).fetch( URL_COINPAGE.format(self.coin)) self.id = list(d['props']['initialState']['cryptocurrency']['info'] ['data'].keys())[0] self.info = d['props']['initialState']['cryptocurrency']['info'][ 'data'][self.id] if self.rank is None: self.rank = d['props']['initialState']['cryptocurrency'][ 'quotesLatest']['data'][self.id]['cmc_rank'] self.max_supply = self.data.get("max_supply", 0) if not self.max_supply: self.max_supply = self.data.get("total_supply", 0) if not self.max_supply: self.max_supply = 0 self.circ_supply = self.data.get("circulating_supply", 0) if not self.circ_supply: self.circ_supply = 0 self.supply_rel = 0 if self.circ_supply and self.max_supply: self.supply_rel = div0(self.circ_supply, self.max_supply, z=lambda x: 0) self.sub = None try: self.sub = self.info['urls']['reddit'][0].split("/")[-1] except: log.debug("sub = None") self.twt = None try: self.twt = self.info['urls']['twitter'][0].split("/")[-1] except: log.debug("twt = None")
def __init__(self, year, month, day=None): """ Constructor Arguments: year: The... year! month: The... month! day: The... day! (or None for all days of the month) Schedule is a standard dictionary: each day is a key in the format of 'YYYY-MM-DD', each value a list of game dictionaries. """ days = [] if day is None: for d in xrange(1, calendar.mdays[month] + 1): days.append(datetime.date(year, month, d)) else: days.append(datetime.date(year, month, day)) for d in days: key = d.strftime("%Y-%m-%d") if key not in self.keys(): self[key] = [] f = Fetcher(Fetcher.MLB_SCHEDULE_URL, date=d.strftime("%Y%m%d")) try: content = f.fetch(True) if len(content) == 0: continue content = re.sub(r'\t+', '\t', content) content = content.replace('"', '\\"') content = content.replace("'", "\"") content = re.sub(r'\t([\w,_]+):\s', r'"\1":', content) obj = json.loads(content) self[key] = obj except ValueError, e: print "ERROR %s on %s" % (e, f.url) pass
def main(args, _session): output_dir = args.map_tiles_output_dir logging.info('Deleting existing map tiles') if os.path.isdir(output_dir): shutil.rmtree(output_dir) tiles = [] for z in range(0, args.max_zoom_level + 1): n = 2**z for x in range(n): for y in range(n): tiles.append({'z': z, 'x': x, 'y': y}) logging.info( f'Largest zoom level: {args.max_zoom_level} ({256 * 2**args.max_zoom_level} pixels)' ) logging.info(f'Fetching and optimizing {len(tiles)} map tiles') fetcher = Fetcher('map_tiles', pool_size=1) tile_format = '{z}_{x}_{y}.png' orig_data_size = 0 opt_data_size = 0 os.makedirs(output_dir, exist_ok=True) for tile in progress.percent(tiles): data = fetcher.fetch_cached(args.map_tiles_url_format.format(**tile)) output_file = os.path.join(output_dir, tile_format.format(**tile)) with open(output_file, 'wb') as f: f.write(data) subprocess.run(['optipng', '-quiet', output_file], check=True) orig_data_size += len(data) opt_data_size += os.path.getsize(output_file) side = 256 * 2**args.max_zoom_level logging.info( f'Total size of map tiles: {orig_data_size} bytes originally, {opt_data_size} bytes after optipng' )
def __init__(self, symbol, start, end=date.today(), data_len=5, scale="D"): usecols = ["open", "high", "low", "close", "volume"] self.__data_df = Fetcher().fetch(symbol, start, end)[usecols] if scale != "D": self.__data_df = self.__data_df.resample(scale, how={ "open": 'first', "high": 'max', "low": 'min', "close": 'last', "volume": 'sum', })[:-1] self.__data_df_norm = self.__data_df.copy() self.__data_df_norm['open'] = MinMaxScaler().fit_transform( self.__data_df.open.values.reshape(-1, 1)) self.__data_df_norm['high'] = MinMaxScaler().fit_transform( self.__data_df.high.values.reshape(-1, 1)) self.__data_df_norm['low'] = MinMaxScaler().fit_transform( self.__data_df.low.values.reshape(-1, 1)) self.__data_df_norm['close'] = MinMaxScaler().fit_transform( self.__data_df.close.values.reshape(-1, 1)) self.__data_df_norm['volume'] = MinMaxScaler().fit_transform( self.__data_df.volume.values.reshape(-1, 1)) self.data_len = data_len