def fetch_monthly(self): month_ago = (datetime.datetime.now() - datetime.timedelta(days=30)) unixtime_month_ago = month_ago.strftime('%s') start = datetime.datetime.now() print(f"Last searched date: {month_ago}") running = True # Since this API responds with jsons of a maximum size of 1000 images, we need to # make several requests to gather images from entire month; with every request # I check the last image in it and start from that date on the next request, until # I have all images necessary. while running: # range as in FROM later TO earlier, dates are in UnixTime url = ( f'http://api.deckchair.com/v1/camera/{self.cam_id}' f'/images?from={unixtime_month_ago}&to={start.strftime("%s")}') cam_imgs = request_to_json(url) start = convert_to_datetime(cam_imgs[0]['taken']) end = convert_to_datetime(cam_imgs[-1]['taken']) print(f"{start} - {end}", url, f"({len(cam_imgs)} images found)") for img in cam_imgs: img_date = convert_to_datetime(img['taken']) if img_date <= month_ago: running = False break else: img_url = f'http://api.deckchair.com/v1/viewer/image/{img["_id"]}' yield img_date, requests.get(img_url).content start = end
def fetch_latest(self): cam_imgs = request_to_json( f'http://api.deckchair.com/v1/camera/{self.cam_id}/images') img_id = cam_imgs[0]['_id'] img_date = cam_imgs[0]['taken'] print(datetime.datetime.now(), convert_to_datetime(img_date)) url = f'http://api.deckchair.com/v1/viewer/image/{img_id}' return requests.get(url).content, convert_to_datetime(img_date)
def build_daily_trading(): sql_create_table = ''' DROP TABLE IF EXISTS daily_price; CREATE TABLE daily_price ( id INT PRIMARY KEY AUTO_INCREMENT, ticker_id INT NOT NULL, ticker VARCHAR(32), date_time datetime, open_price double, high_price double, low_price double, close_price double, volume double, amount double, free_turnover_ratio double, trade_status varchar(32), maxup_or_maxdown int, last_updated_datetime datetime)engine = InnoDB ''' cur.execute(sql_create_table) sql_select_ticker_id = ''' SELECT id,ticker,ipo_date FROM symbols ''' cur.execute(sql_select_ticker_id) data = cur.fetchall() column_str = ''' open_price,high_price,low_price,close_price,volume,amount, free_turnover_ratio,trade_status,maxup_or_maxdown,ticker, date_time,ticker_id,last_updated_datetime ''' insert_str = ('%s' * 13)[:-1] final_str = ''' INSERT INTO daily_price (%s) VALUES (%s) ''' % (column_str, insert_str) total_length = len(data) i = 0 for idx, ticker, ipo_date in data: i += 1 now = dt.datetime.now() start_date = ipo_date.strftime('%Y%m%d') end_date = now.strftime('%Y%m%d') last_updated_datetime = now ticker_data = get_stock_daily_price(ticker, start_date, end_date) ticker_data['ticker_id'] = idx ticker_data['last_updated_datetime'] = now ticker_data['date_time'] = convert_to_datetime( ticker_data['date_time']) ticker_data['last_updated_datetime'] = convert_to_datetime( ticker_data['last_updated_datetime']) ticker_data = ticker_data.values.tolist() cur.executemany(final_str, ticker_data) print '%s 股票已经成功插入数据库' % ticker print '%s/%s' % (i, total_length)
def get_reverse_trade_price(order, binance_client): order_time = int(order['updateTime']) start_date = str(convert_to_datetime(order_time).date()) end_date = str(convert_to_datetime(order_time).date()) # To patch this to take in price_df from s3 bucket or github repository moment_price_df = pd.DataFrame(binance_client.get_historical_klines( "BTCUSDT", binance_client.KLINE_INTERVAL_30MINUTE, start_date, end_date)) # [Potential Bug] closest_price = moment_price_df[moment_price_df[0] < order_time].iloc[-1, 4] return closest_price
def object_from_dict(cls, data): user = User(name=data['name']) fields = ['type', 'bio', 'logo', 'display_name'] for field in fields: setattr(user, field, data[field]) user.id = data['_id'] user.created__at = convert_to_datetime(data['created_at']) user.updated__at = convert_to_datetime(data['updated_at']) return user
def object_from_dict(cls, data): team = Team(name=data['name']) fields = ['info', 'display_name', 'logo', 'banner', 'background'] for field in fields: setattr(team, field, data[field]) team.id = data['_id'] team.created__at = convert_to_datetime(data['created_at']) team.updated__at = convert_to_datetime(data['updated_at']) return team
def __init__(self, elem_lst): self.app_no = elem_lst[0] #工单受理唯一标识号 self.handle_id = elem_lst[1] #工单处理编号 self.comm_no = elem_lst[2] #通讯号码 self.req_begin_date = None #申请的提交时间 self.req_begin_date = utils.convert_to_datetime( '01_S_COMM_REC.TSV', elem_lst[3]) self.req_finish_date = None #服务请求结束时间 self.req_finish_date = utils.convert_to_datetime( '01_S_COMM_REC.TSV', elem_lst[4]) self.org_no = elem_lst[5] #供电单位 self.busi_type_code = elem_lst[6] #受理业务类型 self.wkst_busi_type_code = elem_lst[7] #工单业务类型
def object_from_dict(cls, data): channel = Channel(name=data['name']) fields = [ 'mature', 'status', 'broadcaster_language', 'display_name', 'game', 'delay', 'language', 'logo', 'banner', 'video_banner', 'background', 'profile_banner', 'profile_banner_background_color', 'partner', 'url', 'views', 'followers' ] for field in fields: setattr(channel, field, data[field]) channel.id = data['_id'] channel.created_at = convert_to_datetime(data['created_at']) channel.updated_at = convert_to_datetime(data['updated_at']) return channel
def print_episode(text, episode, lang, type_episode): if episode: date = convert_to_datetime(episode['airstamp']) episode['airtime'] = date.strftime("%H:%M") if lang == 'es': date = convert_to_timezone(date) episode['airdate'] = date.strftime("%d-%m-%Y") episode['airtime'] = date.strftime("%H:%M") text += translations[type_episode][lang].format(**episode) if episode['summary']: summary = remove_tag(episode['summary']) if lang != 'en': try: summary = config.gs.translate(summary, lang) except urllib.exceptions.HTTPError: pass text += summary if type_episode == 'next_episode': date_str = episode['airdate'] + ' ' + episode['airtime'] date_time = datetime.datetime.strptime(date_str, "%d-%m-%Y %H:%M") delta = date_time - datetime.datetime.now() if delta.days: text += translations['left'][lang].format(days=delta.days) text += '\n \n' return text
def __init__(self, elem_lst): self.app_no = elem_lst[0] #工单受理唯一标识号 self._id = elem_lst[1] #工单唯一标识 self.busi_type_code = elem_lst[ 2] #业务类型编码 "010 003 001 007 015 005 018 009 008 006" self.urban_rural_flag = elem_lst[3] #城乡类别标识 self.org_no = elem_lst[4] #供电单位编码 self.handle_time = None #工单受理时间 self.handle_time = utils.convert_to_datetime( "00_ARC_S_95598_WKST_*.TSV", elem_lst[5]) self.accept_content = None #工单受理内容 self.accept_content = utils.gbk2utf(elem_lst[6]) self.handle_opinion = elem_lst[7] #受理意见 self.calling_no = elem_lst[8] #主叫号码 self.elec_type = elem_lst[9] #用电类别 if len(elem_lst[9]) == 0: self.elec_type = "null" self.cust_no = elem_lst[10] #客户编号 (!!!!结果提交字段) self.prov_org_no = elem_lst[11] #所属省(市)公司供电单位编码 self.city_org_no = elem_lst[12] #所属市(区)公司供电单位编码 self.poweroff_ids = [] #从content中抽取poweroffid
def build_trading_days(): ''' ''' today = dt.date.today().strftime('%Y-%m-%d') start_date = '1991-01-01' end_date = today last_updated_date = end_date sql_create_table = ''' DROP TABLE IF EXISTS tradedates; CREATE TABLE tradedates ( id INT PRIMARY KEY AUTO_INCREMENT, date_time datetime, last_updated_date datetime )engine = InnoDB ''' cur.execute(sql_create_table) trading_days = get_trading_days(start_date, end_date).tolist() trading_days = list(convert_to_datetime(trading_days)) schema_list = [] for each in trading_days: schema_list.append([each, last_updated_date]) column_str = 'date_time,last_updated_date' insert_str = ('%s,' * 2)[:-1] final_str = ''' INSERT INTO tradedates (%s) VALUES (%s) ''' % (column_str, insert_str) cur.executemany(final_str, schema_list) con.commit()
def object_from_dict(cls, data): follow = Follow() follow.notifications = data['notifications'] follow.created_at = convert_to_datetime(data['created_at']) follow.user = User.object_from_dict(data['user']) return follow
def fetch_todays(self): today = datetime.date.today() print(today) url = f'http://api.deckchair.com/v1/camera/{self.cam_id}/images?from={today.strftime("%s")}' cam_imgs = request_to_json(url) for img in cam_imgs: # print(img['taken']) url = f'http://api.deckchair.com/v1/viewer/image/{img["_id"]}' yield convert_to_datetime(img['taken']), requests.get(url).content
def read_tasks(file_to_read): tasks=None try: with open(file_to_read, "r") as f: tasks = json.load(f) except IOError: print(f"Couldn't read {file_to_read} or {file_to_read} does not exsist!!") except Exception as e: print("EXCEPTION OCCURED!!!") print(e) return sorted(convert_to_datetime(tasks), key=lambda d : d["start"])
def __init__(self, elem_lst): self.cons_no = elem_lst[0] #用户编号 self.org_no = elem_lst[1] #供电单位代码 self.charge_ym = None #收费年月 self.charge_ym = utils.convert_to_date_YM('12_A_PAY_FLOW.TSV', elem_lst[2]) self.charge_date = None #收费日期 self.charge_date = utils.convert_to_datetime('12_A_PAY_FLOW.TSV', elem_lst[3]) self.pay_mode = utils.convert_to_int(elem_lst[4]) #缴费方式
def __init__(self, elem_lst): self.poweroff_id = elem_lst[0] #停电信息唯一标识号 self.app_no = elem_lst[1] #申请编号 self.start_time = None #停电开始时间 self.start_time = utils.convert_to_datetime('02_S_REGION_OUTRAGE.TSV', elem_lst[3]) self.stop_date = None #停电结束时间 self.stop_date = utils.convert_to_datetime('02_S_REGION_OUTRAGE.TSV', elem_lst[4]) self.org_no = elem_lst[5] #停电所属供电单位 self.type_code = elem_lst[6] #停电类型 self.poweroff_reason = elem_lst[8] #停电原因 self.power_time = None #现场送电时间 self.power_time = utils.convert_to_datetime('02_S_REGION_OUTRAGE.TSV', elem_lst[31]) self.poweroff_area = elem_lst[7] #停电范围 self.poweroff_scope = elem_lst[2] #停电区域
def object_from_dict(cls, data): video = Video(title=data['title']) fields = [ 'status', 'tag_list', 'description', 'title', 'url', 'views', 'length', 'game', 'broadcast_id', 'broadcast_type', 'preview', 'resolutions', 'fps' ] for field in fields: setattr(video, field, data[field]) video.id = data['_id'] video.recorded_at = convert_to_datetime(data['recorded_at']) return video
def __init__(self, elem_lst): self.app_no = elem_lst[0] #工单受理唯一标识号 self.oversee_time = None #催办督办时间 self.oversee_time = utils.convert_to_datetime('03_S_INFO_OVERSEE.TSV', elem_lst[1]) self.cust_no = elem_lst[2] #客户编号 self.cust_name = elem_lst[3] #来电客户姓名 self.oversee_reason = None #催办督办原因 self.oversee_reason = utils.gbk2utf(elem_lst[4]) self.oversee_content = None #催办督办内容 self.oversee_content = utils.gbk2utf(elem_lst[5]) self.oversee_app_no = elem_lst[6] #被催办督办工单编号 self.org_or_dept = elem_lst[7] #被催办督办单位或部门 self.app_busi_type_code = elem_lst[8] #被催办工单类型 self.org_no = elem_lst[9] #供电单位
def update(): try: db = utils.load_pickle(DB_PATH) last_update = sorted(db['date'])[-1] except: utils.download(DB_URL, DB_PATH) db = utils.load_pickle(DB_PATH) last_update = sorted(db['date'])[-1] # query arxiv api n_added = 0 indx = 0 while indx < MAX_ITER: url = BASE_URL + QUERY_FMT.format(DEF_QUERY, indx, RESULTS_PER_ITER) try: with urllib.request.urlopen(url, timeout=5.0) as url: response = url.read() except TimeoutError: continue response = feedparser.parse(response) for entry in response.entries: e = utils.encode_feedparser_dict(entry) paper_url = utils.parse_arxiv_url(e["link"]) date = e["published"] date = utils.convert_to_datetime(date) # content already in database if paper_url in db["url"]: if date <= last_update: indx = MAX_ITER break else: continue # retrieve and clean some text title = e["title"] title = utils.rem_tex_fmt(title) authors = ", ".join(f"{n['name']}" for n in e["authors"]) abstract = e["summary"] abstract = utils.rem_tex_fmt(abstract) other_urls = utils.extract_url(abstract) journal = e["arxiv_journal_ref"] if "arxiv_journal_ref" in e else "" journal = utils.rem_tex_fmt(journal) db["date"].append(date) db["url"].append(paper_url) db["title"].append(title) db["authors"].append(authors) db["abstract"].append(abstract) db["journal"].append(journal) db["other_urls"].append(other_urls) n_added += 1 if len(response.entries) == 0: utils.progress_bar(indx / MAX_ITER, status="API not responding. retrying...") if indx == MAX_ITER: utils.progress_bar(1) else: indx += 100 utils.progress_bar(indx / MAX_ITER, status=f"Fetching papers from {date}...") time.sleep(WAIT_TIME) print(f"{n_added} papers added to database") if True: indx = list(np.argsort(db["date"])) db["date"] = list(np.array(db["date"])[indx]) db["url"] = list(np.array(db["url"])[indx]) db["title"] = list(np.array(db["title"])[indx]) db["authors"] = list(np.array(db["authors"])[indx]) db["abstract"] = list(np.array(db["abstract"])[indx]) db["journal"] = list(np.array(db["journal"])[indx]) db["other_urls"] = list(np.array(db["other_urls"])[indx]) utils.save_pickle(DB_PATH, db) tkn_corpus = [] for indx in range(len(db["url"])): title = db["title"][indx].lower() abstract = utils.filter_abstract(db["abstract"][indx].lower()) tkn_corpus.append((title + " " + abstract).split(" ")) bm25 = BM25Okapi(tkn_corpus) utils.save_pickle(CACHE_BM25, bm25)
def date_to_timestamp(date): return utils.convert_to_datetime(date)
def test_convert_to_datetime(self): expected = datetime.datetime(2011, 11, 11, 0, 0) actual = convert_to_datetime('11/11/2011') self.assertEqual(actual, expected)