def get_avg_coll_rate(): """ Function to get average number of collisions on hourly/daily/monthly/yearly basis as specified :return: """ result = 0 if num_coll_time[0] == utils.HOUR: for i in range(0, 24): key = utils.get_key(utils.TIME, city, utils.HOUR, i) result = result + int(r.get(key) if r.exists(key) else 0) result = result / 24 elif num_coll_time[0] == utils.DAY: for i in range(0, 7): key = utils.get_key(utils.TIME, city, utils.DAY, utils.get_day(i)) result = result + int(r.get(key) if r.exists(key) else 0) result = result / 7 elif num_coll_time[0] == utils.MONTH: for i in range(1, 13): key = utils.get_key(utils.TIME, city, utils.MONTH, utils.get_month(i)) result = result + int(r.get(key) if r.exists(key) else 0) result = result / 12 elif num_coll_time[0] == utils.YEAR: for i in range(utils.START_YEAR, utils.CURRENT_YEAR + 1): key = utils.get_key(utils.TIME, city, utils.YEAR, i) result = result + int(r.get(key) if r.exists(key) else 0) result = result / (utils.CURRENT_YEAR - utils.START_YEAR + 1) else: raise ValueError('Invalid type of duration: ', avg_coll_rate_type) return result
def process_datetime(date_str, time_str): dt = dateutil.parser.isoparse(date_str).replace(hour = int(time_str.split(':')[0]), minute = int(time_str.split(':')[1])) timezone = pytz.timezone('US/Eastern') dt = timezone.localize(dt).astimezone(pytz.utc) dt_year = dt.year dt_month = dt.month dt_day = dt.weekday() dt_hour = dt.hour if dt_year and isinstance(dt_year, int) and dt_month >=0 and isinstance(dt_month, int) and dt_day >= 0 and isinstance(dt_day, int) and dt_hour >= 0 and isinstance(dt_hour, int): dt_month = utils.get_month(dt_month) dt_day = utils.get_day(dt_day) return [dt_year,dt_month,dt_day,dt_hour]
def generate_month(year): months = utils.get_month(year) months_buttons = [KeyboardButton(text=x[0]) for x in months] months_menu = range(0) if len(months_buttons) < 3: months_menu = ReplyKeyboardMarkup(build_menu(months_buttons, 1), resize_keyboard=True, one_time_keyboard=True) else: months_menu = ReplyKeyboardMarkup(build_menu(months_buttons, 2), resize_keyboard=True, one_time_keyboard=True) return months_menu
def find_date(line): is_date_filter = re.compile("(\d{4})년 (\d{1,2})월 (\d{1,2})") date = is_date_filter.search(line) if date: y = int(date.group(1)) m = int(date.group(2)) d = int(date.group(3)) date = "%4d-%02d-%02d" % (y, m, d) return date else: is_date_filter = re.compile( "------ (\w+), (\w+) (\d{1,2}), (\d{4}) ------") date = is_date_filter.search(line) if date: y = int(date.group(4)) m = get_month(date.group(2)) d = int(date.group(3)) date = "%4d-%02d-%02d" % (y, m, d) return date return False
def extract_text_and_label(self): """ :return: """ for each_json_tweet in self.corpus_dict_batch: if "text" not in each_json_tweet: continue each_text = each_json_tweet["text"] # get rid of url for each text re_expression = re.compile(r'https://[a-zA-Z0-9.?/&=:]*|', re.S) each_text = re_expression.sub("", each_text) # to check out which language is used, choose English only # if utils.lang_detect(each_text) != "en": # continue # self.text_array.append(each_text) # get label for each text, trangforming it to 0 or 1. if "ifFluRelated" in each_json_tweet.keys(): if each_json_tweet["ifFluRelated"]: self.Y.append(1) else: self.Y.append(0) else: self.Y.append(self.add_label_by_keyword_filtering(each_text)) created_time_str = each_json_tweet["created_at"] str_split = created_time_str.split() date = int(str_split[2]) month = utils.get_month(str_split[1]) year = int(str_split[5]) # print(str_split[1]) # print(year, month, date) self.time.append((year, month, date)) # print(self.text_array) print(self.Y)
def get_num_coll_time(): """ Function to get number of collisions in specified duration (can be specified hour/day/month/year) :return: """ result = 0 if num_coll_time[0] == utils.HOUR: result = r.get( utils.get_key(utils.TIME, city, utils.HOUR, int(num_coll_time[1]))) elif num_coll_time[0] == utils.DAY: result = r.get( utils.get_key(utils.TIME, city, utils.DAY, utils.get_day(int(num_coll_time[1])))) elif num_coll_time[0] == utils.MONTH: result = r.get( utils.get_key(utils.TIME, city, utils.MONTH, utils.get_month(int(num_coll_time[1])))) elif num_coll_time[0] == utils.YEAR: result = r.get( utils.get_key(utils.TIME, city, utils.HOUR, int(num_coll_time[1]))) else: raise ValueError('Invalid type of duration: ', num_coll_time[0]) return result
#! /usr/bin/env python # -*- coding: utf-8 -*- import utils # 活动列表时间类型 TIME_TYPE_TODAY = '1' TIME_TYPE_TOMORROW = '2' TIME_TYPE_WEEKS = '3' TIME_TYPE_MONTH = '4' # 时间类型映射 LIMIT_TIME = { TIME_TYPE_TODAY: utils.get_today(), TIME_TYPE_TOMORROW: utils.get_tomorrow(), TIME_TYPE_WEEKS: utils.get_week(), TIME_TYPE_MONTH: utils.get_month() }
import concurrent.futures from utils import get_year, get_month from utils import get_submitter_info, get_country_from_domain info = [] year_low = 2020 month_low = 8 print("Reading data...") with open("data/arxiv.json") as f: for line in f: data = json.loads(line) year = get_year(data["versions"]) month = get_month(data["versions"]) if year >= year_low and month >= month_low: info.append((data["submitter"], data["title"])) print("Done. Number of articles:", len(info)) def task(id, info_slice): print(f"Worker {id} started") already_seen_domains = {} for submitter, title in info_slice: country = None try: submitter_info = get_submitter_info(submitter, title) domain = submitter_info.email.split("@")[1]
import utils cur_mysql = utils.mysql_conn() #从命令行获取业务线id line_id = sys.argv[1] #获取指定业务线的表单ID项 form_id = utils.get_line(cur_mysql,line_id) #从表单主表中获取昨天的已完成的表单记录 total = cur_mysql.execute('select id from col_summary where to_days(now())-to_days(finish_date)<=1 and state=3 and %s' % form_id) #print total #得到当前月份 m = utils.get_month(cur_mysql) #获取本月业绩 mm = mt = 0 cnt = cur_mysql.execute('select m from year_total where id=%s and line_id=%s' % (m,line_id)) if cnt>0: v = cur_mysql.fetchone() mm = int(v[0]) #做业务累计 #print(">>> 1: %d,%d <<<" % (mm,mt)) mm += total mt += total cur_mysql.execute('update year_total set m=%d where id=%s and line_id=%s' % (mm,m,line_id)); cur_mysql.close()
#encoding=UTF-8 # # 2015.12.10 by shenwei @GuiYang # ============================== # 在每月1日00:00时,须清除本月的累计业绩 # import MySQLdb import os import sys from subprocess import Popen, PIPE import utils cur_mysql = utils.mysql_conn() #清除本月累计业绩 m = utils.get_month(cur_mysql) cur_mysql.execute('update year_total set m=0 where id=%s' % m) cur_mysql.close() # # Eof #
import sys import os import pandas as pd import calendar root_path = os.path.dirname(os.path.realpath(__file__)) + '/../' sys.path.append(root_path + 'scripts/') import utils if len(sys.argv) < 4: print("usage: ./dump_glassnode.py date endpoint metric") quit() dt = sys.argv[1] endpoint = sys.argv[2] metric = sys.argv[3] yy = int(utils.get_year(dt)) mm = int(utils.get_month(dt)) dd = int(utils.get_day(dt)) sd = datetime.datetime(yy, mm, dd, 0, 0) ed = sd + datetime.timedelta(1) print(dt) sd = calendar.timegm(sd.timetuple()) ed = calendar.timegm(ed.timetuple()) rics = pd.read_csv(root_path + '/data/compo/crypto/ftx.txt') rics = list(rics.ric.unique()) #rics = ['btc', 'eth'] data_path = root_path + 'data/glassnode/{}/{}/'.format(endpoint, metric) os.makedirs(data_path, 0o777, exist_ok=True) df = pd.DataFrame() for ric in rics:
idx = sys.argv[3] sd = datetime.datetime.strptime(sd, '%Y%m%d') ed = datetime.datetime.strptime(ed, '%Y%m%d') rics = pd.read_csv(root_path + '/data/compo/crypto/{}.txt'.format(idx)) rics = list(rics.ric.unique()) #rics = ['BTC'] funding_path = root_path + 'data/funding/{}/'.format(idx) os.makedirs(funding_path, 0o777, exist_ok=True) s = Session() ts = int(time.time() * 1000) for d in [sd + datetime.timedelta(x) for x in range((ed - sd).days + 1)]: d = d.strftime('%Y%m%d') df = pd.DataFrame() st = datetime.datetime(int(utils.get_year(d)), int(utils.get_month(d)), int(utils.get_day(d)), 0, 0) et = datetime.datetime(int(utils.get_year(d)), int(utils.get_month(d)), int(utils.get_day(d)), 23, 59) print('{} {}'.format(st, et)) st = calendar.timegm(st.timetuple()) et = calendar.timegm(et.timetuple()) for ric in rics: print(ric) request = Request( 'GET', 'https://ftx.com/api/funding_rates?start_time={}&end_time={}&future={}-PERP' .format(st, et, ric)) prepared = request.prepare() signature_payload = f'{ts}{prepared.method}{prepared.path_url}'.encode( )
import sys import os import pandas as pd import calendar root_path = os.path.dirname(os.path.realpath(__file__)) + '/../../' sys.path.append(root_path + 'scripts/') import utils if len(sys.argv) < 3: print("usage: ./dump_erd_crypto.py date idx") quit() sd = sys.argv[1] ed = sys.argv[2] idx = sys.argv[3] sd = datetime.datetime(int(utils.get_year(sd)), int(utils.get_month(sd)), int(utils.get_day(sd)), 0, 0) ed = datetime.datetime(int(utils.get_year(ed)), int(utils.get_month(ed)), int(utils.get_day(ed)), 0, 0) print('{} {}'.format(sd, ed)) sd = calendar.timegm(sd.timetuple()) ed = calendar.timegm(ed.timetuple()) #sd = int(time.mktime(sd.timetuple())) #ed = int(time.mktime(ed.timetuple())) rics = pd.read_csv(root_path + '/data/compo/crypto/{}.txt'.format(idx)) rics = list(rics.ric.unique()) erd_path = root_path + 'data/erd/{}/'.format(idx) s = Session() ts = int(time.time() * 1000) df = pd.DataFrame()