def update_daily_detection_mach(self): logging.info("Start update_daily_detection_mach") self.db_dashboard.connectDB() self.db_attribute.connectDB() try: earliest = self.db_dashboard.get_date_of_earliest_row_of_detection() logging.debug("get_date_of_earliest_row_of_detection_mach %s" % earliest) latest_day = self.db_dashboard.get_date_of_latest_row_of_detection() logging.debug("get_date_of_latest_row_of_detection_mach %s" % latest_day) for current_day in util.date_range(earliest , latest_day): day = util.datetime_to_string(current_day) print(day) logging.debug("get_detection_by_day_mach:%s" % day) detection_list = self.db_attribute.get_detection_by_day_mach(day) print(detection_list) mach_o_count =plist_count = 0 for detection in detection_list: if detection['type'] == 'mach-o': mach_o_count = detection['num'] if mach_o_count!=0: print mach_o_count elif detection['type'] == 'plist': plist_count = detection['num'] # insert_detection self.db_dashboard.update_detection_by_day(day,mach_o_count,plist_count) except Exception as e: logging.exception("update_daily_detection_mach error: %s" % e) self.db_dashboard.close() self.db_pattern.close()
def update_new_virus(self): logging.info("Start update_new_virus") self.db_dashboard.connectDB() self.db_pattern.connectDB() try: latest_day = self.db_dashboard.get_date_of_latest_row() logging.debug("get_date_of_latest_row: %s" % latest_day) all_viruses = self.db_dashboard.get_all_viruses() logging.debug("get_all_viruses count: %s" % len(all_viruses)) for current_day in util.date_range(latest_day + datetime.timedelta(1), self.today): current_viruses = self.db_pattern.get_viruses_by_day(current_day) logging.debug("get_viruses_by_day (%s) count: %s" % (current_day, len(current_viruses))) insert_viruses = [virus for virus in current_viruses if virus.lower() not in all_viruses] self.db_dashboard.insert_viruses_by_day(insert_viruses, current_day) logging.debug("insert_viruses count: %s" % len(insert_viruses)) for virus in insert_viruses: all_viruses.append(virus.lower()) logging.debug("update all_viruses count: %s" % len(all_viruses)) except Exception as e: logging.exception("update_new_virus error: %s" % e) self.db_dashboard.close() self.db_pattern.close()
def __read_db(self, start_date, end_date): db = self.__open_db() date_range = util.date_range(start_date, end_date) times = [] open = [] high = [] low = [] close = [] volume = [] adj_close = [] for d in date_range: try: data = db[d.isoformat()] except: data = '0' if data != '0': data = data.split() times.append(d) open.append(float(data[0])) high.append(float(data[1])) low.append(float(data[2])) close.append(float(data[3])) volume.append(float(data[4])) adj_close.append(float(data[5])) return times, open, high, low, close, volume, adj_close
def update_daily_crc(self): logging.info("Start update_daily_crc") self.db_dashboard.connectDB() self.db_pattern.connectDB() try: latest_day = self.db_dashboard.get_date_of_latest_row_of_crc() logging.info("get_date_of_latest_row_of_crc: %s" % latest_day) # version_list = ["android_china", "android_global", "android_global_light", # "mobile_internal_full", "mobilegateway_global"] version_list = ["android_global_light","mobile_internal_full", "mobilegateway_global"] flag_list = ["0", "1"] for current_day in util.date_range(latest_day + datetime.timedelta(1), self.today): day = util.datetime_to_string(current_day) logging.info("get_crc_by_day:%s" % day) for j in version_list: for k in flag_list: crc_count = self.db_pattern.get_crc_count(day, j, k) crc_added = self.db_pattern.get_crc_added_by_day(day, j, k) crc_modified = self.db_pattern.get_crc_modified_by_day(day, j, k) crc_dropped = self.db_pattern.get_crc_dropped_by_day(day, j, k) # insert_crc self.db_dashboard.insert_crc_by_day(day, j, k, crc_added, crc_modified, crc_dropped, crc_count) logging.debug("insert finished, [%s] [%s] flag[%s] crc_added[%s] crc_modified[%s] " "crc_dropped[%s] crc_count[%s]" % (day, j, k, crc_added, crc_modified, crc_dropped, crc_count)) except Exception as e: logging.exception("update_daily_crc error: %s" % e) self.db_dashboard.close() self.db_pattern.close()
def update_daily_malware_report(self): logging.info("Start update_daily_malware_report") self.db_dashboard.connectDB() self.db_attribute.connectDB() try: latest_day = self.db_dashboard.get_date_of_latest_daily_malware_report() logging.debug("get_date_of_latest_row_of_sample: %s" % latest_day) for current_day in util.date_range(latest_day + datetime.timedelta(1), self.today): day = util.datetime_to_string(current_day) logging.debug("get_sample_by_day:%s" % day) malware_apk = self.db_attribute.get_daily_malware_apk_number(day) malware_sample = self.db_attribute.get_daily_malware_sample_number(day) high_risk_apk = self.db_attribute.get_daily_high_risk_apk_number(day) high_risk_sample = self.db_attribute.get_daily_high_risk_sample_number(day) logging.info( "malware_apk =%s high_risk_apk = %s malware_sample = %s high_risk_sample = %s" % (malware_apk, high_risk_apk, malware_sample, high_risk_sample)) # insert_detection #malware_apk, high_risk_apk, malware_sample, high_risk_sample self.db_dashboard.insert_daily_malware_report(day, malware_apk, high_risk_apk, malware_sample, high_risk_sample) except Exception as e: logging.exception("update_daily_malware_report error: %s" % e) self.db_dashboard.close() self.db_attribute.close()
def get_weather(self, province, city, spell): month_list = date_range(self.start_time, self.end_time) for month in month_list: url = self.history_url % (spell, month) print(url) weather_list = get_soup(url).find(name='div', id='content').find_all(name='tr') # remove the first element del (weather_list[0]) for weather in weather_list: detail = weather.find_all(name='td') date = detail[0].find( name='a').get('href').split('.')[0].split('/')[-1] date = get_all(date) state = detail[1].get_text() state = get_all(state) temperature = detail[2].get_text() temperature = get_all(temperature) wind = detail[3].get_text() wind = get_all(wind) print(province, city, date, state, temperature, wind) sql = 'INSERT INTO weather_list(weather_date, province, city, spell, state, temperature, wind) ' \ 'values (%s, %s, %s, %s, %s, %s, %s)' params = [ date, province, city, spell, state, temperature, wind ] self.mysql.insert(sql=sql, params=params)
def __init__(self, strategy_label, enter_signal, exit_signal, risk_cap, starting_cash, chart_functions, chart_functions_new, symbol_blacklist, trading_symbols, trading_start, trading_end, trading_type, trading_commission, always_plot): self.strategy_label = strategy_label p = re.compile('(\s\s*)') self.enter_signal = p.sub(' ', enter_signal) self.exit_signal = p.sub(' ', exit_signal) self.risk_cap = p.sub(' ', risk_cap) self.starting_cash = starting_cash self.chart_functions = chart_functions self.chart_functions_new = chart_functions_new self.trading_start = util.convert_to_datetime(trading_start) self.trading_end = util.convert_to_datetime(trading_end) symbol_start = self.trading_start - datetime.timedelta(100) self.trading_symbols = sl.SymbolList(trading_symbols, symbol_blacklist, symbol_start, self.trading_end) self.trading_symbols_label = trading_symbols #self.trading_start = datetime.strptime(trading_start, "%Y%M%d") #self.trading_end = datetime.strptime(trading_end, "%Y%M%d") self.trading_type = trading_type self.trading_commission = trading_commission self.always_plot = always_plot self.daterange = util.date_range(self.trading_start, self.trading_end) print self.daterange # Portfolio to manage the trading transactions of the strategy self.portfolio = portfolio.Portfolio(self.starting_cash, self.trading_start, self.trading_commission) # time series used to store when we are in or out of the market self.in_n_out = None self.trading_days = None # dictionary with time series of computed indicators # time series to store the performance of the current trading strategy self.performance_index = {} self.stock_chart = {} self.indicators = {}
def get_holidays_between(instance, start_date, end_date): holidays = [] for date in date_range(start_date, end_date, exclusive=False): if date in instance: holiday_names = instance.get(date).split(', ') for holiday_name in holiday_names: holidays.append([holiday_name, date]) return pd.DataFrame(holidays, columns=['holiday', 'ds'])
def update_daily_detection(self): logging.info("Start update_daily_detection") self.db_dashboard.connectDB() self.db_attribute.connectDB() try: latest_day = self.db_dashboard.get_date_of_latest_row_of_detection() logging.debug("get_date_of_latest_row_of_detection %s" % latest_day) for current_day in util.date_range(latest_day + datetime.timedelta(1), self.today): day = util.datetime_to_string(current_day) logging.debug("get_detection_by_day:%s" % day) detection_list = self.db_attribute.get_detection_by_day(day) apk_count = ar_count = cod_count = deb_count = dex_count = \ elf_count = ipa_count = other_count = sis_count = mach_o_count =plist_count =zip_count = 0 for detection in detection_list: if detection['type'] == 'apk': apk_count = detection['num'] elif detection['type'] == 'ar': ar_count = detection['num'] elif detection['type'] == 'cod': cod_count = detection['num'] elif detection['type'] == 'deb': deb_count = detection['num'] elif detection['type'] == 'dex': dex_count = detection['num'] elif detection['type'] == 'elf': elf_count = detection['num'] elif detection['type'] == 'ipa': ipa_count = detection['num'] elif detection['type'] == 'other': other_count = detection['num'] elif detection['type'] == 'sis': sis_count = detection['num'] elif detection['type'] == 'zip': zip_count = detection['num'] elif detection['type'] == 'mach-o': mach_o_count = detection['num'] elif detection['type'] == 'plist': plist_count = detection['num'] # insert_detection self.db_dashboard.insert_detection_by_day(day, apk_count, ar_count, cod_count, deb_count, dex_count, elf_count, ipa_count, other_count, sis_count, zip_count,mach_o_count,plist_count) except Exception as e: logging.exception("update_daily_detection error: %s" % e) self.db_dashboard.close() self.db_pattern.close()
def analyse(path, filter_fn, field_name, print_csv=False): data = load_data(path, filter_fn) occurrences = data['days'] day_of_cycle = data['day_of_cycle'] weekdays = data['weekdays'] day_of_cycle_total = sum([day_of_cycle[x] for x in day_of_cycle]) if len(occurrences) == 0: print "No tags found. Are you sure '%s' is the correct tag?" % tag return deltas = [] for d in xrange(len(occurrences)-1): delta = occurrences[d+1] - occurrences[d] if delta.days > 2: deltas.append(delta.days) if print_csv: print "date,%s" % field_name for d in date_range(occurrences[0], occurrences[len(occurrences)-1]): if d in occurrences: print str(d) + ",1" else: print str(d) + ",0" return print "===============" print "Day of cycle distribution" previous = None for k in sorted(day_of_cycle.keys()): if previous: if k - previous > 1: print ".\n." previous = k print ("Day %s:" % k).ljust(10), str(day_of_cycle[k]).ljust(4), round(day_of_cycle[k] / float(day_of_cycle_total), 2) print "===============" print "Weekday distribution" for k in sorted(weekdays.keys()): print weekday_from_int(k).ljust(5), weekdays[k] print "===============" print "Total amount of days with %s: " % field_name, len(occurrences) print "Average amount of days between %s: " % field_name, average(deltas) print "Std dev: ", std_dev(deltas) print "Last day with %s: " % field_name, occurrences[len(occurrences)-1] print "Days between today and last day with %s: " % field_name, (datetime.datetime.today().date() - occurrences[len(occurrences)-1].date()).days print "==============="
def get_x_values(self): x_min, x_max, scale_division = self._x_range() if self.timescale_divisions: pattern = re.compile('(\d+) ?(\w+)') m = pattern.match(self.timescale_divisions) if not m: raise ValueError, "Invalid timescale_divisions: %s" % self.timescale_divisions magnitude = int(m.group(1)) units = m.group(2) parameter = self.lookup_relativedelta_parameter(units) delta = relativedelta(**{parameter:magnitude}) scale_division = delta return date_range(x_min, x_max, scale_division)
def __insert_symbol(self, start_date, end_date, db): list = self.__get_historical_prices(self.symbol, start_date, end_date) if len(list) == 1: return None last_trade_date = list[1][0] days_without_trade = util.date_range(start_date, last_trade_date) # fill dictionary with data for trading days for i in range(1,len(list)): trade_date = list[i][0] days_without_trade.remove(trade_date) data = '' for j in range(1, len(list[i])): data = data + str(list[i][j]) + ' ' db[ trade_date.isoformat() ] = data # make the data for non-trading days '0' for i in days_without_trade: db [i.isoformat()] = '0'
def load_data(path, filter_fn): with open(path) as f: data_sorted = sorted(json.loads(f.read())['data'], key=lambda x: x['day']) data = { x['day'][:10]: x for x in data_sorted } days = [] day_of_cycle_dict = {} day_of_cycle = 0 start_date = parse(data_sorted[0]['day']) end_date = parse(data_sorted[len(data_sorted)-1]['day']) dates = [x.date().strftime('%Y-%m-%d') for x in date_range(start_date, end_date)] weekdays = { x: 0 for x in xrange(0, 7) } for d in dates: try: i = data[d] except: if day_of_cycle != 0: day_of_cycle += 1 continue if 'period' in i: if (day_of_cycle > 15 or day_of_cycle == 0) and i['period'] != 'spotting': day_of_cycle = 1 if filter_fn(i): days.append(parse(i['day'])) weekdays[parse(i['day']).weekday()] += 1 if day_of_cycle != 0: if not day_of_cycle_dict.get(day_of_cycle): day_of_cycle_dict[day_of_cycle] = 1 else: day_of_cycle_dict[day_of_cycle] += 1 if day_of_cycle != 0: day_of_cycle += 1 return { "days": days, "day_of_cycle": day_of_cycle_dict, "weekdays": weekdays }
def update_top10_family(self): logging.info("Start update_top10_family") self.db_dashboard.connectDB() self.db_attribute.connectDB() try: latest_day = self.db_dashboard.get_date_of_latest_row_of_top10_family() logging.debug("get_date_of_latest_row_of_top10_family: %s" % latest_day) for current_day in util.date_range(latest_day + datetime.timedelta(1), self.today): day = util.datetime_to_string(current_day) logging.debug("get_sample_by_day:%s" % day) detection_list = self.db_attribute.get_detection_list_by_day(day) adware_family_list = self.db_attribute.get_adware_family_list() malware_list = list() adware_list = list() malware_count = 0 adware_count = 0 for detection in detection_list: adware_count += detection['num'] if detection['family_name'] in adware_family_list: adware_list.append((detection['family_name'], detection['num'])) elif detection['family_name'] != 'GEN': malware_list.append((detection['family_name'], detection['num'])) # insert self.db_dashboard.insert_top10_family_by_day(day, "malware", malware_count, malware_list) self.db_dashboard.insert_top10_family_by_day(day, "adware", adware_count, adware_list) except Exception as e: logging.exception("update_top10_family error: %s" % e) self.db_dashboard.close() self.db_pattern.close()
def __iter__(self): if type(self.min_key) is int: return iter(xrange(self.min_key, self.max_key+1)) elif type(self.min_key) is datetime.date: return util.date_range(self.min_key, self.max_key)
def build_r_0_arr(self): """Returns an array of the reproduction numbers (R) for each day. Each element in the array represents a single day in the simulation. For example, if self.first_date is 2020-03-01 and self.projection_end_date is 2020-09-01, then R_0_ARR[10] would be the R value on 2020-03-11. Full description at: https://covid19-projections.com/about/#effective-reproduction-number-r and https://covid19-projections.com/model-details/#modeling-the-r-value We use three different R values: R0, post-mitigation R, and reopen R. We use an inverse logistic/sigmoid function to smooth the transition between the three R values. """ reopen_r = self.get_reopen_r() assert 0.5 <= self.LOCKDOWN_FATIGUE <= 1.5, self.LOCKDOWN_FATIGUE reopen_date_shift = self.REOPEN_DATE + datetime.timedelta( days=int(self.REOPEN_SHIFT_DAYS) + DEFAULT_REOPEN_SHIFT_DAYS) fatigue_idx = self.inflection_day_idx + DAYS_UNTIL_LOCKDOWN_FATIGUE reopen_idx = self.get_day_idx_from_date(reopen_date_shift) lockdown_reopen_midpoint_idx = (self.inflection_day_idx + reopen_idx) // 2 NUMERATOR_CONST = 6 days_until_post_reopen = int( np.rint(NUMERATOR_CONST / self.REOPEN_INFLECTION)) assert 10 <= days_until_post_reopen <= 80, days_until_post_reopen post_reopen_midpoint_idx = reopen_idx + days_until_post_reopen post_reopen_idx = reopen_idx + days_until_post_reopen * 2 fall_start_idx = self.get_day_idx_from_date(FALL_START_DATE_NORTH) - 30 sig_lockdown = get_transition_sigmoid( self.inflection_day_idx, self.rate_of_inflection, self.INITIAL_R_0, self.LOCKDOWN_R_0, ) sig_fatigue = get_transition_sigmoid(fatigue_idx, 0.2, 0, self.LOCKDOWN_FATIGUE - 1, check_values=False) sig_reopen = get_transition_sigmoid( reopen_idx, self.REOPEN_INFLECTION, self.LOCKDOWN_R_0 * self.LOCKDOWN_FATIGUE, reopen_r, ) sig_post_reopen = get_transition_sigmoid( post_reopen_idx, self.REOPEN_INFLECTION, reopen_r, min(reopen_r, self.post_reopen_equilibrium_r), ) dates = util.date_range(self.first_date, self.projection_end_date) assert len(dates) == self.N R_0_ARR = [self.INITIAL_R_0] for day_idx in range(1, self.N): if day_idx < lockdown_reopen_midpoint_idx: r_t = sig_lockdown(day_idx) if abs(self.LOCKDOWN_FATIGUE - 1) > 1e-9: r_t *= 1 + sig_fatigue(day_idx) elif day_idx > post_reopen_midpoint_idx: r_t = sig_post_reopen(day_idx) else: r_t = sig_reopen(day_idx) if day_idx > fall_start_idx: fall_r_mult = max( 0.9, min(1.5, self.fall_r_multiplier**(day_idx - fall_start_idx))) assert 0.9 <= fall_r_mult <= 1.5, fall_r_mult r_t *= fall_r_mult # Make sure R is stable # if day_idx > reopen_idx and abs(r_t / R_0_ARR[-1] - 1) > 0.2: # assert ( # False # ), f"{str(self)} - R changed too quickly: {day_idx} {R_0_ARR[-1]} -> {r_t} {R_0_ARR}" R_0_ARR.append(r_t) assert len(R_0_ARR) == self.N self.reopen_idx = reopen_idx return R_0_ARR
def test_date_range(date_cls): assert list(date_range(date_cls(2019, 1, 1), date_cls(2019, 1, 1))) == [] assert list(date_range(date_cls(2019, 1, 2), date_cls(2019, 1, 1))) == [] assert list( date_range(date_cls(2019, 1, 1), date_cls(2019, 1, 1), relativedelta(months=1))) == [] assert list( date_range(date_cls(2019, 1, 1), date_cls(2019, 1, 1), timedelta(days=-1), asc=False)) == [] assert list( date_range(date_cls(2019, 1, 1), date_cls(2019, 1, 2), timedelta(days=-1), asc=False)) == [] assert list( date_range(date_cls(2019, 1, 1), date_cls(2019, 1, 1), relativedelta(months=-1), asc=False)) == [] assert list(date_range(date_cls(2019, 1, 31), date_cls( 2019, 2, 2))) == [date_cls(2019, 1, 31), date_cls(2019, 2, 1)] assert list( date_range(date_cls(2019, 1, 31), date_cls(2019, 2, 2), timedelta(days=2))) == [date_cls(2019, 1, 31)] assert list( date_range( date_cls(2019, 2, 1), date_cls(2019, 1, 30), timedelta(days=-1), asc=False)) == [date_cls(2019, 2, 1), date_cls(2019, 1, 31)] assert list( date_range(date_cls(2019, 2, 2), date_cls(2019, 1, 31), timedelta(days=-2), asc=False)) == [date_cls(2019, 2, 2)] assert list( date_range(date_cls(2019, 1, 1), date_cls(2019, 3, 1), relativedelta(months=1))) == [ date_cls(2019, 1, 1), date_cls(2019, 2, 1) ] assert list( date_range(date_cls(2019, 3, 1), date_cls(2019, 1, 1), relativedelta(months=-1), asc=False)) == [date_cls(2019, 3, 1), date_cls(2019, 2, 1)]