def build(self, dao): feed = [] stock_symbol = dao.get_stock_symbol() release_date = DateUtils().now_date() period = dao.get_period() entry_count = len(self.column_name_list) for row in dao.get_row_list(): stmt_date = row[0] value_list = [row[i] for i in self.value_index_list] for i in range(entry_count): entry = { 'release_date': release_date, 'stock_symbol': stock_symbol, 'stmt_date': stmt_date, 'account': self.column_name_list[i], 'account_order': i + 1, 'value': value_list[i], 'period': period, } feed.append(entry) return tuple(feed)
def __build_tuple(self, dao): feed = [] stock_symbol = dao.get_stock_symbol() release_date = DateUtils().now_date() column_name_list = dao.get_column_name_list() period = dao.get_period() row_list = dao.get_row_list() for row in row_list: stmt_date = row[0] for j in range(1, len(row)): entry = { 'release_date' : release_date, 'stock_symbol' : stock_symbol, 'stmt_date' : stmt_date, 'account' : column_name_list[j], 'account_order' : j, 'value' : row[j], 'period' : period, } feed.append(entry) return tuple(feed)
def build(self, dao): feed = [] stock_symbol = dao.get_stock_symbol() release_date = DateUtils().now_date() stmt_date_list = dao.get_column_name_list() period = dao.get_period() row_list = dao.get_row_list() for i in range(len(row_list)): if not row_list[i]: continue account = row_list[i][0] for j in range(1, len(row_list[i])): entry = { 'release_date' : release_date, 'stock_symbol' : stock_symbol, 'stmt_date' : stmt_date_list[j], 'account' : account, 'account_order' : i + 1, 'value' : row_list[i][j], 'period' : period, } feed.append(entry) return tuple(feed)
def __init__(self): self.date_utils = DateUtils()
class DateBuilder(): def __init__(self): self.date_utils = DateUtils() def build(self, local_string): try: m = re.search(u'^(\d{4})年(\d+)月(\d+)日$', local_string) year = int(m.group(1)) month = int(m.group(2)) day = int(m.group(3)) return datetime.date(year, month, day) except AttributeError: return self.__build_step_1(local_string) def __build_step_1(self, local_string): try: m = re.search(u'^(\d{2,3})年(\d+)月(\d+)日$', local_string) year = int(m.group(1)) + 1911 # expect roc era month = int(m.group(2)) day = int(m.group(3)) return datetime.date(year, month, day) except AttributeError: return self.__build_step_2(local_string) def __build_step_2(self, local_string): try: m = re.search('^(\d{4})/(\d+)/(\d+)$', local_string) year = int(m.group(1)) month = int(m.group(2)) day = int(m.group(3)) return datetime.date(year, month, day) except AttributeError: return self.__build_step_3(local_string) def __build_step_3(self, local_string): try: m = re.search('^(\d{2,3})/(\d+)/(\d+)$', local_string) year = int(m.group(1)) + 1911 # expect roc era month = int(m.group(2)) day = int(m.group(3)) return datetime.date(year, month, day) except AttributeError: return self.__build_step_4(local_string) def __build_step_4(self, local_string): try: m = re.search('^(\d{2,3})$', local_string) year = int(m.group(1)) + 1911 # expect roc era return datetime.date(year, 12, 31) except AttributeError: return self.__build_step_5(local_string) def __build_step_5(self, local_string): try: m = re.search('^(\d{2,3})\.(\d{1})Q$', local_string) year = int(m.group(1)) + 1911 # expect roc era end_quarter = int(m.group(2)) return self.__from_year_quarter_to_date(year, end_quarter) except AttributeError: return self.__build_step_6(local_string) def __build_step_6(self, local_string): try: m = re.search('^(\d{2,3})/0?(\d{1,2})$', local_string) year = int(m.group(1)) + 1911 # expect roc era month = int(m.group(2)) day = self.date_utils.get_last_day_of_month(year, month) return datetime.date(year, month, day) except AttributeError: return self.__build_step_7(local_string) def __build_step_7(self, local_string): try: m = re.search('^(\d{4})-(\d+)-(\d+)$', local_string) year = int(m.group(1)) month = int(m.group(2)) day = int(m.group(3)) return datetime.date(year, month, day) except AttributeError: return self.__build_step_8(local_string) def __build_step_8(self, local_string): m = re.search(u'^民國(\d{2,3})年(\d+)月$', local_string) year = int(m.group(1)) + 1911 # expect roc era month = int(m.group(2)) day = self.date_utils.get_last_day_of_month(year, month) return datetime.date(year, month, day) def __from_year_quarter_to_date(self, year, quarter): if quarter == 1: return datetime.date(year, 3, 31) if quarter == 2: return datetime.date(year, 6, 30) if quarter == 3: return datetime.date(year, 9, 30) if quarter == 4: return datetime.date(year, 12, 31)
class TimeSeries(object): @staticmethod def create(records): """Create time series from records Create time series from records. If there are more than one records of the same (stmt_date, value), we pick up the latest release date as our time series data. Args: records: A list of record (release_date, stmt_date, value). Returns: A sorted time series (order by stmt_date). For example, if records are [ (datetime.date(2015, 9, 30), datetime.date(2002, 12, 31), 1), (datetime.date(2015, 9, 30), datetime.date(2001, 12, 31), 2), (datetime.date(2015, 8, 31), datetime.date(2001, 12, 31), 3), ], we should returns time series [ (datetime.date(2001, 12, 31), 2), (datetime.date(2002, 12, 31), 1), ]. """ group = {} for record in records: release_date, stmt_date, value = record if stmt_date not in group: group[stmt_date] = [] group[stmt_date].append((release_date, value)) time_series = [] for stmt_date in group: latest_date, value = sorted(group[stmt_date])[-1] if value: time_series.append((stmt_date, value)) return TimeSeries(time_series) def __init__(self, time_series): self.logger = logging.getLogger(__name__) self.time_series = sorted(time_series) self.date_utils = DateUtils() def get(self): return self.time_series def get_map(self): output = {} for stmt_date, value in self.time_series: output[stmt_date] = value return output def scalar(self, c): output = [] for stmt_date, value in self.time_series: z = c * float(value) output.append((stmt_date, z)) return TimeSeries(output) def get_inverse(self): output = [] for stmt_date, value in self.time_series: try: z = 1.0 / float(value) output.append((stmt_date, z)) except ZeroDivisionError as e: self.logger.error(e) return TimeSeries(output) def get_average(self): return self.get_moving_average(2) def get_moving_average(self, n): output = [] count = len(self.time_series) for i in range(count): stmt_date = self.time_series[i][0] value_list = [self.time_series[i][1] for i in range(max(i - n + 1, 0), i + 1)] try: z = float(sum(value_list)) / float(len(value_list)) output.append((stmt_date, z)) except ZeroDivisionError as e: self.logger.error(e) return TimeSeries(output) def execute_binary_operation(self, operator, other_time_series): output = [] other_map = other_time_series.get_map() for stmt_date, value in self.time_series: if stmt_date in other_map: try: z = operator(float(value), float(other_map[stmt_date])) output.append((stmt_date, z)) except ZeroDivisionError as e: self.logger.error(e) return TimeSeries(output) def __add__(self, other_time_series): return self.execute_binary_operation(operator.add, other_time_series) def __sub__(self, other_time_series): return self.execute_binary_operation(operator.sub, other_time_series) def __div__(self, other_time_series): return self.execute_binary_operation(operator.truediv, other_time_series) def __mul__(self, other_time_series): return self.execute_binary_operation(operator.mul, other_time_series) def accumulate(self): output = [] accumulated_value = 0.0 for stmt_date, value in self.time_series: accumulated_value += value output.append((stmt_date, accumulated_value)) return TimeSeries(output) def accumulate_annually(self): if not self.time_series: return TimeSeries([]) output = [] first_stmt_date, first_value = self.time_series[0] current_year = first_stmt_date.year accumulated_value = 0.0 for stmt_date, value in self.time_series: if stmt_date.year != current_year: current_year = stmt_date.year accumulated_value = 0.0 accumulated_value += value output.append((stmt_date, accumulated_value)) return TimeSeries(output) def get_yoy(self): output = [] time_series_map = self.get_map() for stmt_date, value in self.time_series: prev_stmt_date = self.date_utils.get_last_date_of_month_in_prev_year(stmt_date) if prev_stmt_date in time_series_map: try: prev_value = time_series_map[prev_stmt_date] yoy = (float(value) - float(prev_value)) / float(prev_value) output.append((stmt_date, yoy)) except ZeroDivisionError as e: self.logger.error(e) return TimeSeries(output) def shift(self): output = [] count = len(self.time_series) for i in range(1, count): stmt_date, value = self.time_series[i] prev_stmt_date, prev_value = self.time_series[i - 1] output.append((stmt_date, prev_value)) return TimeSeries(output) def group_by_period(self, period): if period == 'Q': return self.group_by_quarter() elif period == 'Y': return self.group_by_year() def group_by_quarter(self): group_map = {} time_series_map = self.get_map() for stmt_date, value in self.time_series: key = self.date_utils.get_last_date_of_quarter(stmt_date) if key not in group_map: group_map[key] = [] group_map[key].append((stmt_date, value)) output = [] for key in group_map: output.append((key, TimeSeries(group_map[key]))) return TimeSeries(output) def group_by_year(self): group_map = {} time_series_map = self.get_map() for stmt_date, value in self.time_series: key = self.date_utils.get_last_date_of_year(stmt_date) if key not in group_map: group_map[key] = [] group_map[key].append((stmt_date, value)) output = [] for key in group_map: output.append((key, TimeSeries(group_map[key]))) return TimeSeries(output) def get_max_by_period(self, period): output = [] group = self.group_by_period(period).get() for key, group_value in group: group_time_series = group_value.get_map() output.append((key, max(group_time_series.values()))) return TimeSeries(output) def get_min_by_period(self, period): output = [] group = self.group_by_period(period).get() for key, group_value in group: group_time_series = group_value.get_map() output.append((key, min(group_time_series.values()))) return TimeSeries(output) def annualize(self, period): if period == 'Q': return self.scalar(4) elif period == 'Y': return self
def __init__(self, time_series): self.logger = logging.getLogger(__name__) self.time_series = sorted(time_series) self.date_utils = DateUtils()
class TimeSeries(object): @staticmethod def create(records): """Create time series from records Create time series from records. If there are more than one records of the same (stmt_date, value), we pick up the latest release date as our time series data. Args: records: A list of record (release_date, stmt_date, value). Returns: A sorted time series (order by stmt_date). For example, if records are [ (datetime.date(2015, 9, 30), datetime.date(2002, 12, 31), 1), (datetime.date(2015, 9, 30), datetime.date(2001, 12, 31), 2), (datetime.date(2015, 8, 31), datetime.date(2001, 12, 31), 3), ], we should returns time series [ (datetime.date(2001, 12, 31), 2), (datetime.date(2002, 12, 31), 1), ]. """ group = {} for record in records: release_date, stmt_date, value = record if stmt_date not in group: group[stmt_date] = [] group[stmt_date].append((release_date, value)) time_series = [] for stmt_date in group: latest_date, value = sorted(group[stmt_date])[-1] if value: time_series.append((stmt_date, value)) return TimeSeries(time_series) def __init__(self, time_series): self.logger = logging.getLogger(__name__) self.time_series = sorted(time_series) self.date_utils = DateUtils() def get(self): return self.time_series def get_map(self): output = {} for stmt_date, value in self.time_series: output[stmt_date] = value return output def scalar(self, c): output = [] for stmt_date, value in self.time_series: z = c * float(value) output.append((stmt_date, z)) return TimeSeries(output) def get_inverse(self): output = [] for stmt_date, value in self.time_series: try: z = 1.0 / float(value) output.append((stmt_date, z)) except ZeroDivisionError as e: self.logger.error(e) return TimeSeries(output) def get_average(self): return self.get_moving_average(2) def get_moving_average(self, n): output = [] count = len(self.time_series) for i in range(count): stmt_date = self.time_series[i][0] value_list = [ self.time_series[i][1] for i in range(max(i - n + 1, 0), i + 1) ] try: z = float(sum(value_list)) / float(len(value_list)) output.append((stmt_date, z)) except ZeroDivisionError as e: self.logger.error(e) return TimeSeries(output) def execute_binary_operation(self, operator, other_time_series): output = [] other_map = other_time_series.get_map() for stmt_date, value in self.time_series: if stmt_date in other_map: try: z = operator(float(value), float(other_map[stmt_date])) output.append((stmt_date, z)) except ZeroDivisionError as e: self.logger.error(e) return TimeSeries(output) def __add__(self, other_time_series): return self.execute_binary_operation(operator.add, other_time_series) def __sub__(self, other_time_series): return self.execute_binary_operation(operator.sub, other_time_series) def __div__(self, other_time_series): return self.execute_binary_operation(operator.truediv, other_time_series) def __mul__(self, other_time_series): return self.execute_binary_operation(operator.mul, other_time_series) def accumulate(self): output = [] accumulated_value = 0.0 for stmt_date, value in self.time_series: accumulated_value += value output.append((stmt_date, accumulated_value)) return TimeSeries(output) def accumulate_annually(self): if not self.time_series: return TimeSeries([]) output = [] first_stmt_date, first_value = self.time_series[0] current_year = first_stmt_date.year accumulated_value = 0.0 for stmt_date, value in self.time_series: if stmt_date.year != current_year: current_year = stmt_date.year accumulated_value = 0.0 accumulated_value += value output.append((stmt_date, accumulated_value)) return TimeSeries(output) def get_yoy(self): output = [] time_series_map = self.get_map() for stmt_date, value in self.time_series: prev_stmt_date = self.date_utils.get_last_date_of_month_in_prev_year( stmt_date) if prev_stmt_date in time_series_map: try: prev_value = time_series_map[prev_stmt_date] yoy = (float(value) - float(prev_value)) / float(prev_value) output.append((stmt_date, yoy)) except ZeroDivisionError as e: self.logger.error(e) return TimeSeries(output) def shift(self): output = [] count = len(self.time_series) for i in range(1, count): stmt_date, value = self.time_series[i] prev_stmt_date, prev_value = self.time_series[i - 1] output.append((stmt_date, prev_value)) return TimeSeries(output) def group_by_period(self, period): if period == 'Q': return self.group_by_quarter() elif period == 'Y': return self.group_by_year() def group_by_quarter(self): group_map = {} time_series_map = self.get_map() for stmt_date, value in self.time_series: key = self.date_utils.get_last_date_of_quarter(stmt_date) if key not in group_map: group_map[key] = [] group_map[key].append((stmt_date, value)) output = [] for key in group_map: output.append((key, TimeSeries(group_map[key]))) return TimeSeries(output) def group_by_year(self): group_map = {} time_series_map = self.get_map() for stmt_date, value in self.time_series: key = self.date_utils.get_last_date_of_year(stmt_date) if key not in group_map: group_map[key] = [] group_map[key].append((stmt_date, value)) output = [] for key in group_map: output.append((key, TimeSeries(group_map[key]))) return TimeSeries(output) def get_max_by_period(self, period): output = [] group = self.group_by_period(period).get() for key, group_value in group: group_time_series = group_value.get_map() output.append((key, max(group_time_series.values()))) return TimeSeries(output) def get_min_by_period(self, period): output = [] group = self.group_by_period(period).get() for key, group_value in group: group_time_series = group_value.get_map() output.append((key, min(group_time_series.values()))) return TimeSeries(output) def annualize(self, period): if period == 'Q': return self.scalar(4) elif period == 'Y': return self
def get_date_list_by_quarter(self, begin_date, end_date): output = [] for date in DateUtils().range_date_by_quarter(begin_date, end_date): output.append({'date': date}) return output
def get_now_date(self): return DateUtils().now_date()
def setUp(self): self.date_utils = DateUtils()
class DateUtilsTest(unittest.TestCase): def setUp(self): self.date_utils = DateUtils() def tearDown(self): self.date_utils = None def test_get_last_date_of_month(self): actual = self.date_utils.get_last_date_of_month( datetime.date(2010, 1, 1)) expected = datetime.date(2010, 1, 31) self.assertEqual(actual, expected) def test_get_last_date_of_prev_month(self): actual = self.date_utils.get_last_date_of_prev_month( datetime.date(2010, 1, 1)) expected = datetime.date(2009, 12, 31) self.assertEqual(actual, expected) def test_get_last_date_of_quarter(self): actual = self.date_utils.get_last_date_of_quarter( datetime.date(2010, 1, 1)) expected = datetime.date(2010, 3, 31) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_quarter( datetime.date(2010, 3, 31)) expected = datetime.date(2010, 3, 31) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_quarter( datetime.date(2010, 4, 1)) expected = datetime.date(2010, 6, 30) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_quarter( datetime.date(2010, 6, 30)) expected = datetime.date(2010, 6, 30) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_quarter( datetime.date(2010, 7, 1)) expected = datetime.date(2010, 9, 30) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_quarter( datetime.date(2010, 9, 30)) expected = datetime.date(2010, 9, 30) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_quarter( datetime.date(2010, 10, 1)) expected = datetime.date(2010, 12, 31) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_quarter( datetime.date(2010, 12, 31)) expected = datetime.date(2010, 12, 31) self.assertEqual(actual, expected) def test_get_last_date_of_next_quarter(self): actual = self.date_utils.get_last_date_of_next_quarter( datetime.date(2010, 1, 1)) expected = datetime.date(2010, 6, 30) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_next_quarter( datetime.date(2010, 3, 31)) expected = datetime.date(2010, 6, 30) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_next_quarter( datetime.date(2010, 4, 1)) expected = datetime.date(2010, 9, 30) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_next_quarter( datetime.date(2010, 6, 30)) expected = datetime.date(2010, 9, 30) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_next_quarter( datetime.date(2010, 7, 1)) expected = datetime.date(2010, 12, 31) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_next_quarter( datetime.date(2010, 9, 30)) expected = datetime.date(2010, 12, 31) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_next_quarter( datetime.date(2010, 10, 1)) expected = datetime.date(2011, 3, 31) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_next_quarter( datetime.date(2010, 12, 31)) expected = datetime.date(2011, 3, 31) self.assertEqual(actual, expected) def test_range_date_by_month(self): actual = self.date_utils.range_date_by_month( datetime.date(2010, 9, 1), datetime.date(2011, 3, 15)) expected = [ datetime.date(2010, 9, 30), datetime.date(2010, 10, 31), datetime.date(2010, 11, 30), datetime.date(2010, 12, 31), datetime.date(2011, 1, 31), datetime.date(2011, 2, 28), datetime.date(2011, 3, 31), ] self.assertEqual(actual, expected) def test_range_date_by_quarter(self): actual = self.date_utils.range_date_by_quarter( datetime.date(2010, 9, 1), datetime.date(2011, 3, 15)) expected = [ datetime.date(2010, 9, 30), datetime.date(2010, 12, 31), datetime.date(2011, 3, 31), ] self.assertEqual(actual, expected) def test_get_last_date_of_month_in_prev_year(self): actual = self.date_utils.get_last_date_of_month_in_prev_year( datetime.date(2010, 10, 31)) expected = datetime.date(2009, 10, 31) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_month_in_prev_year( datetime.date(2013, 2, 28)) expected = datetime.date(2012, 2, 29) self.assertEqual(actual, expected)
class DateUtilsTest(unittest.TestCase): def setUp(self): self.date_utils = DateUtils() def tearDown(self): self.date_utils = None def test_get_last_date_of_month(self): actual = self.date_utils.get_last_date_of_month(datetime.date(2010, 1, 1)) expected = datetime.date(2010, 1, 31) self.assertEqual(actual, expected) def test_get_last_date_of_prev_month(self): actual = self.date_utils.get_last_date_of_prev_month(datetime.date(2010, 1, 1)) expected = datetime.date(2009, 12, 31) self.assertEqual(actual, expected) def test_get_last_date_of_quarter(self): actual = self.date_utils.get_last_date_of_quarter(datetime.date(2010, 1, 1)) expected = datetime.date(2010, 3, 31) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_quarter(datetime.date(2010, 3, 31)) expected = datetime.date(2010, 3, 31) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_quarter(datetime.date(2010, 4, 1)) expected = datetime.date(2010, 6, 30) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_quarter(datetime.date(2010, 6, 30)) expected = datetime.date(2010, 6, 30) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_quarter(datetime.date(2010, 7, 1)) expected = datetime.date(2010, 9, 30) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_quarter(datetime.date(2010, 9, 30)) expected = datetime.date(2010, 9, 30) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_quarter(datetime.date(2010, 10, 1)) expected = datetime.date(2010, 12, 31) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_quarter(datetime.date(2010, 12, 31)) expected = datetime.date(2010, 12, 31) self.assertEqual(actual, expected) def test_get_last_date_of_next_quarter(self): actual = self.date_utils.get_last_date_of_next_quarter(datetime.date(2010, 1, 1)) expected = datetime.date(2010, 6, 30) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_next_quarter(datetime.date(2010, 3, 31)) expected = datetime.date(2010, 6, 30) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_next_quarter(datetime.date(2010, 4, 1)) expected = datetime.date(2010, 9, 30) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_next_quarter(datetime.date(2010, 6, 30)) expected = datetime.date(2010, 9, 30) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_next_quarter(datetime.date(2010, 7, 1)) expected = datetime.date(2010, 12, 31) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_next_quarter(datetime.date(2010, 9, 30)) expected = datetime.date(2010, 12, 31) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_next_quarter(datetime.date(2010, 10, 1)) expected = datetime.date(2011, 3, 31) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_next_quarter(datetime.date(2010, 12, 31)) expected = datetime.date(2011, 3, 31) self.assertEqual(actual, expected) def test_range_date_by_month(self): actual = self.date_utils.range_date_by_month(datetime.date(2010, 9, 1), datetime.date(2011, 3, 15)) expected = [ datetime.date(2010, 9, 30), datetime.date(2010, 10, 31), datetime.date(2010, 11, 30), datetime.date(2010, 12, 31), datetime.date(2011, 1, 31), datetime.date(2011, 2, 28), datetime.date(2011, 3, 31), ] self.assertEqual(actual, expected) def test_range_date_by_quarter(self): actual = self.date_utils.range_date_by_quarter(datetime.date(2010, 9, 1), datetime.date(2011, 3, 15)) expected = [ datetime.date(2010, 9, 30), datetime.date(2010, 12, 31), datetime.date(2011, 3, 31), ] self.assertEqual(actual, expected) def test_get_last_date_of_month_in_prev_year(self): actual = self.date_utils.get_last_date_of_month_in_prev_year(datetime.date(2010, 10, 31)) expected = datetime.date(2009, 10, 31) self.assertEqual(actual, expected) actual = self.date_utils.get_last_date_of_month_in_prev_year(datetime.date(2013, 2, 28)) expected = datetime.date(2012, 2, 29) self.assertEqual(actual, expected)