Exemplo n.º 1
0
 def testLastFridayInThisMonth(self):
     self.assertEqual(self.today + relativedelta(day=31, weekday=FR(-1)),
                      date(2003, 9, 26))
Exemplo n.º 2
0
class TestStringifiedDAGs(unittest.TestCase):
    """Unit tests for stringified DAGs."""
    def setUp(self):
        super().setUp()
        BaseHook.get_connection = mock.Mock(return_value=Connection(
            extra=('{'
                   '"project_id": "mock", '
                   '"location": "mock", '
                   '"instance": "mock", '
                   '"database_type": "postgres", '
                   '"use_proxy": "False", '
                   '"use_ssl": "False"'
                   '}')))
        self.maxDiff = None  # pylint: disable=invalid-name

    def test_serialization(self):
        """Serialization and deserialization should work for every DAG and Operator."""
        dags = collect_dags()
        serialized_dags = {}
        for _, v in dags.items():
            dag = SerializedDAG.to_dict(v)
            SerializedDAG.validate_schema(dag)
            serialized_dags[v.dag_id] = dag

        # Compares with the ground truth of JSON string.
        self.validate_serialized_dag(serialized_dags['simple_dag'],
                                     serialized_simple_dag_ground_truth)

    def validate_serialized_dag(self, json_dag, ground_truth_dag):
        """Verify serialized DAGs match the ground truth."""
        self.assertTrue(json_dag['dag']['fileloc'].split('/')[-1] ==
                        'test_dag_serialization.py')
        json_dag['dag']['fileloc'] = None

        def sorted_serialized_dag(dag_dict: dict):
            """
            Sorts the "tasks" list in the serialised dag python dictionary
            This is needed as the order of tasks should not matter but assertEqual
            would fail if the order of tasks list changes in dag dictionary
            """
            dag_dict["dag"]["tasks"] = sorted(dag_dict["dag"]["tasks"],
                                              key=lambda x: sorted(x.keys()))
            return dag_dict

        self.assertEqual(sorted_serialized_dag(ground_truth_dag),
                         sorted_serialized_dag(json_dag))

    def test_deserialization(self):
        """A serialized DAG can be deserialized in another process."""
        queue = multiprocessing.Queue()
        proc = multiprocessing.Process(target=serialize_subprocess,
                                       args=(queue, ))
        proc.daemon = True
        proc.start()

        stringified_dags = {}
        while True:
            v = queue.get()
            if v is None:
                break
            dag = SerializedDAG.from_json(v)
            self.assertTrue(isinstance(dag, DAG))
            stringified_dags[dag.dag_id] = dag

        dags = collect_dags()
        self.assertTrue(set(stringified_dags.keys()) == set(dags.keys()))

        # Verify deserialized DAGs.
        example_skip_dag = stringified_dags['example_skip_dag']
        skip_operator_1_task = example_skip_dag.task_dict['skip_operator_1']
        self.validate_deserialized_task(skip_operator_1_task,
                                        'DummySkipOperator', '#e8b7e4', '#000')

        # Verify that the DAG object has 'full_filepath' attribute
        # and is equal to fileloc
        self.assertTrue(hasattr(example_skip_dag, 'full_filepath'))
        self.assertEqual(example_skip_dag.full_filepath,
                         example_skip_dag.fileloc)

        example_subdag_operator = stringified_dags['example_subdag_operator']
        section_1_task = example_subdag_operator.task_dict['section-1']
        self.validate_deserialized_task(section_1_task,
                                        SubDagOperator.__name__,
                                        SubDagOperator.ui_color,
                                        SubDagOperator.ui_fgcolor)

    def validate_deserialized_task(self, task, task_type, ui_color,
                                   ui_fgcolor):
        """Verify non-airflow operators are casted to BaseOperator."""
        self.assertTrue(isinstance(task, SerializedBaseOperator))
        # Verify the original operator class is recorded for UI.
        self.assertTrue(task.task_type == task_type)
        self.assertTrue(task.ui_color == ui_color)
        self.assertTrue(task.ui_fgcolor == ui_fgcolor)

        # Check that for Deserialised task, task.subdag is None for all other Operators
        # except for the SubDagOperator where task.subdag is an instance of DAG object
        if task.task_type == "SubDagOperator":
            self.assertIsNotNone(task.subdag)
            self.assertTrue(isinstance(task.subdag, DAG))
        else:
            self.assertIsNone(task.subdag)

    @parameterized.expand([
        (datetime(2019, 8, 1), None, datetime(2019, 8, 1)),
        (datetime(2019, 8, 1), datetime(2019, 8, 2), datetime(2019, 8, 2)),
        (datetime(2019, 8, 1), datetime(2019, 7, 30), datetime(2019, 8, 1)),
    ])
    def test_deserialization_start_date(self, dag_start_date, task_start_date,
                                        expected_task_start_date):
        dag = DAG(dag_id='simple_dag', start_date=dag_start_date)
        BaseOperator(task_id='simple_task',
                     dag=dag,
                     start_date=task_start_date)

        serialized_dag = SerializedDAG.to_dict(dag)
        if not task_start_date or dag_start_date >= task_start_date:
            # If dag.start_date > task.start_date -> task.start_date=dag.start_date
            # because of the logic in dag.add_task()
            self.assertNotIn("start_date", serialized_dag["dag"]["tasks"][0])
        else:
            self.assertIn("start_date", serialized_dag["dag"]["tasks"][0])

        dag = SerializedDAG.from_dict(serialized_dag)
        simple_task = dag.task_dict["simple_task"]
        self.assertEqual(simple_task.start_date, expected_task_start_date)

    @parameterized.expand([
        (datetime(2019, 8, 1), None, datetime(2019, 8, 1)),
        (datetime(2019, 8, 1), datetime(2019, 8, 2), datetime(2019, 8, 1)),
        (datetime(2019, 8, 1), datetime(2019, 7, 30), datetime(2019, 7, 30)),
    ])
    def test_deserialization_end_date(self, dag_end_date, task_end_date,
                                      expected_task_end_date):
        dag = DAG(dag_id='simple_dag',
                  start_date=datetime(2019, 8, 1),
                  end_date=dag_end_date)
        BaseOperator(task_id='simple_task', dag=dag, end_date=task_end_date)

        serialized_dag = SerializedDAG.to_dict(dag)
        if not task_end_date or dag_end_date <= task_end_date:
            # If dag.end_date < task.end_date -> task.end_date=dag.end_date
            # because of the logic in dag.add_task()
            self.assertNotIn("end_date", serialized_dag["dag"]["tasks"][0])
        else:
            self.assertIn("end_date", serialized_dag["dag"]["tasks"][0])

        dag = SerializedDAG.from_dict(serialized_dag)
        simple_task = dag.task_dict["simple_task"]
        self.assertEqual(simple_task.end_date, expected_task_end_date)

    @parameterized.expand([
        (None, None),
        ("@weekly", "@weekly"),
        ({
            "__type": "timedelta",
            "__var": 86400.0
        }, timedelta(days=1)),
    ])
    def test_deserialization_schedule_interval(self,
                                               serialized_schedule_interval,
                                               expected):
        serialized = {
            "__version": 1,
            "dag": {
                "default_args": {
                    "__type": "dict",
                    "__var": {}
                },
                "params": {},
                "_dag_id": "simple_dag",
                "fileloc": __file__,
                "tasks": [],
                "timezone": "UTC",
                "schedule_interval": serialized_schedule_interval,
            },
        }

        SerializedDAG.validate_schema(serialized)

        dag = SerializedDAG.from_dict(serialized)

        self.assertEqual(dag.schedule_interval, expected)

    @parameterized.expand([
        (relativedelta(days=-1), {
            "__type": "relativedelta",
            "__var": {
                "days": -1
            }
        }),
        (relativedelta(month=1, days=-1), {
            "__type": "relativedelta",
            "__var": {
                "month": 1,
                "days": -1
            }
        }),
        # Every friday
        (relativedelta(weekday=FR), {
            "__type": "relativedelta",
            "__var": {
                "weekday": [4]
            }
        }),
        # Every second friday
        (relativedelta(weekday=FR(2)), {
            "__type": "relativedelta",
            "__var": {
                "weekday": [4, 2]
            }
        })
    ])
    def test_roundtrip_relativedelta(self, val, expected):
        serialized = SerializedDAG._serialize(val)
        self.assertDictEqual(serialized, expected)

        round_tripped = SerializedDAG._deserialize(serialized)
        self.assertEqual(val, round_tripped)

    def test_extra_serialized_field_and_operator_links(self):
        """
        Assert extra field exists & OperatorLinks defined in Plugins and inbuilt Operator Links.

        This tests also depends on GoogleLink() registered as a plugin
        in tests/plugins/test_plugin.py

        The function tests that if extra operator links are registered in plugin
        in ``operator_extra_links`` and the same is also defined in
        the Operator in ``BaseOperator.operator_extra_links``, it has the correct
        extra link.
        """
        test_date = datetime(2019, 8, 1)
        dag = DAG(dag_id='simple_dag', start_date=test_date)
        CustomOperator(task_id='simple_task', dag=dag, bash_command="true")

        serialized_dag = SerializedDAG.to_dict(dag)
        self.assertIn("bash_command", serialized_dag["dag"]["tasks"][0])

        dag = SerializedDAG.from_dict(serialized_dag)
        simple_task = dag.task_dict["simple_task"]
        self.assertEqual(getattr(simple_task, "bash_command"), "true")

        #########################################################
        # Verify Operator Links work with Serialized Operator
        #########################################################
        # Check Serialized version of operator link only contains the inbuilt Op Link
        self.assertEqual(
            serialized_dag["dag"]["tasks"][0]["_operator_extra_links"], [{
                'tests.test_utils.mock_operators.CustomOpLink': {}
            }])

        # Test all the extra_links are set
        self.assertCountEqual(simple_task.extra_links,
                              ['Google Custom', 'airflow', 'github', 'google'])

        ti = TaskInstance(task=simple_task, execution_date=test_date)
        ti.xcom_push('search_query', "dummy_value_1")

        # Test Deserialized inbuilt link
        custom_inbuilt_link = simple_task.get_extra_links(
            test_date, CustomOpLink.name)
        self.assertEqual(
            'http://google.com/custom_base_link?search=dummy_value_1',
            custom_inbuilt_link)

        # Test Deserialized link registered via Airflow Plugin
        google_link_from_plugin = simple_task.get_extra_links(
            test_date, GoogleLink.name)
        self.assertEqual("https://www.google.com", google_link_from_plugin)

    def test_extra_serialized_field_and_multiple_operator_links(self):
        """
        Assert extra field exists & OperatorLinks defined in Plugins and inbuilt Operator Links.

        This tests also depends on GoogleLink() registered as a plugin
        in tests/plugins/test_plugin.py

        The function tests that if extra operator links are registered in plugin
        in ``operator_extra_links`` and the same is also defined in
        the Operator in ``BaseOperator.operator_extra_links``, it has the correct
        extra link.
        """
        test_date = datetime(2019, 8, 1)
        dag = DAG(dag_id='simple_dag', start_date=test_date)
        CustomOperator(task_id='simple_task',
                       dag=dag,
                       bash_command=["echo", "true"])

        serialized_dag = SerializedDAG.to_dict(dag)
        self.assertIn("bash_command", serialized_dag["dag"]["tasks"][0])

        dag = SerializedDAG.from_dict(serialized_dag)
        simple_task = dag.task_dict["simple_task"]
        self.assertEqual(getattr(simple_task, "bash_command"),
                         ["echo", "true"])

        #########################################################
        # Verify Operator Links work with Serialized Operator
        #########################################################
        # Check Serialized version of operator link only contains the inbuilt Op Link
        self.assertEqual(
            serialized_dag["dag"]["tasks"][0]["_operator_extra_links"], [
                {
                    'tests.test_utils.mock_operators.CustomBaseIndexOpLink': {
                        'index': 0
                    }
                },
                {
                    'tests.test_utils.mock_operators.CustomBaseIndexOpLink': {
                        'index': 1
                    }
                },
            ])

        # Test all the extra_links are set
        self.assertCountEqual(simple_task.extra_links, [
            'BigQuery Console #1', 'BigQuery Console #2', 'airflow', 'github',
            'google'
        ])

        ti = TaskInstance(task=simple_task, execution_date=test_date)
        ti.xcom_push('search_query', ["dummy_value_1", "dummy_value_2"])

        # Test Deserialized inbuilt link #1
        custom_inbuilt_link = simple_task.get_extra_links(
            test_date, "BigQuery Console #1")
        self.assertEqual(
            'https://console.cloud.google.com/bigquery?j=dummy_value_1',
            custom_inbuilt_link)

        # Test Deserialized inbuilt link #2
        custom_inbuilt_link = simple_task.get_extra_links(
            test_date, "BigQuery Console #2")
        self.assertEqual(
            'https://console.cloud.google.com/bigquery?j=dummy_value_2',
            custom_inbuilt_link)

        # Test Deserialized link registered via Airflow Plugin
        google_link_from_plugin = simple_task.get_extra_links(
            test_date, GoogleLink.name)
        self.assertEqual("https://www.google.com", google_link_from_plugin)
Exemplo n.º 3
0
    def _populate(self, year):
        # New years
        self._add_with_observed_day_off(date(year, JAN, 1), "Újév", since=2014)

        # National Day
        if 1945 <= year <= 1950 or 1989 <= year:
            self._add_with_observed_day_off(date(year, MAR, 15),
                                            "Nemzeti ünnep")

        # Soviet era
        if 1950 <= year <= 1989:
            # Proclamation of Soviet socialist governing system
            self[date(year, MAR,
                      21)] = "A Tanácsköztársaság kikiáltásának ünnepe"
            # Liberation Day
            self[date(year, APR, 4)] = "A felszabadulás ünnepe"
            # Memorial day of The Great October Soviet Socialist Revolution
            if year not in (1956, 1989):
                self[date(year, NOV,
                          7)] = "A nagy októberi szocialista forradalom ünnepe"

        easter_date = easter(year)

        # Good Friday
        if 2017 <= year:
            self[easter_date + rd(weekday=FR(-1))] = "Nagypéntek"

        # Easter
        self[easter_date] = "Húsvét"

        # Second easter day
        if 1955 != year:
            self[easter_date + rd(days=1)] = "Húsvét Hétfő"

        # Pentecost
        self[easter_date + rd(days=49)] = "Pünkösd"

        # Pentecost monday
        if year <= 1952 or 1992 <= year:
            self[easter_date + rd(days=50)] = "Pünkösdhétfő"

        # International Workers' Day
        if 1946 <= year:
            self._add_with_observed_day_off(date(year, MAY, 1),
                                            "A Munka ünnepe")
        if 1950 <= year <= 1953:
            self[date(year, MAY, 2)] = "A Munka ünnepe"

        # State Foundation Day (1771-????, 1891-)
        if 1950 <= year < 1990:
            self[date(year, AUG, 20)] = "A kenyér ünnepe"
        else:
            self._add_with_observed_day_off(date(year, AUG, 20),
                                            "Az államalapítás ünnepe")

        # National Day
        if 1991 <= year:
            self._add_with_observed_day_off(date(year, OCT, 23),
                                            "Nemzeti ünnep")

        # All Saints' Day
        if 1999 <= year:
            self._add_with_observed_day_off(date(year, NOV, 1),
                                            "Mindenszentek")

        # Christmas Eve is not endorsed officially
        # but nowadays it is usually a day off work
        if (self.observed and 2010 <= year
                and date(year, DEC, 24).weekday() not in WEEKEND):
            self[date(year, DEC, 24)] = "Szenteste"

        # First christmas
        self[date(year, DEC, 25)] = "Karácsony"

        # Second christmas
        if 1955 != year:
            self._add_with_observed_day_off(
                date(year, DEC, 26),
                "Karácsony másnapja",
                since=2013,
                before=False,
                after=True,
            )

        # New Year's Eve
        if (self.observed and 2014 <= year
                and date(year, DEC, 31).weekday() == MON):
            self[date(year, DEC, 31)] = "Szilveszter"
def main():

    #MAIN IDEA:
    #Identify the sales volume and ROAS per margin and display as a bubble chart in data studio
    c = CurrencyConverter()

    print('------------Product Margins------------')
    #analytics dates to pull individually
    #NEED TO REWORK THIS TO HAVE LESS API REQUESTS
    today = date.today()
    mon = today + relativedelta(
        weekday=MO(-2)
    )  #last MON: -2 because it will run every monday so we need to look 2 mondays back
    tue = today + relativedelta(weekday=TU(-1))  #last TUE
    wed = today + relativedelta(weekday=WE(-1))  #last WED
    thu = today + relativedelta(weekday=TH(-1))  #last THU
    fri = today + relativedelta(weekday=FR(-1))  #last FRI
    sat = today + relativedelta(weekday=SA(-1))  #last SAT
    sun = today + relativedelta(weekday=SU(-1))  #last SUN
    weekArr = [mon, tue, wed, thu, fri, sat, sun]

    analyticsCSV = ""
    #pull for each day of the week - avoids data sampling this way but takes a bit longer
    for day in weekArr:
        analyticsCSV += analytics_pull.main(str(day))
        print("Pulled date: " + str(day))
    dfAnalytics = csv_parser.parseToCSV(analyticsCSV, "ANL")

    dfAnalytics['date'] = dfAnalytics.apply(
        lambda row: match_maker.CheckDateFormatAnalytics(row), axis=1)
    dfAnalytics = dfAnalytics[dfAnalytics.date != -1]
    dfAnalytics['date'] = dfAnalytics.apply(lambda row: formatDate(row),
                                            axis=1)

    print('-----Analytics Dataframe-----')

    #adwords
    adwordsCSV = adwords_pull.get_raw_report()

    dfAdwords = csv_parser.parseToCSV(adwordsCSV, "ADW")

    #REMOVE THE SPACE AFTER THE GID - WILL VARY DEPENDING ON CLIENTS
    GIDnew = []
    for dfAdwordsIndex, dfAdwordsRow in dfAdwords.iterrows():
        newGID = dfAdwordsRow['GID'].partition(" ")[0]

        GIDnew.append(newGID)

    dfAdwords = dfAdwords.drop(columns="GID")
    dfAdwords.loc[:, "GID"] = GIDnew

    print('-----Adwords Dataframe-----')

    sql = '''
    SELECT dfAdwords.Date, SUM(dfAdwords.Cost) AS Cost, dfAdwords.GID
    FROM dfAdwords
    GROUP BY dfAdwords.Date, dfAdwords.GID
    '''
    dfAdwords = pandasql.sqldf(sql, locals())

    print('-----Grouped Adwords Dataframe-----')

    #XML
    dfXML = process_xml.main(margins=True)

    dfXML['GID'] = dfXML['GID'].str.lower()
    dfAnalytics['productSku'] = dfAnalytics['productSku'].str.lower()

    print('-----Default XML Dataframe-----')

    sql = '''
    SELECT dfXML.Margin, dfXML.GID, dfXML.Brand
    FROM dfXML
    GROUP BY dfXML.Margin, dfXML.GID, dfXML.Brand
    '''

    dfXML = pandasql.sqldf(sql, locals())

    print('-----Grouped XML Dataframe-----')

    sql = '''
    SELECT dfXML.Margin, dfXML.Brand, dfAnalytics.date, dfAnalytics.itemQuantity, dfAnalytics.productSku,
    dfAnalytics.productName, dfAnalytics.itemRevenue
    FROM dfAnalytics
    INNER JOIN dfXML ON dfXML.GID = dfAnalytics.productSku
    '''
    dfXML = pandasql.sqldf(sql, locals())

    print('-----Joined XML Dataframe-----')
    print(dfXML)

    sql = '''
    SELECT dfXML.Margin, dfXML.Brand, dfXML.date, dfXML.itemQuantity, dfXML.productSku,
    dfXML.productName, dfXML.itemRevenue, dfAdwords.Cost
    FROM dfXML
    INNER JOIN dfAdwords ON dfXML.Date = dfAdwords.Date AND dfXML.productSku = dfAdwords.GID
    '''

    dfXML = pandasql.sqldf(sql, locals())
    print('-----Joined XML Dataframe-----')
    print(dfXML)

    newCost = []
    newROAS = []
    for dfXMLIndex, dfXMLRow in dfXML.iterrows():
        newVal = c.convert(dfXMLRow['Cost'] / 1000000, 'USD', 'YOUR_CURRENCY')
        newCost.append(newVal)
        if float(newVal) <= 0:
            newROAS.append(0)
        else:
            newROAS.append(float(dfXMLRow['itemRevenue']) / float(newVal))

    dfXML.loc[:, "newCost"] = newCost
    dfXML.loc[:, "newROAS"] = newROAS

    print('-----Final XML Dataframe-----')

    sql = '''
    SELECT 
    date,
    Margin,
    (SUM(CAST(itemRevenue as float64)) / sum(CAST(newCost as float64))) as ROAS,
    SUM(CAST(itemQuantity as int64)) as salesVolume
    FROM dfXML
    GROUP BY Margin, date
    '''
    dfXML = pandasql.sqldf(sql, locals())

    dfXML.to_gbq('DATASET_NAME.TABLE_NAME',
                 project_id='YOUR_PROJECT_ID',
                 chunksize=None,
                 if_exists='append',
                 table_schema=None,
                 location='LOCATION',
                 progress_bar=True,
                 credentials=google_auth.getCreds())
Exemplo n.º 5
0
def main():
    # -----Tickers and mapping-----
    napi = numerapi.SignalsAPI()
    eligible_tickers = pd.Series(napi.ticker_universe(),
                                 name="bloomberg_ticker")

    ticker_map = pd.read_csv(
        "https://numerai-signals-public-data.s3-us-west-2.amazonaws.com/signals_ticker_map_w_bbg.csv"
    )
    ticker_map = ticker_map[ticker_map.bloomberg_ticker.isin(eligible_tickers)]

    numerai_tickers = ticker_map["bloomberg_ticker"]
    yfinance_tickers = ticker_map["yahoo"]

    eod_tickers = pd.read_csv(
        "https://s3.amazonaws.com/quandl-production-static/end_of_day_us_stocks/ticker_list.csv"
    )
    print(f"Number of eligible tickers : {len(eligible_tickers)}")

    common_tickers = np.intersect1d(yfinance_tickers.values.astype(str),
                                    eod_tickers["Ticker"].values.astype(str))
    print(
        f"Number of tickers common between EOD and Bloomberg: {len(common_tickers)}"
    )

    # downloads the whole dataset as zip and read data (takes around 1.5min)
    full_data = download_full_and_load(ticker_map,
                                       common_tickers,
                                       f_name="full_EOD.zip")

    # Building a custom feature
    full_data["day_chg"] = full_data["close"] / full_data["open"] - 1
    gc.collect()

    # -----Feature engineering-----
    ticker_groups = full_data.groupby("bloomberg_ticker")

    # RSI
    full_data["close_RSI_14"] = ticker_groups["close"].transform(
        lambda x: RSI(x, 14))
    full_data["close_RSI_21"] = ticker_groups["close"].transform(
        lambda x: RSI(x, 21))
    full_data["day_chg_RSI_14"] = ticker_groups["day_chg"].transform(
        lambda x: RSI(x, 14))
    full_data["day_chg_RSI_21"] = ticker_groups["day_chg"].transform(
        lambda x: RSI(x, 21))

    # SMA
    full_data["close_SMA_14"] = ticker_groups["close"].transform(
        lambda x: x.rolling(14).mean())
    full_data["close_SMA_21"] = ticker_groups["close"].transform(
        lambda x: x.rolling(21).mean())

    indicators = [
        "close_RSI_14", "close_RSI_21", "day_chg_RSI_14", "close_SMA_14",
        "close_SMA_21", "day_chg_RSI_21"
    ]

    full_data.dropna(axis=0, inplace=True)
    del ticker_groups

    # -----Feature engineering: Quintile-----
    date_groups = full_data.groupby(full_data.index)
    print("Quintiling...")
    for indicator in indicators:
        full_data[f"{indicator}_quintile"] = (
            date_groups[indicator].transform(lambda group: pd.qcut(
                group, 100, labels=False, duplicates="drop")).astype(
                    np.float16))
        gc.collect()

    del date_groups
    gc.collect()

    # -----Feature engineering: Quintile lag-----
    ticker_groups = full_data.groupby("ticker")
    # create lagged features, lag 0 is that day's value, lag 1 is yesterday's value, etc
    print("Calculating lag...")
    for indicator in indicators:
        num_days = 5
        for day in range(num_days + 1):
            full_data[f"{indicator}_quintile_lag_{day}"] = ticker_groups[
                f"{indicator}_quintile"].transform(
                    lambda group: group.shift(day))

        gc.collect()

    full_data.dropna(axis=0, inplace=True)

    del ticker_groups
    gc.collect()
    print("Calculating changes in lag...")
    # create difference of the lagged features (change in RSI quintile by day)
    for indicator in indicators:
        for day in range(0, num_days):
            full_data[f"{indicator}_diff_{day}"] = (
                full_data[f"{indicator}_quintile_lag_{day}"] -
                full_data[f"{indicator}_quintile_lag_{day + 1}"]).astype(
                    np.float16)
            gc.collect()

    # create difference of the lagged features (change in RSI quintile by day)
    for indicator in indicators:
        full_data[f"{indicator}_abs_diff_{day}"] = np.abs(
            full_data[f"{indicator}_quintile_lag_{day}"] -
            full_data[f"{indicator}_quintile_lag_{day + 1}"]).astype(
                np.float16)
        gc.collect()

    TARGET_NAME = "target"
    PREDICTION_NAME = "signal"

    # read in Signals targets
    numerai_targets = "https://numerai-signals-public-data.s3-us-west-2.amazonaws.com/signals_train_val_bbg.csv"
    targets = pd.read_csv(numerai_targets)
    targets["date"] = pd.to_datetime(targets["friday_date"], format="%Y%m%d")

    # merge our feature data with Numerai targets
    ML_data = pd.merge(full_data.reset_index(),
                       targets,
                       on=["date", "bloomberg_ticker"]).set_index("date")
    print(f"Number of eras in data: {len(ML_data.index.unique())}")

    # for training and testing we want clean, complete data only
    ML_data.dropna(inplace=True)
    ML_data = ML_data[ML_data.index.weekday ==
                      4]  # ensure we have only fridays
    ML_data = ML_data[ML_data.index.value_counts() >
                      200]  # drop eras with under 200 observations per era
    feature_names = [
        f for f in ML_data.columns for y in ["lag", "diff"] if y in f
    ]
    print(f"Using {len(feature_names)} features")

    last_friday = datetime.now() + relativedelta(weekday=FR(-1))
    date_string = last_friday.strftime("%Y-%m-%d")

    try:
        live_data = full_data.loc[date_string].copy()
    except KeyError as e:
        print(f"No ticker on {e}")
        live_data = full_data.iloc[:0].copy()
    live_data.dropna(subset=feature_names, inplace=True)

    # get data from the day before, for markets that were closed
    # on the most recent friday
    last_thursday = last_friday - timedelta(days=1)
    thursday_date_string = last_thursday.strftime("%Y-%m-%d")
    thursday_data = full_data.loc[thursday_date_string]
    # Only select tickers than aren't already present in live_data
    thursday_data = thursday_data[~thursday_data.ticker.isin(live_data.ticker.
                                                             values)].copy()
    thursday_data.dropna(subset=feature_names, inplace=True)

    live_data = pd.concat([live_data, thursday_data])

    # train test split
    train_data = ML_data[ML_data["data_type"] == "train"].copy()
    test_data = ML_data[ML_data["data_type"] == "validation"].copy()

    train_data[feature_names] /= 100.0
    test_data[feature_names] /= 100.0
    live_data[feature_names] /= 100.0

    del ML_data
    gc.collect()

    # train model
    print("Training model...")
    model = GradientBoostingRegressor(n_estimators=50)
    model.fit(train_data[feature_names], train_data[TARGET_NAME])
    print("Model trained.")

    # predict test data
    train_data[PREDICTION_NAME] = model.predict(train_data[feature_names])
    test_data[PREDICTION_NAME] = model.predict(test_data[feature_names])

    print(f"Number of live tickers to submit: {len(live_data)}")
    live_data[PREDICTION_NAME] = model.predict(live_data[feature_names])

    # prepare and writeout example file
    diagnostic_df = pd.concat([test_data, live_data])
    diagnostic_df["friday_date"] = diagnostic_df.friday_date.fillna(
        last_friday.strftime("%Y%m%d")).astype(int)
    diagnostic_df["data_type"] = diagnostic_df.data_type.fillna("live")
    diagnostic_df[["bloomberg_ticker", "friday_date", "data_type",
                   "signal"]].reset_index(drop=True).to_csv(
                       "example_quandl_signal_upload.csv", index=False)
    print(
        "Example submission completed. Upload to signals.numer.ai for scores and live submission"
    )
Exemplo n.º 6
0
def main():
    '''Creates example_signal_upload.csv to upload for validation and live data submission'''
    napi = numerapi.SignalsAPI()

    # read in list of active Signals tickers which can change slightly era to era
    eligible_tickers = pd.Series(napi.ticker_universe(), name='numerai_ticker')
    print(f"Number of eligible tickers: {len(eligible_tickers)}")

    # read in yahoo to numerai ticker map, still a work in progress, h/t wsouza
    ticker_map = pd.read_csv(
        'https://numerai-signals-public-data.s3-us-west-2.amazonaws.com/signals_ticker_map_w_bbg.csv'
    )
    print(f"Number of tickers in map: {len(ticker_map)}")

    # map eligible numerai tickers to yahoo finance tickers
    yfinance_tickers = eligible_tickers.map(
        dict(zip(ticker_map['bloomberg_ticker'],
                 ticker_map['yahoo']))).dropna()
    numerai_tickers = ticker_map['bloomberg_ticker']
    print(f'Number of eligible, mapped tickers: {len(yfinance_tickers)}')

    # download data
    n = 1000  # chunk row size
    chunk_df = [
        yfinance_tickers.iloc[i:i + n]
        for i in range(0, len(yfinance_tickers), n)
    ]

    concat_dfs = []
    print("Downloading data...")
    for df in chunk_df:
        try:
            # set threads = True for faster performance, but tickers will fail, script may hang
            # set threads = False for slower performance, but more tickers will succeed
            temp_df = yfinance.download(df.str.cat(sep=' '),
                                        start='2005-12-01',
                                        threads=False)
            temp_df = temp_df['Adj Close'].stack().reset_index()
            concat_dfs.append(temp_df)
        except:  # simplejson.errors.JSONDecodeError:
            pass

    full_data = pd.concat(concat_dfs)

    # properly position and clean raw data, after taking adjusted close only
    full_data.columns = ['date', 'ticker', 'price']
    full_data.set_index('date', inplace=True)
    # convert yahoo finance tickers back to numerai tickers
    full_data['numerai_ticker'] = full_data.ticker.map(
        dict(zip(ticker_map['yahoo'], numerai_tickers)))
    print('Data downloaded.')
    print(
        f"Number of tickers with data: {len(full_data.numerai_ticker.unique())}"
    )

    ticker_groups = full_data.groupby('ticker')
    full_data['RSI'] = ticker_groups['price'].transform(lambda x: RSI(x))

    # group by era (date) and create quintile labels within each era, useful for learning relative ranking
    date_groups = full_data.groupby(full_data.index)
    full_data['RSI_quintile'] = date_groups['RSI'].transform(
        lambda group: pd.qcut(group, 5, labels=False, duplicates='drop'))
    full_data.dropna(inplace=True)

    # create lagged features grouped by ticker
    ticker_groups = full_data.groupby('ticker')
    num_days = 5
    # lag 0 is that day's value, lag 1 is yesterday's value, etc
    for day in range(num_days + 1):
        full_data[f'RSI_quintile_lag_{day}'] = ticker_groups[
            'RSI_quintile'].transform(lambda group: group.shift(day))

    # create difference of the lagged features and absolute difference of the lagged features (change in RSI quintile by day)
    for day in range(num_days):
        full_data[f'RSI_diff_{day}'] = full_data[
            f'RSI_quintile_lag_{day}'] - full_data[
                f'RSI_quintile_lag_{day + 1}']
        full_data[f'RSI_abs_diff_{day}'] = np.abs(
            full_data[f'RSI_quintile_lag_{day}'] -
            full_data[f'RSI_quintile_lag_{day + 1}'])

    # define column names of features, target, and prediction
    feature_names = [f'RSI_quintile_lag_{num}' for num in range(num_days)] + [
        f'RSI_diff_{num}' for num in range(num_days)
    ] + [f'RSI_abs_diff_{num}' for num in range(num_days)]
    print(f'Features for training:\n {feature_names}')

    TARGET_NAME = 'target'
    PREDICTION_NAME = 'signal'

    # read in Signals targets
    try:
        targets = pd.read_csv('historical_targets.csv')
    except FileNotFoundError:
        napi.download_validation_data(dest_filename='historical_targets.csv')
        targets = pd.read_csv('historical_targets.csv')
    targets['date'] = pd.to_datetime(targets['friday_date'], format='%Y%m%d')

    targets.rename(columns={"bloomberg_ticker": "numerai_ticker"},
                   inplace=True)

    # merge our feature data with Numerai targets
    ML_data = pd.merge(full_data.reset_index(),
                       targets,
                       on=['date', 'numerai_ticker']).set_index('date')
    # print(f'Number of eras in data: {len(ML_data.index.unique())}')

    # for training and testing we want clean, complete data only
    ML_data.dropna(inplace=True)
    ML_data = ML_data[ML_data.index.weekday ==
                      4]  # ensure we have only fridays
    ML_data = ML_data[ML_data.index.value_counts() >
                      50]  # drop eras with under 50 observations per era

    # train test split
    train_data = ML_data[ML_data['data_type'] == 'train']
    test_data = ML_data[ML_data['data_type'] == 'validation']

    # train model
    print("Training model...")
    model = GradientBoostingRegressor(subsample=0.1)
    model.fit(train_data[feature_names], train_data[TARGET_NAME])
    print("Model trained.")

    # predict test data
    test_data[PREDICTION_NAME] = model.predict(test_data[feature_names])

    # predict live data
    # choose data as of most recent friday
    last_friday = datetime.now() + relativedelta(weekday=FR(-1))
    date_string = last_friday.strftime('%Y-%m-%d')

    try:
        live_data = full_data.loc[date_string].copy()
    except KeyError as e:
        print(f"No ticker on {e}")
        live_data = full_data.iloc[:0].copy()
    live_data.dropna(subset=feature_names, inplace=True)

    # get data from the day before, for markets that were closed
    # on the most recent friday
    last_thursday = last_friday - timedelta(days=1)
    thursday_date_string = last_thursday.strftime('%Y-%m-%d')
    thursday_data = full_data.loc[thursday_date_string]
    # Only select tickers than aren't already present in live_data
    thursday_data = thursday_data[~thursday_data.ticker.isin(live_data.ticker.
                                                             values)].copy()
    thursday_data.dropna(subset=feature_names, inplace=True)

    live_data = pd.concat([live_data, thursday_data])

    print(f"Number of live tickers to submit: {len(live_data)}")
    live_data[PREDICTION_NAME] = model.predict(live_data[feature_names])

    # prepare and writeout example file
    diagnostic_df = pd.concat([test_data, live_data])
    diagnostic_df['friday_date'] = diagnostic_df.friday_date.fillna(
        last_friday.strftime('%Y%m%d')).astype(int)
    diagnostic_df['data_type'] = diagnostic_df.data_type.fillna('live')
    diagnostic_df[['numerai_ticker', 'friday_date', 'data_type',
                   'signal']].reset_index(drop=True).to_csv(
                       'example_signal_upload.csv', index=False)
    print(
        'Example submission completed. Upload to signals.numer.ai for scores and live submission'
    )
Exemplo n.º 7
0
    def _populate(self, year):
        # New Year's Day
        if not self.observed and date(year, JAN, 1).weekday() in WEEKEND:
            pass
        else:
            self[date(year, JAN, 1)] = "Año Nuevo [New Year's Day]"

        # Carnival days
        name = "Día de Carnaval [Carnival's Day]"
        self[easter(year) - rd(days=48)] = name
        self[easter(year) - rd(days=47)] = name

        # Memory's National Day for the Truth and Justice
        name = ("Día Nacional de la Memoria por la Verdad y la Justicia "
                "[Memory's National Day for the Truth and Justice]")

        if not self.observed and date(year, MAR, 24).weekday() in WEEKEND:
            pass
        else:
            self[date(year, MAR, 24)] = name

        # Holy Week
        name_thu = "Semana Santa (Jueves Santo)  [Holy day (Holy Thursday)]"
        name_fri = "Semana Santa (Viernes Santo)  [Holy day (Holy Friday)]"
        name_easter = "Día de Pascuas [Easter Day]"

        self[easter(year) + rd(weekday=TH(-1))] = name_thu
        self[easter(year) + rd(weekday=FR(-1))] = name_fri

        if not self.observed and easter(year).weekday() in WEEKEND:
            pass
        else:
            self[easter(year)] = name_easter

        # Veterans Day and the Fallen in the Malvinas War
        if not self.observed and date(year, APR, 2).weekday() in WEEKEND:
            pass
        else:
            self[date(year, APR, 2)] = ("Día del Veterano y de los Caidos "
                                        "en la Guerra de Malvinas [Veterans"
                                        " Day and the Fallen in the"
                                        " Malvinas War]")

        # Labor Day
        name = "Día del Trabajo [Labour Day]"
        if not self.observed and date(year, MAY, 1).weekday() in WEEKEND:
            pass
        else:
            self[date(year, MAY, 1)] = name

        # May Revolution Day
        name = "Día de la Revolucion de Mayo [May Revolution Day]"
        if not self.observed and date(year, MAY, 25).weekday() in WEEKEND:
            pass
        else:
            self[date(year, MAY, 25)] = name

        # Day Pass to the Immortality of General Martín Miguel de Güemes.
        name = ("Día Pase a la Inmortalidad "
                "del General Martín Miguel de Güemes [Day Pass "
                "to the Immortality of General Martín Miguel de Güemes]")
        if not self.observed and date(year, JUN, 17).weekday() in WEEKEND:
            pass
        else:
            self[date(year, JUN, 17)] = name

        # Day Pass to the Immortality of General D. Manuel Belgrano.
        name = ("Día Pase a la Inmortalidad "
                "del General D. Manuel Belgrano [Day Pass "
                "to the Immortality of General D. Manuel Belgrano]")
        if not self.observed and date(year, JUN, 20).weekday() in WEEKEND:
            pass
        else:
            self[date(year, JUN, 20)] = name

        # Independence Day
        name = "Día de la Independencia [Independence Day]"
        if not self.observed and date(year, JUL, 9).weekday() in WEEKEND:
            pass
        else:
            self[date(year, JUL, 9)] = name

        # Day Pass to the Immortality of General D. José de San Martin
        name = ("Día Pase a la Inmortalidad "
                "del General D. José de San Martin [Day Pass "
                "to the Immortality of General D. José de San Martin]")
        if not self.observed and date(year, AUG, 17).weekday() in WEEKEND:
            pass
        else:
            self[date(year, AUG, 17)] = name

        # Respect for Cultural Diversity Day or Columbus day
        if not self.observed and date(year, OCT, 12).weekday() in WEEKEND:
            pass
        elif year < 2010:
            self[date(year, OCT, 12)] = "Día de la Raza [Columbus day]"
        else:
            self[date(year, OCT, 12)] = ("Día del Respeto a la Diversidad"
                                         " Cultural [Respect for"
                                         " Cultural Diversity Day]")
        # National Sovereignty Day
        name = "Día Nacional de la Soberanía [National Sovereignty Day]"
        if not self.observed and date(year, NOV, 20).weekday() in WEEKEND:
            pass
        elif year >= 2010:
            self[date(year, NOV, 20)] = name

        # Immaculate Conception
        if not self.observed and date(year, DEC, 8).weekday() in WEEKEND:
            pass
        else:
            self[date(year, DEC, 8)] = ("La Inmaculada Concepción"
                                        " [Immaculate Conception]")

        # Christmas
        self[date(year, DEC, 25)] = "Navidad [Christmas]"
Exemplo n.º 8
0
	def loadbars(self, now, is_closed):
		# get latest date in db.
		# if not yesterday, get bars between the latest day and yesterday
		# there's really no need to loop this.
		# simply get as much as possible and insert it.
		for table in self.ohlc_tables:
			# cp is currencypair
			cp = table.__tablename__[:6]
			yesterday = now - timedelta(hours=24)

			'''
			date.weekday()
			Return the day of the week as an integer, where Monday is 0 and Sunday is 6.
			'''

			if is_closed and now.weekday() == 4:
				# if the market is closed but its still friday, we want bars up to today; there won't be any partial bars
				yesterday = now

			elif is_closed and now.weekday() == 5:
				# if the market is closed and its saturday we want bars up to the previous friday. this block is here just for consistency
				yesterday = now + relativedelta(weekday=FR(-1))

			elif now.weekday() == 0:
				# anytime it's sunday even when the market is open, we want price bars from the previous friday. if not the loop will run continuously trying to get a price bar from saturday and that will never occur
				yesterday = now + relativedelta(weekday=FR(-1))

			# TODO!: forex market is also closed on christmas and new year's day, work in logic for that

			# while True:
			latest_local_ts = table.query\
			.order_by(
				table.timestamp.desc()
			)\
			.first()

			if latest_local_ts is None:
				latest = dt.datetime(1970,1,1)

			# you can avoid all this converting back and forth if you just load the db with either tz unaware utc times or straight up timestamps

			else:
				latest = latest_local_ts.timestamp\
				.replace(hour=0,minute=0,second=0,microsecond=0)
			yesterday = yesterday.replace(tzinfo=None)

			if latest == yesterday:
				break
			# use yesterday to not get a partial bar
			latest_ts = int(latest.timestamp())
			yesterday_ts = int(yesterday.timestamp())
			market_id = market_ids[cp]
			price_type = price_types[cp]
			bars, status_code = self.get_bars_between(
				latest_ts,
				yesterday_ts,
				market_id,
				price_type
			)
			self.check_error(bars, status_code)
			rows = []
			for bar in bars['PriceBars']:
				date = self.convert_wcf_notz(
					int(bar['BarDate'][6:-2])
				)
				open_ = bar['Open']
				high = bar['High']
				low = bar['Low']
				close = bar['Close']
				row = table(
					timestamp=date,
					open=open_,
					high=high,
					low=low,
					close=close
				)
				rows.append(row)
			self.db.session.add_all(rows)
			self.db.session.commit()
Exemplo n.º 9
0
    def _populate(self, year):
        # New Year's Day
        if not self.observed and date(year, JAN, 1).weekday() in WEEKEND:
            pass
        else:
            self[date(year, JAN, 1)] = "Año Nuevo [New Year's Day]"

        # Carnival days
        # revisar este día para futuros casos
        name = "Día de Carnaval [Carnival's Day]"
        self[easter(year) - rd(days=48)] = name
        self[easter(year) - rd(days=47)] = name

        # Día de Reyes - Feriado en el cual se conmemora la llegada de
        # los reyes magos a Jesus
        if not self.observed and date(year, JAN, 6).weekday() in WEEKEND:
            pass
        else:
            self[date(year, JAN, 6)] = "Día de Reyes"

        # Holy Week
        name_thu = "Semana Santa (Jueves Santo)  [Holy day (Holy Thursday)]"
        name_fri = "Semana Santa (Viernes Santo)  [Holy day (Holy Friday)]"
        name_easter = "Día de Pascuas [Easter Day]"

        self[easter(year) + rd(weekday=TH(-1))] = name_thu
        self[easter(year) + rd(weekday=FR(-1))] = name_fri

        if not self.observed and easter(year).weekday() in WEEKEND:
            pass
        else:
            self[easter(year)] = name_easter

        # Desembarco de los 33 Orientales en la playa de la Agraciada
        if not self.observed and date(year, APR, 19).weekday() in WEEKEND:
            pass
        else:
            self[date(year, APR, 19)] = ("Desembarco de los 33 Orientales "
                                         "Landing of the 33 Orientals"
                                         " Aterrissagem dos 33 Orientais"
                                         " Sbarco dei 33 orientali")

        # Día de los Trabajadores
        name = "Día del Trabajo [Labour Day]"
        if not self.observed and date(year, MAY, 1).weekday() in WEEKEND:
            pass
        else:
            self[date(year, MAY, 1)] = name

        # Batalla de las piedras
        name = "Batalla de las Piedras [Battle of the stones]"
        if not self.observed and date(year, MAY, 17).weekday() in WEEKEND:
            pass
        else:
            self[date(year, MAY, 17)] = name

        # Natalicio de José Gervacio Artigas
        name = "Natalicio de José Gervacio Artigas "
        if not self.observed and date(year, JUN, 19).weekday() in WEEKEND:
            pass
        else:
            self[date(year, JUN, 19)] = name

        # Jura de la Constitución
        name = "Jura de la constitución "
        if not self.observed and date(year, JUL, 18).weekday() in WEEKEND:
            pass
        else:
            self[date(year, JUL, 18)] = name

        # Declaratoria de la Independencia
        name = "Día de la Independencia [Independence Day]"
        if not self.observed and date(year, AUG, 25).weekday() in WEEKEND:
            pass
        else:
            self[date(year, AUG, 25)] = name

        # Respect for Cultural Diversity Day or Columbus day
        if not self.observed and date(year, OCT, 11).weekday() in WEEKEND:
            pass
        elif year < 2010:
            self[date(year, OCT, 11)] = "Día de la Raza [Columbus day]"
        else:
            self[date(year, OCT, 11)] = ("Día del Respeto a la Diversidad"
                                         " Cultural [Respect for"
                                         " Cultural Diversity Day]")
        # Día de los difuntos
        name = "Día de los difuntos"
        if not self.observed and date(year, NOV, 2).weekday() in WEEKEND:
            pass
        else:
            self[date(year, NOV, 2)] = name

        # Christmas
        self[date(year, DEC, 25)] = "Navidad [Christmas]"
Exemplo n.º 10
0
    def _populate(self, year):

        # Fixed date holidays!
        # If observed=True and they fall on a weekend they are not observed.
        # If observed=False there are 18 holidays

        # New Year's Day
        if self.observed and date(year, JAN, 1).weekday() in WEEKEND:
            pass
        else:
            self[date(year, JAN, 1)] = "Año Nuevo [New Year's Day]"

        # Labor Day
        self[date(year, MAY, 1)] = "Día del Trabajo [Labour Day]"

        # Independence Day
        name = "Día de la Independencia [Independence Day]"
        if self.observed and date(year, JUL, 20).weekday() in WEEKEND:
            pass
        else:
            self[date(year, JUL, 20)] = name

        # Battle of Boyaca
        self[date(year, AUG, 7)] = "Batalla de Boyacá [Battle of Boyacá]"

        # Immaculate Conception
        if self.observed and date(year, DEC, 8).weekday() in WEEKEND:
            pass
        else:
            self[date(year, DEC, 8)] = ("La Inmaculada Concepción"
                                        " [Immaculate Conception]")

        # Christmas
        self[date(year, DEC, 25)] = "Navidad [Christmas]"

        # Emiliani Law holidays!
        # Unless they fall on a Monday they are observed the following monday

        #  Epiphany
        name = "Día de los Reyes Magos [Epiphany]"
        if date(year, JAN, 6).weekday() == MON or not self.observed:
            self[date(year, JAN, 6)] = name
        else:
            self[date(year, JAN, 6) + rd(weekday=MO)] = name + "(Observed)"

        # Saint Joseph's Day
        name = "Día de San José [Saint Joseph's Day]"
        if date(year, MAR, 19).weekday() == MON or not self.observed:
            self[date(year, MAR, 19)] = name
        else:
            self[date(year, MAR, 19) + rd(weekday=MO)] = name + "(Observed)"

        # Saint Peter and Saint Paul's Day
        name = "San Pedro y San Pablo [Saint Peter and Saint Paul]"
        if date(year, JUN, 29).weekday() == MON or not self.observed:
            self[date(year, JUN, 29)] = name
        else:
            self[date(year, JUN, 29) + rd(weekday=MO)] = name + "(Observed)"

        # Assumption of Mary
        name = "La Asunción [Assumption of Mary]"
        if date(year, AUG, 15).weekday() == MON or not self.observed:
            self[date(year, AUG, 15)] = name
        else:
            self[date(year, AUG, 15) + rd(weekday=MO)] = name + "(Observed)"

        # Columbus Day
        name = "Día de la Raza [Columbus Day]"
        if date(year, OCT, 12).weekday() == MON or not self.observed:
            self[date(year, OCT, 12)] = name
        else:
            self[date(year, OCT, 12) + rd(weekday=MO)] = name + "(Observed)"

        # All Saints’ Day
        name = "Día de Todos los Santos [All Saint's Day]"
        if date(year, NOV, 1).weekday() == MON or not self.observed:
            self[date(year, NOV, 1)] = name
        else:
            self[date(year, NOV, 1) + rd(weekday=MO)] = name + "(Observed)"

        # Independence of Cartagena
        name = "Independencia de Cartagena [Independence of Cartagena]"
        if date(year, NOV, 11).weekday() == MON or not self.observed:
            self[date(year, NOV, 11)] = name
        else:
            self[date(year, NOV, 11) + rd(weekday=MO)] = name + "(Observed)"

        # Holidays based on Easter

        # Maundy Thursday
        self[easter(year) +
             rd(weekday=TH(-1))] = "Jueves Santo [Maundy Thursday]"

        # Good Friday
        self[easter(year) + rd(weekday=FR(-1))] = "Viernes Santo [Good Friday]"

        # Holidays based on Easter but are observed the following monday
        # (unless they occur on a monday)

        # Ascension of Jesus
        name = "Ascensión del señor [Ascension of Jesus]"
        hdate = easter(year) + rd(days=+39)
        if hdate.weekday() == MON or not self.observed:
            self[hdate] = name
        else:
            self[hdate + rd(weekday=MO)] = name + "(Observed)"

        # Corpus Christi
        name = "Corpus Christi [Corpus Christi]"
        hdate = easter(year) + rd(days=+60)
        if hdate.weekday() == MON or not self.observed:
            self[hdate] = name
        else:
            self[hdate + rd(weekday=MO)] = name + "(Observed)"

        # Sacred Heart
        name = "Sagrado Corazón [Sacred Heart]"
        hdate = easter(year) + rd(days=+68)
        if hdate.weekday() == MON or not self.observed:
            self[hdate] = name
        else:
            self[hdate + rd(weekday=MO)] = name + "(Observed)"
Exemplo n.º 11
0
    def _populate(self, year):
        # New Year's Day
        if year > 1870:
            name = "New Year's Day"
            self[date(year, JAN, 1)] = name
            if self.observed and date(year, JAN, 1).weekday() == SUN:
                self[date(year, JAN, 1) + rd(days=+1)] = name + " (Observed)"
            elif self.observed and date(year, JAN, 1).weekday() == SAT:
                # Add Dec 31st from the previous year without triggering
                # the entire year to be added
                expand = self.expand
                self.expand = False
                self[date(year, JAN, 1) + rd(days=-1)] = name + " (Observed)"
                self.expand = expand
            # The next year's observed New Year's Day can be in this year
            # when it falls on a Friday (Jan 1st is a Saturday)
            if self.observed and date(year, DEC, 31).weekday() == FRI:
                self[date(year, DEC, 31)] = name + " (Observed)"

        # Epiphany
        if self.state == "PR":
            self[date(year, JAN, 6)] = "Epiphany"

        # Three King's Day
        if self.state == "VI":
            self[date(year, JAN, 6)] = "Three King's Day"

        # Lee Jackson Day
        name = "Lee Jackson Day"
        if self.state == "VA":
            if 2000 <= year <= 2020:
                dt = (date(year, JAN, 1) + rd(weekday=MO(+3)) +
                      rd(weekday=FR(-1)))
                self[dt] = name
            elif 1983 <= year <= 2020:
                self[date(year, JAN, 1) + rd(weekday=MO(+3))] = name
            elif 1889 <= year <= 2020:
                self[date(year, JAN, 19)] = name

        # Inauguration Day
        if self.state in ("DC", "LA", "MD", "VA") and year >= 1789:
            name = "Inauguration Day"
            if (year - 1789) % 4 == 0 and year >= 1937:
                self[date(year, JAN, 20)] = name
                if date(year, JAN, 20).weekday() == SUN:
                    self[date(year, JAN, 21)] = name + " (Observed)"
            elif (year - 1789) % 4 == 0:
                self[date(year, MAR, 4)] = name
                if date(year, MAR, 4).weekday() == SUN:
                    self[date(year, MAR, 5)] = name + " (Observed)"

        # Martin Luther King Jr. Day
        if year >= 1986:
            name = "Martin Luther King Jr. Day"
            if self.state == "AL":
                name = "Robert E. Lee/Martin Luther King Birthday"
            elif (self.state == "MS") or ((self.state == "AR") and
                                          (year <= 2017)):
                name = ("Dr. Martin Luther King Jr. "
                        "and Robert E. Lee's Birthdays")
            elif self.state in ("AZ", "NH"):
                name = "Dr. Martin Luther King Jr./Civil Rights Day"
            elif self.state == "GA" and year < 2012:
                name = "Robert E. Lee's Birthday"
            elif self.state == "ID" and year >= 2006:
                name = "Martin Luther King Jr. - Idaho Human Rights Day"
            self[date(year, JAN, 1) + rd(weekday=MO(+3))] = name

        # Lincoln's Birthday
        name = "Lincoln's Birthday"
        if (self.state in ("CT", "IL", "IA", "NJ", "NY")
                and year >= 1971) or (self.state == "CA"
                                      and 1971 <= year <= 2009):
            self[date(year, FEB, 12)] = name
            if self.observed and date(year, FEB, 12).weekday() == SAT:
                self[date(year, FEB, 11)] = name + " (Observed)"
            elif self.observed and date(year, FEB, 12).weekday() == SUN:
                self[date(year, FEB, 13)] = name + " (Observed)"

        # Susan B. Anthony Day
        if ((self.state == "CA" and year >= 2014)
                or (self.state == "FL" and year >= 2011)
                or (self.state == "NY" and year >= 2004)
                or (self.state == "WI" and year >= 1976)):
            self[date(year, FEB, 15)] = "Susan B. Anthony Day"

        # Washington's Birthday
        name = "Washington's Birthday"
        if self.state == "AL":
            name = "George Washington/Thomas Jefferson Birthday"
        elif self.state == "AR":
            name = "George Washington's Birthday and Daisy Gatson Bates Day"
        elif self.state in ("PR", "VI"):
            name = "Presidents' Day"
        if self.state not in ("DE", "FL", "GA", "NM", "PR"):
            if year > 1970:
                self[date(year, FEB, 1) + rd(weekday=MO(+3))] = name
            elif year >= 1879:
                self[date(year, FEB, 22)] = name
        elif self.state == "GA":
            if date(year, DEC, 24).weekday() != WED:
                self[date(year, DEC, 24)] = name
            else:
                self[date(year, DEC, 26)] = name
        elif self.state in ("PR", "VI"):
            self[date(year, FEB, 1) + rd(weekday=MO(+3))] = name

        # Mardi Gras
        if self.state == "LA" and year >= 1857:
            self[easter(year) + rd(days=-47)] = "Mardi Gras"

        # Guam Discovery Day
        if self.state == "GU" and year >= 1970:
            self[date(year, MAR, 1) + rd(weekday=MO)] = "Guam Discovery Day"

        # Casimir Pulaski Day
        if self.state == "IL" and year >= 1978:
            self[date(year, MAR, 1) + rd(weekday=MO)] = "Casimir Pulaski Day"

        # Texas Independence Day
        if self.state == "TX" and year >= 1874:
            self[date(year, MAR, 2)] = "Texas Independence Day"

        # Town Meeting Day
        if self.state == "VT" and year >= 1800:
            self[date(year, MAR, 1) + rd(weekday=TU)] = "Town Meeting Day"

        # Evacuation Day
        if self.state == "MA" and year >= 1901:
            name = "Evacuation Day"
            self[date(year, MAR, 17)] = name
            if date(year, MAR, 17).weekday() in WEEKEND:
                self[date(year, MAR, 17) + rd(weekday=MO)] = (name +
                                                              " (Observed)")

        # Emancipation Day
        if self.state == "PR":
            self[date(year, MAR, 22)] = "Emancipation Day"
            if self.observed and date(year, MAR, 22).weekday() == SUN:
                self[date(year, MAR, 23)] = "Emancipation Day (Observed)"

        # Prince Jonah Kuhio Kalanianaole Day
        if self.state == "HI" and year >= 1949:
            name = "Prince Jonah Kuhio Kalanianaole Day"
            self[date(year, MAR, 26)] = name
            if self.observed and date(year, MAR, 26).weekday() == SAT:
                self[date(year, MAR, 25)] = name + " (Observed)"
            elif self.observed and date(year, MAR, 26).weekday() == SUN:
                self[date(year, MAR, 27)] = name + " (Observed)"

        # Steward's Day
        name = "Steward's Day"
        if self.state == "AK" and year >= 1955:
            self[date(year, APR, 1) + rd(days=-1, weekday=MO(-1))] = name
        elif self.state == "AK" and year >= 1918:
            self[date(year, MAR, 30)] = name

        # César Chávez Day
        name = "César Chávez Day"
        if self.state == "CA" and year >= 1995:
            self[date(year, MAR, 31)] = name
            if self.observed and date(year, MAR, 31).weekday() == SUN:
                self[date(year, APR, 1)] = name + " (Observed)"
        elif self.state == "TX" and year >= 2000:
            self[date(year, MAR, 31)] = name

        # Transfer Day
        if self.state == "VI":
            self[date(year, MAR, 31)] = "Transfer Day"

        # Emancipation Day
        if self.state == "DC" and year >= 2005:
            name = "Emancipation Day"
            self[date(year, APR, 16)] = name
            if self.observed and date(year, APR, 16).weekday() == SAT:
                self[date(year, APR, 15)] = name + " (Observed)"
            elif self.observed and date(year, APR, 16).weekday() == SUN:
                self[date(year, APR, 17)] = name + " (Observed)"

        # Patriots' Day
        if self.state in ("ME", "MA") and year >= 1969:
            self[date(year, APR, 1) + rd(weekday=MO(+3))] = "Patriots' Day"
        elif self.state in ("ME", "MA") and year >= 1894:
            self[date(year, APR, 19)] = "Patriots' Day"

        # Holy Thursday
        if self.state == "VI":
            self[easter(year) + rd(weekday=TH(-1))] = "Holy Thursday"

        # Good Friday
        if self.state in (
                "CT",
                "DE",
                "GU",
                "IN",
                "KY",
                "LA",
                "NJ",
                "NC",
                "PR",
                "TN",
                "TX",
                "VI",
        ):
            self[easter(year) + rd(weekday=FR(-1))] = "Good Friday"

        # Easter Monday
        if self.state == "VI":
            self[easter(year) + rd(weekday=MO)] = "Easter Monday"

        # Confederate Memorial Day
        name = "Confederate Memorial Day"
        if self.state in ("AL", "GA", "MS", "SC") and year >= 1866:
            if self.state == "GA" and year >= 2016:
                name = "State Holiday"
            if self.state == "GA" and year == 2020:
                self[date(year, APR, 10)] = name
            else:
                self[date(year, APR, 1) + rd(weekday=MO(+4))] = name
        elif self.state == "TX" and year >= 1931:
            self[date(year, JAN, 19)] = name

        # San Jacinto Day
        if self.state == "TX" and year >= 1875:
            self[date(year, APR, 21)] = "San Jacinto Day"

        # Arbor Day
        if self.state == "NE" and year >= 1989:
            self[date(year, APR, 30) + rd(weekday=FR(-1))] = "Arbor Day"
        elif self.state == "NE" and year >= 1875:
            self[date(year, APR, 22)] = "Arbor Day"

        # Primary Election Day
        if self.state == "IN" and ((year >= 2006 and year % 2 == 0)
                                   or year >= 2015):
            dt = date(year, MAY, 1) + rd(weekday=MO)
            self[dt + rd(days=+1)] = "Primary Election Day"

        # Truman Day
        if self.state == "MO" and year >= 1949:
            name = "Truman Day"
            self[date(year, MAY, 8)] = name
            if self.observed and date(year, MAY, 8).weekday() == SAT:
                self[date(year, MAY, 7)] = name + " (Observed)"
            elif self.observed and date(year, MAY, 8).weekday() == SUN:
                self[date(year, MAY, 10)] = name + " (Observed)"

        # Memorial Day
        if year > 1970:
            self[date(year, MAY, 31) + rd(weekday=MO(-1))] = "Memorial Day"
        elif year >= 1888:
            self[date(year, MAY, 30)] = "Memorial Day"

        # Juneteenth Day
        if year > 2020:
            self[date(year, JUN, 19)] = "Juneteenth National Independence Day"
            if self.observed and date(year, JUN, 19).weekday() == SAT:
                self[date(year, JUN, 18)] = name + " (Observed)"
            elif self.observed and date(year, JUN, 19).weekday() == SUN:
                self[date(year, JUN, 20)] = name + " (Observed)"

            if self.observed and date(year, JUN, 19).weekday() == SAT:
                self[date(year, JUN, 18)] = name + " (Observed)"
            elif self.observed and date(year, JUN, 19).weekday() == SUN:
                self[date(year, JUN, 20)] = name + " (Observed)"

        # Jefferson Davis Birthday
        name = "Jefferson Davis Birthday"
        if self.state == "AL" and year >= 1890:
            self[date(year, JUN, 1) + rd(weekday=MO)] = name

        # Kamehameha Day
        if self.state == "HI" and year >= 1872:
            self[date(year, JUN, 11)] = "Kamehameha Day"
            if self.observed and year >= 2011:
                if date(year, JUN, 11).weekday() == SAT:
                    self[date(year, JUN, 10)] = "Kamehameha Day (Observed)"
                elif date(year, JUN, 11).weekday() == SUN:
                    self[date(year, JUN, 12)] = "Kamehameha Day (Observed)"
        # Emancipation Day In Texas
        if self.state == "TX" and year >= 1980:
            self[date(year, JUN, 19)] = "Emancipation Day In Texas"

        # West Virginia Day
        name = "West Virginia Day"
        if self.state == "WV" and year >= 1927:
            self[date(year, JUN, 20)] = name
            if self.observed and date(year, JUN, 20).weekday() == SAT:
                self[date(year, JUN, 19)] = name + " (Observed)"
            elif self.observed and date(year, JUN, 20).weekday() == SUN:
                self[date(year, JUN, 21)] = name + " (Observed)"

        # Emancipation Day in US Virgin Islands
        if self.state == "VI":
            self[date(year, JUL, 3)] = "Emancipation Day"

        # Independence Day
        if year > 1870:
            name = "Independence Day"
            self[date(year, JUL, 4)] = name
            if self.observed and date(year, JUL, 4).weekday() == SAT:
                self[date(year, JUL, 4) + rd(days=-1)] = name + " (Observed)"
            elif self.observed and date(year, JUL, 4).weekday() == SUN:
                self[date(year, JUL, 4) + rd(days=+1)] = name + " (Observed)"

        # Liberation Day (Guam)
        if self.state == "GU" and year >= 1945:
            self[date(year, JUL, 21)] = "Liberation Day (Guam)"

        # Pioneer Day
        if self.state == "UT" and year >= 1849:
            name = "Pioneer Day"
            self[date(year, JUL, 24)] = name
            if self.observed and date(year, JUL, 24).weekday() == SAT:
                self[date(year, JUL, 24) + rd(days=-1)] = name + " (Observed)"
            elif self.observed and date(year, JUL, 24).weekday() == SUN:
                self[date(year, JUL, 24) + rd(days=+1)] = name + " (Observed)"

        # Constitution Day
        if self.state == "PR":
            self[date(year, JUL, 25)] = "Constitution Day"
            if self.observed and date(year, JUL, 25).weekday() == SUN:
                self[date(year, JUL, 26)] = "Constitution Day (Observed)"

        # Victory Day
        if self.state == "RI" and year >= 1948:
            self[date(year, AUG, 1) + rd(weekday=MO(+2))] = "Victory Day"

        # Statehood Day (Hawaii)
        if self.state == "HI" and year >= 1959:
            self[date(year, AUG, 1) + rd(weekday=FR(+3))] = "Statehood Day"

        # Bennington Battle Day
        if self.state == "VT" and year >= 1778:
            name = "Bennington Battle Day"
            self[date(year, AUG, 16)] = name
            if self.observed and date(year, AUG, 16).weekday() == SAT:
                self[date(year, AUG, 15)] = name + " (Observed)"
            elif self.observed and date(year, AUG, 16).weekday() == SUN:
                self[date(year, AUG, 17)] = name + " (Observed)"

        # Lyndon Baines Johnson Day
        if self.state == "TX" and year >= 1973:
            self[date(year, AUG, 27)] = "Lyndon Baines Johnson Day"

        # Labor Day
        if year >= 1894:
            self[date(year, SEP, 1) + rd(weekday=MO)] = "Labor Day"

        # Columbus Day
        if self.state not in ("AK", "AR", "DE", "FL", "HI", "NV"):
            if self.state == "SD":
                name = "Native American Day"
            elif self.state == "VI":
                name = "Columbus Day and Puerto Rico Friendship Day"
            else:
                name = "Columbus Day"
            if year >= 1970:
                self[date(year, OCT, 1) + rd(weekday=MO(+2))] = name
            elif year >= 1937:
                self[date(year, OCT, 12)] = name

        # Alaska Day
        if self.state == "AK" and year >= 1867:
            name = "Alaska Day"
            self[date(year, OCT, 18)] = name
            if self.observed and date(year, OCT, 18).weekday() == SAT:
                self[date(year, OCT, 18) + rd(days=-1)] = name + " (Observed)"
            elif self.observed and date(year, OCT, 18).weekday() == SUN:
                self[date(year, OCT, 18) + rd(days=+1)] = name + " (Observed)"

        # Nevada Day
        if self.state == "NV" and year >= 1933:
            dt = date(year, OCT, 31)
            if year >= 2000:
                dt += rd(weekday=FR(-1))
            self[dt] = "Nevada Day"
            if self.observed and dt.weekday() == SAT:
                self[dt + rd(days=-1)] = "Nevada Day (Observed)"
            elif self.observed and dt.weekday() == SUN:
                self[dt + rd(days=+1)] = "Nevada Day (Observed)"

        # Liberty Day
        if self.state == "VI":
            self[date(year, NOV, 1)] = "Liberty Day"

        # Election Day
        if (self.state in ("DE", "HI", "IL", "IN", "LA", "MT", "NH", "NJ",
                           "NY", "WV") and year >= 2008
                and year % 2 == 0) or (self.state in ("IN", "NY")
                                       and year >= 2015):
            dt = date(year, NOV, 1) + rd(weekday=MO)
            self[dt + rd(days=+1)] = "Election Day"

        # All Souls' Day
        if self.state == "GU":
            self[date(year, NOV, 2)] = "All Souls' Day"

        # Veterans Day
        if year > 1953:
            name = "Veterans Day"
        else:
            name = "Armistice Day"
        if 1978 > year > 1970:
            self[date(year, OCT, 1) + rd(weekday=MO(+4))] = name
        elif year >= 1938:
            self[date(year, NOV, 11)] = name
            if self.observed and date(year, NOV, 11).weekday() == SAT:
                self[date(year, NOV, 11) + rd(days=-1)] = name + " (Observed)"
            elif self.observed and date(year, NOV, 11).weekday() == SUN:
                self[date(year, NOV, 11) + rd(days=+1)] = name + " (Observed)"

        # Discovery Day
        if self.state == "PR":
            self[date(year, NOV, 19)] = "Discovery Day"
            if self.observed and date(year, NOV, 19).weekday() == SUN:
                self[date(year, NOV, 20)] = "Discovery Day (Observed)"

        # Thanksgiving
        if year > 1870:
            self[date(year, NOV, 1) + rd(weekday=TH(+4))] = "Thanksgiving"

        # Day After Thanksgiving
        # Friday After Thanksgiving
        # Lincoln's Birthday
        # American Indian Heritage Day
        # Family Day
        # New Mexico Presidents' Day
        if ((self.state in ("CA", "DE", "FL", "NH", "NC", "OK", "TX", "WV")
             and year >= 1975) or (self.state == "IN" and year >= 2010)
                or (self.state == "MD" and year >= 2008)
                or self.state in ("NV", "NM")):
            if self.state in ("CA", "DE", "NH", "NC", "OK", "WV"):
                name = "Day After Thanksgiving"
            elif self.state in ("FL", "TX"):
                name = "Friday After Thanksgiving"
            elif self.state == "IN":
                name = "Lincoln's Birthday"
            elif self.state == "MD" and year >= 2008:
                name = "American Indian Heritage Day"
            elif self.state == "NV":
                name = "Family Day"
            elif self.state == "NM":
                name = "Presidents' Day"
            dt = date(year, NOV, 1) + rd(weekday=TH(+4))
            self[dt + rd(days=+1)] = name

        # Robert E. Lee's Birthday
        if self.state == "GA" and year >= 1986:
            if year >= 2016:
                name = "State Holiday"
            else:
                name = "Robert E. Lee's Birthday"
            self[date(year, NOV, 29) + rd(weekday=FR(-1))] = name

        # Lady of Camarin Day
        if self.state == "GU":
            self[date(year, DEC, 8)] = "Lady of Camarin Day"

        # Christmas Eve
        if (self.state == "AS"
                or (self.state in ("KS", "MI", "NC") and year >= 2013)
                or (self.state == "TX" and year >= 1981)
                or (self.state == "WI" and year >= 2012)):
            name = "Christmas Eve"
            self[date(year, DEC, 24)] = name
            name = name + " (Observed)"
            # If on Friday, observed on Thursday
            if self.observed and date(year, DEC, 24).weekday() == FRI:
                self[date(year, DEC, 24) + rd(days=-1)] = name
            # If on Saturday or Sunday, observed on Friday
            elif self.observed and date(year, DEC, 24).weekday() in WEEKEND:
                self[date(year, DEC, 24) + rd(weekday=FR(-1))] = name

        # Christmas Day
        if year > 1870:
            name = "Christmas Day"
            self[date(year, DEC, 25)] = "Christmas Day"
            if self.observed and date(year, DEC, 25).weekday() == SAT:
                self[date(year, DEC, 25) + rd(days=-1)] = name + " (Observed)"
            elif self.observed and date(year, DEC, 25).weekday() == SUN:
                self[date(year, DEC, 25) + rd(days=+1)] = name + " (Observed)"

        # Day After Christmas
        if self.state == "NC" and year >= 2013:
            name = "Day After Christmas"
            self[date(year, DEC, 26)] = name
            name = name + " (Observed)"
            # If on Saturday or Sunday, observed on Monday
            if self.observed and date(year, DEC, 26).weekday() in WEEKEND:
                self[date(year, DEC, 26) + rd(weekday=MO)] = name
            # If on Monday, observed on Tuesday
            elif self.observed and date(year, DEC, 26).weekday() == MON:
                self[date(year, DEC, 26) + rd(days=+1)] = name
        elif self.state == "TX" and year >= 1981:
            self[date(year, DEC, 26)] = "Day After Christmas"
        elif self.state == "VI":
            self[date(year, DEC, 26)] = "Christmas Second Day"

        # New Year's Eve
        if (self.state in ("KY", "MI") and year >= 2013) or (self.state == "WI"
                                                             and year >= 2012):
            name = "New Year's Eve"
            self[date(year, DEC, 31)] = name
            if self.observed and date(year, DEC, 31).weekday() == SAT:
                self[date(year, DEC, 30)] = name + " (Observed)"
Exemplo n.º 12
0
        plt.show()

    # spearman scores by era
    train_era_scores = train_data.groupby(train_data.index).apply(score)
    test_era_scores = test_data.groupby(test_data.index).apply(score)

    #train scores, in-sample and will be significantly overfit
    run_analytics(train_era_scores)

    #test scores, out of sample
    run_analytics(test_era_scores)

    # Prediction on live data

    # choose data as of most recent friday
    last_friday = datetime.now() + relativedelta(weekday=FR(-1))
    date_string = last_friday.strftime('%Y-%m-%d')

    live_data = full_df.loc[date_string].copy()
    live_data.dropna(subset=feature_names, inplace=True)

    live_data

    print(f"Number of live tickers to submit: {len(live_data)}")
    live_data[PREDICTION_NAME] = model.predict(live_data[feature_names])

    live_data[PREDICTION_NAME].hist()

    # You can simply run this without any modification
    diagnostic_df = pd.concat([test_data, live_data])
    diagnostic_df['friday_date'] = diagnostic_df.friday_date.fillna(
Exemplo n.º 13
0
def get_last_friday():
    return date.today() + relativedelta(weekday=FR(-1))
Exemplo n.º 14
0
class TestStringifiedDAGs(unittest.TestCase):
    """Unit tests for stringified DAGs."""
    def setUp(self):
        super().setUp()
        BaseHook.get_connection = mock.Mock(return_value=Connection(
            extra=('{'
                   '"project_id": "mock", '
                   '"location": "mock", '
                   '"instance": "mock", '
                   '"database_type": "postgres", '
                   '"use_proxy": "False", '
                   '"use_ssl": "False"'
                   '}')))
        self.maxDiff = None  # pylint: disable=invalid-name

    def test_serialization(self):
        """Serialization and deserialization should work for every DAG and Operator."""
        dags = collect_dags()
        serialized_dags = {}
        for _, v in dags.items():
            dag = SerializedDAG.to_dict(v)
            SerializedDAG.validate_schema(dag)
            serialized_dags[v.dag_id] = dag

        # Compares with the ground truth of JSON string.
        self.validate_serialized_dag(serialized_dags['simple_dag'],
                                     serialized_simple_dag_ground_truth)

    def validate_serialized_dag(self, json_dag, ground_truth_dag):
        """Verify serialized DAGs match the ground truth."""
        self.assertTrue(json_dag['dag']['fileloc'].split('/')[-1] ==
                        'test_dag_serialization.py')
        json_dag['dag']['fileloc'] = None

        def sorted_serialized_dag(dag_dict: dict):
            """
            Sorts the "tasks" list in the serialised dag python dictionary
            This is needed as the order of tasks should not matter but assertEqual
            would fail if the order of tasks list changes in dag dictionary
            """
            dag_dict["dag"]["tasks"] = sorted(dag_dict["dag"]["tasks"],
                                              key=lambda x: sorted(x.keys()))
            return dag_dict

        self.assertEqual(sorted_serialized_dag(ground_truth_dag),
                         sorted_serialized_dag(json_dag))

    def test_deserialization(self):
        """A serialized DAG can be deserialized in another process."""
        queue = multiprocessing.Queue()
        proc = multiprocessing.Process(target=serialize_subprocess,
                                       args=(queue, ))
        proc.daemon = True
        proc.start()

        stringified_dags = {}
        while True:
            v = queue.get()
            if v is None:
                break
            dag = SerializedDAG.from_json(v)
            self.assertTrue(isinstance(dag, DAG))
            stringified_dags[dag.dag_id] = dag

        dags = collect_dags()
        self.assertTrue(set(stringified_dags.keys()) == set(dags.keys()))

        # Verify deserialized DAGs.
        for dag_id in stringified_dags:
            self.validate_deserialized_dag(stringified_dags[dag_id],
                                           dags[dag_id])

        example_skip_dag = stringified_dags['example_skip_dag']
        skip_operator_1_task = example_skip_dag.task_dict['skip_operator_1']
        self.validate_deserialized_task(skip_operator_1_task,
                                        'DummySkipOperator', '#e8b7e4', '#000')

        # Verify that the DAG object has 'full_filepath' attribute
        # and is equal to fileloc
        self.assertTrue(hasattr(example_skip_dag, 'full_filepath'))
        self.assertEqual(example_skip_dag.full_filepath,
                         example_skip_dag.fileloc)

        example_subdag_operator = stringified_dags['example_subdag_operator']
        section_1_task = example_subdag_operator.task_dict['section-1']
        self.validate_deserialized_task(section_1_task,
                                        SubDagOperator.__name__,
                                        SubDagOperator.ui_color,
                                        SubDagOperator.ui_fgcolor)

    def validate_deserialized_dag(self, serialized_dag, dag):
        """
        Verify that all example DAGs work with DAG Serialization by
        checking fields between Serialized Dags & non-Serialized Dags
        """
        fields_to_check = [
            "task_ids", "params", "fileloc", "max_active_runs", "concurrency",
            "is_paused_upon_creation", "doc_md", "safe_dag_id", "is_subdag",
            "catchup", "description", "start_date", "end_date", "parent_dag",
            "template_searchpath"
        ]

        # fields_to_check = dag.get_serialized_fields()
        for field in fields_to_check:
            self.assertEqual(getattr(serialized_dag, field),
                             getattr(dag, field))

    def validate_deserialized_task(self, task, task_type, ui_color,
                                   ui_fgcolor):
        """Verify non-airflow operators are casted to BaseOperator."""
        self.assertTrue(isinstance(task, SerializedBaseOperator))
        # Verify the original operator class is recorded for UI.
        self.assertTrue(task.task_type == task_type)
        self.assertTrue(task.ui_color == ui_color)
        self.assertTrue(task.ui_fgcolor == ui_fgcolor)

        # Check that for Deserialised task, task.subdag is None for all other Operators
        # except for the SubDagOperator where task.subdag is an instance of DAG object
        if task.task_type == "SubDagOperator":
            self.assertIsNotNone(task.subdag)
            self.assertTrue(isinstance(task.subdag, DAG))
        else:
            self.assertIsNone(task.subdag)
        self.assertEqual({}, task.params)
        self.assertEqual({}, task.executor_config)

    @parameterized.expand([
        (datetime(2019, 8, 1), None, datetime(2019, 8, 1)),
        (datetime(2019, 8, 1), datetime(2019, 8, 2), datetime(2019, 8, 2)),
        (datetime(2019, 8, 1), datetime(2019, 7, 30), datetime(2019, 8, 1)),
    ])
    def test_deserialization_start_date(self, dag_start_date, task_start_date,
                                        expected_task_start_date):
        dag = DAG(dag_id='simple_dag', start_date=dag_start_date)
        BaseOperator(task_id='simple_task',
                     dag=dag,
                     start_date=task_start_date)

        serialized_dag = SerializedDAG.to_dict(dag)
        if not task_start_date or dag_start_date >= task_start_date:
            # If dag.start_date > task.start_date -> task.start_date=dag.start_date
            # because of the logic in dag.add_task()
            self.assertNotIn("start_date", serialized_dag["dag"]["tasks"][0])
        else:
            self.assertIn("start_date", serialized_dag["dag"]["tasks"][0])

        dag = SerializedDAG.from_dict(serialized_dag)
        simple_task = dag.task_dict["simple_task"]
        self.assertEqual(simple_task.start_date, expected_task_start_date)

    @parameterized.expand([
        (datetime(2019, 8, 1), None, datetime(2019, 8, 1)),
        (datetime(2019, 8, 1), datetime(2019, 8, 2), datetime(2019, 8, 1)),
        (datetime(2019, 8, 1), datetime(2019, 7, 30), datetime(2019, 7, 30)),
    ])
    def test_deserialization_end_date(self, dag_end_date, task_end_date,
                                      expected_task_end_date):
        dag = DAG(dag_id='simple_dag',
                  start_date=datetime(2019, 8, 1),
                  end_date=dag_end_date)
        BaseOperator(task_id='simple_task', dag=dag, end_date=task_end_date)

        serialized_dag = SerializedDAG.to_dict(dag)
        if not task_end_date or dag_end_date <= task_end_date:
            # If dag.end_date < task.end_date -> task.end_date=dag.end_date
            # because of the logic in dag.add_task()
            self.assertNotIn("end_date", serialized_dag["dag"]["tasks"][0])
        else:
            self.assertIn("end_date", serialized_dag["dag"]["tasks"][0])

        dag = SerializedDAG.from_dict(serialized_dag)
        simple_task = dag.task_dict["simple_task"]
        self.assertEqual(simple_task.end_date, expected_task_end_date)

    @parameterized.expand([
        (None, None),
        ("@weekly", "@weekly"),
        ({
            "__type": "timedelta",
            "__var": 86400.0
        }, timedelta(days=1)),
    ])
    def test_deserialization_schedule_interval(self,
                                               serialized_schedule_interval,
                                               expected):
        serialized = {
            "__version": 1,
            "dag": {
                "default_args": {
                    "__type": "dict",
                    "__var": {}
                },
                "_dag_id": "simple_dag",
                "fileloc": __file__,
                "tasks": [],
                "timezone": "UTC",
                "schedule_interval": serialized_schedule_interval,
            },
        }

        SerializedDAG.validate_schema(serialized)

        dag = SerializedDAG.from_dict(serialized)

        self.assertEqual(dag.schedule_interval, expected)

    @parameterized.expand([
        (relativedelta(days=-1), {
            "__type": "relativedelta",
            "__var": {
                "days": -1
            }
        }),
        (relativedelta(month=1, days=-1), {
            "__type": "relativedelta",
            "__var": {
                "month": 1,
                "days": -1
            }
        }),
        # Every friday
        (relativedelta(weekday=FR), {
            "__type": "relativedelta",
            "__var": {
                "weekday": [4]
            }
        }),
        # Every second friday
        (relativedelta(weekday=FR(2)), {
            "__type": "relativedelta",
            "__var": {
                "weekday": [4, 2]
            }
        })
    ])
    def test_roundtrip_relativedelta(self, val, expected):
        serialized = SerializedDAG._serialize(val)
        self.assertDictEqual(serialized, expected)

        round_tripped = SerializedDAG._deserialize(serialized)
        self.assertEqual(val, round_tripped)

    @parameterized.expand([
        (None, {}),
        ({
            "param_1": "value_1"
        }, {
            "param_1": "value_1"
        }),
    ])
    def test_dag_params_roundtrip(self, val, expected_val):
        """
        Test that params work both on Serialized DAGs & Tasks
        """
        dag = DAG(dag_id='simple_dag', params=val)
        BaseOperator(task_id='simple_task',
                     dag=dag,
                     start_date=datetime(2019, 8, 1))

        serialized_dag = SerializedDAG.to_dict(dag)
        if val:
            self.assertIn("params", serialized_dag["dag"])
        else:
            self.assertNotIn("params", serialized_dag["dag"])

        deserialized_dag = SerializedDAG.from_dict(serialized_dag)
        deserialized_simple_task = deserialized_dag.task_dict["simple_task"]
        self.assertEqual(expected_val, deserialized_dag.params)
        self.assertEqual(expected_val, deserialized_simple_task.params)

    @parameterized.expand([
        (None, {}),
        ({
            "param_1": "value_1"
        }, {
            "param_1": "value_1"
        }),
    ])
    def test_task_params_roundtrip(self, val, expected_val):
        """
        Test that params work both on Serialized DAGs & Tasks
        """
        dag = DAG(dag_id='simple_dag')
        BaseOperator(task_id='simple_task',
                     dag=dag,
                     params=val,
                     start_date=datetime(2019, 8, 1))

        serialized_dag = SerializedDAG.to_dict(dag)
        if val:
            self.assertIn("params", serialized_dag["dag"]["tasks"][0])
        else:
            self.assertNotIn("params", serialized_dag["dag"]["tasks"][0])

        deserialized_dag = SerializedDAG.from_dict(serialized_dag)
        deserialized_simple_task = deserialized_dag.task_dict["simple_task"]
        self.assertEqual(expected_val, deserialized_simple_task.params)

    def test_extra_serialized_field_and_operator_links(self):
        """
        Assert extra field exists & OperatorLinks defined in Plugins and inbuilt Operator Links.

        This tests also depends on GoogleLink() registered as a plugin
        in tests/plugins/test_plugin.py

        The function tests that if extra operator links are registered in plugin
        in ``operator_extra_links`` and the same is also defined in
        the Operator in ``BaseOperator.operator_extra_links``, it has the correct
        extra link.
        """
        test_date = datetime(2019, 8, 1)
        dag = DAG(dag_id='simple_dag', start_date=test_date)
        CustomOperator(task_id='simple_task', dag=dag, bash_command="true")

        serialized_dag = SerializedDAG.to_dict(dag)
        self.assertIn("bash_command", serialized_dag["dag"]["tasks"][0])

        dag = SerializedDAG.from_dict(serialized_dag)
        simple_task = dag.task_dict["simple_task"]
        self.assertEqual(getattr(simple_task, "bash_command"), "true")

        #########################################################
        # Verify Operator Links work with Serialized Operator
        #########################################################
        # Check Serialized version of operator link only contains the inbuilt Op Link
        self.assertEqual(
            serialized_dag["dag"]["tasks"][0]["_operator_extra_links"], [{
                'tests.test_utils.mock_operators.CustomOpLink': {}
            }])

        # Test all the extra_links are set
        self.assertCountEqual(simple_task.extra_links,
                              ['Google Custom', 'airflow', 'github', 'google'])

        ti = TaskInstance(task=simple_task, execution_date=test_date)
        ti.xcom_push('search_query', "dummy_value_1")

        # Test Deserialized inbuilt link
        custom_inbuilt_link = simple_task.get_extra_links(
            test_date, CustomOpLink.name)
        self.assertEqual(
            'http://google.com/custom_base_link?search=dummy_value_1',
            custom_inbuilt_link)

        # Test Deserialized link registered via Airflow Plugin
        google_link_from_plugin = simple_task.get_extra_links(
            test_date, GoogleLink.name)
        self.assertEqual("https://www.google.com", google_link_from_plugin)

    def test_extra_serialized_field_and_multiple_operator_links(self):
        """
        Assert extra field exists & OperatorLinks defined in Plugins and inbuilt Operator Links.

        This tests also depends on GoogleLink() registered as a plugin
        in tests/plugins/test_plugin.py

        The function tests that if extra operator links are registered in plugin
        in ``operator_extra_links`` and the same is also defined in
        the Operator in ``BaseOperator.operator_extra_links``, it has the correct
        extra link.
        """
        test_date = datetime(2019, 8, 1)
        dag = DAG(dag_id='simple_dag', start_date=test_date)
        CustomOperator(task_id='simple_task',
                       dag=dag,
                       bash_command=["echo", "true"])

        serialized_dag = SerializedDAG.to_dict(dag)
        self.assertIn("bash_command", serialized_dag["dag"]["tasks"][0])

        dag = SerializedDAG.from_dict(serialized_dag)
        simple_task = dag.task_dict["simple_task"]
        self.assertEqual(getattr(simple_task, "bash_command"),
                         ["echo", "true"])

        #########################################################
        # Verify Operator Links work with Serialized Operator
        #########################################################
        # Check Serialized version of operator link only contains the inbuilt Op Link
        self.assertEqual(
            serialized_dag["dag"]["tasks"][0]["_operator_extra_links"], [
                {
                    'tests.test_utils.mock_operators.CustomBaseIndexOpLink': {
                        'index': 0
                    }
                },
                {
                    'tests.test_utils.mock_operators.CustomBaseIndexOpLink': {
                        'index': 1
                    }
                },
            ])

        # Test all the extra_links are set
        self.assertCountEqual(simple_task.extra_links, [
            'BigQuery Console #1', 'BigQuery Console #2', 'airflow', 'github',
            'google'
        ])

        ti = TaskInstance(task=simple_task, execution_date=test_date)
        ti.xcom_push('search_query', ["dummy_value_1", "dummy_value_2"])

        # Test Deserialized inbuilt link #1
        custom_inbuilt_link = simple_task.get_extra_links(
            test_date, "BigQuery Console #1")
        self.assertEqual(
            'https://console.cloud.google.com/bigquery?j=dummy_value_1',
            custom_inbuilt_link)

        # Test Deserialized inbuilt link #2
        custom_inbuilt_link = simple_task.get_extra_links(
            test_date, "BigQuery Console #2")
        self.assertEqual(
            'https://console.cloud.google.com/bigquery?j=dummy_value_2',
            custom_inbuilt_link)

        # Test Deserialized link registered via Airflow Plugin
        google_link_from_plugin = simple_task.get_extra_links(
            test_date, GoogleLink.name)
        self.assertEqual("https://www.google.com", google_link_from_plugin)

    def test_dag_serialized_fields_with_schema(self):
        """
        Additional Properties are disabled on DAGs. This test verifies that all the
        keys in DAG.get_serialized_fields are listed in Schema definition.
        """
        dag_schema: dict = load_dag_schema_dict(
        )["definitions"]["dag"]["properties"]

        # The parameters we add manually in Serialization needs to be ignored
        ignored_keys: set = {"is_subdag", "tasks"}
        dag_params: set = set(dag_schema.keys()) - ignored_keys
        self.assertEqual(set(DAG.get_serialized_fields()), dag_params)

    def test_no_new_fields_added_to_base_operator(self):
        """
        This test verifies that there are no new fields added to BaseOperator. And reminds that
        tests should be added for it.
        """
        base_operator = BaseOperator(task_id="10")
        fields = base_operator.__dict__
        self.assertEqual(
            {
                '_dag': None,
                '_downstream_task_ids': set(),
                '_inlets': [],
                '_log': base_operator.log,
                '_outlets': [],
                '_upstream_task_ids': set(),
                'depends_on_past': False,
                'do_xcom_push': True,
                'email': None,
                'email_on_failure': True,
                'email_on_retry': True,
                'end_date': None,
                'execution_timeout': None,
                'executor_config': {},
                'inlets': [],
                'max_retry_delay': None,
                'on_execute_callback': None,
                'on_failure_callback': None,
                'on_retry_callback': None,
                'on_success_callback': None,
                'outlets': [],
                'owner': 'airflow',
                'params': {},
                'pool': 'default_pool',
                'pool_slots': 1,
                'priority_weight': 1,
                'queue': 'default',
                'resources': None,
                'retries': 0,
                'retry_delay': timedelta(0, 300),
                'retry_exponential_backoff': False,
                'run_as_user': None,
                'sla': None,
                'start_date': None,
                'subdag': None,
                'task_concurrency': None,
                'task_id': '10',
                'trigger_rule': 'all_success',
                'wait_for_downstream': False,
                'weight_rule': 'downstream'
            }, fields, """
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

     ACTION NEEDED! PLEASE READ THIS CAREFULLY AND CORRECT TESTS CAREFULLY

 Some fields were added to the BaseOperator! Please add them to the list above and make sure that
 you add support for DAG serialization - you should add the field to
 `airflow/serialization/schema.json` - they should have correct type defined there.

 Note that we do not support versioning yet so you should only add optional fields to BaseOperator.

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                         """)
Exemplo n.º 15
0
 def handle(self) -> date:
     return self.result + relativedelta(weekday=FR(self.number))
    def _populate(self, year):
        # ACT:  Holidays Act 1958
        # NSW:  Public Holidays Act 2010
        # NT:   Public Holidays Act 2013
        # QLD:  Holidays Act 1983
        # SA:   Holidays Act 1910
        # TAS:  Statutory Holidays Act 2000
        # VIC:  Public Holidays Act 1993
        # WA:   Public and Bank Holidays Act 1972

        # TODO do more research on history of Aus holidays

        # New Year's Day
        name = "New Year's Day"
        jan1 = date(year, JAN, 1)
        self[jan1] = name
        if self.observed and jan1.weekday() in WEEKEND:
            self[jan1 + rd(weekday=MO)] = name + " (Observed)"

        # Australia Day
        jan26 = date(year, JAN, 26)
        if year >= 1935:
            if self.prov == 'NSW' and year < 1946:
                name = "Anniversary Day"
            else:
                name = "Australia Day"
            self[jan26] = name
            if self.observed and year >= 1946 and jan26.weekday() in WEEKEND:
                self[jan26 + rd(weekday=MO)] = name + " (Observed)"
        elif year >= 1888 and self.prov != 'SA':
            name = "Anniversary Day"
            self[jan26] = name

        # Adelaide Cup
        if self.prov == 'SA':
            name = "Adelaide Cup"
            if year >= 2006:
                # subject to proclamation ?!?!
                self[date(year, MAR, 1) + rd(weekday=MO(+2))] = name
            else:
                self[date(year, MAR, 1) + rd(weekday=MO(+3))] = name

        # Canberra Day
        # Info from https://www.timeanddate.com/holidays/australia/canberra-day
        # and https://en.wikipedia.org/wiki/Canberra_Day
        if self.prov == 'ACT' and year >= 1913:
            name = "Canberra Day"
            if year >= 1913 and year <= 1957:
                self[date(year, MAR, 12)] = name
            elif year >= 1958 and year <= 2007:
                self[date(year, MAR, 1) + rd(weekday=MO(+3))] = name
            elif year >= 2008 and year != 2012:
                self[date(year, MAR, 1) + rd(weekday=MO(+2))] = name
            elif year == 2012:
                self[date(year, MAR, 12)] = name

        # Easter
        self[easter(year) + rd(weekday=FR(-1))] = "Good Friday"
        if self.prov in ('ACT', 'NSW', 'NT', 'QLD', 'SA', 'VIC'):
            self[easter(year) + rd(weekday=SA(-1))] = "Easter Saturday"
        if self.prov in ('ACT', 'NSW', 'QLD', 'VIC'):
            self[easter(year)] = "Easter Sunday"
        self[easter(year) + rd(weekday=MO)] = "Easter Monday"

        # Anzac Day
        if year > 1920:
            name = "Anzac Day"
            apr25 = date(year, APR, 25)
            self[apr25] = name
            if self.observed:
                if apr25.weekday() == SAT and self.prov in ('WA', 'NT'):
                    self[apr25 + rd(weekday=MO)] = name + " (Observed)"
                elif (apr25.weekday() == SUN
                      and self.prov in ('ACT', 'QLD', 'SA', 'WA', 'NT')):
                    self[apr25 + rd(weekday=MO)] = name + " (Observed)"

        # Western Australia Day
        if self.prov == 'WA' and year > 1832:
            if year >= 2015:
                name = "Western Australia Day"
            else:
                name = "Foundation Day"
            self[date(year, JUN, 1) + rd(weekday=MO(+1))] = name

        # Sovereign's Birthday
        if year >= 1952:
            name = "Queen's Birthday"
        elif year > 1901:
            name = "King's Birthday"
        if year >= 1936:
            name = "Queen's Birthday"
            if self.prov == 'QLD':
                if year == 2012:
                    self[date(year, JUN, 11)] = "Queen's Diamond Jubilee"
                if year < 2016 and year != 2012:
                    dt = date(year, JUN, 1) + rd(weekday=MO(+2))
                    self[dt] = name
                else:
                    dt = date(year, OCT, 1) + rd(weekday=MO)
                    self[dt] = name
            elif self.prov == 'WA':
                # by proclamation ?!?!
                self[date(year, OCT, 1) + rd(weekday=MO(-1))] = name
            elif self.prov in ('NSW', 'VIC', 'ACT', 'SA', 'NT', 'TAS'):
                dt = date(year, JUN, 1) + rd(weekday=MO(+2))
                self[dt] = name
        elif year > 1911:
            self[date(year, JUN, 3)] = name  # George V
        elif year > 1901:
            self[date(year, NOV, 9)] = name  # Edward VII

        # Picnic Day
        if self.prov == 'NT':
            name = "Picnic Day"
            self[date(year, AUG, 1) + rd(weekday=MO)] = name

        # Bank Holiday
        if self.prov == 'NSW':
            if year >= 1912:
                name = "Bank Holiday"
                self[date(year, 8, 1) + rd(weekday=MO)] = name

        # Labour Day
        name = "Labour Day"
        if self.prov in ('NSW', 'ACT', 'SA'):
            self[date(year, OCT, 1) + rd(weekday=MO)] = name
        elif self.prov == 'WA':
            self[date(year, MAR, 1) + rd(weekday=MO)] = name
        elif self.prov == 'VIC':
            self[date(year, MAR, 1) + rd(weekday=MO(+2))] = name
        elif self.prov == 'QLD':
            if 2013 <= year <= 2015:
                self[date(year, OCT, 1) + rd(weekday=MO)] = name
            else:
                self[date(year, MAY, 1) + rd(weekday=MO)] = name
        elif self.prov == 'NT':
            name = "May Day"
            self[date(year, MAY, 1) + rd(weekday=MO)] = name
        elif self.prov == 'TAS':
            name = "Eight Hours Day"
            self[date(year, MAR, 1) + rd(weekday=MO(+2))] = name

        # Family & Community Day
        if self.prov == 'ACT':
            name = "Family & Community Day"
            if 2007 <= year <= 2009:
                self[date(year, NOV, 1) + rd(weekday=TU)] = name
            elif year == 2010:
                # first Monday of the September/October school holidays
                # moved to the second Monday if this falls on Labour day
                # TODO need a formula for the ACT school holidays then
                # http://www.cmd.act.gov.au/communication/holidays
                self[date(year, SEP, 26)] = name
            elif year == 2011:
                self[date(year, OCT, 10)] = name
            elif year == 2012:
                self[date(year, OCT, 8)] = name
            elif year == 2013:
                self[date(year, SEP, 30)] = name
            elif year == 2014:
                self[date(year, SEP, 29)] = name
            elif year == 2015:
                self[date(year, SEP, 28)] = name
            elif year == 2016:
                self[date(year, SEP, 26)] = name
            elif year == 2017:
                self[date(year, SEP, 25)] = name

        # Reconciliation Day
        if self.prov == 'ACT':
            name = "Reconciliation Day"
            if year >= 2018:
                self[date(year, 5, 27) + rd(weekday=MO)] = name

        if self.prov == 'VIC':
            # Grand Final Day
            if year == 2020:
                # Rescheduled due to COVID-19
                self[date(year, OCT, 23)] = "Grand Final Day"
            elif year >= 2015:
                self[date(year, SEP, 24) + rd(weekday=FR)] = "Grand Final Day"

            # Melbourne Cup
            self[date(year, NOV, 1) + rd(weekday=TU)] = "Melbourne Cup"

        # The Royal Queensland Show (Ekka)
        # The Show starts on the first Friday of August - providing this is
        # not prior to the 5th - in which case it will begin on the second
        # Friday. The Wednesday during the show is a public holiday.
        if self.prov == 'QLD':
            name = "The Royal Queensland Show"
            if year == 2020:
                self[date(year, AUG, 14)] = name
            else:
                self[date(year, AUG, 5) + rd(weekday=FR) + rd(weekday=WE)] = \
                    name

        # Christmas Day
        name = "Christmas Day"
        dec25 = date(year, DEC, 25)
        self[dec25] = name
        if self.observed and dec25.weekday() in WEEKEND:
            self[date(year, DEC, 27)] = name + " (Observed)"

        # Boxing Day
        if self.prov == 'SA':
            name = "Proclamation Day"
        else:
            name = "Boxing Day"
        dec26 = date(year, DEC, 26)
        self[dec26] = name
        if self.observed and dec26.weekday() in WEEKEND:
            self[date(year, DEC, 28)] = name + " (Observed)"
Exemplo n.º 17
0
    def _populate(self, year):
        """
        Checks if a date is holiday or not
        
        Parameters
        ----------
        year : str
            year of a date

        Returns
        -------
        Returns true if a date is a holiday otherwise flase 
        """                    
        # New Year's Day 
        self[datetime.date(year, JAN, 1)] = "Año Nuevo [New Year's Day]"
        
        # Christmas
        self[datetime.date(year, DEC, 25)] = "Navidad [Christmas]"
        
        # Holy Week
        self[easter(year) + rd(weekday=FR(-1))] = "Semana Santa (Viernes Santo) [Good Friday)]"
        self[easter(year)] = "Día de Pascuas [Easter Day]"
        
        # Carnival
        total_lent_days = 46
        self[easter(year) - datetime.timedelta(days=total_lent_days+2)] = "Lunes de carnaval [Carnival of Monday)]"
        self[easter(year) - datetime.timedelta(days=total_lent_days+1)] = "Martes de carnaval [Tuesday of Carnival)]"
        
        # Labor day
        name = "Día Nacional del Trabajo [Labour Day]"
        # (Law 858/Reform Law to the LOSEP (in force since December 21, 2016 /R.O # 906)) If the holiday falls on Saturday or Tuesday
        # the mandatory rest will go to the immediate previous Friday or Monday
        # respectively
        if year > 2015 and datetime.date(year, MAY, 1).weekday() in (5,1):
            self[datetime.date(year, MAY, 1) - datetime.timedelta(days=1)] = name
        # (Law 858/Reform Law to the LOSEP (in force since December 21, 2016 /R.O # 906)) if the holiday falls on Sunday
        # the mandatory rest will go to the following Monday
        elif year > 2015 and datetime.date(year, MAY, 1).weekday() == 6:
            self[datetime.date(year, MAY, 1) + datetime.timedelta(days=1)] = name
        # (Law 858/Reform Law to the LOSEP (in force since December 21, 2016 /R.O # 906)) Holidays that are on Wednesday or Thursday
        # will be moved to the Friday of that week
        elif year > 2015 and  datetime.date(year, MAY, 1).weekday() in (2,3):
            self[datetime.date(year, MAY, 1) + rd(weekday=FR)] = name
        else:
            self[datetime.date(year, MAY, 1)] = name
        
        # Pichincha battle, the rules are the same as the labor day
        name = "Batalla del Pichincha [Pichincha Battle]"
        if year > 2015 and datetime.date(year, MAY, 24).weekday() in (5,1):
            self[datetime.date(year, MAY, 24).weekday() - datetime.timedelta(days=1)] = name
        elif year > 2015 and datetime.date(year, MAY, 24).weekday() == 6:
            self[datetime.date(year, MAY, 24) + datetime.timedelta(days=1)] = name
        elif year > 2015 and  datetime.date(year, MAY, 24).weekday() in (2,3):
            self[datetime.date(year, MAY, 24) + rd(weekday=FR)] = name
        else:
            self[datetime.date(year, MAY, 24)] = name        
        
        # First Cry of Independence, the rules are the same as the labor day
        name = "Primer Grito de la Independencia [First Cry of Independence]"
        if year > 2015 and datetime.date(year, AUG, 10).weekday() in (5,1):
            self[datetime.date(year, AUG, 10)- datetime.timedelta(days=1)] = name
        elif year > 2015 and datetime.date(year, AUG, 10).weekday() == 6:
            self[datetime.date(year, AUG, 10) + datetime.timedelta(days=1)] = name
        elif year > 2015 and  datetime.date(year, AUG, 10).weekday() in (2,3):
            self[datetime.date(year, AUG, 10) + rd(weekday=FR)] = name
        else:
            self[datetime.date(year, AUG, 10)] = name       
        
        # Guayaquil's independence, the rules are the same as the labor day
        name = "Independencia de Guayaquil [Guayaquil's Independence]"
        if year > 2015 and datetime.date(year, OCT, 9).weekday() in (5,1):
            self[datetime.date(year, OCT, 9) - datetime.timedelta(days=1)] = name
        elif year > 2015 and datetime.date(year, OCT, 9).weekday() == 6:
            self[datetime.date(year, OCT, 9) + datetime.timedelta(days=1)] = name
        elif year > 2015 and  datetime.date(year, MAY, 1).weekday() in (2,3):
            self[datetime.date(year, OCT, 9) + rd(weekday=FR)] = name
        else:
            self[datetime.date(year, OCT, 9)] = name        
        
        # Day of the Dead and
        namedd = "Día de los difuntos [Day of the Dead]" 
        # Independence of Cuenca
        nameic = "Independencia de Cuenca [Independence of Cuenca]"
        #(Law 858/Reform Law to the LOSEP (in force since December 21, 2016 /R.O # 906)) 
        #For national and/or local holidays that coincide on continuous days, 
        #the following rules will apply:
        if (datetime.date(year, NOV, 2).weekday() == 5 and  datetime.date(year, NOV, 3).weekday() == 6):
            self[datetime.date(year, NOV, 2) - datetime.timedelta(days=1)] = namedd
            self[datetime.date(year, NOV, 3) + datetime.timedelta(days=1)] = nameic     
        elif (datetime.date(year, NOV, 3).weekday() == 2):
            self[datetime.date(year, NOV, 2)] = namedd
            self[datetime.date(year, NOV, 3) - datetime.timedelta(days=2)] = nameic
        elif (datetime.date(year, NOV, 3).weekday() == 3):
            self[datetime.date(year, NOV, 3)] = nameic
            self[datetime.date(year, NOV, 2) + datetime.timedelta(days=2)] = namedd
        elif (datetime.date(year, NOV, 3).weekday() == 5):
            self[datetime.date(year, NOV, 2)] =  namedd
            self[datetime.date(year, NOV, 3) - datetime.timedelta(days=2)] = nameic
        elif (datetime.date(year, NOV, 3).weekday() == 0):
            self[datetime.date(year, NOV, 3)] = nameic
            self[datetime.date(year, NOV, 2) + datetime.timedelta(days=2)] = namedd
        else:
            self[datetime.date(year, NOV, 2)] = namedd
            self[datetime.date(year, NOV, 3)] = nameic  
            
        # Foundation of Quito, applies only to Pichincha province, 
        # the rules are the same as the labor day
        name = "Fundación de Quito [Foundation of Quito]"        
        if self.prov in ("EC-P"):
            if year > 2015 and datetime.date(year, DEC, 6).weekday() in (5,1):
                self[datetime.date(year, DEC, 6) - datetime.timedelta(days=1)] = name
            elif year > 2015 and datetime.date(year, DEC, 6).weekday() == 6:
                self[(datetime.date(year, DEC, 6).weekday()) + datetime.timedelta(days=1)] =name
            elif year > 2015 and  datetime.date(year, DEC, 6).weekday() in (2,3):
                self[datetime.date(year, DEC, 6) + rd(weekday=FR)] = name
            else:
                self[datetime.date(year, DEC, 6)] = name
Exemplo n.º 18
0
    def _populate(self, year):
        # New Year's Day
        if not self.observed and date(year, JAN, 1).weekday() in WEEKEND:
            pass
        else:
            self[date(year, JAN, 1)] = "Año Nuevo [New Year's Day]"

        # Patriots day
        name = "Día de los Héroes de la Patria" \
               "[Patriots Day]"

        if not self.observed and date(year, MAR, 1).weekday() in WEEKEND:
            pass
        elif date(year, MAR, 1).weekday() >= WED:
            self[date(year, MAR, 1) + rd(weekday=MO(+1))] = name
        else:
            self[date(year, MAR, 1)] = name

        # Holy Week
        name_thu = "Semana Santa (Jueves Santo)  [Holy day (Holy Thursday)]"
        name_fri = "Semana Santa (Viernes Santo)  [Holy day (Holy Friday)]"
        name_easter = 'Día de Pascuas [Easter Day]'

        self[easter(year) + rd(weekday=TH(-1))] = name_thu
        self[easter(year) + rd(weekday=FR(-1))] = name_fri

        if not self.observed and easter(year).weekday() in WEEKEND:
            pass
        else:
            self[easter(year)] = name_easter

        # Labor Day
        name = "Día de los Trabajadores [Labour Day]"
        if not self.observed and date(year, MAY, 1).weekday() in WEEKEND:
            pass
        else:
            self[date(year, MAY, 1)] = name

        # Independence Day
        name = "Día de la Independencia Nacional [Independence Day]"
        if not self.observed and date(year, MAY, 15).weekday() in WEEKEND:
            pass
        else:
            self[date(year, MAY, 15)] = name

        # Peace in Chaco Day.
        name = "Día de la Paz del Chaco [Peace in Chaco Day]"
        if not self.observed and date(year, JUN, 12).weekday() in WEEKEND:
            pass
        elif date(year, JUN, 12).weekday() >= WED:
            self[date(year, JUN, 12) + rd(weekday=MO(+1))] = name
        else:
            self[date(year, JUN, 12)] = name

        # Asuncion Fundation's Day
        name = "Día de la Fundación de Asunción [Asuncion Fundation's Day]"
        if not self.observed and date(year, AUG, 15).weekday() in WEEKEND:
            pass
        else:
            self[date(year, AUG, 15)] = name

        # Boqueron's Battle
        name = "Batalla de Boquerón [Boqueron's Battle]"
        if not self.observed and date(year, SEP, 29).weekday() in WEEKEND:
            pass
        else:
            self[date(year, SEP, 29)] = name

        # Caacupe Virgin Day
        name = "Día de la Virgen de Caacupé [Caacupe Virgin Day]"
        if not self.observed and date(year, DEC, 8).weekday() in WEEKEND:
            pass
        else:
            self[date(year, DEC, 8)] = name

        # Christmas
        self[date(year, DEC, 25)] = "Navidad [Christmas]"
Exemplo n.º 19
0
    def _populate(self, year):
        # New Year's Day
        if year >= 1867:
            name = "New Year's Day"
            self[date(year, JAN, 1)] = name
            if self.observed and date(year, JAN, 1).weekday() == SUN:
                self[date(year, JAN, 1) + rd(days=+1)] = name + " (Observed)"
            elif self.observed and date(year, JAN, 1).weekday() == SAT:
                # Add Dec 31st from the previous year without triggering
                # the entire year to be added
                expand = self.expand
                self.expand = False
                self[date(year, JAN, 1) + rd(days=-1)] = name + " (Observed)"
                self.expand = expand
            # The next year's observed New Year's Day can be in this year
            # when it falls on a Friday (Jan 1st is a Saturday)
            if self.observed and date(year, DEC, 31).weekday() == FRI:
                self[date(year, DEC, 31)] = name + " (Observed)"

        # Family Day / Louis Riel Day (MB) / Islander Day (PE)
        # / Heritage Day (NS, YT)
        if self.prov in ("AB", "SK", "ON") and year >= 2008:
            self[date(year, FEB, 1) + rd(weekday=MO(+3))] = "Family Day"
        elif self.prov in ("AB", "SK") and year >= 2007:
            self[date(year, FEB, 1) + rd(weekday=MO(+3))] = "Family Day"
        elif self.prov == "AB" and year >= 1990:
            self[date(year, FEB, 1) + rd(weekday=MO(+3))] = "Family Day"
        elif self.prov == "NB" and year >= 2018:
            self[date(year, FEB, 1) + rd(weekday=MO(+3))] = "Family Day"
        elif self.prov == "BC":
            if year >= 2013 and year <= 2018:
                self[date(year, FEB, 1) + rd(weekday=MO(+2))] = "Family Day"
            elif year > 2018:
                self[date(year, FEB, 1) + rd(weekday=MO(+3))] = "Family Day"
        elif self.prov == "MB" and year >= 2008:
            self[date(year, FEB, 1) + rd(weekday=MO(+3))] = "Louis Riel Day"
        elif self.prov == "PE" and year >= 2010:
            self[date(year, FEB, 1) + rd(weekday=MO(+3))] = "Islander Day"
        elif self.prov == "PE" and year == 2009:
            self[date(year, FEB, 1) + rd(weekday=MO(+2))] = "Islander Day"
        elif self.prov == "NS" and year >= 2015:
            # http://novascotia.ca/lae/employmentrights/NovaScotiaHeritageDay.asp
            self[date(year, FEB, 1) + rd(weekday=MO(+3))] = "Heritage Day"
        elif self.prov == "YT":
            # start date?
            # http://heritageyukon.ca/programs/heritage-day
            # https://en.wikipedia.org/wiki/Family_Day_(Canada)#Yukon_Heritage_Day
            # Friday before the last Sunday in February
            dt = date(year, MAR, 1) + rd(weekday=SU(-1)) + rd(weekday=FR(-1))
            self[dt] = "Heritage Day"

        # St. Patrick's Day
        if self.prov == "NL" and year >= 1900:
            dt = date(year, MAR, 17)
            # Nearest Monday to March 17
            dt1 = date(year, MAR, 17) + rd(weekday=MO(-1))
            dt2 = date(year, MAR, 17) + rd(weekday=MO(+1))
            if dt2 - dt <= dt - dt1:
                self[dt2] = "St. Patrick's Day"
            else:
                self[dt1] = "St. Patrick's Day"

        # Good Friday
        if self.prov != "QC" and year >= 1867:
            self[easter(year) + rd(weekday=FR(-1))] = "Good Friday"

        # Easter Monday
        if self.prov == "QC" and year >= 1867:
            self[easter(year) + rd(weekday=MO)] = "Easter Monday"

        # St. George's Day
        if self.prov == "NL" and year == 2010:
            # 4/26 is the Monday closer to 4/23 in 2010
            # but the holiday was observed on 4/19? Crazy Newfies!
            self[date(2010, 4, 19)] = "St. George's Day"
        elif self.prov == "NL" and year >= 1990:
            dt = date(year, APR, 23)
            # Nearest Monday to April 23
            dt1 = dt + rd(weekday=MO(-1))
            dt2 = dt + rd(weekday=MO(+1))
            if dt2 - dt < dt - dt1:
                self[dt2] = "St. George's Day"
            else:
                self[dt1] = "St. George's Day"

        # Victoria Day / National Patriots' Day (QC)
        if self.prov not in ("NB", "NS", "PE", "NL", "QC") and year >= 1953:
            self[date(year, MAY, 24) + rd(weekday=MO(-1))] = "Victoria Day"
        elif self.prov == "QC" and year >= 1953:
            name = "National Patriots' Day"
            self[date(year, MAY, 24) + rd(weekday=MO(-1))] = name

        # National Aboriginal Day
        if self.prov == "NT" and year >= 1996:
            self[date(year, JUN, 21)] = "National Aboriginal Day"

        # St. Jean Baptiste Day
        if self.prov == "QC" and year >= 1925:
            self[date(year, JUN, 24)] = "St. Jean Baptiste Day"
            if self.observed and date(year, JUN, 24).weekday() == SUN:
                self[date(year, JUN, 25)] = "St. Jean Baptiste Day (Observed)"

        # Discovery Day
        if self.prov == "NL" and year >= 1997:
            dt = date(year, JUN, 24)
            # Nearest Monday to June 24
            dt1 = dt + rd(weekday=MO(-1))
            dt2 = dt + rd(weekday=MO(+1))
            if dt2 - dt <= dt - dt1:
                self[dt2] = "Discovery Day"
            else:
                self[dt1] = "Discovery Day"
        elif self.prov == "YT" and year >= 1912:
            self[date(year, AUG, 1) + rd(weekday=MO(+3))] = "Discovery Day"

        # Canada Day / Memorial Day (NL)
        if self.prov != "NL" and year >= 1867:
            if year >= 1983:
                name = "Canada Day"
            else:
                name = "Dominion Day"
            self[date(year, JUL, 1)] = name
            if (
                year >= 1879
                and self.observed
                and date(year, JUL, 1).weekday() in WEEKEND
            ):
                self[date(year, JUL, 1) + rd(weekday=MO)] = (
                    name + " (Observed)"
                )
        elif year >= 1867:
            if year >= 1983:
                name = "Memorial Day"
            else:
                name = "Dominion Day"
            self[date(year, JUL, 1)] = name
            if (
                year >= 1879
                and self.observed
                and date(year, JUL, 1).weekday() in WEEKEND
            ):
                self[date(year, JUL, 1) + rd(weekday=MO)] = (
                    name + " (Observed)"
                )

        # Nunavut Day
        if self.prov == "NU" and year >= 2001:
            self[date(year, JUL, 9)] = "Nunavut Day"
            if self.observed and date(year, JUL, 9).weekday() == SUN:
                self[date(year, JUL, 10)] = "Nunavut Day (Observed)"
        elif self.prov == "NU" and year == 2000:
            self[date(2000, 4, 1)] = "Nunavut Day"

        # Civic Holiday
        if self.prov in ("ON", "MB", "NT") and year >= 1900:
            self[date(year, AUG, 1) + rd(weekday=MO)] = "Civic Holiday"
        elif self.prov == "AB" and year >= 1974:
            # https://en.wikipedia.org/wiki/Civic_Holiday#Alberta
            self[date(year, AUG, 1) + rd(weekday=MO)] = "Heritage Day"
        elif self.prov == "BC" and year >= 1974:
            # https://en.wikipedia.org/wiki/Civic_Holiday
            self[date(year, AUG, 1) + rd(weekday=MO)] = "British Columbia Day"
        elif self.prov == "NB" and year >= 1900:
            # https://en.wikipedia.org/wiki/Civic_Holiday
            self[date(year, AUG, 1) + rd(weekday=MO)] = "New Brunswick Day"
        elif self.prov == "SK" and year >= 1900:
            # https://en.wikipedia.org/wiki/Civic_Holiday
            self[date(year, AUG, 1) + rd(weekday=MO)] = "Saskatchewan Day"

        # Labour Day
        if year >= 1894:
            self[date(year, SEP, 1) + rd(weekday=MO)] = "Labour Day"

        # Thanksgiving
        if self.prov not in ("NB", "NS", "PE", "NL") and year >= 1931:
            if year == 1935:
                # in 1935, Canadian Thanksgiving was moved due to the General
                # Election falling on the second Monday of October
                # https://books.google.ca/books?id=KcwlQsmheG4C&pg=RA1-PA1940&lpg=RA1-PA1940&dq=canada+thanksgiving+1935&source=bl&ots=j4qYrcfGuY&sig=gxXeAQfXVsOF9fOwjSMswPHJPpM&hl=en&sa=X&ved=0ahUKEwjO0f3J2PjOAhVS4mMKHRzKBLAQ6AEIRDAG#v=onepage&q=canada%20thanksgiving%201935&f=false
                self[date(1935, 10, 25)] = "Thanksgiving"
            else:
                self[date(year, OCT, 1) + rd(weekday=MO(+2))] = "Thanksgiving"

        # Remembrance Day
        name = "Remembrance Day"
        provinces = ("ON", "QC", "NS", "NL", "NT", "PE", "SK")
        if self.prov not in provinces and year >= 1931:
            self[date(year, NOV, 11)] = name
        elif self.prov in ("NS", "NL", "NT", "PE", "SK") and year >= 1931:
            self[date(year, NOV, 11)] = name
            if self.observed and date(year, NOV, 11).weekday() == SUN:
                name = name + " (Observed)"
                self[date(year, NOV, 11) + rd(weekday=MO)] = name

        # Christmas Day
        if year >= 1867:
            self[date(year, DEC, 25)] = "Christmas Day"
            if self.observed and date(year, DEC, 25).weekday() == SAT:
                self[date(year, DEC, 24)] = "Christmas Day (Observed)"
            elif self.observed and date(year, DEC, 25).weekday() == SUN:
                self[date(year, DEC, 26)] = "Christmas Day (Observed)"

        # Boxing Day
        if year >= 1867:
            name = "Boxing Day"
            name_observed = name + " (Observed)"
            if self.observed and date(year, DEC, 26).weekday() in WEEKEND:
                self[date(year, DEC, 26) + rd(weekday=MO)] = name_observed
            elif self.observed and date(year, DEC, 26).weekday() == 0:
                self[date(year, DEC, 27)] = name_observed
            else:
                self[date(year, DEC, 26)] = name
Exemplo n.º 20
0
		os.mkdir(save_path)
	
	# 1. get PMI image
	url = "https://tradingeconomics.com/united-states/business-confidence"
	soup = getSoup(url)

	imgs = soup.find_all('img')
	img_url = imgs[0]['src']

	today = datetime.now()

	pmi_nm = "PMI_image" #+ today_str
	importImgFromURL(save_path, img_url, pmi_nm)

	# 2. S&P500 image	
	last_friday = today + relativedelta(weekday=FR(-1))
	last_friday_str = last_friday.strftime("%m%d%y")

	pdf_url = "https://www.factset.com/hubfs/Website/Resources%20Section/Research%20Desk/Earnings%20Insight/EarningsInsight_" + last_friday_str + "A.pdf"
	headers = {'Referer': pdf_url,
		   'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'}
	save_name = "12fwd_" + last_friday_str
	
	pdfDownload(save_path, pdf_url, headers, save_name)

	file_logic = not os.path.isfile( save_path + save_name )
	
	i = 0
	while file_logic:
		i += 1
		last_friday = today + relativedelta(weekday=FR(-i))
Exemplo n.º 21
0
    def _populate(self, year):
        # New Year's Day (Law 2.977)
        self[date(year, JAN, 1)] = "Año Nuevo [New Year's Day]"
        # Day after, if it's a Sunday (Law 20.983)
        if year > 2016 and date(year, JAN, 1).weekday() == SUN:
            self[date(year, JAN, 2)] = "Fiestas Patrias [Holiday]"

        # Holy Week (Law 2.977)
        name_fri = "Semana Santa (Viernes Santo) [Good Friday)]"
        name_sat = "Semana Santa (Sábado Santo) [Good Saturday)]"
        name_easter = "Día de Pascuas [Easter Day]"

        self[easter(year) + rd(weekday=FR(-1))] = name_fri
        self[easter(year) + rd(weekday=SA(-1))] = name_sat
        self[easter(year)] = name_easter

        # Labor Day (Law 2.200, renamed with Law 18.018)
        name = "Día Nacional del Trabajo [Labour Day]"
        self[date(year, MAY, 1)] = name

        # Naval Glories Day (Law 2.977)
        name = "Día de las Glorias Navales [Navy Day]"
        self[date(year, MAY, 21)] = name

        name = "Día Nacional de los Pueblos Indígenas"
        if year == 2021:
            self[date(year, JUN, 21)] = name
        if year > 2021:
            self[date(year, JUN, 19)] = name

        # Saint Peter and Saint Paul (Law 18.432)
        name = "San Pedro y San Pablo [Saint Peter and Saint Paul]"
        if year < 2020:
            self[date(year, JUN, 29)] = name
        else:
            # floating Monday holiday (Law 19.668)
            if date(year, JUN, 29).weekday() <= THU:
                self[date(year, JUN, 29) +
                     rd(date(year, JUN, 29), weekday=MO(-1))] = name
            elif date(year, JUN, 29).weekday() == FRI:
                self[date(year, JUN, 29) + rd(weekday=MO)] = name
            else:
                self[date(year, JUN, 29)] = name

        # Day of Virgin of Carmen (Law 20.148)
        if year > 2006:
            name = "Virgen del Carmen [Our Lady of Mount Carmel]"
            self[date(year, JUL, 16)] = name

        # Day of Assumption of the Virgin (Law 2.977)
        name = "Asunción de la Virgen [Assumption of Mary]"
        self[date(year, AUG, 15)] = name

        # National Holiday Friday preceding Independence Day (Law 20.983)
        if year > 2016 and date(year, SEP, 18).weekday() == SAT:
            self[date(year, SEP, 17)] = "Fiestas Patrias [Holiday]"

        # National Holiday Monday preceding Independence Day (Law 20.215)
        if year > 2007 and date(year, SEP, 18).weekday() == TUE:
            self[date(year, SEP, 17)] = "Fiestas Patrias [Holiday]"

        # Independence Day (Law 2.977)
        name = "Día de la Independencia [Independence Day]"
        self[date(year, SEP, 18)] = name

        # Day of Glories of the Army of Chile (Law 2.977)
        name = "Día de las Glorias del Ejército [Army Day]"
        self[date(year, SEP, 19)] = name

        # National Holiday Friday following Army Day (Law 20.215)
        if year > 2007 and date(year, SEP, 19).weekday() == THU:
            self[date(year, SEP, 20)] = "Fiestas Patrias [Holiday]"

        # Day of the Meeting of Two Worlds (Law 3.810)
        if year < 2010:
            self[date(year, OCT, 12)] = "Día de la Raza [Columbus day]"
        elif year < 2020:
            self[date(year, OCT, 12)] = ("Día del Respeto a la Diversidad"
                                         " [Day of the Meeting "
                                         " of Two Worlds]")
        else:
            # floating Monday holiday (Law 19.668)
            name = "Día del Descubrimiento de dos Mundos [Columbus Day]"
            if date(year, OCT, 12).weekday() <= THU:
                self[date(year, OCT, 12) +
                     rd(date(year, OCT, 12), weekday=MO(-1))] = name
            elif date(year, OCT, 12).weekday() == FRI:
                self[date(year, OCT, 12) + rd(weekday=MO)] = name
            else:
                self[date(year, OCT, 12)] = name

        # National Day of the Evangelical and Protestant Churches (Law 20.299)
        if year > 2007:
            name = ("Día Nacional de las Iglesias Evangélicas y Protestantes "
                    " [Reformation Day]")
            self[date(year, OCT, 31)] = name

        # All Saints Day (Law 2.977)
        name = "Día de Todos los Santos [All Saints Day]"
        self[date(year, NOV, 1)] = name

        # Immaculate Conception (Law 2.977)
        self[date(year, DEC, 8)] = ("La Inmaculada Concepción"
                                    " [Immaculate Conception]")

        # Christmas (Law 2.977)
        self[date(year, DEC, 25)] = "Navidad [Christmas]"

        # región de Arica y Parinacota
        if self.state == "AP" and year >= 2020:
            # Law 20.663
            self[date(year, JUN, 7)] = ("Asalto y Toma del Morro de Arica"
                                        " [Assault and Capture of Cape Arica]")

        # región de Ñuble
        if self.state == "NB" and year >= 2014:
            # Law 20.678
            self[date(year, AUG,
                      20)] = ("Nacimiento del Prócer de la Independencia"
                              " (Chillán y Chillán Viejo)"
                              " [Nativity of Bernardo O'Higgins]")
Exemplo n.º 22
0
    def _populate(self, year):
        # Bank Holidays Act 1873
        # The Employment of Females Act 1873
        # Factories Act 1894
        # Industrial Conciliation and Arbitration Act 1894
        # Labour Day Act 1899
        # Anzac Day Act 1920, 1949, 1956
        # New Zealand Day Act 1973
        # Waitangi Day Act 1960, 1976
        # Sovereign's Birthday Observance Act 1937, 1952
        # Holidays Act 1981, 2003
        if year < 1894:
            return

        # New Year's Day
        name = "New Year's Day"
        jan1 = date(year, JAN, 1)
        self[jan1] = name
        if self.observed and jan1.weekday() in WEEKEND:
            self[date(year, JAN, 3)] = name + " (Observed)"

        name = "Day after New Year's Day"
        jan2 = date(year, JAN, 2)
        self[jan2] = name
        if self.observed and jan2.weekday() in WEEKEND:
            self[date(year, JAN, 4)] = name + " (Observed)"

        # Waitangi Day
        if year > 1973:
            name = "New Zealand Day"
            if year > 1976:
                name = "Waitangi Day"
            feb6 = date(year, FEB, 6)
            self[feb6] = name
            if self.observed and year >= 2014 and feb6.weekday() in WEEKEND:
                self[feb6 + rd(weekday=MO)] = name + " (Observed)"

        # Easter
        self[easter(year) + rd(weekday=FR(-1))] = "Good Friday"
        self[easter(year) + rd(weekday=MO)] = "Easter Monday"

        # Anzac Day
        if year > 1920:
            name = "Anzac Day"
            apr25 = date(year, APR, 25)
            self[apr25] = name
            if self.observed and year >= 2014 and apr25.weekday() in WEEKEND:
                self[apr25 + rd(weekday=MO)] = name + " (Observed)"

        # Sovereign's Birthday
        if year >= 1952:
            name = "Queen's Birthday"
        elif year > 1901:
            name = "King's Birthday"
        if year == 1952:
            self[date(year, JUN, 2)] = name  # Elizabeth II
        elif year > 1937:
            self[date(year, JUN, 1) + rd(weekday=MO(+1))] = name  # EII & GVI
        elif year == 1937:
            self[date(year, JUN, 9)] = name  # George VI
        elif year == 1936:
            self[date(year, JUN, 23)] = name  # Edward VIII
        elif year > 1911:
            self[date(year, JUN, 3)] = name  # George V
        elif year > 1901:
            # http://paperspast.natlib.govt.nz/cgi-bin/paperspast?a=d&d=NZH19091110.2.67
            self[date(year, NOV, 9)] = name  # Edward VII

        # Labour Day
        name = "Labour Day"
        if year >= 1910:
            self[date(year, OCT, 1) + rd(weekday=MO(+4))] = name
        elif year > 1899:
            self[date(year, OCT, 1) + rd(weekday=WE(+2))] = name

        # Christmas Day
        name = "Christmas Day"
        dec25 = date(year, DEC, 25)
        self[dec25] = name
        if self.observed and dec25.weekday() in WEEKEND:
            self[date(year, DEC, 27)] = name + " (Observed)"

        # Boxing Day
        name = "Boxing Day"
        dec26 = date(year, DEC, 26)
        self[dec26] = name
        if self.observed and dec26.weekday() in WEEKEND:
            self[date(year, DEC, 28)] = name + " (Observed)"

        # Province Anniversary Day
        if self.prov in ('NTL', 'Northland', 'AUK', 'Auckland'):
            if 1963 < year <= 1973 and self.prov in ('NTL', 'Northland'):
                name = "Waitangi Day"
                dt = date(year, FEB, 6)
            else:
                name = "Auckland Anniversary Day"
                dt = date(year, JAN, 29)
            if dt.weekday() in (TUE, WED, THU):
                self[dt + rd(weekday=MO(-1))] = name
            else:
                self[dt + rd(weekday=MO)] = name

        elif self.prov in ('TKI', 'Taranaki', 'New Plymouth'):
            name = "Taranaki Anniversary Day"
            self[date(year, MAR, 1) + rd(weekday=MO(+2))] = name

        elif self.prov in ('HKB', "Hawke's Bay"):
            name = "Hawke's Bay Anniversary Day"
            labour_day = date(year, OCT, 1) + rd(weekday=MO(+4))
            self[labour_day + rd(weekday=FR(-1))] = name

        elif self.prov in ('WGN', 'Wellington'):
            name = "Wellington Anniversary Day"
            jan22 = date(year, JAN, 22)
            if jan22.weekday() in (TUE, WED, THU):
                self[jan22 + rd(weekday=MO(-1))] = name
            else:
                self[jan22 + rd(weekday=MO)] = name

        elif self.prov in ('MBH', 'Marlborough'):
            name = "Marlborough Anniversary Day"
            labour_day = date(year, OCT, 1) + rd(weekday=MO(+4))
            self[labour_day + rd(weeks=1)] = name

        elif self.prov in ('NSN', 'Nelson'):
            name = "Nelson Anniversary Day"
            feb1 = date(year, FEB, 1)
            if feb1.weekday() in (TUE, WED, THU):
                self[feb1 + rd(weekday=MO(-1))] = name
            else:
                self[feb1 + rd(weekday=MO)] = name

        elif self.prov in ('CAN', 'Canterbury'):
            name = "Canterbury Anniversary Day"
            showday = date(year, NOV, 1) + rd(weekday=TU) + \
                rd(weekday=FR(+2))
            self[showday] = name

        elif self.prov in ('STC', 'South Canterbury'):
            name = "South Canterbury Anniversary Day"
            dominion_day = date(year, SEP, 1) + rd(weekday=MO(4))
            self[dominion_day] = name

        elif self.prov in ('WTL', 'Westland'):
            name = "Westland Anniversary Day"
            dec1 = date(year, DEC, 1)
            # Observance varies?!?!
            if year == 2005:  # special case?!?!
                self[date(year, DEC, 5)] = name
            elif dec1.weekday() in (TUE, WED, THU):
                self[dec1 + rd(weekday=MO(-1))] = name
            else:
                self[dec1 + rd(weekday=MO)] = name

        elif self.prov in ('OTA', 'Otago'):
            name = "Otago Anniversary Day"
            mar23 = date(year, MAR, 23)
            # there is no easily determined single day of local observance?!?!
            if mar23.weekday() in (TUE, WED, THU):
                dt = mar23 + rd(weekday=MO(-1))
            else:
                dt = mar23 + rd(weekday=MO)
            if dt == easter(year) + rd(weekday=MO):  # Avoid Easter Monday
                dt += rd(days=1)
            self[dt] = name

        elif self.prov in ('STL', 'Southland'):
            name = "Southland Anniversary Day"
            jan17 = date(year, JAN, 17)
            if year > 2011:
                self[easter(year) + rd(weekday=TU)] = name
            else:
                if jan17.weekday() in (TUE, WED, THU):
                    self[jan17 + rd(weekday=MO(-1))] = name
                else:
                    self[jan17 + rd(weekday=MO)] = name

        elif self.prov in ('CIT', 'Chatham Islands'):
            name = "Chatham Islands Anniversary Day"
            nov30 = date(year, NOV, 30)
            if nov30.weekday() in (TUE, WED, THU):
                self[nov30 + rd(weekday=MO(-1))] = name
            else:
                self[nov30 + rd(weekday=MO)] = name
Exemplo n.º 23
0
    def _populate(self, year):
        # New Year's Day
        if year >= 1867:
            name = "New Year's Day"
            self[date(year, 1, 1)] = name
            if self.observed and date(year, 1, 1).weekday() == 6:
                self[date(year, 1, 1) + rd(days=+1)] = name + " (Observed)"
            # The next year's observed New Year's Day can be in this year
            # when it falls on a Friday (Jan 1st is a Saturday)
            if self.observed and date(year, 12, 31).weekday() == 4:
                self[date(year, 12, 31)] = name + " (Observed)"

        # Islander Day
        if self.prov == 'PE' and year >= 2010:
            self[date(year, 2, 1) + rd(weekday=MO(+3))] = "Islander Day"
        elif self.prov == 'PE' and year == 2009:
            self[date(year, 2, 1) + rd(weekday=MO(+2))] = "Islander Day"

        # Family Day / Louis Riel Day (MB)
        if self.prov in ('AB', 'SK', 'ON') and year >= 2008:
            self[date(year, 2, 1) + rd(weekday=MO(+3))] = "Family Day"
        elif self.prov in ('AB', 'SK') and year >= 2007:
            self[date(year, 2, 1) + rd(weekday=MO(+3))] = "Family Day"
        elif self.prov == 'AB' and year >= 1990:
            self[date(year, 2, 1) + rd(weekday=MO(+3))] = "Family Day"
        elif self.prov == 'BC' and year >= 2013:
            self[date(year, 2, 1) + rd(weekday=MO(+2))] = "Family Day"
        elif self.prov == 'MB' and year >= 2008:
            self[date(year, 2, 1) + rd(weekday=MO(+3))] = "Louis Riel Day"

        # St. Patrick's Day
        if self.prov == 'NL' and year >= 1900:
            dt = date(year, 3, 17)
            # Nearest Monday to March 17
            dt1 = date(year, 3, 17) + rd(weekday=MO(-1))
            dt2 = date(year, 3, 17) + rd(weekday=MO(+1))
            if dt2 - dt <= dt - dt1:
                self[dt2] = "St. Patrick's Day"
            else:
                self[dt1] = "St. Patrick's Day"

        # Good Friday
        if self.prov != 'QC' and year >= 1867:
            self[easter(year) + rd(weekday=FR(-1))] = "Good Friday"

        # Easter Monday
        if self.prov == 'QC' and year >= 1867:
            self[easter(year) + rd(weekday=MO)] = "Easter Monday"

        # St. George's Day
        if self.prov == 'NL' and year == 2010:
            # 4/26 is the Monday closer to 4/23 in 2010
            # but the holiday was observed on 4/19? Crazy Newfies!
            self[date(2010, 4, 19)] = "St. George's Day"
        elif self.prov == 'NL' and year >= 1990:
            dt = date(year, 4, 23)
            # Nearest Monday to April 23
            dt1 = dt + rd(weekday=MO(-1))
            dt2 = dt + rd(weekday=MO(+1))
            if dt2 - dt < dt - dt1:
                self[dt2] = "St. George's Day"
            else:
                self[dt1] = "St. George's Day"

        # Victoria Day / National Patriotes Day (QC)
        if self.prov not in ('NB', 'NS', 'PE', 'NL', 'QC') and year >= 1953:
            self[date(year, 5, 24) + rd(weekday=MO(-1))] = "Victoria Day"
        elif self.prov == 'QC' and year >= 1953:
            name = "National Patriotes Day"
            self[date(year, 5, 24) + rd(weekday=MO(-1))] = name

        # National Aboriginal Day
        if self.prov == 'NT' and year >= 1996:
            self[date(year, 6, 21)] = "National Aboriginal Day"

        # St. Jean Baptiste Day
        if self.prov == 'QC' and year >= 1925:
            self[date(year, 6, 24)] = "St. Jean Baptiste Day"
            if self.observed and date(year, 6, 24).weekday() == 6:
                self[date(year, 6, 25)] = "St. Jean Baptiste Day (Observed)"

        # Discovery Day
        if self.prov == 'NL' and year >= 1997:
            dt = date(year, 6, 24)
            # Nearest Monday to June 24
            dt1 = dt + rd(weekday=MO(-1))
            dt2 = dt + rd(weekday=MO(+1))
            if dt2 - dt <= dt - dt1:
                self[dt2] = "Discovery Day"
            else:
                self[dt1] = "Discovery Day"
        elif self.prov == 'YU' and year >= 1912:
            self[date(year, 8, 1) + rd(weekday=MO(+3))] = "Discovery Day"

        # Canada Day / Memorial Day (NL)
        if self.prov != 'NL' and year >= 1867:
            name = "Canada Day"
            self[date(year, 7, 1)] = name
            if self.observed and date(year, 7, 1).weekday() in (5, 6):
                self[date(year, 7, 1) + rd(weekday=MO)] = name + " (Observed)"
        elif year >= 1867:
            name = "Memorial Day"
            self[date(year, 7, 1)] = name
            if self.observed and date(year, 7, 1).weekday() in (5, 6):
                self[date(year, 7, 1) + rd(weekday=MO)] = name + " (Observed)"

        # Nunavut Day
        if self.prov == 'NU' and year >= 2001:
            self[date(year, 7, 9)] = "Nunavut Day"
            if self.observed and date(year, 7, 9).weekday() == 6:
                self[date(year, 7, 10)] = "Nunavut Day (Observed)"
        elif self.prov == 'NU' and year == 2000:
            self[date(2000, 4, 1)] = "Nunavut Day"

        # Civic Holiday
        if self.prov in ('SK', 'ON', 'MB', 'NT') and year >= 1900:
            self[date(year, 8, 1) + rd(weekday=MO)] = "Civic Holiday"
        elif self.prov in ('BC') and year >= 1974:
            self[date(year, 8, 1) + rd(weekday=MO)] = "British Columbia Day"

        # Labour Day
        if year >= 1894:
            self[date(year, 9, 1) + rd(weekday=MO)] = "Labour Day"

        # Thanksgiving
        if self.prov not in ('NB', 'NS', 'PE', 'NL') and year >= 1931:
            self[date(year, 10, 1) + rd(weekday=MO(+2))] = "Thanksgiving"

        # Remembrance Day
        name = "Remembrance Day"
        provinces = ('ON', 'QC', 'NS', 'NL', 'NT', 'PE', 'SK')
        if self.prov not in provinces and year >= 1931:
            self[date(year, 11, 11)] = name
        elif self.prov in ('NS', 'NL', 'NT', 'PE', 'SK') and year >= 1931:
            self[date(year, 11, 11)] = name
            if self.observed and date(year, 11, 11).weekday() == 6:
                name = name + " (Observed)"
                self[date(year, 11, 11) + rd(weekday=MO)] = name

        # Christmas Day
        if year >= 1867:
            self[date(year, 12, 25)] = "Christmas Day"
            if self.observed and date(year, 12, 25).weekday() == 5:
                self[date(year, 12, 24)] = "Christmas Day (Observed)"
            elif self.observed and date(year, 12, 25).weekday() == 6:
                self[date(year, 12, 26)] = "Christmas Day (Observed)"

        # Boxing Day
        if year >= 1867:
            name = "Boxing Day"
            name_observed = name + " (Observed)"
            if self.observed and date(year, 12, 26).weekday() in (5, 6):
                self[date(year, 12, 26) + rd(weekday=MO)] = name_observed
            elif self.observed and date(year, 12, 26).weekday() == 0:
                self[date(year, 12, 27)] = name_observed
            else:
                self[date(year, 12, 26)] = name
Exemplo n.º 24
0
class TestStringifiedDAGs(unittest.TestCase):
    """Unit tests for stringified DAGs."""
    def setUp(self):
        super().setUp()
        BaseHook.get_connection = mock.Mock(return_value=Connection(
            extra=('{'
                   '"project_id": "mock", '
                   '"location": "mock", '
                   '"instance": "mock", '
                   '"database_type": "postgres", '
                   '"use_proxy": "False", '
                   '"use_ssl": "False"'
                   '}')))
        self.maxDiff = None  # pylint: disable=invalid-name

    def test_serialization(self):
        """Serialization and deserialization should work for every DAG and Operator."""
        dags = collect_dags()
        serialized_dags = {}
        for _, v in dags.items():
            dag = SerializedDAG.to_dict(v)
            SerializedDAG.validate_schema(dag)
            serialized_dags[v.dag_id] = dag

        # Compares with the ground truth of JSON string.
        self.validate_serialized_dag(serialized_dags['simple_dag'],
                                     serialized_simple_dag_ground_truth)

    def validate_serialized_dag(self, json_dag, ground_truth_dag):
        """Verify serialized DAGs match the ground truth."""
        self.assertTrue(json_dag['dag']['fileloc'].split('/')[-1] ==
                        'test_dag_serialization.py')
        json_dag['dag']['fileloc'] = None

        def sorted_serialized_dag(dag_dict: dict):
            """
            Sorts the "tasks" list in the serialised dag python dictionary
            This is needed as the order of tasks should not matter but assertEqual
            would fail if the order of tasks list changes in dag dictionary
            """
            dag_dict["dag"]["tasks"] = sorted(dag_dict["dag"]["tasks"],
                                              key=lambda x: sorted(x.keys()))
            return dag_dict

        self.assertEqual(sorted_serialized_dag(ground_truth_dag),
                         sorted_serialized_dag(json_dag))

    def test_deserialization(self):
        """A serialized DAG can be deserialized in another process."""
        queue = multiprocessing.Queue()
        proc = multiprocessing.Process(target=serialize_subprocess,
                                       args=(queue, ))
        proc.daemon = True
        proc.start()

        stringified_dags = {}
        while True:
            v = queue.get()
            if v is None:
                break
            dag = SerializedDAG.from_json(v)
            self.assertTrue(isinstance(dag, DAG))
            stringified_dags[dag.dag_id] = dag

        dags = collect_dags()
        self.assertTrue(set(stringified_dags.keys()) == set(dags.keys()))

        # Verify deserialized DAGs.
        example_skip_dag = stringified_dags['example_skip_dag']
        skip_operator_1_task = example_skip_dag.task_dict['skip_operator_1']
        self.validate_deserialized_task(skip_operator_1_task,
                                        'DummySkipOperator', '#e8b7e4', '#000')

        # Verify that the DAG object has 'full_filepath' attribute
        # and is equal to fileloc
        self.assertTrue(hasattr(example_skip_dag, 'full_filepath'))
        self.assertEqual(example_skip_dag.full_filepath,
                         example_skip_dag.fileloc)

        example_subdag_operator = stringified_dags['example_subdag_operator']
        section_1_task = example_subdag_operator.task_dict['section-1']
        self.validate_deserialized_task(section_1_task,
                                        SubDagOperator.__name__,
                                        SubDagOperator.ui_color,
                                        SubDagOperator.ui_fgcolor)

        simple_dag = stringified_dags['simple_dag']
        custom_task = simple_dag.task_dict['custom_task']
        self.validate_operator_extra_links(custom_task)

    def validate_deserialized_task(self, task, task_type, ui_color,
                                   ui_fgcolor):
        """Verify non-airflow operators are casted to BaseOperator."""
        self.assertTrue(isinstance(task, SerializedBaseOperator))
        # Verify the original operator class is recorded for UI.
        self.assertTrue(task.task_type == task_type)
        self.assertTrue(task.ui_color == ui_color)
        self.assertTrue(task.ui_fgcolor == ui_fgcolor)

        # Check that for Deserialised task, task.subdag is None for all other Operators
        # except for the SubDagOperator where task.subdag is an instance of DAG object
        if task.task_type == "SubDagOperator":
            self.assertIsNotNone(task.subdag)
            self.assertTrue(isinstance(task.subdag, DAG))
        else:
            self.assertIsNone(task.subdag)

    def validate_operator_extra_links(self, task):
        """
        This tests also depends on GoogleLink() registered as a plugin
        in tests/plugins/test_plugin.py

        The function tests that if extra operator links are registered in plugin
        in ``operator_extra_links`` and the same is also defined in
        the Operator in ``BaseOperator.operator_extra_links``, it has the correct
        extra link.
        """
        self.assertEqual(
            task.operator_extra_link_dict[GoogleLink.name].get_link(
                task, datetime(2019, 8, 1)), "https://www.google.com")

    @parameterized.expand([
        (None, None),
        ("@weekly", "@weekly"),
        ({
            "__type": "timedelta",
            "__var": 86400.0
        }, timedelta(days=1)),
    ])
    def test_deserialization_schedule_interval(self,
                                               serialized_schedule_interval,
                                               expected):
        serialized = {
            "__version": 1,
            "dag": {
                "default_args": {
                    "__type": "dict",
                    "__var": {}
                },
                "params": {},
                "_dag_id": "simple_dag",
                "fileloc": __file__,
                "tasks": [],
                "timezone": "UTC",
                "schedule_interval": serialized_schedule_interval,
            },
        }

        SerializedDAG.validate_schema(serialized)

        dag = SerializedDAG.from_dict(serialized)

        self.assertEqual(dag.schedule_interval, expected)

    @parameterized.expand([
        (relativedelta(days=-1), {
            "__type": "relativedelta",
            "__var": {
                "days": -1
            }
        }),
        (relativedelta(month=1, days=-1), {
            "__type": "relativedelta",
            "__var": {
                "month": 1,
                "days": -1
            }
        }),
        # Every friday
        (relativedelta(weekday=FR), {
            "__type": "relativedelta",
            "__var": {
                "weekday": [4]
            }
        }),
        # Every second friday
        (relativedelta(weekday=FR(2)), {
            "__type": "relativedelta",
            "__var": {
                "weekday": [4, 2]
            }
        })
    ])
    def test_roundtrip_relativedelta(self, val, expected):
        serialized = SerializedDAG._serialize(val)
        self.assertDictEqual(serialized, expected)

        round_tripped = SerializedDAG._deserialize(serialized)
        self.assertEqual(val, round_tripped)
Exemplo n.º 25
0
def main():
    napi = numerapi.SignalsAPI()

    # Numerai Universe
    eligible_tickers = pd.Series(napi.ticker_universe(),
                                 name="bloomberg_ticker")
    print(f"Number of eligible tickers : {len(eligible_tickers)}")

    ticker_map = pd.read_csv(
        "https://numerai-signals-public-data.s3-us-west-2.amazonaws.com/signals_ticker_map_w_bbg.csv"
    )

    # ----- Yahoo <-> Bloomberg mapping -----
    yfinance_tickers = eligible_tickers.map(
        dict(zip(ticker_map["bloomberg_ticker"],
                 ticker_map["yahoo"]))).dropna()
    bloomberg_tickers = ticker_map["bloomberg_ticker"]
    print(f"Number of eligible, mapped tickers: {len(yfinance_tickers)}")

    us_ticker_map = ticker_map[ticker_map.bloomberg_ticker.str[-2:] == "US"]
    #tickers = us_ticker_map.yahoo.dropna().values #for US tickers
    tickers = ticker_map.yahoo.dropna().values  #For possible tickers

    # ----- Raw data loading and formatting -----
    print(f"using tickers: {len(tickers)}")
    full_data = load_data(tickers,
                          "full_data.csv",
                          threads=LOAD_DATA_IN_PARALLEL)

    full_data["bloomberg_ticker"] = full_data.ticker.map(
        dict(zip(ticker_map["yahoo"], bloomberg_tickers)))

    full_data = full_data[[
        "bloomberg_ticker", "open", "high", "low", "close", "adjusted close"
    ]].sort_index(ascending=True)
    full_data.dropna(inplace=True, axis=0)

    # ----- Merging targets -----
    url = "https://numerai-signals-public-data.s3-us-west-2.amazonaws.com/signals_train_val_bbg.csv"
    targets = pd.read_csv(url)

    targets["target"] = targets["target"].astype(np.float16)
    targets["date"] = pd.to_datetime(targets["friday_date"], format="%Y%m%d")
    gc.collect()

    # ----- Generate and select features -----
    full_data = generate_featues(full_data)
    feature_names = [f for f in full_data.columns if "quintile" in f]

    ML_data = pd.merge(
        full_data.reset_index(),
        targets,
        on=["date", "bloomberg_ticker"],
    ).set_index("date")
    print(f"Number of eras in data: {len(ML_data.index.unique())}")

    ML_data = ML_data[ML_data.index.weekday == 4]
    ML_data = ML_data[ML_data.index.value_counts() > 200]

    # ----- Train test split -----
    train_data = ML_data[ML_data["data_type"] == "train"]
    test_data = ML_data[ML_data["data_type"] == "validation"]

    corrs = train_data.groupby(train_data.index).apply(
        lambda x: x[feature_names + [TARGET_NAME]].corr()[TARGET_NAME])
    mean_corr = corrs[feature_names].mean(0)
    print(mean_corr)

    last_friday = datetime.now() + relativedelta(weekday=FR(-1))
    print(last_friday)
    date_string = last_friday.strftime("%Y-%m-%d")

    try:
        live_data = full_data.loc[date_string].copy()
    except KeyError as e:
        print(f"No ticker on {e}")
        live_data = full_data.iloc[:0].copy()
    live_data.dropna(subset=feature_names, inplace=True)
    print(len(live_data))
    # ----- Train model -----
    print("Training model...")
    model = GradientBoostingRegressor()
    model.fit(train_data[feature_names], train_data[TARGET_NAME])
    print("Model trained.")

    # ----- Predict test data -----
    train_data[PREDICTION_NAME] = model.predict(train_data[feature_names])
    test_data[PREDICTION_NAME] = model.predict(test_data[feature_names])
    live_data[PREDICTION_NAME] = model.predict(live_data[feature_names])

    diagnostic_df = pd.concat([test_data, live_data])
    diagnostic_df["friday_date"] = diagnostic_df.friday_date.fillna(
        last_friday.strftime("%Y%m%d")).astype(int)
    diagnostic_df["data_type"] = diagnostic_df.data_type.fillna("live")
    diagnostic_df[["bloomberg_ticker", "friday_date", "data_type",
                   "signal"]].reset_index(drop=True).to_csv(
                       "example_signal_alphavantage.csv", index=False)
    print(
        "Submission saved to example_signal_alphavantage.csv. Upload to signals.numer.ai for scores and diagnostics"
    )
Exemplo n.º 26
0
                       className='nav-item nav-link btn btn-outline-success'),
                html.A('Ticks',
                       href='/apps/ticks',
                       className='nav-item nav-link btn btn-outline-success'),
            ]),
        html.Div([
            html.Label("Expiry Date:",
                       htmlFor="date-picker",
                       className='form-check-label'),
            html.Div([
                dcc.DatePickerSingle(
                    id='date-picker',
                    min_date_allowed=dt(2018, 6, 15),
                    max_date_allowed=dt(2030, 12, 31),
                    initial_visible_month=dt.now(),
                    date=dt.now() + relativedelta(weekday=FR(+1)),
                ),
            ],
                     className='form-check'),
        ],
                 className='form-check-inline col-auto'),
        dcc.Graph(style={
            'height': 300,
        }, id='my-timestamp-graph'),
        dcc.Interval(
            id='interval-component',
            interval=600 * 1000,  # in milliseconds
            n_intervals=0)
    ])

Exemplo n.º 27
0
import pandas.util.testing as tm
import pandas as pd
from dateutil.relativedelta import relativedelta, FR
import matplotlib.pyplot as plt

# make a df of time series
tm.N = 10
df = tm.makeTimeDataFrame(freq='B')  # 'B': business daily
print(df)

# reindex with new time stamps
df.index = pd.date_range(start=pd.Timestamp.now().date() +
                         relativedelta(weekday=FR(-1)),
                         periods=df.shape[0],
                         freq='B')
print(df)

# show
df.plot(figsize=(10, 6), marker='o')
plt.show()
Exemplo n.º 28
0
    def _country_specific(self, year):
        # UnitedKingdom exclusive holidays

        # Good Friday
        self[easter(year) + rd(weekday=FR(-1))] = "Good Friday"

        # Easter Monday
        if self.state != "Scotland":
            name = "Easter Monday"
            if self.state == "UK":
                name += " [England/Wales/Northern Ireland]"
            self[easter(year) + rd(weekday=MO)] = name

        # May Day bank holiday (first Monday in May)
        if year >= 1978:
            name = "May Day"
            if year == 2020:
                # Moved to Friday to mark 75th anniversary of VE Day.
                self[date(year, MAY, 8)] = name
            else:
                if year == 1995:
                    dt = date(year, MAY, 8)
                else:
                    dt = date(year, MAY, 1)
                if dt.weekday() == MON:
                    self[dt] = name
                elif dt.weekday() == TUE:
                    self[dt + rd(days=+6)] = name
                elif dt.weekday() == WED:
                    self[dt + rd(days=+5)] = name
                elif dt.weekday() == THU:
                    self[dt + rd(days=+4)] = name
                elif dt.weekday() == FRI:
                    self[dt + rd(days=+3)] = name
                elif dt.weekday() == SAT:
                    self[dt + rd(days=+2)] = name
                elif dt.weekday() == SUN:
                    self[dt + rd(days=+1)] = name

        # Spring bank holiday (last Monday in May)
        name = "Spring Bank Holiday"
        if year == 2012:
            self[date(year, JUN, 4)] = name
        elif year == 2022:
            self[date(year, JUN, 2)] = name
        elif year >= 1971:
            self[date(year, MAY, 31) + rd(weekday=MO(-1))] = name

        # Late Summer bank holiday (last Monday in August)
        if self.state not in ("Scotland") and year >= 1971:
            name = "Late Summer Bank Holiday"
            if self.state == "UK":
                name += " [England/Wales/Northern Ireland]"
            self[date(year, AUG, 31) + rd(weekday=MO(-1))] = name

        # Boxing Day
        name = "Boxing Day"
        self[date(year, DEC, 26)] = name
        if self.observed and date(year, DEC, 26).weekday() == SAT:
            self[date(year, DEC, 28)] = name + " (Observed)"
        elif self.observed and date(year, DEC, 26).weekday() == SUN:
            self[date(year, DEC, 28)] = name + " (Observed)"

        # Special holidays
        if year == 1977:
            self[date(year, JUN, 7)] = "Silver Jubilee of Elizabeth II"
        elif year == 1981:
            self[date(year, JUL, 29)] = "Wedding of Charles and Diana"
        elif year == 1999:
            self[date(year, DEC, 31)] = "Millennium Celebrations"
        elif year == 2002:
            self[date(year, JUN, 3)] = "Golden Jubilee of Elizabeth II"
        elif year == 2011:
            self[date(year, APR, 29)] = "Wedding of William and Catherine"
        elif year == 2012:
            self[date(year, JUN, 5)] = "Diamond Jubilee of Elizabeth II"
        elif year == 2022:
            self[date(year, JUN, 3)] = "Platinum Jubilee of Elizabeth II"
Exemplo n.º 29
0
class TestStringifiedDAGs(unittest.TestCase):
    """Unit tests for stringified DAGs."""

    def setUp(self):
        super().setUp()
        BaseHook.get_connection = mock.Mock(
            return_value=Connection(
                extra=('{'
                       '"project_id": "mock", '
                       '"location": "mock", '
                       '"instance": "mock", '
                       '"database_type": "postgres", '
                       '"use_proxy": "False", '
                       '"use_ssl": "False"'
                       '}')))
        self.maxDiff = None  # pylint: disable=invalid-name

    def test_serialization(self):
        """Serialization and deserialization should work for every DAG and Operator."""
        dags = collect_dags()
        serialized_dags = {}
        for _, v in dags.items():
            dag = SerializedDAG.to_dict(v)
            SerializedDAG.validate_schema(dag)
            serialized_dags[v.dag_id] = dag

        # Compares with the ground truth of JSON string.
        self.validate_serialized_dag(
            serialized_dags['simple_dag'],
            serialized_simple_dag_ground_truth)

    def validate_serialized_dag(self, json_dag, ground_truth_dag):
        """Verify serialized DAGs match the ground truth."""
        self.assertTrue(
            json_dag['dag']['fileloc'].split('/')[-1] == 'test_dag_serialization.py')
        json_dag['dag']['fileloc'] = None

        def sorted_serialized_dag(dag_dict: dict):
            """
            Sorts the "tasks" list and "access_control" permissions in the
            serialised dag python dictionary. This is needed as the order of
            items should not matter but assertEqual would fail if the order of
            items changes in the dag dictionary
            """
            dag_dict["dag"]["tasks"] = sorted(dag_dict["dag"]["tasks"],
                                              key=lambda x: sorted(x.keys()))
            dag_dict["dag"]["_access_control"]["__var"]["test_role"]["__var"] = sorted(
                dag_dict["dag"]["_access_control"]["__var"]["test_role"]["__var"]
            )
            return dag_dict

        assert sorted_serialized_dag(ground_truth_dag) == sorted_serialized_dag(json_dag)

    def test_deserialization_across_process(self):
        """A serialized DAG can be deserialized in another process."""

        # Since we need to parse the dags twice here (once in the subprocess,
        # and once here to get a DAG to compare to) we don't want to load all
        # dags.
        queue = multiprocessing.Queue()
        proc = multiprocessing.Process(
            target=serialize_subprocess, args=(queue, "airflow/example_dags"))
        proc.daemon = True
        proc.start()

        stringified_dags = {}
        while True:
            v = queue.get()
            if v is None:
                break
            dag = SerializedDAG.from_json(v)
            self.assertTrue(isinstance(dag, DAG))
            stringified_dags[dag.dag_id] = dag

        dags = collect_dags("airflow/example_dags")
        assert set(stringified_dags.keys()) == set(dags.keys())

        # Verify deserialized DAGs.
        for dag_id in stringified_dags:
            self.validate_deserialized_dag(stringified_dags[dag_id], dags[dag_id])

    def test_roundtrip_provider_example_dags(self):
        dags = collect_dags([
            "airflow/providers/*/example_dags",
            "airflow/providers/*/*/example_dags",
        ])

        # Verify deserialized DAGs.
        for dag in dags.values():
            serialized_dag = SerializedDAG.from_json(SerializedDAG.to_json(dag))
            self.validate_deserialized_dag(serialized_dag, dag)

    def validate_deserialized_dag(self, serialized_dag, dag):
        """
        Verify that all example DAGs work with DAG Serialization by
        checking fields between Serialized Dags & non-Serialized Dags
        """
        fields_to_check = dag.get_serialized_fields() - {
            # Doesn't implement __eq__ properly. Check manually
            'timezone',

            # Need to check fields in it, to exclude functions
            'default_args',
            "_task_group"
        }
        for field in fields_to_check:
            assert getattr(serialized_dag, field) == getattr(dag, field), \
                f'{dag.dag_id}.{field} does not match'

        if dag.default_args:
            for k, v in dag.default_args.items():
                if callable(v):
                    # Check we stored _someting_.
                    assert k in serialized_dag.default_args
                else:
                    assert v == serialized_dag.default_args[k], \
                        f'{dag.dag_id}.default_args[{k}] does not match'

        assert serialized_dag.timezone.name == dag.timezone.name

        for task_id in dag.task_ids:
            self.validate_deserialized_task(serialized_dag.get_task(task_id), dag.get_task(task_id))

        # Verify that the DAG object has 'full_filepath' attribute
        # and is equal to fileloc
        assert serialized_dag.full_filepath == dag.fileloc

    def validate_deserialized_task(self, serialized_task, task,):
        """Verify non-airflow operators are casted to BaseOperator."""
        assert isinstance(serialized_task, SerializedBaseOperator)
        assert not isinstance(task, SerializedBaseOperator)
        assert isinstance(task, BaseOperator)

        fields_to_check = task.get_serialized_fields() - {
            # Checked separately
            '_task_type', 'subdag',

            # Type is exluded, so don't check it
            '_log',

            # List vs tuple. Check separately
            'template_fields',

            # We store the string, real dag has the actual code
            'on_failure_callback', 'on_success_callback', 'on_retry_callback',

            # Checked separately
            'resources',
        }

        assert serialized_task.task_type == task.task_type
        assert set(serialized_task.template_fields) == set(task.template_fields)

        assert serialized_task.upstream_task_ids == task.upstream_task_ids
        assert serialized_task.downstream_task_ids == task.downstream_task_ids

        for field in fields_to_check:
            assert getattr(serialized_task, field) == getattr(task, field), \
                f'{task.dag.dag_id}.{task.task_id}.{field} does not match'

        if serialized_task.resources is None:
            assert task.resources is None or task.resources == []
        else:
            assert serialized_task.resources == task.resources

        # Check that for Deserialised task, task.subdag is None for all other Operators
        # except for the SubDagOperator where task.subdag is an instance of DAG object
        if task.task_type == "SubDagOperator":
            assert serialized_task.subdag is not None
            assert isinstance(serialized_task.subdag, DAG)
        else:
            assert serialized_task.subdag is None

    @parameterized.expand([
        (datetime(2019, 8, 1, tzinfo=timezone.utc), None, datetime(2019, 8, 1, tzinfo=timezone.utc)),
        (datetime(2019, 8, 1, tzinfo=timezone.utc), datetime(2019, 8, 2, tzinfo=timezone.utc),
         datetime(2019, 8, 2, tzinfo=timezone.utc)),
        (datetime(2019, 8, 1, tzinfo=timezone.utc), datetime(2019, 7, 30, tzinfo=timezone.utc),
         datetime(2019, 8, 1, tzinfo=timezone.utc)),
    ])
    def test_deserialization_start_date(self,
                                        dag_start_date,
                                        task_start_date,
                                        expected_task_start_date):

        dag = DAG(dag_id='simple_dag', start_date=dag_start_date)
        BaseOperator(task_id='simple_task', dag=dag, start_date=task_start_date)

        serialized_dag = SerializedDAG.to_dict(dag)
        if not task_start_date or dag_start_date >= task_start_date:
            # If dag.start_date > task.start_date -> task.start_date=dag.start_date
            # because of the logic in dag.add_task()
            self.assertNotIn("start_date", serialized_dag["dag"]["tasks"][0])
        else:
            self.assertIn("start_date", serialized_dag["dag"]["tasks"][0])

        dag = SerializedDAG.from_dict(serialized_dag)
        simple_task = dag.task_dict["simple_task"]
        self.assertEqual(simple_task.start_date, expected_task_start_date)

    @parameterized.expand([
        (datetime(2019, 8, 1, tzinfo=timezone.utc), None, datetime(2019, 8, 1, tzinfo=timezone.utc)),
        (datetime(2019, 8, 1, tzinfo=timezone.utc), datetime(2019, 8, 2, tzinfo=timezone.utc),
         datetime(2019, 8, 1, tzinfo=timezone.utc)),
        (datetime(2019, 8, 1, tzinfo=timezone.utc), datetime(2019, 7, 30, tzinfo=timezone.utc),
         datetime(2019, 7, 30, tzinfo=timezone.utc)),
    ])
    def test_deserialization_end_date(self,
                                      dag_end_date,
                                      task_end_date,
                                      expected_task_end_date):
        dag = DAG(dag_id='simple_dag', start_date=datetime(2019, 8, 1),
                  end_date=dag_end_date)
        BaseOperator(task_id='simple_task', dag=dag, end_date=task_end_date)

        serialized_dag = SerializedDAG.to_dict(dag)
        if not task_end_date or dag_end_date <= task_end_date:
            # If dag.end_date < task.end_date -> task.end_date=dag.end_date
            # because of the logic in dag.add_task()
            self.assertNotIn("end_date", serialized_dag["dag"]["tasks"][0])
        else:
            self.assertIn("end_date", serialized_dag["dag"]["tasks"][0])

        dag = SerializedDAG.from_dict(serialized_dag)
        simple_task = dag.task_dict["simple_task"]
        self.assertEqual(simple_task.end_date, expected_task_end_date)

    @parameterized.expand([
        (None, None, None),
        ("@weekly", "@weekly", "0 0 * * 0"),
        ("@once", "@once", None),
        ({"__type": "timedelta", "__var": 86400.0}, timedelta(days=1), timedelta(days=1)),
    ])
    def test_deserialization_schedule_interval(
        self, serialized_schedule_interval, expected_schedule_interval, expected_n_schedule_interval
    ):
        serialized = {
            "__version": 1,
            "dag": {
                "default_args": {"__type": "dict", "__var": {}},
                "_dag_id": "simple_dag",
                "fileloc": __file__,
                "tasks": [],
                "timezone": "UTC",
                "schedule_interval": serialized_schedule_interval,
            },
        }

        SerializedDAG.validate_schema(serialized)

        dag = SerializedDAG.from_dict(serialized)

        self.assertEqual(dag.schedule_interval, expected_schedule_interval)
        self.assertEqual(dag.normalized_schedule_interval, expected_n_schedule_interval)

    @parameterized.expand([
        (relativedelta(days=-1), {"__type": "relativedelta", "__var": {"days": -1}}),
        (relativedelta(month=1, days=-1), {"__type": "relativedelta", "__var": {"month": 1, "days": -1}}),
        # Every friday
        (relativedelta(weekday=FR), {"__type": "relativedelta", "__var": {"weekday": [4]}}),
        # Every second friday
        (relativedelta(weekday=FR(2)), {"__type": "relativedelta", "__var": {"weekday": [4, 2]}})
    ])
    def test_roundtrip_relativedelta(self, val, expected):
        serialized = SerializedDAG._serialize(val)
        self.assertDictEqual(serialized, expected)

        round_tripped = SerializedDAG._deserialize(serialized)
        self.assertEqual(val, round_tripped)

    @parameterized.expand([
        (None, {}),
        ({"param_1": "value_1"}, {"param_1": "value_1"}),
    ])
    def test_dag_params_roundtrip(self, val, expected_val):
        """
        Test that params work both on Serialized DAGs & Tasks
        """
        dag = DAG(dag_id='simple_dag', params=val)
        BaseOperator(task_id='simple_task', dag=dag, start_date=datetime(2019, 8, 1))

        serialized_dag = SerializedDAG.to_dict(dag)
        if val:
            self.assertIn("params", serialized_dag["dag"])
        else:
            self.assertNotIn("params", serialized_dag["dag"])

        deserialized_dag = SerializedDAG.from_dict(serialized_dag)
        deserialized_simple_task = deserialized_dag.task_dict["simple_task"]
        self.assertEqual(expected_val, deserialized_dag.params)
        self.assertEqual(expected_val, deserialized_simple_task.params)

    @parameterized.expand([
        (None, {}),
        ({"param_1": "value_1"}, {"param_1": "value_1"}),
    ])
    def test_task_params_roundtrip(self, val, expected_val):
        """
        Test that params work both on Serialized DAGs & Tasks
        """
        dag = DAG(dag_id='simple_dag')
        BaseOperator(task_id='simple_task', dag=dag, params=val,
                     start_date=datetime(2019, 8, 1))

        serialized_dag = SerializedDAG.to_dict(dag)
        if val:
            self.assertIn("params", serialized_dag["dag"]["tasks"][0])
        else:
            self.assertNotIn("params", serialized_dag["dag"]["tasks"][0])

        deserialized_dag = SerializedDAG.from_dict(serialized_dag)
        deserialized_simple_task = deserialized_dag.task_dict["simple_task"]
        self.assertEqual(expected_val, deserialized_simple_task.params)

    def test_extra_serialized_field_and_operator_links(self):
        """
        Assert extra field exists & OperatorLinks defined in Plugins and inbuilt Operator Links.

        This tests also depends on GoogleLink() registered as a plugin
        in tests/plugins/test_plugin.py

        The function tests that if extra operator links are registered in plugin
        in ``operator_extra_links`` and the same is also defined in
        the Operator in ``BaseOperator.operator_extra_links``, it has the correct
        extra link.
        """
        test_date = datetime(2019, 8, 1)
        dag = DAG(dag_id='simple_dag', start_date=test_date)
        CustomOperator(task_id='simple_task', dag=dag, bash_command="true")

        serialized_dag = SerializedDAG.to_dict(dag)
        self.assertIn("bash_command", serialized_dag["dag"]["tasks"][0])

        dag = SerializedDAG.from_dict(serialized_dag)
        simple_task = dag.task_dict["simple_task"]
        self.assertEqual(getattr(simple_task, "bash_command"), "true")

        #########################################################
        # Verify Operator Links work with Serialized Operator
        #########################################################
        # Check Serialized version of operator link only contains the inbuilt Op Link
        self.assertEqual(
            serialized_dag["dag"]["tasks"][0]["_operator_extra_links"],
            [{'tests.test_utils.mock_operators.CustomOpLink': {}}]
        )

        # Test all the extra_links are set
        self.assertCountEqual(simple_task.extra_links, ['Google Custom', 'airflow', 'github', 'google'])

        ti = TaskInstance(task=simple_task, execution_date=test_date)
        ti.xcom_push('search_query', "dummy_value_1")

        # Test Deserialized inbuilt link
        custom_inbuilt_link = simple_task.get_extra_links(test_date, CustomOpLink.name)
        self.assertEqual('http://google.com/custom_base_link?search=dummy_value_1', custom_inbuilt_link)

        # Test Deserialized link registered via Airflow Plugin
        google_link_from_plugin = simple_task.get_extra_links(test_date, GoogleLink.name)
        self.assertEqual("https://www.google.com", google_link_from_plugin)

    def test_extra_serialized_field_and_multiple_operator_links(self):
        """
        Assert extra field exists & OperatorLinks defined in Plugins and inbuilt Operator Links.

        This tests also depends on GoogleLink() registered as a plugin
        in tests/plugins/test_plugin.py

        The function tests that if extra operator links are registered in plugin
        in ``operator_extra_links`` and the same is also defined in
        the Operator in ``BaseOperator.operator_extra_links``, it has the correct
        extra link.
        """
        test_date = datetime(2019, 8, 1)
        dag = DAG(dag_id='simple_dag', start_date=test_date)
        CustomOperator(task_id='simple_task', dag=dag, bash_command=["echo", "true"])

        serialized_dag = SerializedDAG.to_dict(dag)
        self.assertIn("bash_command", serialized_dag["dag"]["tasks"][0])

        dag = SerializedDAG.from_dict(serialized_dag)
        simple_task = dag.task_dict["simple_task"]
        self.assertEqual(getattr(simple_task, "bash_command"), ["echo", "true"])

        #########################################################
        # Verify Operator Links work with Serialized Operator
        #########################################################
        # Check Serialized version of operator link only contains the inbuilt Op Link
        self.assertEqual(
            serialized_dag["dag"]["tasks"][0]["_operator_extra_links"],
            [
                {'tests.test_utils.mock_operators.CustomBaseIndexOpLink': {'index': 0}},
                {'tests.test_utils.mock_operators.CustomBaseIndexOpLink': {'index': 1}},
            ]
        )

        # Test all the extra_links are set
        self.assertCountEqual(simple_task.extra_links, [
            'BigQuery Console #1', 'BigQuery Console #2', 'airflow', 'github', 'google'])

        ti = TaskInstance(task=simple_task, execution_date=test_date)
        ti.xcom_push('search_query', ["dummy_value_1", "dummy_value_2"])

        # Test Deserialized inbuilt link #1
        custom_inbuilt_link = simple_task.get_extra_links(test_date, "BigQuery Console #1")
        self.assertEqual('https://console.cloud.google.com/bigquery?j=dummy_value_1', custom_inbuilt_link)

        # Test Deserialized inbuilt link #2
        custom_inbuilt_link = simple_task.get_extra_links(test_date, "BigQuery Console #2")
        self.assertEqual('https://console.cloud.google.com/bigquery?j=dummy_value_2', custom_inbuilt_link)

        # Test Deserialized link registered via Airflow Plugin
        google_link_from_plugin = simple_task.get_extra_links(test_date, GoogleLink.name)
        self.assertEqual("https://www.google.com", google_link_from_plugin)

    class ClassWithCustomAttributes:
        """
        Class for testing purpose: allows to create objects with custom attributes in one single statement.
        """

        def __init__(self, **kwargs):
            for key, value in kwargs.items():
                setattr(self, key, value)

        def __str__(self):
            return "{}({})".format(self.__class__.__name__, str(self.__dict__))

        def __repr__(self):
            return self.__str__()

        def __eq__(self, other):
            return self.__dict__ == other.__dict__

        def __ne__(self, other):
            return not self.__eq__(other)

    @parameterized.expand([
        (None, None),
        ([], []),
        ({}, {}),
        ("{{ task.task_id }}", "{{ task.task_id }}"),
        (["{{ task.task_id }}", "{{ task.task_id }}"]),
        ({"foo": "{{ task.task_id }}"}, {"foo": "{{ task.task_id }}"}),
        ({"foo": {"bar": "{{ task.task_id }}"}}, {"foo": {"bar": "{{ task.task_id }}"}}),
        (
            [{"foo1": {"bar": "{{ task.task_id }}"}}, {"foo2": {"bar": "{{ task.task_id }}"}}],
            [{"foo1": {"bar": "{{ task.task_id }}"}}, {"foo2": {"bar": "{{ task.task_id }}"}}],
        ),
        (
            {"foo": {"bar": {"{{ task.task_id }}": ["sar"]}}},
            {"foo": {"bar": {"{{ task.task_id }}": ["sar"]}}}),
        (
            ClassWithCustomAttributes(
                att1="{{ task.task_id }}", att2="{{ task.task_id }}", template_fields=["att1"]),
            "ClassWithCustomAttributes("
            "{'att1': '{{ task.task_id }}', 'att2': '{{ task.task_id }}', 'template_fields': ['att1']})",
        ),
        (
            ClassWithCustomAttributes(nested1=ClassWithCustomAttributes(att1="{{ task.task_id }}",
                                                                        att2="{{ task.task_id }}",
                                                                        template_fields=["att1"]),
                                      nested2=ClassWithCustomAttributes(att3="{{ task.task_id }}",
                                                                        att4="{{ task.task_id }}",
                                                                        template_fields=["att3"]),
                                      template_fields=["nested1"]),
            "ClassWithCustomAttributes("
            "{'nested1': ClassWithCustomAttributes({'att1': '{{ task.task_id }}', "
            "'att2': '{{ task.task_id }}', 'template_fields': ['att1']}), "
            "'nested2': ClassWithCustomAttributes({'att3': '{{ task.task_id }}', "
            "'att4': '{{ task.task_id }}', 'template_fields': ['att3']}), 'template_fields': ['nested1']})",
        ),
    ])
    def test_templated_fields_exist_in_serialized_dag(self, templated_field, expected_field):
        """
        Test that templated_fields exists for all Operators in Serialized DAG

        Since we don't want to inflate arbitrary python objects (it poses a RCE/security risk etc.)
        we want check that non-"basic" objects are turned in to strings after deserializing.
        """

        dag = DAG("test_serialized_template_fields", start_date=datetime(2019, 8, 1))
        with dag:
            BashOperator(task_id="test", bash_command=templated_field)

        serialized_dag = SerializedDAG.to_dict(dag)
        deserialized_dag = SerializedDAG.from_dict(serialized_dag)
        deserialized_test_task = deserialized_dag.task_dict["test"]
        self.assertEqual(expected_field, getattr(deserialized_test_task, "bash_command"))

    def test_dag_serialized_fields_with_schema(self):
        """
        Additional Properties are disabled on DAGs. This test verifies that all the
        keys in DAG.get_serialized_fields are listed in Schema definition.
        """
        dag_schema: dict = load_dag_schema_dict()["definitions"]["dag"]["properties"]

        # The parameters we add manually in Serialization needs to be ignored
        ignored_keys: set = {"is_subdag", "tasks"}
        dag_params: set = set(dag_schema.keys()) - ignored_keys
        self.assertEqual(set(DAG.get_serialized_fields()), dag_params)

    def test_operator_subclass_changing_base_defaults(self):
        assert BaseOperator(task_id='dummy').do_xcom_push is True, \
            "Precondition check! If this fails the test won't make sense"

        class MyOperator(BaseOperator):
            def __init__(self, do_xcom_push=False, **kwargs):
                super().__init__(**kwargs)
                self.do_xcom_push = do_xcom_push

        op = MyOperator(task_id='dummy')
        assert op.do_xcom_push is False

        blob = SerializedBaseOperator.serialize_operator(op)
        serialized_op = SerializedBaseOperator.deserialize_operator(blob)

        assert serialized_op.do_xcom_push is False

    def test_no_new_fields_added_to_base_operator(self):
        """
        This test verifies that there are no new fields added to BaseOperator. And reminds that
        tests should be added for it.
        """
        base_operator = BaseOperator(task_id="10")
        fields = base_operator.__dict__
        self.assertEqual({'_BaseOperator__instantiated': True,
                          '_dag': None,
                          '_downstream_task_ids': set(),
                          '_inlets': [],
                          '_log': base_operator.log,
                          '_outlets': [],
                          '_upstream_task_ids': set(),
                          'depends_on_past': False,
                          'do_xcom_push': True,
                          'email': None,
                          'email_on_failure': True,
                          'email_on_retry': True,
                          'end_date': None,
                          'execution_timeout': None,
                          'executor_config': {},
                          'inlets': [],
                          'label': '10',
                          'max_retry_delay': None,
                          'on_execute_callback': None,
                          'on_failure_callback': None,
                          'on_retry_callback': None,
                          'on_success_callback': None,
                          'outlets': [],
                          'owner': 'airflow',
                          'params': {},
                          'pool': 'default_pool',
                          'pool_slots': 1,
                          'priority_weight': 1,
                          'queue': 'default',
                          'resources': None,
                          'retries': 0,
                          'retry_delay': timedelta(0, 300),
                          'retry_exponential_backoff': False,
                          'run_as_user': None,
                          'sla': None,
                          'start_date': None,
                          'subdag': None,
                          'task_concurrency': None,
                          'task_id': '10',
                          'trigger_rule': 'all_success',
                          'wait_for_downstream': False,
                          'weight_rule': 'downstream'}, fields,
                         """
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

     ACTION NEEDED! PLEASE READ THIS CAREFULLY AND CORRECT TESTS CAREFULLY

 Some fields were added to the BaseOperator! Please add them to the list above and make sure that
 you add support for DAG serialization - you should add the field to
 `airflow/serialization/schema.json` - they should have correct type defined there.

 Note that we do not support versioning yet so you should only add optional fields to BaseOperator.

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                         """
                         )

    def test_task_group_serialization(self):
        """
        Test TaskGroup serialization/deserialization.
        """
        from airflow.operators.dummy_operator import DummyOperator
        from airflow.utils.task_group import TaskGroup

        execution_date = datetime(2020, 1, 1)
        with DAG("test_task_group_serialization", start_date=execution_date) as dag:
            task1 = DummyOperator(task_id="task1")
            with TaskGroup("group234") as group234:
                _ = DummyOperator(task_id="task2")

                with TaskGroup("group34") as group34:
                    _ = DummyOperator(task_id="task3")
                    _ = DummyOperator(task_id="task4")

            task5 = DummyOperator(task_id="task5")
            task1 >> group234
            group34 >> task5

        dag_dict = SerializedDAG.to_dict(dag)
        SerializedDAG.validate_schema(dag_dict)
        json_dag = SerializedDAG.from_json(SerializedDAG.to_json(dag))
        self.validate_deserialized_dag(json_dag, dag)

        serialized_dag = SerializedDAG.deserialize_dag(SerializedDAG.serialize_dag(dag))

        assert serialized_dag.task_group.children
        assert serialized_dag.task_group.children.keys() == dag.task_group.children.keys()

        def check_task_group(node):
            try:
                children = node.children.values()
            except AttributeError:
                # Round-trip serialization and check the result
                expected_serialized = SerializedBaseOperator.serialize_operator(dag.get_task(node.task_id))
                expected_deserialized = SerializedBaseOperator.deserialize_operator(expected_serialized)
                expected_dict = SerializedBaseOperator.serialize_operator(expected_deserialized)
                assert node
                assert SerializedBaseOperator.serialize_operator(node) == expected_dict
                return

            for child in children:
                check_task_group(child)

        check_task_group(serialized_dag.task_group)
Exemplo n.º 30
0
    def _populate(self, year):

        # New Year's Day
        if self.state not in ("JHR", "KDH", "KTN", "PLS", "TRG"):
            self[date(year, JAN, 1)] = "New Year's Day"

        # Birthday of the Prophet Muhammad (s.a.w.).
        # a.k.a. Hari Keputeraan Nabi Muhammad (Sabah Act)
        for hol_date in self.my_islamic_to_gre(year, 3, 12):
            self[
                hol_date
            ] = "Maulidur Rasul (Birthday of the Prophet Muhammad)"

        # Hari Kebangsaan or National Day.
        self[date(year, AUG, 31)] = "National Day"

        # Chinese New Year (one day in the States of Kelantan and Terengganu,
        # two days in the other States).
        hol_date = self.cnls.lunar_n_y_date(year)
        self[hol_date] = "Chinese New Year"
        # The second day of Chinese New Year is not a federal holiday in
        # Kelantan and Terengganu. However, it is gazetted as a state holiday
        # in both states, effectively making it a nationwide holiday.
        self[hol_date + rd(days=+1)] = "Chinese New Year Holiday"

        # Wesak Day.
        # Date of observance is announced yearly
        # https://en.wikipedia.org/wiki/Vesak#Dates_of_observance
        dates_obs = {
            2001: (MAY, 7),
            2002: (MAY, 27),
            2003: (MAY, 15),
            2004: (JUN, 2),
            2005: (MAY, 23),
            2006: (MAY, 12),
            2007: (MAY, 31),
            2008: (MAY, 19),
            2009: (MAY, 9),
            2010: (MAY, 28),
            2011: (MAY, 17),
            2012: (MAY, 5),
            2013: (MAY, 24),
            2014: (MAY, 13),
            2015: (JUN, 1),
            2016: (MAY, 20),
            2017: (MAY, 10),
            2018: (MAY, 29),
            2019: (MAY, 19),
            2020: (MAY, 7),
            2021: (MAY, 26),
            2022: (MAY, 15),
        }
        if year in dates_obs:
            hol_date = date(year, *dates_obs[year])
            self[hol_date] = "Vesak Day"
        else:
            hol_date = self.cnls.vesak_may_date(year)
            self[hol_date] = "Vesak Day* (*estimated; ~10% chance +/- 1 day)"

        # Birthday of [His Majesty] the Yang di-Pertuan Agong.
        if year <= 2017:
            hol_date = rrule(
                MONTHLY,
                dtstart=date(year, JUN, 1),
                count=1,
                bysetpos=1,
                byweekday=SA,
            )[0]
        elif year == 2018:
            hol_date = date(2018, SEP, 9)
        else:
            hol_date = rrule(
                MONTHLY,
                dtstart=date(year, JUN, 1),
                count=1,
                bysetpos=1,
                byweekday=MO,
            )[0]
        self[hol_date] = "Birthday of SPB Yang di-Pertuan Agong"

        # Hari Raya Puasa (2 days).
        # aka Eid al-Fitr;
        # exact date of observance is announced yearly
        dates_obs = {
            2001: [(DEC, 17)],
            2002: [(DEC, 6)],
            2003: [(NOV, 25)],
            2004: [(NOV, 14)],
            2005: [(NOV, 3)],
            2006: [(OCT, 24)],
            2007: [(OCT, 13)],
            2008: [(OCT, 1)],
            2009: [(SEP, 20)],
            2010: [(SEP, 10)],
            2011: [(AUG, 30)],
            2012: [(AUG, 19)],
            2013: [(AUG, 8)],
            2014: [(JUL, 28)],
            2015: [(JUL, 17)],
            2016: [(JUL, 6)],
            2017: [(JUN, 25)],
            2018: [(JUN, 15)],
            2019: [(JUN, 5)],
            2020: [(MAY, 24)],
            2021: [(MAY, 13)],
            2022: [(MAY, 2)],
        }
        if year in dates_obs:
            for date_obs in dates_obs[year]:
                hol_date = date(year, *date_obs)
                self[hol_date] = "Hari Raya Puasa"
                self[hol_date + rd(days=+1)] = "Second day of Hari Raya Puasa"
        else:
            for date_obs in islamic_to_gre(year, 10, 1):
                hol_date = date_obs
                self[hol_date] = "Hari Raya Puasa* (*estimated)"
                self[hol_date + rd(days=+1)] = (
                    "Second day of Hari Raya Puasa*" " (*estimated)"
                )

        # Hari Raya Haji and Arafat Day.
        # Date of observance is announced yearly.
        dates_obs = {
            2001: [(MAR, 6)],
            2002: [(FEB, 23)],
            2003: [(FEB, 12)],
            2004: [(FEB, 1)],
            2005: [(JAN, 21)],
            2006: [(JAN, 10)],
            2007: [(DEC, 20)],
            2008: [(DEC, 8)],
            2009: [(NOV, 27)],
            2010: [(NOV, 17)],
            2011: [(NOV, 6)],
            2012: [(OCT, 26)],
            2013: [(OCT, 15)],
            2014: [(OCT, 5)],
            2015: [(SEP, 24)],
            2016: [(SEP, 12)],
            2017: [(SEP, 1)],
            2018: [(AUG, 22)],
            2019: [(AUG, 11)],
            2020: [(JUL, 31)],
            2021: [(JUL, 20)],
            2022: [(JUL, 9)],
        }
        if year in dates_obs:
            for date_obs in dates_obs[year]:
                hol_date = date(year, *date_obs)
                self[hol_date] = "Hari Raya Haji"
                if self.state == "TRG":
                    # Arafat Day is one day before Eid al-Adha
                    self[hol_date - rd(days=1)] = "Arafat Day"
                if self.state in ("KDH", "KTN", "PLS", "TRG"):
                    # Second day
                    self[hol_date + rd(days=1)] = "Hari Raya Haji Holiday"
        else:
            for date_obs in islamic_to_gre(year, 12, 10):
                hol_date = date_obs
                self[hol_date] = "Hari Raya Haji* (*estimated)"
                if self.state == "TRG":
                    # Arafat Day is one day before Eid al-Adha
                    self[hol_date - rd(days=1)] = "Arafat Day* (*estimated)"
                if self.state in ("KDH", "KTN", "PLS", "TRG"):
                    # Second day
                    self[
                        hol_date + rd(days=1)
                    ] = "Hari Raya Haji Holiday* (*estimated)"

        # Deepavali.
        # aka Diwali;
        # date of observance is announced yearly
        if self.state != "SWK":
            dates_obs = {
                2001: (NOV, 14),
                2002: (NOV, 3),
                2003: (OCT, 23),
                2004: (NOV, 11),
                2005: (NOV, 1),
                2006: (OCT, 21),
                2007: (NOV, 8),
                2008: (OCT, 27),
                2009: (OCT, 17),
                2010: (NOV, 5),
                2011: (OCT, 26),
                2012: (NOV, 13),
                2013: (NOV, 2),
                2014: (OCT, 22),
                2015: (NOV, 10),
                2016: (OCT, 29),
                2017: (OCT, 18),
                2018: (NOV, 6),
                2019: (OCT, 27),
                2020: (NOV, 14),
                2021: (NOV, 4),
                2022: (NOV, 24),
            }
            if year in dates_obs:
                hol_date = date(year, *dates_obs[year])
                self[hol_date] = "Deepavali"
            else:
                hol_date = self.cnls.s_diwali_date(year)
                self[hol_date] = "Deepavali* (*estimated; rarely on day after)"

        # Christmas day.
        self[date(year, DEC, 25)] = "Christmas Day"

        # Malaysia Day.
        self[date(year, SEP, 16)] = "Malaysia Day"

        # ---------------------------------------------------------#
        # Holidays from the Sarawak Ordinance (not included above) #
        # ---------------------------------------------------------#
        if self.state == "SWK":
            # Dayak Festival Day (the first day of June) and the following day.
            self[date(year, JUN, 1)] = "Gawai Dayak"
            self[date(year, JUN, 2)] = "Gawai Dayak (Second day)"

            # The first day of May—Worker’s Celebration Day.

            # Birthday of Tuan Yang Terutama Yang di-Pertua Negeri Sarawak (the
            # second Saturday of September).
            second_sat_oct = rrule(
                MONTHLY,
                dtstart=date(year, OCT, 1),
                count=1,
                bysetpos=2,
                byweekday=SA,
            )[0]
            self[second_sat_oct] = "Birthday of the Governor of Sarawak"

            # Sarawak Independence Day
            if year > 2016:
                self[date(year, JUL, 22)] = "Sarawak Day"

        # Check for holidays that fall on a Sunday and
        # implement Section 3 of Malaysian Holidays Act:
        # "if any day specified in the Schedule falls on
        # Sunday then the day following shall be a public
        # holiday and if such day is already a public holiday,
        # then the day following shall be a public holiday"
        for (hol_date, hol_name) in list(self.items()):
            if hol_date.year == year and hol_date.weekday() == SUN:
                self[hol_date] += " [Sunday]"
                in_lieu_date = hol_date + rd(days=+1)
                while in_lieu_date in self:
                    in_lieu_date += rd(days=+1)
                self[in_lieu_date] = hol_name + " [In lieu]"

        # The last two days in May (Pesta Kaamatan).
        # (Sarawak Act)
        # Day following a Sunday is not a holiday
        if self.state in ("LBN", "SBH"):
            self[date(year, MAY, 30)] = "Pesta Kaamatan"
            self[date(year, MAY, 31)] = "Pesta Kaamatan (Second day)"

        # ------------------------------#
        # Other holidays (decrees etc.) #
        # ------------------------------#

        # Malaysia General Election Holiday.
        dates_obs = {
            # The years 1955 1959 1995 seems to have the elections
            # one weekday but I am not sure if they were marked as
            # holidays.
            1999: (NOV, 29),
            2018: (MAY, 9),
        }
        if year in dates_obs:
            self[
                date(year, *dates_obs[year])
            ] = "Malaysia General Election Holiday"

        # Awal Muharram.
        for hol_date in self.my_islamic_to_gre(year, 1, 1):
            self[hol_date] = "Awal Muharram (Hijri New Year)"

        # Labour Day.
        self[date(year, MAY, 1)] = "Labour Day"

        # ---------------------------------#
        # State holidays (multiple states) #
        # ---------------------------------#

        # 1 January (or the following day if the 1 January should fall on a
        # weekly holiday in any State or in the Federal Territory).
        if self.state in (
            "KUL",
            "LBN",
            "MLK",
            "NSN",
            "PHG",
            "PNG",
            "PRK",
            "PJY",
            "SBH",
            "SWK",
            "SGR",
        ):
            hol_date = date(year, JAN, 1)
            self[hol_date] = "New Year's Day"
            if hol_date == SUN:
                self[hol_date] += " [Sunday]"
                self[date(year, JAN, 2)] = "New Year's Day [In lieu]"

        # Isra and Mi'raj.
        if self.state in ("KDH", "NSN", "PLS", "TRG"):
            for hol_date in islamic_to_gre(year, 7, 27):
                self[hol_date] = "Isra and Mi'raj"

        # Beginning of Ramadan.
        if self.state in ("JHR", "KDH", "MLK"):
            for hol_date in islamic_to_gre(year, 9, 1):
                self[hol_date] = "Begining of Ramadan"

        # Nuzul Al-Quran Day.
        if self.state and self.state not in (
            "JHR",
            "KDH",
            "MLK",
            "NSN",
            "SBH",
            "SWK",
        ):
            for hol_date in islamic_to_gre(year, 9, 17):
                self[hol_date] = "Nuzul Al-Quran Day"

        # Hari Raya Aidilfitri.
        # aka Eid al-Fitr;
        # date of observance is announced yearly
        dates_obs = {
            2001: [(DEC, 16)],
            2002: [(DEC, 6)],
            2003: [(NOV, 25)],
            2004: [(NOV, 14)],
            2005: [(NOV, 3)],
            2006: [(OCT, 24)],
            2007: [(OCT, 13)],
            2008: [(OCT, 1)],
            2009: [(SEP, 20)],
            2010: [(SEP, 10)],
            2011: [(AUG, 30)],
            2012: [(AUG, 19)],
            2013: [(AUG, 8)],
            2014: [(JUL, 28)],
            2015: [(JUL, 17)],
            2016: [(JUL, 6)],
            2017: [(JUN, 25)],
            2018: [(JUN, 15)],
            2019: [(JUN, 5)],
            2020: [(MAY, 24)],
            2021: [(MAY, 13)],
            2022: [(MAY, 2)],
        }
        if year in dates_obs:
            for date_obs in dates_obs[year]:
                hol_date = date(year, *date_obs)
                self[hol_date] = "Hari Raya Aidilfitri"
                hol_date += rd(days=+1)
                self[hol_date] = "Hari Raya Aidilfitri Holiday"
        else:
            for date_obs in islamic_to_gre(year, 10, 1):
                hol_date = date_obs
                self[hol_date] = "Hari Raya Aidilfitri* (*estimated)"
                hol_date += rd(days=+1)
                self[hol_date] = "Hari Raya Aidilfitri Holiday* (*estimated)"

        # Good Friday.
        if self.state in ("SBH", "SWK"):
            self[easter(year) + rd(weekday=FR(-1))] = "Good Friday"

        # Thaipusam.
        # An annual Hindu festival observed on the day of the first full moon
        # during the Tamil month of Thai
        if self.state in ("JHR", "KUL", "NSN", "PJY", "PNG", "PRK", "SGR"):
            dates_obs = {
                2018: [(JAN, 31)],
                2019: [(JAN, 21)],
                2020: [(FEB, 8)],
                2021: [(JAN, 28)],
                2022: [(JAN, 18)],
                2023: [(FEB, 4)],
                2024: [(JAN, 25)],
                2025: [(FEB, 11)],
                2026: [(FEB, 1)],
                2027: [(JAN, 22)],
            }
            if year in dates_obs:
                for date_obs in dates_obs[year]:
                    hol_date = date(year, *date_obs)
                    self[hol_date] = "Thaipusam"
            else:
                hol_date = self.cnls.thaipusam_date(year)
                self[hol_date] = "Thaipusam* (*estimated)"

        # Federal Territory Day.
        if self.state in ("KUL", "LBN", "PJY"):
            if year > 1973:
                self[date(year, FEB, 1)] = "Federal Territory Day"

        # State holidays (single state)
        # -----------------------------

        if self.state == "JHR":
            if year > 2014:
                self[date(year, MAR, 23)] = "Birthday of the Sultan of Johor"
            for date_obs in islamic_to_gre(year, 2, 6):
                self[date_obs] = "Hari Hol of Sultan Iskandar of Johor"

        elif self.state == "KDH":
            third_sun_jun = rrule(
                MONTHLY,
                dtstart=date(year, JUN, 1),
                count=1,
                bysetpos=3,
                byweekday=SU,
            )[0]
            self[third_sun_jun] = "Birthday of The Sultan of Kedah"

        elif self.state == "KTN":
            self[date(year, NOV, 11)] = "Birthday of the Sultan of Kelantan"
            self[
                date(year, NOV, 12)
            ] = "Birthday of the Sultan of Kelantan Holiday"

        elif self.state == "MLK":
            self[
                date(year, APR, 15)
            ] = "Declaration of Malacca as a Historical City in Melaka"
            self[
                date(year, AUG, 24)
            ] = "Birthday of the Governor of the State of Melaka"

        elif self.state == "NSN":
            self[
                date(year, JAN, 14)
            ] = "Birthday of the Sultan of Negeri Sembilan"

        elif self.state == "PHG":
            self[date(year, MAY, 22)] = "Hari Hol of Pahang"
            self[date(year, JUL, 30)] = "Birthday of the Sultan of Pahang"

        elif self.state == "PNG":
            self[date(year, JUL, 7)] = "George Town Heritage Day"
            second_sat_jul = rrule(
                MONTHLY,
                dtstart=date(year, JUL, 1),
                count=1,
                bysetpos=2,
                byweekday=SA,
            )[0]
            self[second_sat_jul] = "Birthday of the Governor of Penang"

        elif self.state == "PRK":
            if year > 2016:
                first_fri_nov = rrule(
                    MONTHLY,
                    dtstart=date(year, NOV, 1),
                    count=1,
                    bysetpos=1,
                    byweekday=FR,
                )[0]
                self[first_fri_nov] = "Birthday of the Sultan of Perak"
            else:
                # This Holiday used to be on 27th until 2017
                # https://www.officeholidays.com/holidays/malaysia/birthday-of-the-sultan-of-perak  # noqa: E501
                self[date(year, NOV, 27)] = "Birthday of the Sultan of Perak"

        elif self.state == "PLS":
            self[date(year, JUL, 17)] = "Birthday of The Raja of Perlis"

        elif self.state == "SGR":
            self[date(year, DEC, 11)] = "Birthday of The Sultan of Selangor"

        elif self.state == "SBH":
            first_sat_oct = rrule(
                MONTHLY,
                dtstart=date(year, OCT, 1),
                count=1,
                bysetpos=1,
                byweekday=SA,
            )[0]
            self[first_sat_oct] = "Birthday of the Governor of Sabah"
            if year > 2018:
                self[date(year, DEC, 24)] = "Christmas Eve"

        elif self.state == "TRG":
            self[
                date(year, MAR, 4)
            ] = "Anniversary of the Installation of the Sultan of Terengganu"
            self[date(year, APR, 26)] = "Birthday of the Sultan of Terengganu"