def test_transform_raises_MissingDatSetError_when_one_dataset_given(self):
     """
     When the transformer class receives only one dataset, exception should be raised
     """
     transformer = Transform(
         Extract.from_files(Constants._NYT_DATA_GOOD).get_datasets())
     self.assertRaises(MissingDatasetError, transformer.transform_data)
 def test_extract_from_two_urls_returns_two_datasets(self):
     """
     Test we can load URLs
     """
     extractor = Extract.from_urls(Constants._JH_URL, Constants._NYT_URL)
     datasets = extractor.get_datasets()
     assert len(datasets) == 2
 def test_transform_raises_InvalidDatasetError_when_column_is_missing(self):
     """
     If a requied column is missing from a dataset, exception should be raised
     """
     transformer = Transform(
         Extract.from_files(
             Constants._NYT_DATA_MISSING_COLUMN).get_datasets())
     self.assertRaises(InvalidDatasetError, transformer.transform_data)
 def test_transform_raises_InvalidDatasetError_when_date_cannot_be_parsed(
         self):
     """
     if a date cannot be parsed, exception should be raised
     """
     transformer = Transform(
         Extract.from_files(Constants._NYT_DATA_BAD_DATE,
                            Constants._JH_DATA_GOOD).get_datasets())
     self.assertRaises(InvalidDatasetError, transformer.transform_data)
    def test_extract_from_two_files_returns_two_datasets(self):
        """
        Test we can load files. Will be used in other tests
        """

        extractor = Extract.from_files(Constants._JH_DATA_GOOD,
                                       Constants._NYT_DATA_GOOD)
        datasets = extractor.get_datasets()
        assert len(datasets) == 2
示例#6
0
def extract(options):

    url = None
    ref = None
    if "url" in options: url = options["url"]
    if "ref" in options: ref = options["ref"]

    map_path = None
    if url: map_path = get_mapping(url)

    Extract("map", map_path, 16, ref)
    def test_transform_with_valid_data_returns_correct_date_range(self):
        """
        When two valid dataasets are input, the merged output should only
        have rows for the dates in common across both inputs

        JH input ranges from 2020-01-22 to 2020-02-03
        NYT input ranges from 2020-01-21 to 2020-02-03
        Valid range therefore 2020-01-22 to 2020-02-03
        """
        expected_min_date = date(2020, 1, 22)
        expected_max_date = date(2020, 2, 3)

        transformer = Transform(
            Extract.from_files(Constants._NYT_DATA_GOOD,
                               Constants._JH_DATA_GOOD).get_datasets())
        merged_data = transformer.transform_data()
        min_date = min([d['date'] for d in merged_data])
        max_date = max([d['date'] for d in merged_data])

        assert min_date == expected_min_date and max_date == expected_max_date
示例#8
0
 def start_extract_map(self):
     self.root.destroy()
     Extract("dw")
示例#9
0
from src.extract import Extract
from src.report import Report

if __name__ == "__main__":
    extract = Extract()
    # Ingest data
    data = extract.get_events_data_from_file("../input/input.txt")
    extract.ingest(data)

    # Run report
    report = Report()
    report.TopXSimpleLTVCustomers(10)

    #
    # week = dt.strftime("%U")
    # year = dt.strftime("%Y")
    #
    # # 53rd week of previous year will be combined with 0th week of next year
    # if week == "53":
    #     week = '00'
    #     temp_year = int(year) + 1
    #     year = str(temp_year)
    #
    # weekly_visit_key = year + '-' + week
    # print (weekly_visit_key)
    #
    #
    #
    # # d = "2017-01-06T12:45:52.041Z"
    # # r = datetime.datetime.strptime(d + '-0', "%Y-%M-%DT%H-%M-W%W-%w")
    # # print(r)
示例#10
0
from src.extract import Extract

if __name__ == "__main__":
    obj = Extract()
    obj.boot()