def setUp(self): self.interval_start = "2015-12-29" self.interval_end = "2016-01-02" self.interval_code = "unit_test" self.expected_days_in_interval = 4 self.testDateIntervalInstance = DateInterval( self.interval_start, self.interval_end, self.interval_code ) # type: DateInterval
class TestDateInterval(unittest.TestCase): def tearDown(self): self.testDateIntervalInstance = None self.interval_start = None self.interval_end = None self.interval_code = None self.expected_days_in_interval = None def setUp(self): self.interval_start = "2015-12-29" self.interval_end = "2016-01-02" self.interval_code = "unit_test" self.expected_days_in_interval = 4 self.testDateIntervalInstance = DateInterval( self.interval_start, self.interval_end, self.interval_code ) # type: DateInterval def test_init(self): test_start_date = self.testDateIntervalInstance.start_date self.assertEqual(test_start_date, self.interval_start) test_end_date = self.testDateIntervalInstance.end_date self.assertEqual(test_end_date, self.interval_end) test_code = self.testDateIntervalInstance.code self.assertEqual(test_code, self.interval_code) def test_isDateInInterval(self): dates_that_are_in_interval = ["2015-12-30", "2015-12-29", "2015-12-31", "2016-01-01", "2016-01-02"] for date_string in dates_that_are_in_interval: result = self.testDateIntervalInstance.isDateInInterval(date_string) self.assertTrue(result) dates_not_in_interval = ["2015-12-28", "2015-11-30", "2014-12-31", "2016-01-03", "2015-01-01"] for date_string in dates_not_in_interval: result = self.testDateIntervalInstance.isDateInInterval(date_string) self.assertFalse(result) def test_getNumberOfDaysInInterval(self): test_number_of_days = self.testDateIntervalInstance.getNumberOfDaysInInterval() self.assertEqual(test_number_of_days, self.expected_days_in_interval)
spark_url = "spark://10.211.55.4:7077" spark_context_name = "Find Down Stocks" included_python_files_package = ['/var/machine_learning/stocks/python/stocks_python.zip'] mysql_url = "jdbc:mysql://10.211.55.4:3306/stocks?user=parallels&password=dellc123" data_files = "file:///var/data/stocks/historical_data/*.csv" sc = SparkContext(spark_url, spark_context_name, pyFiles=included_python_files_package) sqlContext = HiveContext(sc) from pyspark.sql import Row from stockRdd import StockRdd from dateInterval import DateIntervalManager sample_data_rdd = sc.textFile("file:///var/data/stocks/historical_data/*.csv").distinct() yesterday_date = DateInterval.getYesterdayDate() dailyDateIntervalDictionaryToCalculateFor = DateIntervalManager.createDailyIntervalDictionaryForPastYear(yesterday_date) number_of_days_in_dictionary = dailyDateIntervalDictionaryToCalculateFor.getNumberOfDaysInDictionary() minimum_number_of_days = int((4.0 / 7.0) * float(number_of_days_in_dictionary)) mapStockCsvToKeyValueClosure = StockRdd.getMapStockCsvToKeyValueForDatesInDictionaryClosure(dailyDateIntervalDictionaryToCalculateFor) symbol_creation_function_closure = StockRdd.getSymbolDataInstanceForDateDictionaryDataPointsClosure(dailyDateIntervalDictionaryToCalculateFor, yesterday_date) symbol_down_stocks_data_filtered = sample_data_rdd.map(mapStockCsvToKeyValueClosure)\ .filter(lambda line: not(line is None))\ .reduceByKey(lambda a,b : a + b)\ .map(lambda tuple : ( tuple[0], StockRdd.sort_and_compute_deltas( list(tuple[1]) ) ) )\ .filter(lambda tuple : len(list(tuple[1])) > minimum_number_of_days)\