Пример #1
0
    def test_write_points_from_dataframe_with_float_nan(self):
        """Test write points from dataframe with NaN float."""
        now = pd.Timestamp('1970-01-01 00:00+00:00')
        dataframe = pd.DataFrame(data=[[1, float("NaN"), 1.0], [2, 2, 2.0]],
                                 index=[now, now + timedelta(hours=1)],
                                 columns=["column_one", "column_two",
                                          "column_three"])
        points = [
            {
                "points": [
                    [1, None, 1.0, 0],
                    [2, 2, 2.0, 3600]
                ],
                "name": "foo",
                "columns": ["column_one", "column_two", "column_three", "time"]
            }
        ]

        with requests_mock.Mocker() as m:
            m.register_uri(requests_mock.POST,
                           "http://localhost:8086/db/db/series")

            cli = DataFrameClient(database='db')
            cli.write_points({"foo": dataframe})

            self.assertListEqual(json.loads(m.last_request.body), points)
    def test_write_points_from_dataframe_with_period_index(self):
        dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]],
                                 index=[pd.Period('1970-01-01'),
                                        pd.Period('1970-01-02')],
                                 columns=["column_one", "column_two",
                                          "column_three"])
        points = [
            {
                "points": [
                    ["1", 1, 1.0, 0],
                    ["2", 2, 2.0, 86400]
                ],
                "name": "foo",
                "columns": ["column_one", "column_two", "column_three", "time"]
            }
        ]

        with requests_mock.Mocker() as m:
            m.register_uri(requests_mock.POST,
                           "http://localhost:8086/db/db/series")

            cli = DataFrameClient(database='db')
            cli.write_points({"foo": dataframe})

            self.assertListEqual(json.loads(m.last_request.body), points)
    def test_write_points_from_dataframe_with_float_nan(self):
        now = pd.Timestamp('1970-01-01 00:00+00:00')
        dataframe = pd.DataFrame(data=[[1, float("NaN"), 1.0], [2, 2, 2.0]],
                                 index=[now, now + timedelta(hours=1)],
                                 columns=["column_one", "column_two",
                                          "column_three"])
        points = [
            {
                "points": [
                    [1, None, 1.0, 0],
                    [2, 2, 2.0, 3600]
                ],
                "name": "foo",
                "columns": ["column_one", "column_two", "column_three", "time"]
            }
        ]

        with requests_mock.Mocker() as m:
            m.register_uri(requests_mock.POST,
                           "http://localhost:8086/db/db/series")

            cli = DataFrameClient(database='db')
            cli.write_points({"foo": dataframe})

            self.assertListEqual(json.loads(m.last_request.body), points)
Пример #4
0
    def test_write_points_from_dataframe_with_numeric_column_names(self):
        """Test write points from dataframe with numeric columns."""
        now = pd.Timestamp('1970-01-01 00:00+00:00')
        # df with numeric column names
        dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]],
                                 index=[now, now + timedelta(hours=1)])
        points = [
            {
                "points": [
                    ["1", 1, 1.0, 0],
                    ["2", 2, 2.0, 3600]
                ],
                "name": "foo",
                "columns": ['0', '1', '2', "time"]
            }
        ]

        with requests_mock.Mocker() as m:
            m.register_uri(requests_mock.POST,
                           "http://localhost:8086/db/db/series")

            cli = DataFrameClient(database='db')
            cli.write_points({"foo": dataframe})

            self.assertListEqual(json.loads(m.last_request.body), points)
Пример #5
0
    def test_write_points_from_dataframe_with_period_index(self):
        """Test write points from dataframe with period index."""
        dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]],
                                 index=[pd.Period('1970-01-01'),
                                        pd.Period('1970-01-02')],
                                 columns=["column_one", "column_two",
                                          "column_three"])
        points = [
            {
                "points": [
                    ["1", 1, 1.0, 0],
                    ["2", 2, 2.0, 86400]
                ],
                "name": "foo",
                "columns": ["column_one", "column_two", "column_three", "time"]
            }
        ]

        with requests_mock.Mocker() as m:
            m.register_uri(requests_mock.POST,
                           "http://localhost:8086/db/db/series")

            cli = DataFrameClient(database='db')
            cli.write_points({"foo": dataframe})

            self.assertListEqual(json.loads(m.last_request.body), points)
Пример #6
0
 def test_query_multiple_time_series(self):
     data = [{
         "name": "series1",
         "columns": ["time", "mean", "min", "max", "stddev"],
         "points": [[0, 323048, 323048, 323048, 0]]
     }, {
         "name": "series2",
         "columns": ["time", "mean", "min", "max", "stddev"],
         "points": [[0, -2.8233, -2.8503, -2.7832, 0.0173]]
     }, {
         "name": "series3",
         "columns": ["time", "mean", "min", "max", "stddev"],
         "points": [[0, -0.01220, -0.01220, -0.01220, 0]]
     }]
     dataframes = {
         'series1':
         pd.DataFrame(data=[[323048, 323048, 323048, 0]],
                      index=pd.to_datetime([0], unit='s', utc=True),
                      columns=['mean', 'min', 'max', 'stddev']),
         'series2':
         pd.DataFrame(data=[[-2.8233, -2.8503, -2.7832, 0.0173]],
                      index=pd.to_datetime([0], unit='s', utc=True),
                      columns=['mean', 'min', 'max', 'stddev']),
         'series3':
         pd.DataFrame(data=[[-0.01220, -0.01220, -0.01220, 0]],
                      index=pd.to_datetime([0], unit='s', utc=True),
                      columns=['mean', 'min', 'max', 'stddev'])
     }
     with _mocked_session('get', 200, data):
         cli = DataFrameClient('host', 8086, 'username', 'password', 'db')
         result = cli.query("""select mean(value), min(value), max(value),
             stddev(value) from series1, series2, series3""")
         assert dataframes.keys() == result.keys()
         for key in dataframes.keys():
             assert_frame_equal(dataframes[key], result[key])
Пример #7
0
 def _search_db(self,series_name):
     '''
     Search the db name for a series name
     '''
     for db in self.db_list:
         temp_db = DataFrameClient(self.url, self.port, self.user, self.password, db)
         if series_name in temp_db.get_list_series():
             return db
     
     return None
Пример #8
0
 def test_list_series(self):
     response = [{
         'columns': ['time', 'name'],
         'name': 'list_series_result',
         'points': [[0, 'seriesA'], [0, 'seriesB']]
     }]
     with _mocked_session('get', 200, response):
         cli = DataFrameClient('host', 8086, 'username', 'password', 'db')
         series_list = cli.get_list_series()
         assert series_list == ['seriesA', 'seriesB']
    def test_write_points_from_dataframe_fails_without_time_index(self):
        dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]],
                                 columns=["column_one", "column_two",
                                          "column_three"])

        with requests_mock.Mocker() as m:
            m.register_uri(requests_mock.POST,
                           "http://localhost:8086/db/db/series")

            cli = DataFrameClient(database='db')
            cli.write_points({"foo": dataframe})
Пример #10
0
 def test_list_series(self):
     """Test list of series for dataframe object."""
     response = [{
         'columns': ['time', 'name'],
         'name': 'list_series_result',
         'points': [[0, 'seriesA'], [0, 'seriesB']]
     }]
     with _mocked_session('get', 200, response):
         cli = DataFrameClient('host', 8086, 'username', 'password', 'db')
         series_list = cli.get_list_series()
         self.assertEqual(series_list, ['seriesA', 'seriesB'])
    def test_write_points_from_dataframe_fails_with_series(self):
        now = pd.Timestamp('1970-01-01 00:00+00:00')
        dataframe = pd.Series(data=[1.0, 2.0],
                              index=[now, now + timedelta(hours=1)])

        with requests_mock.Mocker() as m:
            m.register_uri(requests_mock.POST,
                           "http://localhost:8086/db/db/series")

            cli = DataFrameClient(database='db')
            cli.write_points({"foo": dataframe})
Пример #12
0
    def test_write_points_from_dataframe_fails_with_series(self):
        now = pd.Timestamp('1970-01-01 00:00+00:00')
        dataframe = pd.Series(data=[1.0, 2.0],
                              index=[now, now + timedelta(hours=1)])

        with requests_mock.Mocker() as m:
            m.register_uri(requests_mock.POST,
                           "http://localhost:8086/db/db/series")

            cli = DataFrameClient(database='db')
            cli.write_points({"foo": dataframe})
Пример #13
0
    def test_write_points_from_dataframe_fails_without_time_index(self):
        dataframe = pd.DataFrame(
            data=[["1", 1, 1.0], ["2", 2, 2.0]],
            columns=["column_one", "column_two", "column_three"])

        with requests_mock.Mocker() as m:
            m.register_uri(requests_mock.POST,
                           "http://localhost:8086/db/db/series")

            cli = DataFrameClient(database='db')
            cli.write_points({"foo": dataframe})
Пример #14
0
    def test_write_points_from_dataframe_in_batches(self):
        now = pd.Timestamp('1970-01-01 00:00+00:00')
        dataframe = pd.DataFrame(
            data=[["1", 1, 1.0], ["2", 2, 2.0]],
            index=[now, now + timedelta(hours=1)],
            columns=["column_one", "column_two", "column_three"])
        with requests_mock.Mocker() as m:
            m.register_uri(requests_mock.POST,
                           "http://localhost:8086/db/db/series")

            cli = DataFrameClient(database='db')
            assert cli.write_points({"foo": dataframe}, batch_size=1) is True
    def test_write_points_from_dataframe_in_batches(self):
        now = pd.Timestamp('1970-01-01 00:00+00:00')
        dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]],
                                 index=[now, now + timedelta(hours=1)],
                                 columns=["column_one", "column_two",
                                          "column_three"])
        with requests_mock.Mocker() as m:
            m.register_uri(requests_mock.POST,
                           "http://localhost:8086/db/db/series")

            cli = DataFrameClient(database='db')
            self.assertTrue(cli.write_points({"foo": dataframe}, batch_size=1))
 def test_list_series(self):
     response = [
         {
             'columns': ['time', 'name'],
             'name': 'list_series_result',
             'points': [[0, 'seriesA'], [0, 'seriesB']]
         }
     ]
     with _mocked_session('get', 200, response):
         cli = DataFrameClient('host', 8086, 'username', 'password', 'db')
         series_list = cli.get_list_series()
         self.assertEqual(series_list, ['seriesA', 'seriesB'])
Пример #17
0
    def test_write_points_from_dataframe_with_time_precision(self):
        now = pd.Timestamp('1970-01-01 00:00+00:00')
        dataframe = pd.DataFrame(
            data=[["1", 1, 1.0], ["2", 2, 2.0]],
            index=[now, now + timedelta(hours=1)],
            columns=["column_one", "column_two", "column_three"])
        points = [{
            "points": [["1", 1, 1.0, 0], ["2", 2, 2.0, 3600]],
            "name":
            "foo",
            "columns": ["column_one", "column_two", "column_three", "time"]
        }]

        points_ms = copy.deepcopy(points)
        points_ms[0]["points"][1][-1] = 3600 * 1000

        points_us = copy.deepcopy(points)
        points_us[0]["points"][1][-1] = 3600 * 1000000

        with requests_mock.Mocker() as m:
            m.register_uri(requests_mock.POST,
                           "http://localhost:8086/db/db/series")

            cli = DataFrameClient(database='db')

            cli.write_points({"foo": dataframe}, time_precision='s')
            self.assertListEqual(json.loads(m.last_request.body), points)

            cli.write_points({"foo": dataframe}, time_precision='m')
            self.assertListEqual(json.loads(m.last_request.body), points_ms)

            cli.write_points({"foo": dataframe}, time_precision='u')
            self.assertListEqual(json.loads(m.last_request.body), points_us)
Пример #18
0
 def test_query_into_dataframe(self):
     data = [{
         "name":
         "foo",
         "columns": ["time", "sequence_number", "column_one"],
         "points": [[3600, 16, 2], [3600, 15, 1], [0, 14, 2], [0, 13, 1]]
     }]
     # dataframe sorted ascending by time first, then sequence_number
     dataframe = pd.DataFrame(data=[[13, 1], [14, 2], [15, 1], [16, 2]],
                              index=pd.to_datetime([0, 0, 3600, 3600],
                                                   unit='s',
                                                   utc=True),
                              columns=['sequence_number', 'column_one'])
     with _mocked_session('get', 200, data):
         cli = DataFrameClient('host', 8086, 'username', 'password', 'db')
         result = cli.query('select column_one from foo;')
         assert_frame_equal(dataframe, result)
Пример #19
0
    def test_datetime_to_epoch(self):
        """Test convert datetime to epoch."""
        timestamp = pd.Timestamp('2013-01-01 00:00:00.000+00:00')
        cli = DataFrameClient('host', 8086, 'username', 'password', 'db')

        self.assertEqual(
            cli._datetime_to_epoch(timestamp),
            1356998400.0
        )
        self.assertEqual(
            cli._datetime_to_epoch(timestamp, time_precision='s'),
            1356998400.0
        )
        self.assertEqual(
            cli._datetime_to_epoch(timestamp, time_precision='m'),
            1356998400000.0
        )
        self.assertEqual(
            cli._datetime_to_epoch(timestamp, time_precision='ms'),
            1356998400000.0
        )
        self.assertEqual(
            cli._datetime_to_epoch(timestamp, time_precision='u'),
            1356998400000000.0
        )
Пример #20
0
 def __init__(self, db_name=None):
     self.url = 'localhost'
     self.port = 8086
     self.user = '******'
     self.password = '******'
     self.db_list = [ 'FRED', 'Quandl', 'Econ', 'ChinaData' ]
     
     self.db = DataFrameClient(self.url, self.port, self.user, self.password)
     if(db_name != None):
         self.db_name = db_name
     self.db.switch_database(db_name)
 def test_query_into_dataframe(self):
     data = [
         {
             "name": "foo",
             "columns": ["time", "sequence_number", "column_one"],
             "points": [
                 [3600, 16, 2], [3600, 15, 1],
                 [0, 14, 2], [0, 13, 1]
             ]
         }
     ]
     # dataframe sorted ascending by time first, then sequence_number
     dataframe = pd.DataFrame(data=[[13, 1], [14, 2], [15, 1], [16, 2]],
                              index=pd.to_datetime([0, 0,
                                                   3600, 3600],
                                                   unit='s', utc=True),
                              columns=['sequence_number', 'column_one'])
     with _mocked_session('get', 200, data):
         cli = DataFrameClient('host', 8086, 'username', 'password', 'db')
         result = cli.query('select column_one from foo;')
         assert_frame_equal(dataframe, result)
Пример #22
0
 def test_query_multiple_time_series(self):
     """Test query for multiple time series."""
     data = [
         {
             "name": "series1",
             "columns": ["time", "mean", "min", "max", "stddev"],
             "points": [[0, 323048, 323048, 323048, 0]]
         },
         {
             "name": "series2",
             "columns": ["time", "mean", "min", "max", "stddev"],
             "points": [[0, -2.8233, -2.8503, -2.7832, 0.0173]]
         },
         {
             "name": "series3",
             "columns": ["time", "mean", "min", "max", "stddev"],
             "points": [[0, -0.01220, -0.01220, -0.01220, 0]]
         }
     ]
     dataframes = {
         'series1': pd.DataFrame(data=[[323048, 323048, 323048, 0]],
                                 index=pd.to_datetime([0], unit='s',
                                                      utc=True),
                                 columns=['mean', 'min', 'max', 'stddev']),
         'series2': pd.DataFrame(data=[[-2.8233, -2.8503, -2.7832, 0.0173]],
                                 index=pd.to_datetime([0], unit='s',
                                                      utc=True),
                                 columns=['mean', 'min', 'max', 'stddev']),
         'series3': pd.DataFrame(data=[[-0.01220, -0.01220, -0.01220, 0]],
                                 index=pd.to_datetime([0], unit='s',
                                                      utc=True),
                                 columns=['mean', 'min', 'max', 'stddev'])
     }
     with _mocked_session('get', 200, data):
         cli = DataFrameClient('host', 8086, 'username', 'password', 'db')
         result = cli.query("""select mean(value), min(value), max(value),
             stddev(value) from series1, series2, series3""")
         self.assertEqual(dataframes.keys(), result.keys())
         for key in dataframes.keys():
             assert_frame_equal(dataframes[key], result[key])
Пример #23
0
    def test_write_points_from_dataframe_with_time_precision(self):
        """Test write points from dataframe with time precision."""
        now = pd.Timestamp('1970-01-01 00:00+00:00')
        dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]],
                                 index=[now, now + timedelta(hours=1)],
                                 columns=["column_one", "column_two",
                                          "column_three"])
        points = [
            {
                "points": [
                    ["1", 1, 1.0, 0],
                    ["2", 2, 2.0, 3600]
                ],
                "name": "foo",
                "columns": ["column_one", "column_two", "column_three", "time"]
            }
        ]

        points_ms = copy.deepcopy(points)
        points_ms[0]["points"][1][-1] = 3600 * 1000

        points_us = copy.deepcopy(points)
        points_us[0]["points"][1][-1] = 3600 * 1000000

        with requests_mock.Mocker() as m:
            m.register_uri(requests_mock.POST,
                           "http://localhost:8086/db/db/series")

            cli = DataFrameClient(database='db')

            cli.write_points({"foo": dataframe}, time_precision='s')
            self.assertListEqual(json.loads(m.last_request.body), points)

            cli.write_points({"foo": dataframe}, time_precision='m')
            self.assertListEqual(json.loads(m.last_request.body), points_ms)

            cli.write_points({"foo": dataframe}, time_precision='u')
            self.assertListEqual(json.loads(m.last_request.body), points_us)
Пример #24
0
 def test_query_with_empty_result(self):
     with _mocked_session('get', 200, []):
         cli = DataFrameClient('host', 8086, 'username', 'password', 'db')
         result = cli.query('select column_one from foo;')
         assert result == []
Пример #25
0
class InfluxDB(object):
    '''
    Connect to influxdb and pull/write data
    '''
    def __init__(self, db_name=None):
        self.url = 'localhost'
        self.port = 8086
        self.user = '******'
        self.password = '******'
        self.db_list = [ 'FRED', 'Quandl', 'Econ', 'ChinaData' ]
        
        self.db = DataFrameClient(self.url, self.port, self.user, self.password)
        if(db_name != None):
            self.db_name = db_name
        self.db.switch_database(db_name)
        
    def _search_db(self,series_name):
        '''
        Search the db name for a series name
        '''
        for db in self.db_list:
            temp_db = DataFrameClient(self.url, self.port, self.user, self.password, db)
            if series_name in temp_db.get_list_series():
                return db
        
        return None
            
    def query(self,series_name,db_name=None):
        '''
        Query a particular series
        ------
        series_name: str
            name of the series, e.g. "CPI_US"
        ------
        return a pandas DataFrame with NaN representing missing values
        ------
        '''
        if(db_name != None):
            self.db.switch_database(db_name)
            results = self.db.query('SELECT * FROM %s' % series_name)
        else:
            db_name = self._search_db(series_name)
            self.db.switch_database(db_name)
            results = self.db.query('SELECT * FROM %s' % series_name)       
        
        if(results['value'].str.contains('.').isnull().sum()!=len(results)):
            results.loc[results['value']=='.','value'] = None
        
        return results.astype(float)
    
    def _is_num(self, s):
        '''
        Determine if a string is a number
        '''
        try:
            float(s)
            return True
        except ValueError:
            return False
    
    def _include(self,expression_list,c):
        '''
        check whether expression_list include basic operator c, and give the index of first occurrence
        ------
        c: str
            the basic operator or parentheses c
        ------
        '''
        for index, item in enumerate(expression_list):
            if(type(item)==type('string')):
                if(item == c):
                    return index
        
        return -1
            
        
    def _get_index(self,expression_list, op_1, op_2):
        '''
        Get the index of first occurrence of op_1 OR op_2, assume expression_list includes op_1 OR op_2
        '''
        index_1 = self._include(expression_list, op_1)
        index_2 = self._include(expression_list, op_2)
        
        if(min(index_1,index_2)==-1):
            return(max(index_1,index_2))
        else:
            return(min(index_1,index_2))
    
    def _close_parentheses(self,expression):
        '''
        Find the closing parentheses to the first opening (
        ------
        expression: str
            str contains an opening (
        ------
        '''
        layer = 0
        for i, char in enumerate(expression):
            if(char=='('):
                layer = layer + 1
            elif(char==')'):
                layer = layer - 1
                if(layer == 0):
                    return i
        return -1
            
    def _break_expression(self, expression, operators, functions):
        '''
        Break the expression into logical components
        '''
        expression = expression.replace(' ','')
        #empty string
        if(len(expression)==0):
            return [expression]
        
        # interpret functions
        for func in functions:
            if func in expression:
                func_start = expression.find(func)
                func_end = func_start + self._close_parentheses(expression[func_start:])
                if(expression[func_start-1] in operators or func_start == 0):
                    return (self._break_expression(expression[:func_start], operators, functions)
                            +[expression[func_start:func_end+1]]
                            +self._break_expression(expression[func_end+1:], operators, functions))
                
        # then deal with the time series, operators and numbers
        results = []
        current_expression = ''
        for char in expression:
            if(char in operators):
                results.append(current_expression)
                results.append(char)
                current_expression = ''
            else:
                current_expression = current_expression + char
        results.append(current_expression)
        results[:] = [item for item in results if item != '']
        
        return results
    
    def _eval_func(self, func):
        '''
        Evaluate the function
        '''
        function = func[:func.index('(')]
        if 'lag' == function: #lag the time series by a number of periods. lag(*series*,i) where i is number of period
            index_start = func.index('(')
            index_mid = len(func)-func[::-1].index(',')-1
            index_end = len(func)-func[::-1].index(')')-1
            try:
                series = self.query(func[index_start+1:index_mid])
            except:
                series = self.interpret(func[index_start+1:index_mid])
            periods = float(func[index_mid+1:index_end])
            return series.shift(periods)
        elif 'mlag' == function: # shift the time stamp by a number of months. mlag(*series*,i) where i is number of months
            index_start = func.index('(')
            index_mid = len(func)-func[::-1].index(',')-1
            index_end = len(func)-func[::-1].index(')')-1
            try:
                series = self.query(func[index_start+1:index_mid])
            except:
                series = self.interpret(func[index_start+1:index_mid])
            periods = float(func[index_mid+1:index_end])
            return series.tshift(periods,freq='M').tshift(1,freq='D')
        elif 'avg' == function: #taking the average of time series
            index_start = func.index('(')
            index_mid = len(func)-func[::-1].index(',')-1
            index_end = len(func)-func[::-1].index(')')-1
            try:
                series = self.query(func[index_start+1:index_mid])
            except:
                series = self.interpret(func[index_start+1:index_mid])
            freq = func[index_mid+1:index_end]
            series = series.resample(freq, how='mean')
            if 'M' in freq:
                return series.tshift(-1,freq='M').tshift(1,freq='D')
            else:
                return series
        elif 'anticum' == function: #reverse the cumulative common to China stats
            index_start = func.index('(')
            index_mid = len(func)-func[::-1].index(',')-1
            index_end = len(func) - func[::-1].index(')')-1
            new_series = {}
            try:
                series = self.query(func[index_start+1:index_mid])
            except:
                series = self.interpret(func[index_start+1:index_mid])
            freq = func[index_mid+1:index_end]
            if(freq=='M'):
                freq_m = 1
            elif(freq=='Q'):
                freq_m = 3
                
            for item in series.iterrows():
                if item[0].month != freq_m:
                    prev_date = item[0] + DateOffset(months=-1*freq_m)
                    prev_date = prev_date.to_period('M').to_timestamp('M')
                    monthly = item[1]['value']-series.loc[prev_date]
                    new_series[item[0]] = monthly['value']
                else:
                    new_series[item[0]] = item[1]['value']
            return DataFrame({'value':Series(new_series)})
        else:
            message = '%s not defined' % func
            raise ValueError(message)
        
        
    def _convert_expressions(self, expression, operators):
        '''
        Convert expressions to data series after they are broken down
        '''
        expression[:] = [item for item in expression if item != '']
        converted_results = []
        for item in expression:
            if '(' in item and ')' in item:
                converted_results.append(self._eval_func(item))
            elif item in operators:
                converted_results.append(item)
            elif self._is_num(item):
                converted_results.append(float(item))
            else:
                converted_results.append(self.query(item))
                
        return converted_results
    
    def _calculate(self, expression_list):
        '''
        Calculate a list of expression elements
        '''
        e_list = expression_list
        if(len(e_list)==1):
            return e_list
        
        if(self._include(e_list, '^') != -1):
            position = self._include(e_list, '^')
            eval_result = pow(e_list[position-1],e_list[position+1])
            return self._calculate(e_list[:position-1] + [eval_result] + e_list[position+2:])
        elif(self._include(e_list, '*') != -1 or self._include(e_list, '/') != -1):
            position = self._get_index(e_list, '*', '/')
            if(e_list[position]=='*'):
                eval_result = e_list[position-1] * e_list[position+1]
            else:
                eval_result = e_list[position-1] / e_list[position+1]
            return self._calculate(e_list[:position-1] + [eval_result] + e_list[position+2:])
        elif(self._include(e_list, '+') != -1 or self._include(e_list, '-') != -1):
            position = self._get_index(e_list, '+', '-')
            if(e_list[position]=='+'):
                eval_result = e_list[position-1] + e_list[position+1]
            else:
                eval_result = e_list[position-1] - e_list[position+1]
            return self._calculate(e_list[:position-1] + [eval_result] + e_list[position+2:])
        else:
            raise ValueError('cannot recognize operators in the expression')
       
    def _parentheses(self, expression_list):
        '''
        Iterate through parentheses, always interpret the first closing parentheses )
        '''
        e_list = expression_list  
        if(self._include(e_list, '(') != -1 and self._include(e_list, ')') != -1):
            close_index = self._include(e_list, ')')
            sub = e_list[:close_index][::-1]
            open_index = close_index - self._include(sub, '(') - 1
            eval_result = self._calculate(e_list[(open_index+1):close_index])
            new_list = e_list[:open_index] + eval_result + e_list[close_index+1:]
            return(self._parentheses(new_list))
        else:
            if(self._include(e_list, '(') != -1 or self._include(e_list, ')') != -1):
                raise ValueError('unmatched parentheses, check the expression')
            else:
                return(self._calculate(e_list))
    
    def interpret(self, expression):
        '''
        Interpret an expression
        '''
        operators = ['^','+','-','*','/','(',')']
        functions = ['lag', 'mlag', 'avg', 'anticum']
        broken_expression = self._break_expression(expression, operators, functions)
        interp_expression = self._convert_expressions(broken_expression, operators)
        results = self._parentheses(interp_expression)[0]
        
        results = results.loc[isnull(results['value']) != True]
        return results
Пример #26
0
 def test_query_with_empty_result(self):
     """Test query with empty results."""
     with _mocked_session('get', 200, []):
         cli = DataFrameClient('host', 8086, 'username', 'password', 'db')
         result = cli.query('select column_one from foo;')
         self.assertEqual(result, [])
 def test_query_with_empty_result(self):
     with _mocked_session('get', 200, []):
         cli = DataFrameClient('host', 8086, 'username', 'password', 'db')
         result = cli.query('select column_one from foo;')
         self.assertEqual(result, [])
Пример #28
0
'''
Created on Jul 8, 2015

@author: shaunz
'''

from QuandlAPI import QuandlAPI
from influxdb.influxdb08 import DataFrameClient
from QuandlTicker import Quandl_ticker_list
import timeit

quandl = QuandlAPI()
df = DataFrameClient('localhost', 8086, 'root', 'root')
if({'name':'Quandl'} not in df.get_list_database()):
    df.create_database('Quandl')

df.switch_database('Quandl')

for series in df.get_list_series():
    df.delete_series(series)

start = timeit.default_timer()
for item in Quandl_ticker_list:
    results = quandl.get_series(item[1],item[2],item[3])
    results = results.replace(to_replace='NaN',value='.')
    print item
    df.write_points({item[0]:results})
print 'total time in seconds: %.2f' % (timeit.default_timer() - start)
Пример #29
0
                 [ 'cpi_us_education_yoy', '(cpi_us_education/lag(cpi_us_education,12)-1)*100' ],
                 [ 'cpi_us_communication_yoy', '(cpi_us_communication/lag(cpi_us_communication,12)-1)*100' ],
                 
                 [ 'wage_us_yoy', '(wage_us_weekly_nonsupervisory/lag(wage_us_weekly_nonsupervisory,12)-1)*100' ],
                 [ 'wti_yoy', '(avg(wti_spot,M)/lag(avg(wti_spot,M),12)-1)*100' ],
                 [ 'zillow_median_sale_price_yoy', '(zillow_median_sale_price/lag(zillow_median_sale_price,12)-1)*100' ],
                 [ 'ng_yoy', '(ng_hh_spot/lag(ng_hh_spot,12)-1)*100' ],
                 [ 'gdp_us_yoy', '(gdp_us/lag(gdp_us,4)-1)*100' ],
                 ]

processed_list = [
                  [ 'wage_lag_15m', 'mlag(wage_us_yoy,15)' ],
                  [ 'zillow_median_sale_price_15m', 'mlag(zillow_median_sale_price_yoy,15)' ],
                  ]

df = DataFrameClient('localhost',8086,'root','root')
if({'name':'Econ'} not in df.get_list_database()):
    df.create_database('Econ')
df.switch_database('Econ')

#add all items in interested list
start = timeit.default_timer()
for item in interest_list:
    if item[0] in df.get_list_series():
        df.delete_series(item[0])
    results = influx_Fred.interpret(item[1])
    results = results.replace(to_replace='NaN',value='.')
    results = DataFrame({'value':results['value']})
    df.write_points({item[0]:results})
    print item
print 'total time in seconds: %.2f' % (timeit.default_timer() - start)
Пример #30
0
'''
Created on Jul 2, 2015

@author: shaunz
'''
from influxdb.influxdb08 import DataFrameClient
from pandas import DataFrame
from FredAPI import FredLink
from FredLink.FredTicker import Fred_ticker_list
import timeit

fred = FredLink()
df = DataFrameClient('localhost', 8086, 'root', 'root')
if({'name':'FRED'} not in df.get_list_database()):
    df.create_database('FRED')

df.switch_database('FRED')

for series in df.get_list_series():
    df.delete_series(series)

start = timeit.default_timer()
for item in Fred_ticker_list:
    results = fred.get_series(item[1])
    results = results.replace(to_replace='NaN',value='.')
    data = DataFrame({'value': results})
    print item
    df.write_points({item[0]:data})
print 'total time in seconds: %.2f' % (timeit.default_timer() - start)