def test_write_points_from_dataframe_with_float_nan(self): """Test write points from dataframe with NaN float.""" now = pd.Timestamp('1970-01-01 00:00+00:00') dataframe = pd.DataFrame(data=[[1, float("NaN"), 1.0], [2, 2, 2.0]], index=[now, now + timedelta(hours=1)], columns=["column_one", "column_two", "column_three"]) points = [ { "points": [ [1, None, 1.0, 0], [2, 2, 2.0, 3600] ], "name": "foo", "columns": ["column_one", "column_two", "column_three", "time"] } ] with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/db/db/series") cli = DataFrameClient(database='db') cli.write_points({"foo": dataframe}) self.assertListEqual(json.loads(m.last_request.body), points)
def test_write_points_from_dataframe_with_period_index(self): dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]], index=[pd.Period('1970-01-01'), pd.Period('1970-01-02')], columns=["column_one", "column_two", "column_three"]) points = [ { "points": [ ["1", 1, 1.0, 0], ["2", 2, 2.0, 86400] ], "name": "foo", "columns": ["column_one", "column_two", "column_three", "time"] } ] with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/db/db/series") cli = DataFrameClient(database='db') cli.write_points({"foo": dataframe}) self.assertListEqual(json.loads(m.last_request.body), points)
def test_write_points_from_dataframe_with_float_nan(self): now = pd.Timestamp('1970-01-01 00:00+00:00') dataframe = pd.DataFrame(data=[[1, float("NaN"), 1.0], [2, 2, 2.0]], index=[now, now + timedelta(hours=1)], columns=["column_one", "column_two", "column_three"]) points = [ { "points": [ [1, None, 1.0, 0], [2, 2, 2.0, 3600] ], "name": "foo", "columns": ["column_one", "column_two", "column_three", "time"] } ] with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/db/db/series") cli = DataFrameClient(database='db') cli.write_points({"foo": dataframe}) self.assertListEqual(json.loads(m.last_request.body), points)
def test_write_points_from_dataframe_with_numeric_column_names(self): """Test write points from dataframe with numeric columns.""" now = pd.Timestamp('1970-01-01 00:00+00:00') # df with numeric column names dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]], index=[now, now + timedelta(hours=1)]) points = [ { "points": [ ["1", 1, 1.0, 0], ["2", 2, 2.0, 3600] ], "name": "foo", "columns": ['0', '1', '2', "time"] } ] with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/db/db/series") cli = DataFrameClient(database='db') cli.write_points({"foo": dataframe}) self.assertListEqual(json.loads(m.last_request.body), points)
def test_write_points_from_dataframe_with_period_index(self): """Test write points from dataframe with period index.""" dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]], index=[pd.Period('1970-01-01'), pd.Period('1970-01-02')], columns=["column_one", "column_two", "column_three"]) points = [ { "points": [ ["1", 1, 1.0, 0], ["2", 2, 2.0, 86400] ], "name": "foo", "columns": ["column_one", "column_two", "column_three", "time"] } ] with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/db/db/series") cli = DataFrameClient(database='db') cli.write_points({"foo": dataframe}) self.assertListEqual(json.loads(m.last_request.body), points)
def test_query_multiple_time_series(self): data = [{ "name": "series1", "columns": ["time", "mean", "min", "max", "stddev"], "points": [[0, 323048, 323048, 323048, 0]] }, { "name": "series2", "columns": ["time", "mean", "min", "max", "stddev"], "points": [[0, -2.8233, -2.8503, -2.7832, 0.0173]] }, { "name": "series3", "columns": ["time", "mean", "min", "max", "stddev"], "points": [[0, -0.01220, -0.01220, -0.01220, 0]] }] dataframes = { 'series1': pd.DataFrame(data=[[323048, 323048, 323048, 0]], index=pd.to_datetime([0], unit='s', utc=True), columns=['mean', 'min', 'max', 'stddev']), 'series2': pd.DataFrame(data=[[-2.8233, -2.8503, -2.7832, 0.0173]], index=pd.to_datetime([0], unit='s', utc=True), columns=['mean', 'min', 'max', 'stddev']), 'series3': pd.DataFrame(data=[[-0.01220, -0.01220, -0.01220, 0]], index=pd.to_datetime([0], unit='s', utc=True), columns=['mean', 'min', 'max', 'stddev']) } with _mocked_session('get', 200, data): cli = DataFrameClient('host', 8086, 'username', 'password', 'db') result = cli.query("""select mean(value), min(value), max(value), stddev(value) from series1, series2, series3""") assert dataframes.keys() == result.keys() for key in dataframes.keys(): assert_frame_equal(dataframes[key], result[key])
def _search_db(self,series_name): ''' Search the db name for a series name ''' for db in self.db_list: temp_db = DataFrameClient(self.url, self.port, self.user, self.password, db) if series_name in temp_db.get_list_series(): return db return None
def test_list_series(self): response = [{ 'columns': ['time', 'name'], 'name': 'list_series_result', 'points': [[0, 'seriesA'], [0, 'seriesB']] }] with _mocked_session('get', 200, response): cli = DataFrameClient('host', 8086, 'username', 'password', 'db') series_list = cli.get_list_series() assert series_list == ['seriesA', 'seriesB']
def test_write_points_from_dataframe_fails_without_time_index(self): dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]], columns=["column_one", "column_two", "column_three"]) with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/db/db/series") cli = DataFrameClient(database='db') cli.write_points({"foo": dataframe})
def test_list_series(self): """Test list of series for dataframe object.""" response = [{ 'columns': ['time', 'name'], 'name': 'list_series_result', 'points': [[0, 'seriesA'], [0, 'seriesB']] }] with _mocked_session('get', 200, response): cli = DataFrameClient('host', 8086, 'username', 'password', 'db') series_list = cli.get_list_series() self.assertEqual(series_list, ['seriesA', 'seriesB'])
def test_write_points_from_dataframe_fails_with_series(self): now = pd.Timestamp('1970-01-01 00:00+00:00') dataframe = pd.Series(data=[1.0, 2.0], index=[now, now + timedelta(hours=1)]) with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/db/db/series") cli = DataFrameClient(database='db') cli.write_points({"foo": dataframe})
def test_write_points_from_dataframe_fails_without_time_index(self): dataframe = pd.DataFrame( data=[["1", 1, 1.0], ["2", 2, 2.0]], columns=["column_one", "column_two", "column_three"]) with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/db/db/series") cli = DataFrameClient(database='db') cli.write_points({"foo": dataframe})
def test_write_points_from_dataframe_in_batches(self): now = pd.Timestamp('1970-01-01 00:00+00:00') dataframe = pd.DataFrame( data=[["1", 1, 1.0], ["2", 2, 2.0]], index=[now, now + timedelta(hours=1)], columns=["column_one", "column_two", "column_three"]) with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/db/db/series") cli = DataFrameClient(database='db') assert cli.write_points({"foo": dataframe}, batch_size=1) is True
def test_write_points_from_dataframe_in_batches(self): now = pd.Timestamp('1970-01-01 00:00+00:00') dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]], index=[now, now + timedelta(hours=1)], columns=["column_one", "column_two", "column_three"]) with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/db/db/series") cli = DataFrameClient(database='db') self.assertTrue(cli.write_points({"foo": dataframe}, batch_size=1))
def test_list_series(self): response = [ { 'columns': ['time', 'name'], 'name': 'list_series_result', 'points': [[0, 'seriesA'], [0, 'seriesB']] } ] with _mocked_session('get', 200, response): cli = DataFrameClient('host', 8086, 'username', 'password', 'db') series_list = cli.get_list_series() self.assertEqual(series_list, ['seriesA', 'seriesB'])
def test_write_points_from_dataframe_with_time_precision(self): now = pd.Timestamp('1970-01-01 00:00+00:00') dataframe = pd.DataFrame( data=[["1", 1, 1.0], ["2", 2, 2.0]], index=[now, now + timedelta(hours=1)], columns=["column_one", "column_two", "column_three"]) points = [{ "points": [["1", 1, 1.0, 0], ["2", 2, 2.0, 3600]], "name": "foo", "columns": ["column_one", "column_two", "column_three", "time"] }] points_ms = copy.deepcopy(points) points_ms[0]["points"][1][-1] = 3600 * 1000 points_us = copy.deepcopy(points) points_us[0]["points"][1][-1] = 3600 * 1000000 with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/db/db/series") cli = DataFrameClient(database='db') cli.write_points({"foo": dataframe}, time_precision='s') self.assertListEqual(json.loads(m.last_request.body), points) cli.write_points({"foo": dataframe}, time_precision='m') self.assertListEqual(json.loads(m.last_request.body), points_ms) cli.write_points({"foo": dataframe}, time_precision='u') self.assertListEqual(json.loads(m.last_request.body), points_us)
def test_query_into_dataframe(self): data = [{ "name": "foo", "columns": ["time", "sequence_number", "column_one"], "points": [[3600, 16, 2], [3600, 15, 1], [0, 14, 2], [0, 13, 1]] }] # dataframe sorted ascending by time first, then sequence_number dataframe = pd.DataFrame(data=[[13, 1], [14, 2], [15, 1], [16, 2]], index=pd.to_datetime([0, 0, 3600, 3600], unit='s', utc=True), columns=['sequence_number', 'column_one']) with _mocked_session('get', 200, data): cli = DataFrameClient('host', 8086, 'username', 'password', 'db') result = cli.query('select column_one from foo;') assert_frame_equal(dataframe, result)
def test_datetime_to_epoch(self): """Test convert datetime to epoch.""" timestamp = pd.Timestamp('2013-01-01 00:00:00.000+00:00') cli = DataFrameClient('host', 8086, 'username', 'password', 'db') self.assertEqual( cli._datetime_to_epoch(timestamp), 1356998400.0 ) self.assertEqual( cli._datetime_to_epoch(timestamp, time_precision='s'), 1356998400.0 ) self.assertEqual( cli._datetime_to_epoch(timestamp, time_precision='m'), 1356998400000.0 ) self.assertEqual( cli._datetime_to_epoch(timestamp, time_precision='ms'), 1356998400000.0 ) self.assertEqual( cli._datetime_to_epoch(timestamp, time_precision='u'), 1356998400000000.0 )
def __init__(self, db_name=None): self.url = 'localhost' self.port = 8086 self.user = '******' self.password = '******' self.db_list = [ 'FRED', 'Quandl', 'Econ', 'ChinaData' ] self.db = DataFrameClient(self.url, self.port, self.user, self.password) if(db_name != None): self.db_name = db_name self.db.switch_database(db_name)
def test_query_into_dataframe(self): data = [ { "name": "foo", "columns": ["time", "sequence_number", "column_one"], "points": [ [3600, 16, 2], [3600, 15, 1], [0, 14, 2], [0, 13, 1] ] } ] # dataframe sorted ascending by time first, then sequence_number dataframe = pd.DataFrame(data=[[13, 1], [14, 2], [15, 1], [16, 2]], index=pd.to_datetime([0, 0, 3600, 3600], unit='s', utc=True), columns=['sequence_number', 'column_one']) with _mocked_session('get', 200, data): cli = DataFrameClient('host', 8086, 'username', 'password', 'db') result = cli.query('select column_one from foo;') assert_frame_equal(dataframe, result)
def test_query_multiple_time_series(self): """Test query for multiple time series.""" data = [ { "name": "series1", "columns": ["time", "mean", "min", "max", "stddev"], "points": [[0, 323048, 323048, 323048, 0]] }, { "name": "series2", "columns": ["time", "mean", "min", "max", "stddev"], "points": [[0, -2.8233, -2.8503, -2.7832, 0.0173]] }, { "name": "series3", "columns": ["time", "mean", "min", "max", "stddev"], "points": [[0, -0.01220, -0.01220, -0.01220, 0]] } ] dataframes = { 'series1': pd.DataFrame(data=[[323048, 323048, 323048, 0]], index=pd.to_datetime([0], unit='s', utc=True), columns=['mean', 'min', 'max', 'stddev']), 'series2': pd.DataFrame(data=[[-2.8233, -2.8503, -2.7832, 0.0173]], index=pd.to_datetime([0], unit='s', utc=True), columns=['mean', 'min', 'max', 'stddev']), 'series3': pd.DataFrame(data=[[-0.01220, -0.01220, -0.01220, 0]], index=pd.to_datetime([0], unit='s', utc=True), columns=['mean', 'min', 'max', 'stddev']) } with _mocked_session('get', 200, data): cli = DataFrameClient('host', 8086, 'username', 'password', 'db') result = cli.query("""select mean(value), min(value), max(value), stddev(value) from series1, series2, series3""") self.assertEqual(dataframes.keys(), result.keys()) for key in dataframes.keys(): assert_frame_equal(dataframes[key], result[key])
def test_write_points_from_dataframe_with_time_precision(self): """Test write points from dataframe with time precision.""" now = pd.Timestamp('1970-01-01 00:00+00:00') dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]], index=[now, now + timedelta(hours=1)], columns=["column_one", "column_two", "column_three"]) points = [ { "points": [ ["1", 1, 1.0, 0], ["2", 2, 2.0, 3600] ], "name": "foo", "columns": ["column_one", "column_two", "column_three", "time"] } ] points_ms = copy.deepcopy(points) points_ms[0]["points"][1][-1] = 3600 * 1000 points_us = copy.deepcopy(points) points_us[0]["points"][1][-1] = 3600 * 1000000 with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/db/db/series") cli = DataFrameClient(database='db') cli.write_points({"foo": dataframe}, time_precision='s') self.assertListEqual(json.loads(m.last_request.body), points) cli.write_points({"foo": dataframe}, time_precision='m') self.assertListEqual(json.loads(m.last_request.body), points_ms) cli.write_points({"foo": dataframe}, time_precision='u') self.assertListEqual(json.loads(m.last_request.body), points_us)
def test_query_with_empty_result(self): with _mocked_session('get', 200, []): cli = DataFrameClient('host', 8086, 'username', 'password', 'db') result = cli.query('select column_one from foo;') assert result == []
class InfluxDB(object): ''' Connect to influxdb and pull/write data ''' def __init__(self, db_name=None): self.url = 'localhost' self.port = 8086 self.user = '******' self.password = '******' self.db_list = [ 'FRED', 'Quandl', 'Econ', 'ChinaData' ] self.db = DataFrameClient(self.url, self.port, self.user, self.password) if(db_name != None): self.db_name = db_name self.db.switch_database(db_name) def _search_db(self,series_name): ''' Search the db name for a series name ''' for db in self.db_list: temp_db = DataFrameClient(self.url, self.port, self.user, self.password, db) if series_name in temp_db.get_list_series(): return db return None def query(self,series_name,db_name=None): ''' Query a particular series ------ series_name: str name of the series, e.g. "CPI_US" ------ return a pandas DataFrame with NaN representing missing values ------ ''' if(db_name != None): self.db.switch_database(db_name) results = self.db.query('SELECT * FROM %s' % series_name) else: db_name = self._search_db(series_name) self.db.switch_database(db_name) results = self.db.query('SELECT * FROM %s' % series_name) if(results['value'].str.contains('.').isnull().sum()!=len(results)): results.loc[results['value']=='.','value'] = None return results.astype(float) def _is_num(self, s): ''' Determine if a string is a number ''' try: float(s) return True except ValueError: return False def _include(self,expression_list,c): ''' check whether expression_list include basic operator c, and give the index of first occurrence ------ c: str the basic operator or parentheses c ------ ''' for index, item in enumerate(expression_list): if(type(item)==type('string')): if(item == c): return index return -1 def _get_index(self,expression_list, op_1, op_2): ''' Get the index of first occurrence of op_1 OR op_2, assume expression_list includes op_1 OR op_2 ''' index_1 = self._include(expression_list, op_1) index_2 = self._include(expression_list, op_2) if(min(index_1,index_2)==-1): return(max(index_1,index_2)) else: return(min(index_1,index_2)) def _close_parentheses(self,expression): ''' Find the closing parentheses to the first opening ( ------ expression: str str contains an opening ( ------ ''' layer = 0 for i, char in enumerate(expression): if(char=='('): layer = layer + 1 elif(char==')'): layer = layer - 1 if(layer == 0): return i return -1 def _break_expression(self, expression, operators, functions): ''' Break the expression into logical components ''' expression = expression.replace(' ','') #empty string if(len(expression)==0): return [expression] # interpret functions for func in functions: if func in expression: func_start = expression.find(func) func_end = func_start + self._close_parentheses(expression[func_start:]) if(expression[func_start-1] in operators or func_start == 0): return (self._break_expression(expression[:func_start], operators, functions) +[expression[func_start:func_end+1]] +self._break_expression(expression[func_end+1:], operators, functions)) # then deal with the time series, operators and numbers results = [] current_expression = '' for char in expression: if(char in operators): results.append(current_expression) results.append(char) current_expression = '' else: current_expression = current_expression + char results.append(current_expression) results[:] = [item for item in results if item != ''] return results def _eval_func(self, func): ''' Evaluate the function ''' function = func[:func.index('(')] if 'lag' == function: #lag the time series by a number of periods. lag(*series*,i) where i is number of period index_start = func.index('(') index_mid = len(func)-func[::-1].index(',')-1 index_end = len(func)-func[::-1].index(')')-1 try: series = self.query(func[index_start+1:index_mid]) except: series = self.interpret(func[index_start+1:index_mid]) periods = float(func[index_mid+1:index_end]) return series.shift(periods) elif 'mlag' == function: # shift the time stamp by a number of months. mlag(*series*,i) where i is number of months index_start = func.index('(') index_mid = len(func)-func[::-1].index(',')-1 index_end = len(func)-func[::-1].index(')')-1 try: series = self.query(func[index_start+1:index_mid]) except: series = self.interpret(func[index_start+1:index_mid]) periods = float(func[index_mid+1:index_end]) return series.tshift(periods,freq='M').tshift(1,freq='D') elif 'avg' == function: #taking the average of time series index_start = func.index('(') index_mid = len(func)-func[::-1].index(',')-1 index_end = len(func)-func[::-1].index(')')-1 try: series = self.query(func[index_start+1:index_mid]) except: series = self.interpret(func[index_start+1:index_mid]) freq = func[index_mid+1:index_end] series = series.resample(freq, how='mean') if 'M' in freq: return series.tshift(-1,freq='M').tshift(1,freq='D') else: return series elif 'anticum' == function: #reverse the cumulative common to China stats index_start = func.index('(') index_mid = len(func)-func[::-1].index(',')-1 index_end = len(func) - func[::-1].index(')')-1 new_series = {} try: series = self.query(func[index_start+1:index_mid]) except: series = self.interpret(func[index_start+1:index_mid]) freq = func[index_mid+1:index_end] if(freq=='M'): freq_m = 1 elif(freq=='Q'): freq_m = 3 for item in series.iterrows(): if item[0].month != freq_m: prev_date = item[0] + DateOffset(months=-1*freq_m) prev_date = prev_date.to_period('M').to_timestamp('M') monthly = item[1]['value']-series.loc[prev_date] new_series[item[0]] = monthly['value'] else: new_series[item[0]] = item[1]['value'] return DataFrame({'value':Series(new_series)}) else: message = '%s not defined' % func raise ValueError(message) def _convert_expressions(self, expression, operators): ''' Convert expressions to data series after they are broken down ''' expression[:] = [item for item in expression if item != ''] converted_results = [] for item in expression: if '(' in item and ')' in item: converted_results.append(self._eval_func(item)) elif item in operators: converted_results.append(item) elif self._is_num(item): converted_results.append(float(item)) else: converted_results.append(self.query(item)) return converted_results def _calculate(self, expression_list): ''' Calculate a list of expression elements ''' e_list = expression_list if(len(e_list)==1): return e_list if(self._include(e_list, '^') != -1): position = self._include(e_list, '^') eval_result = pow(e_list[position-1],e_list[position+1]) return self._calculate(e_list[:position-1] + [eval_result] + e_list[position+2:]) elif(self._include(e_list, '*') != -1 or self._include(e_list, '/') != -1): position = self._get_index(e_list, '*', '/') if(e_list[position]=='*'): eval_result = e_list[position-1] * e_list[position+1] else: eval_result = e_list[position-1] / e_list[position+1] return self._calculate(e_list[:position-1] + [eval_result] + e_list[position+2:]) elif(self._include(e_list, '+') != -1 or self._include(e_list, '-') != -1): position = self._get_index(e_list, '+', '-') if(e_list[position]=='+'): eval_result = e_list[position-1] + e_list[position+1] else: eval_result = e_list[position-1] - e_list[position+1] return self._calculate(e_list[:position-1] + [eval_result] + e_list[position+2:]) else: raise ValueError('cannot recognize operators in the expression') def _parentheses(self, expression_list): ''' Iterate through parentheses, always interpret the first closing parentheses ) ''' e_list = expression_list if(self._include(e_list, '(') != -1 and self._include(e_list, ')') != -1): close_index = self._include(e_list, ')') sub = e_list[:close_index][::-1] open_index = close_index - self._include(sub, '(') - 1 eval_result = self._calculate(e_list[(open_index+1):close_index]) new_list = e_list[:open_index] + eval_result + e_list[close_index+1:] return(self._parentheses(new_list)) else: if(self._include(e_list, '(') != -1 or self._include(e_list, ')') != -1): raise ValueError('unmatched parentheses, check the expression') else: return(self._calculate(e_list)) def interpret(self, expression): ''' Interpret an expression ''' operators = ['^','+','-','*','/','(',')'] functions = ['lag', 'mlag', 'avg', 'anticum'] broken_expression = self._break_expression(expression, operators, functions) interp_expression = self._convert_expressions(broken_expression, operators) results = self._parentheses(interp_expression)[0] results = results.loc[isnull(results['value']) != True] return results
def test_query_with_empty_result(self): """Test query with empty results.""" with _mocked_session('get', 200, []): cli = DataFrameClient('host', 8086, 'username', 'password', 'db') result = cli.query('select column_one from foo;') self.assertEqual(result, [])
def test_query_with_empty_result(self): with _mocked_session('get', 200, []): cli = DataFrameClient('host', 8086, 'username', 'password', 'db') result = cli.query('select column_one from foo;') self.assertEqual(result, [])
''' Created on Jul 8, 2015 @author: shaunz ''' from QuandlAPI import QuandlAPI from influxdb.influxdb08 import DataFrameClient from QuandlTicker import Quandl_ticker_list import timeit quandl = QuandlAPI() df = DataFrameClient('localhost', 8086, 'root', 'root') if({'name':'Quandl'} not in df.get_list_database()): df.create_database('Quandl') df.switch_database('Quandl') for series in df.get_list_series(): df.delete_series(series) start = timeit.default_timer() for item in Quandl_ticker_list: results = quandl.get_series(item[1],item[2],item[3]) results = results.replace(to_replace='NaN',value='.') print item df.write_points({item[0]:results}) print 'total time in seconds: %.2f' % (timeit.default_timer() - start)
[ 'cpi_us_education_yoy', '(cpi_us_education/lag(cpi_us_education,12)-1)*100' ], [ 'cpi_us_communication_yoy', '(cpi_us_communication/lag(cpi_us_communication,12)-1)*100' ], [ 'wage_us_yoy', '(wage_us_weekly_nonsupervisory/lag(wage_us_weekly_nonsupervisory,12)-1)*100' ], [ 'wti_yoy', '(avg(wti_spot,M)/lag(avg(wti_spot,M),12)-1)*100' ], [ 'zillow_median_sale_price_yoy', '(zillow_median_sale_price/lag(zillow_median_sale_price,12)-1)*100' ], [ 'ng_yoy', '(ng_hh_spot/lag(ng_hh_spot,12)-1)*100' ], [ 'gdp_us_yoy', '(gdp_us/lag(gdp_us,4)-1)*100' ], ] processed_list = [ [ 'wage_lag_15m', 'mlag(wage_us_yoy,15)' ], [ 'zillow_median_sale_price_15m', 'mlag(zillow_median_sale_price_yoy,15)' ], ] df = DataFrameClient('localhost',8086,'root','root') if({'name':'Econ'} not in df.get_list_database()): df.create_database('Econ') df.switch_database('Econ') #add all items in interested list start = timeit.default_timer() for item in interest_list: if item[0] in df.get_list_series(): df.delete_series(item[0]) results = influx_Fred.interpret(item[1]) results = results.replace(to_replace='NaN',value='.') results = DataFrame({'value':results['value']}) df.write_points({item[0]:results}) print item print 'total time in seconds: %.2f' % (timeit.default_timer() - start)
''' Created on Jul 2, 2015 @author: shaunz ''' from influxdb.influxdb08 import DataFrameClient from pandas import DataFrame from FredAPI import FredLink from FredLink.FredTicker import Fred_ticker_list import timeit fred = FredLink() df = DataFrameClient('localhost', 8086, 'root', 'root') if({'name':'FRED'} not in df.get_list_database()): df.create_database('FRED') df.switch_database('FRED') for series in df.get_list_series(): df.delete_series(series) start = timeit.default_timer() for item in Fred_ticker_list: results = fred.get_series(item[1]) results = results.replace(to_replace='NaN',value='.') data = DataFrame({'value': results}) print item df.write_points({item[0]:data}) print 'total time in seconds: %.2f' % (timeit.default_timer() - start)