def test_fromRecords_toRecords(self): # structured array K = 10 recs = np.zeros(K, dtype="O,O,f8,f8") recs["f0"] = range(K // 2) * 2 recs["f1"] = np.arange(K) / (K // 2) recs["f2"] = np.arange(K) * 2 recs["f3"] = np.arange(K) lp = LongPanel.fromRecords(recs, "f0", "f1") self.assertEqual(len(lp.items), 2) lp = LongPanel.fromRecords(recs, "f0", "f1", exclude=["f2"]) self.assertEqual(len(lp.items), 1) torecs = lp.toRecords() self.assertEqual(len(torecs.dtype.names), len(lp.items) + 2) # DataFrame df = DataFrame.from_records(recs) lp = LongPanel.fromRecords(df, "f0", "f1", exclude=["f2"]) self.assertEqual(len(lp.items), 1) # dict of arrays series = DataFrame.from_records(recs)._series lp = LongPanel.fromRecords(series, "f0", "f1", exclude=["f2"]) self.assertEqual(len(lp.items), 1) self.assert_("f2" in series) self.assertRaises(Exception, LongPanel.fromRecords, np.zeros((3, 3)), 0, 1)
def get_data_with_countries(year_of_color=1990, stat_code='WNTI_%', palette=None): # nopep8 if not palette: palette = WATER_COLOR_RANGE # Get the countries data frame countries = Country.objects.exclude(boundary='') countries = countries.filter(region__in=[1, 2, 3, 6, 7]) # Africa only countries = countries.values('name', 'boundary', 'id') countries_df = DataFrame.from_records(countries) countries_df['xs'], countries_df['ys'] = build_coords_lists(countries_df['boundary']) # nopep8 # Get the stats for access to water stats = StatValue.objects.filter(description__code=stat_code) stats = stats.values('value', 'year', 'country_id') stats_df = DataFrame.from_records(stats, coerce_float=True) # Pivot it before merging pivot_df = stats_df.pivot(columns='year', index='country_id', values='value') # nopep8 pivot_df['id'] = pivot_df.index # Merge the countries and stats together merged_df = merge(countries_df, pivot_df, how='left') merged_df = merged_df.fillna(value=-99) # Color it colored_df = update_active_data(merged_df, year_of_color, palette) # Otherwise things are sad! colored_df.columns = colored_df.columns.astype('str') return colored_df
def main(args): if len(args) != 3: usage() in_scores_filename = args[0] out_scores_filename = get_out_filename(in_scores_filename) if os.path.exists(out_scores_filename): print >> sys.stderr, 'Error: output file "{0}" already exists' % out_scores_filename usage() in_texts_filename = args[1] out_texts_filename = get_out_filename(in_texts_filename) if os.path.exists(out_texts_filename): print >> sys.stderr, 'Error: output file "{0}" already exists' % out_texts_filename usage() n_pages = int(args[2]) if n_pages > 1000: print 'Note: num. pages capped at 1000' n_pages = 1000 from_date = find_from_date(in_scores_filename, in_texts_filename) scores, texts = get_questions(n_pages, from_date) scores_df = DataFrame.from_records(scores) scores_df = scores_df.set_index('question_id') scores_df = scores_df.to_csv(out_scores_filename, encoding = 'UTF-8') texts_df = DataFrame.from_records(texts) texts_df = texts_df.set_index('question_id', verify_integrity = True) texts_df = texts_df.to_csv(out_texts_filename, encoding = 'UTF-8')
def convert_to_dataframe(array): def get_nonscalar_columns(array): first_row = array[0] bad_cols = np.array([x.ndim != 0 for x in first_row]) col_names = np.array(array.dtype.names) bad_names = col_names[bad_cols] if not bad_names.size == 0: warnings.warn("Ignored the following non-scalar branches: {bad_names}" .format(bad_names=", ".join(bad_names)), UserWarning) return list(bad_names) nonscalar_columns = get_nonscalar_columns(array) indices = list(filter(lambda x: x.startswith('__index__') and x not in nonscalar_columns, array.dtype.names)) if len(indices) == 0: df = DataFrame.from_records(array, exclude=nonscalar_columns) elif len(indices) == 1: # We store the index under the __index__* branch, where # * is the name of the index df = DataFrame.from_records(array, index=indices[0], exclude=nonscalar_columns) index_name = indices[0][len('__index__'):] if not index_name: # None means the index has no name index_name = None df.index.name = index_name else: raise ValueError("More than one index found in file") return df
def test_empty_with_nrows_chunksize(self): # see gh-9535 expected = DataFrame([], columns=['foo', 'bar']) result = self.read_csv(StringIO('foo,bar\n'), nrows=10) tm.assert_frame_equal(result, expected) result = next(iter(self.read_csv( StringIO('foo,bar\n'), chunksize=10))) tm.assert_frame_equal(result, expected) with tm.assert_produces_warning( FutureWarning, check_stacklevel=False): result = self.read_csv(StringIO('foo,bar\n'), nrows=10, as_recarray=True) result = DataFrame(result[2], columns=result[1], index=result[0]) tm.assert_frame_equal(DataFrame.from_records( result), expected, check_index_type=False) with tm.assert_produces_warning( FutureWarning, check_stacklevel=False): result = next(iter(self.read_csv(StringIO('foo,bar\n'), chunksize=10, as_recarray=True))) result = DataFrame(result[2], columns=result[1], index=result[0]) tm.assert_frame_equal(DataFrame.from_records(result), expected, check_index_type=False)
def get_pf_items(self): """Returns a tuple of 4 elements which can be used for further processing with ``pyfolio`` returns, positions, transactions, gross_leverage Because the objects are meant to be used as direct input to ``pyfolio`` this method makes a local import of ``pandas`` to convert the internal *backtrader* results to *pandas DataFrames* which is the expected input by, for example, ``pyfolio.create_full_tear_sheet`` The method will break if ``pandas`` is not installed """ # keep import local to avoid disturbing installations with no pandas import pandas from pandas import DataFrame as DF # # Returns cols = ["index", "return"] returns = DF.from_records(iteritems(self.rets["returns"]), index=cols[0], columns=cols) returns.index = pandas.to_datetime(returns.index) returns.index = returns.index.tz_localize("UTC") rets = returns["return"] # # Positions pss = self.rets["positions"] ps = [[k] + v for k, v in iteritems(pss)] cols = ps.pop(0) # headers are in the first entry positions = DF.from_records(ps, index=cols[0], columns=cols) positions.index = pandas.to_datetime(positions.index) positions.index = positions.index.tz_localize("UTC") # # Transactions txss = self.rets["transactions"] txs = list() # The transactions have a common key (date) and can potentially happend # for several assets. The dictionary has a single key and a list of # lists. Each sublist contains the fields of a transaction # Hence the double loop to undo the list indirection for k, v in iteritems(txss): for v2 in v: txs.append([k] + v2) cols = txs.pop(0) # headers are in the first entry transactions = DF.from_records(txs, index=cols[0], columns=cols) transactions.index = pandas.to_datetime(transactions.index) transactions.index = transactions.index.tz_localize("UTC") # Gross Leverage cols = ["index", "gross_lev"] gross_lev = DF.from_records(iteritems(self.rets["gross_lev"]), index=cols[0], columns=cols) gross_lev.index = pandas.to_datetime(gross_lev.index) gross_lev.index = gross_lev.index.tz_localize("UTC") glev = gross_lev["gross_lev"] # Return all together return rets, positions, transactions, glev
def test_fromRecords_toRecords(self): # structured array K = 10 recs = np.zeros(K, dtype='O,O,f8,f8') recs['f0'] = range(K // 2) * 2 recs['f1'] = np.arange(K) / (K // 2) recs['f2'] = np.arange(K) * 2 recs['f3'] = np.arange(K) lp = LongPanel.fromRecords(recs, 'f0', 'f1') self.assertEqual(len(lp.items), 2) lp = LongPanel.fromRecords(recs, 'f0', 'f1', exclude=['f2']) self.assertEqual(len(lp.items), 1) torecs = lp.toRecords() self.assertEqual(len(torecs.dtype.names), len(lp.items) + 2) # DataFrame df = DataFrame.from_records(recs) lp = LongPanel.fromRecords(df, 'f0', 'f1', exclude=['f2']) self.assertEqual(len(lp.items), 1) # dict of arrays series = DataFrame.from_records(recs)._series lp = LongPanel.fromRecords(series, 'f0', 'f1', exclude=['f2']) self.assertEqual(len(lp.items), 1) self.assert_('f2' in series) self.assertRaises(Exception, LongPanel.fromRecords, np.zeros((3, 3)), 0, 1)
def test_multiindex_dtype(self): df1 = DataFrame.from_records( {'a': [1, 2], 'b': [2.1, 1.5], 'c': ['l1', 'l2']}, index=['a', 'b']) df2 = DataFrame.from_records( {'a': [1.0, 2.0], 'b': [2.1, 1.5], 'c': ['l1', 'l2']}, index=['a', 'b']) self._assert_not_equal(df1, df2, check_index_type=True)
def stepfunction(dblstart, dblend, dbllaunch, dbltimenow, dblinterval=1/12): case1 = dbltimenow[(dbltimenow + dblinterval) <= dbllaunch] case2 = dbltimenow[dbltimenow >= dbllaunch] step_function_case1 = DataFrame.from_records(np.zeros([1, case1.shape[0]]), index=None, exclude=None, columns=case1) step_function_case2 = DataFrame.from_records(np.zeros([1, case2.shape[0]]), index=None, exclude=None, columns=case2) step_function_case1[:] = dblstart * dblinterval step_function_case2[:] = dblend * dblinterval step_function = step_function_case1.add(step_function_case2, fill_value=0) return step_function
def convert_to_dataframe(array): """ Creates a DataFrame from a structured array. Currently, this creates a copy of the data. """ if 'index' in array.dtype.names: df = DataFrame.from_records(array, index='index') else: df = DataFrame.from_records(array) return df
def test_stata_writer_pandas(): buf = BytesIO() dta = macrodata.load().data dtype = dta.dtype #as of 0.9.0 pandas only supports i8 and f8 dta = dta.astype(np.dtype([('year', 'i8'), ('quarter', 'i8')] + dtype.descr[2:])) dta = DataFrame.from_records(dta) writer = StataWriter(buf, dta) writer.write_file() buf.seek(0) dta2 = genfromdta(buf) ptesting.assert_frame_equal(dta.reset_index(), DataFrame.from_records(dta2))
def init_class_fixtures(cls): super(ClosesAndVolumes, cls).init_class_fixtures() cls.first_asset_start = min(cls.equity_info.start_date) cls.last_asset_end = max(cls.equity_info.end_date) cls.assets = cls.asset_finder.retrieve_all(cls.asset_finder.sids) cls.trading_day = cls.trading_calendar.day # Add a split for 'A' on its second date. cls.split_asset = cls.assets[0] cls.split_date = cls.split_asset.start_date + cls.trading_day cls.split_ratio = 0.5 cls.adjustments = DataFrame.from_records([ { 'sid': cls.split_asset.sid, 'value': cls.split_ratio, 'kind': MULTIPLY, 'start_date': Timestamp('NaT'), 'end_date': cls.split_date, 'apply_date': cls.split_date, } ]) cls.default_sim_params = SimulationParameters( start_session=cls.first_asset_start, end_session=cls.last_asset_end, trading_calendar=cls.trading_calendar, emission_rate='daily', data_frequency='daily', )
def webuse(data, baseurl='http://www.stata-press.com/data/r11/', as_df=True): """ Parameters ---------- data : str Name of dataset to fetch. baseurl : str The base URL to the stata datasets. as_df : bool If True, returns a `pandas.DataFrame` Returns ------- dta : Record Array A record array containing the Stata dataset. Examples -------- >>> dta = webuse('auto') Notes ----- Make sure baseurl has trailing forward slash. Doesn't do any error checking in response URLs. """ # lazy imports from statsmodels.iolib import genfromdta url = urljoin(baseurl, data+'.dta') dta = urlopen(url) dta = StringIO(dta.read()) # make it truly file-like if as_df: # could make this faster if we don't process dta twice? return DataFrame.from_records(genfromdta(dta)) else: return genfromdta(dta)
def platform_expression(accession, require_age=True, require_gender=False, limit=None): genes = fetch_genes(9606) query = """ SELECT sample.id, sample.age, sample.gender, expression.data FROM expression INNER JOIN sample ON expression.sample_id=sample.id INNER JOIN platform ON sample.platform_id=platform.id WHERE platform.accession=%s""" if require_age: query += "\nAND sample.age IS NOT NULL" if require_gender: query += "\nAND sample.gender IS NOT NULL" if limit: query += "\tLIMIT " + str(limit) c.execute(query, (accession,)) samples, age, gender, expression = zip(*c) X = DataFrame.from_records(list(expression), index=samples, columns=genes.index) X.index.name = "Sample ID" X.columns.name = "Gene ID" P = DataFrame({"age": age, "gender": gender}, index=samples) P.index.name = "Sample" return X, P
def getAdjClosePrices(tickers, startdate, enddate): """ returns a ready to use pandas DataFrame and a Series with the startDate """ Session = orm.sessionmaker(bind=db.GetEngine()) session = Session() conn = db.GetEngine().connect() # Query conn.execute("""CREATE TEMP TABLE Tickers (Cd Text)""") conn.execute("""INSERT INTO Tickers VALUES(?)""", zip(tickers)) result = conn.execute("""SELECT ts.Cd, Date, AdjClose FROM TimeSeries ts INNER JOIN Tickers t ON ts.Cd = t.Cd WHERE ts.Date >= ? AND ts.Date <= ?""", (startdate, enddate)) rows = result.fetchall() # Create a pandas DataFrame pricesRaw = DataFrame.from_records(rows, columns=['Cd', 'Date', 'AdjClose']) # Convert Date strings into datetime so pandas can do time series stuff pricesRaw.Date = pd.to_datetime(pricesRaw.Date) seriesbegin = pricesRaw[['Cd','Date']].groupby('Cd').min() # Pivot DataFrame prices = pricesRaw.pivot(index='Date', columns='Cd', values='AdjClose') # Close DB and Cursor conn.close() return prices, seriesbegin
def get_xueqiu_stocks( self , stockTypeList = ['sha','shb','sza','szb'] , columns = CON.CONST_XUEQIU_QUOTE_ORDER_COLUMN ): stock_xueqiu = None for stockType in stockTypeList: print( "正在从雪球获取:{}".format(C.EX_NAME[stockType]) ) page = 1 while True: response = self.session.get( CON.URL_XUEQIU_QUOTE_ORDER(page,columns,stockType) , headers = CON.HEADERS_XUEQIU ).json() df = DataFrame.from_records(response["data"], columns=response["column"]) if stock_xueqiu is None: stock_xueqiu = df else: stock_xueqiu = stock_xueqiu.append(df) if df.size==0: break page += 1 self.stock_xueqiu = stock_xueqiu return stock_xueqiu
def test_adjust_purchase(self): ohlcs = np.array([ (1216915200, 24.889999389648438, 25.450000762939453, 24.709999084472656, 25.0, 486284.0, 1216462208.0) ], dtype=Day.DTYPE) dividends = np.array([ (1058313600, 0.0, 0.0, 0.0, 0.11999999731779099), (1084233600, 0.20000000298023224, 0.0, 0.0, 0.09200000017881393), (1119225600, 0.5, 0.0, 0.0, 0.10999999940395355), (1140739200, 0.08589000254869461, 0.0, 0.0, 0.0), (1150416000, 0.0, 0.0, 0.0, 0.07999999821186066), (1158796800, 0.0, 0.0, 0.0, 0.18000000715255737), (1183507200, 0.0, 0.0, 0.0, 0.11999999731779099), (1217203200, 0.0, 0.0, 0.0, 0.2800000011920929), (1246579200, 0.30000001192092896, 0.0, 0.0, 0.10000000149011612), (1268611200, 0.0, 0.12999999523162842, 8.850000381469727, 0.0), (1277942400, 0.0, 0.0, 0.0, 0.20999999344348907), (1307664000, 0.0, 0.0, 0.0, 0.28999999165534973) ], dtype=self.dtype) index = np.array([datetime.datetime.fromtimestamp(v) for v in ohlcs['time']], dtype=object) y = DataFrame.from_records(ohlcs, index=index, exclude=['time']) y['adjclose'] = y['close'] for div in dividends: d = Dividend(div) d.adjust(y) adjclose = y.xs(datetime.datetime(2008, 7, 25))['adjclose'] self.assertTrue(self.floatEqual(adjclose, 17.28))
def get_history_data(self, code, year, season): """ 新浪历史复权数据接口 """ res = self.session.get(url=URL_HISTORY_DATA(code, year, season)) if res.status_code == 200: pattern_data = r'<div align="center">([\d\.]+)</div>' data = re.findall(pattern_data, res.text) records = util.slice_list(step=7, data_list=data) print(records) df = DataFrame.from_records( records, columns=[ 'open', 'high', 'close', 'low', 'volume', 'amount', 'restoration_factor' ] ) pattern_date = r'date=([\d]{4}-[\d]{2}-[\d]{2})' date = re.findall(pattern_date, res.text) df["date"] = date return df else: self.logger.debug("Status Code: {}".format(res.status_code)) return False
def get_kline(self, symbol, period = '1day', fqType = 'normal', begin = None, end = None, dataframe = True): if end is None: end = util.time_now() if isinstance(begin, str): begin = util.date_to_timestamp( begin ) if isinstance(end, str): end = util.date_to_timestamp( end ) try: response = self.session.get( URL_XUEQIU_KLINE( symbol = symbol, period = period, fqType = fqType, begin = begin, end = end ) , headers = HEADERS_XUEQIU , timeout = 3 ) kline = response.json() time.sleep(0.5) except Exception as e: self.logger.warning("{}".format(e)) self.logger.info(response.text) time.sleep(3) return None if kline["success"]=='true': if dataframe: if kline["chartlist"] is not None: df = DataFrame.from_records( kline["chartlist"] ) df["time"] = pandas.to_datetime( df["time"] ) df["time"] += timedelta(hours=8) df["symbol"] = symbol return df else: return DataFrame() else: return kline["chartlist"] else: return None
def make_equity_info(cls): cls.equity_info = ret = DataFrame.from_records([ { 'sid': 1, 'symbol': 'A', 'start_date': cls.dates[10], 'end_date': cls.dates[13], 'exchange': 'TEST', }, { 'sid': 2, 'symbol': 'B', 'start_date': cls.dates[11], 'end_date': cls.dates[14], 'exchange': 'TEST', }, { 'sid': 3, 'symbol': 'C', 'start_date': cls.dates[12], 'end_date': cls.dates[15], 'exchange': 'TEST', }, ]) return ret
def test_datetime_roundtrip(): dta = np.array([(1, datetime(2010, 1, 1), 2), (2, datetime(2010, 2, 1), 3), (4, datetime(2010, 3, 1), 5)], dtype=[('var1', float), ('var2', object), ('var3', float)]) buf = BytesIO() with pytest.warns(FutureWarning): writer = StataWriter(buf, dta, {"var2" : "tm"}) writer.write_file() buf.seek(0) with pytest.warns(FutureWarning): dta2 = genfromdta(buf) assert_equal(dta, dta2) dta = DataFrame.from_records(dta) buf = BytesIO() with pytest.warns(FutureWarning): writer = StataWriter(buf, dta, {"var2" : "tm"}) writer.write_file() buf.seek(0) with pytest.warns(FutureWarning): dta2 = genfromdta(buf, pandas=True) ptesting.assert_frame_equal(dta, dta2.drop('index', axis=1))
def _status_table(bot: Bot, update: Update) -> None: """ Handler for /status table. Returns the current TradeThread status in table format :param bot: telegram bot :param update: message update :return: None """ # Fetch open trade trades = Trade.query.filter(Trade.is_open.is_(True)).all() if get_state() != State.RUNNING: send_msg('*Status:* `trader is not running`', bot=bot) elif not trades: send_msg('*Status:* `no active order`', bot=bot) else: trades_list = [] for trade in trades: # calculate profit and send message to user current_rate = exchange.get_ticker(trade.pair, False)['bid'] trades_list.append([ trade.id, trade.pair, shorten_date(arrow.get(trade.open_date).humanize(only_distance=True)), '{:.2f}%'.format(100 * trade.calc_profit_percent(current_rate)) ]) columns = ['ID', 'Pair', 'Since', 'Profit'] df_statuses = DataFrame.from_records(trades_list, columns=columns) df_statuses = df_statuses.set_index(columns[0]) message = tabulate(df_statuses, headers='keys', tablefmt='simple') message = "<pre>{}</pre>".format(message) send_msg(message, parse_mode=ParseMode.HTML)
def tissue_expression_training_set(taxon_id=9606, limit=200): c.execute(""" SELECT sample_term.sample_id, expression.data, sample_term.term_id, sample_term.probability FROM sample_term INNER JOIN term ON term.id=sample_term.term_id INNER JOIN ontology ON ontology.id=term.ontology_id INNER JOIN sample ON sample.id=sample_term.sample_id INNER JOIN expression ON expression.sample_id=sample.id INNER JOIN platform ON sample.platform_id=platform.id INNER JOIN taxon ON platform.taxon_id=taxon.id WHERE ontology.namespace='BTO' AND sample_term.probability=1 AND taxon.id=%s ORDER BY random() LIMIT %s""", (taxon_id, limit)) samples, data, tissues, values = zip(*c) T = coo_to_df(zip(samples, tissues, values)) T.index.name = "Sample ID" T.columns.name = "Term ID" c.execute("""SELECT id FROM gene WHERE gene.taxon_id=%s ORDER BY id""", (taxon_id,)) X = DataFrame.from_records(list(data), index=samples, columns=[e[0] for e in c]) return X,T
def to_dataframe(self, flatten=False): from pandas import DataFrame if flatten: records, columns = self.itertriples(), ['path', 'name', 'id'] else: records, columns = self.iteritems(), ['path', 'members'] return DataFrame.from_records(records, columns=columns)
def digest_npbinary(self, file_name, **kwargs): try: from numpy import load except Exception: raise ImportError('Numpy is missing!') return DataFrame.from_records(load(file_name))
def create_adjustment_reader(cls, tempdir): dbpath = tempdir.getpath('adjustments.sqlite') writer = SQLiteAdjustmentWriter(dbpath, cls.env.trading_days, MockDailyBarSpotReader()) splits = DataFrame.from_records([ { 'effective_date': str_to_seconds('2014-06-09'), 'ratio': (1 / 7.0), 'sid': cls.AAPL, } ]) mergers = DataFrame( { # Hackery to make the dtypes correct on an empty frame. 'effective_date': array([], dtype=int), 'ratio': array([], dtype=float), 'sid': array([], dtype=int), }, index=DatetimeIndex([]), columns=['effective_date', 'ratio', 'sid'], ) dividends = DataFrame({ 'sid': array([], dtype=uint32), 'amount': array([], dtype=float64), 'record_date': array([], dtype='datetime64[ns]'), 'ex_date': array([], dtype='datetime64[ns]'), 'declared_date': array([], dtype='datetime64[ns]'), 'pay_date': array([], dtype='datetime64[ns]'), }) writer.write(splits, mergers, dividends) return SQLiteAdjustmentReader(dbpath)
def generate_dataframe(self, start_date=None, end_date=None): """ """ first_series_point = CurrencyPrices.objects.filter(currency=self)[0] last_series_point = CurrencyPrices.objects.filter(currency=self).reverse()[0] if start_date == None: start_date = first_series_point.date else: start_date = max(first_series_point.date, start_date) # Get a one day lag so the change wont be null temp_start_date = start_date - timedelta(days=3) if end_date == None: end_date = last_series_point.date else: end_date = min(last_series_point.date, end_date) currency_date = CurrencyPrices.objects.filter(currency=self, date__gte=temp_start_date, date__lte=end_date).values_list('date', 'ask_price', 'bid_price') currency_data_array = np.core.records.fromrecords(currency_date, names=['DATE', "ASK", "BID"]) df = DataFrame.from_records(currency_data_array, index='DATE') df = df.astype(float) df['MID'] = (df['ASK'] + df['BID']) / 2.0 df['CHANGE'] = df['MID'].pct_change() required_dates = date_range(start_date,end_date) df = df.reindex(required_dates) return df
def adjust(y, divs): """Return fully adjusted OHLCs data base on dividends Paramaters: y: numpy divs: numpy of dividends Return: DataFrame objects """ index = DatetimeIndex([datetime.datetime.fromtimestamp(v) for v in y['time']]) y = DataFrame.from_records(y, index=index, exclude=['time']) y['adjclose'] = y['close'] for div in divs: d = Dividend(div) d.adjust(y) factor = y['adjclose'] / y['close'] frame = y.copy() frame['open'] = frame['open'] * factor frame['high'] = frame['high'] * factor frame['low'] = frame['low'] * factor frame['close'] = frame['close'] * factor frame['volume'] = frame['volume'] * (1 / factor) return frame
def generate_dataframe(self, symbols=None, date_index = None): """ Generate a dataframe consisting of the currency prices (specified by symbols) from the start to end date """ # Set defaults if necessary if symbols == None: symbols = Currency.objects.all().values_list('symbol') try: assert(date_index != None) assert(len(date_index > 0)) except: start_date = date(2005,1,1) end_date = date.today() date_index = date_range(start_date, end_date) currency_price_data = CurrencyPrices.objects.filter(currency__symbol__in=symbols, date__in=date_index.tolist()).values_list('date', 'currency__symbol', 'ask_price') try: # Generate numpy array from queryset data forex_data_array = np.core.records.fromrecords(currency_price_data, names=['date', 'symbol', 'ask_price']) except IndexError: # If there is no data, generate an empty array forex_data_array = np.core.records.fromrecords([(date(1900,1,1) ,"",0)], names=['date', 'symbol', 'ask_price']) df = DataFrame.from_records(forex_data_array, index='date') # Create pivot table df['date'] = df.index df = df.pivot(index='date', columns='symbol', values='ask_price') return df
def dataframe(self, table, limit=None, offset=None): """ create a pandas dataframe from a table or query Parameters ---------- table : table a table in this database or a query limit: integer an integer limit on the query offset: integer an offset for the query """ from pandas import DataFrame if isinstance(table, basestring): table = getattr(self, table) records = table._table.select() if not limit is None: records = records.limit(limit) if not offset is None: records = records.offset(offset) records = list(records.execute()) cols = [c.name for c in table._table.columns] return DataFrame.from_records(records, columns=cols)
def generate_periodic_breakdown_stats(trade_list: List, period: str) -> List[Dict[str, Any]]: results = DataFrame.from_records(trade_list) if len(results) == 0: return [] results['close_date'] = to_datetime(results['close_date'], utc=True) resample_period = _get_resample_from_period(period) resampled = results.resample(resample_period, on='close_date') stats = [] for name, day in resampled: profit_abs = day['profit_abs'].sum().round(10) wins = sum(day['profit_abs'] > 0) draws = sum(day['profit_abs'] == 0) loses = sum(day['profit_abs'] < 0) stats.append({ 'date': name.strftime('%d/%m/%Y'), 'profit_abs': profit_abs, 'wins': wins, 'draws': draws, 'loses': loses }) return stats
def parse_output(output): """ output: console output from netkit-srl returns: DataFrame with class probabilities """ del output[-1] dic_list = [] fold = '' for line in output: if '#' in line: fold = line.replace('#', '') else: entries = line.split() pred_dic = {} for entry in entries[1:len(entries)]: tmp = entry.split(':') pred_dic[tmp[0]] = float(tmp[1]) pred_dic['id'] = entries[0] pred_dic['fold'] = fold dic_list.append(pred_dic) return DataFrame.from_records(dic_list, index='id')
def error_analysis(self): for option in ["Unweighted", "Weighted"]: self.weighted = option self._all_error() self._group_error() # Print of errors per config # self._config_error() if self.template_error is True: self._template_error() self.errors = DataFrame.from_records(self.errors) self.errors = self.errors.set_index([ "Group", "Weighting", "Subsystem", ]).sort_index() if config.sections[ "CALCULATOR"].calculator == "LAMMPSSNAP" and config.sections[ "BISPECTRUM"].bzeroflag: self._offset()
def movement_interval( train_on=['training1', 'training2', 'training3', 'training4'], predict_on=['validation1_lab', 'validation2_lab', 'validation3_lab']): window_shift = 5 window_length = 40 print 'aggregated_skeletion_win' X_win = aggregated_skeletion_win( predict_on, agg_functions=['median', 'var', 'min', 'max'], window_shift=window_shift, window_length=window_length) X_win = X_win.fillna(0) print 'train rf model' X, y = aggregated_skeletion(file_names=train_on, agg_functions=['median', 'var', 'min', 'max']) X = X.fillna(0) y = np.array([gesture_to_id[gest] for gest in y]) clf = ExtraTreesClassifier(n_estimators=1500, random_state=0, n_jobs=-1) clf.fit(X, y) del X del y print 'rf predict' y_pred = clf.predict_proba(X_win) df_out = pd.concat([ DataFrame.from_records(X_win.index.values.tolist(), columns=['sample_id', 'frame']), DataFrame(y_pred) ], axis=1) df_out['movement'] = np.array(np.argmax(y_pred, axis=1) != 0, dtype=int) # adjust for sliding window size df_out.frame = df_out.frame + 20 return df_out
def data(self) -> DataFrame: if self.__data is None or len(self) != self.__length: records = [{ "time": packet.time, "x": packet.coordinates[0], "y": packet.coordinates[1], "altitude": packet.coordinates[2], } for packet in self.packets] for index, packet_delta in enumerate( numpy.insert(numpy.diff(self.packets), 0, Distance(0, 0, 0, self.crs))): records[index].update({ "interval": packet_delta.interval, "overground_distance": packet_delta.overground, "ascent": packet_delta.ascent, "ground_speed": packet_delta.ground_speed, "ascent_rate": packet_delta.ascent_rate, }) self.__data = DataFrame.from_records(records) return self.__data
def get_quotation(self, symbol=None, symbolSet=None, dataframe=True, threadNum=3): if 'quotation' in self.__dict__.keys(): del(self.quotation) # Cut symbolList symbolList = list(symbolSet) threads = [] symbolListSlice = util.slice_list(num=threadNum, data_list=symbolList) for symbolList in symbolListSlice: loop = asyncio.new_event_loop() symbolsList = util.slice_list(step=50, data_list=symbolList) tasks = [self.get_quotation_task( symbols=symbols) for symbols in symbolsList] t = threading.Thread(target=util.thread_loop, args=(loop, tasks)) threads.append(t) for t in threads: t.start() for t in threads: t.join() if dataframe: self.quotation = DataFrame.from_records(self.quotation).T return(self.quotation)
def test_filter_denies(): headers = HeaderConstraints(srcIps='1.1.1.1') with patch.object(bfq, 'searchFilters', create=True) as mock_search_filters: # Test success mock_search_filters.return_value = MockQuestion() assert_filter_denies('filter', headers) mock_search_filters.assert_called_with(filters='filter', headers=headers, action='permit') # Test failure; also test that startLocation is passed through mock_df = DataFrame.from_records([{'Flow': 'found', 'More': 'data'}]) mock_search_filters.return_value = MockQuestion( MockTableAnswer(mock_df)) with pytest.raises(BatfishAssertException) as excinfo: assert_filter_denies('filter', headers, startLocation='Ethernet1') # Ensure found answer is printed assert mock_df.to_string() in str(excinfo.value) mock_search_filters.assert_called_with(filters='filter', headers=headers, startLocation='Ethernet1', action='permit')
def create_adjustment_reader(cls, tempdir): dbpath = tempdir.getpath('adjustments.sqlite') writer = SQLiteAdjustmentWriter(dbpath, cls.env.trading_days, MockDailyBarSpotReader()) splits = DataFrame.from_records([ { 'effective_date': str_to_seconds('2014-06-09'), 'ratio': (1 / 7.0), 'sid': cls.AAPL, } ]) mergers = create_empty_splits_mergers_frame() dividends = DataFrame({ 'sid': array([], dtype=uint32), 'amount': array([], dtype=float64), 'record_date': array([], dtype='datetime64[ns]'), 'ex_date': array([], dtype='datetime64[ns]'), 'declared_date': array([], dtype='datetime64[ns]'), 'pay_date': array([], dtype='datetime64[ns]'), }) writer.write(splits, mergers, dividends) return SQLiteAdjustmentReader(dbpath)
def _rpc_status_table(self) -> DataFrame: trades = Trade.query.filter(Trade.is_open.is_(True)).all() if self._freqtrade.state != State.RUNNING: raise RPCException('*Status:* `trader is not running`') elif not trades: raise RPCException('*Status:* `no active order`') else: trades_list = [] for trade in trades: # calculate profit and send message to user current_rate = self._freqtrade.exchange.get_ticker(trade.pair, False)['bid'] trades_list.append([ trade.id, trade.pair, shorten_date(arrow.get(trade.open_date).humanize(only_distance=True)), '{:.2f}%'.format(100 * trade.calc_profit_percent(current_rate)) ]) columns = ['ID', 'Pair', 'Since', 'Profit'] df_statuses = DataFrame.from_records(trades_list, columns=columns) df_statuses = df_statuses.set_index(columns[0]) return df_statuses
def create_adjustment_reader(cls, tempdir): dbpath = tempdir.getpath('adjustments.sqlite') writer = SQLiteAdjustmentWriter(dbpath) splits = DataFrame.from_records([{ 'effective_date': str_to_seconds('2014-06-09'), 'ratio': (1 / 7.0), 'sid': cls.AAPL, }]) mergers = dividends = DataFrame( { # Hackery to make the dtypes correct on an empty frame. 'effective_date': array([], dtype=int), 'ratio': array([], dtype=float), 'sid': array([], dtype=int), }, index=DatetimeIndex([], tz='UTC'), columns=['effective_date', 'ratio', 'sid'], ) writer.write(splits, mergers, dividends) return SQLiteAdjustmentReader(dbpath)
def read_html( html: str, selector: str = "table", table_index: int = 0, skiprows: int = 0, header: bool = False, parser: Callable = None, ) -> DataFrame: """ Parse an HTML table into a DataFrame """ parser = parser if parser is not None else _default_html_cell_parser # Fetch table and read its rows page = BeautifulSoup(html, "lxml") table = page.select(selector)[table_index] rows = [_get_html_columns(row) for row in table.find_all("tr")] # Adjust for rowspan > 1 for idx_row, row in enumerate(rows): for idx_cell, cell in enumerate(row): rowspan = int(cell.attrs.get("rowspan", 1)) cell.attrs["rowspan"] = 1 # reset to prevent cascading for offset in range(1, rowspan): rows[idx_row + offset].insert(idx_cell, cell) # Get text within table cells and build dataframe records = [] for row_idx, row in enumerate(rows[skiprows:]): records.append([ parser(elem, row_idx, col_idx) for col_idx, elem in enumerate(row) ]) data = DataFrame.from_records(records) # Parse header if requested if header: data.columns = data.iloc[0] data = data.drop(data.index[0]) return data
def collect_training_data(self, labelled_data: List[Dict], id_field: str, query_model: Query, number_additional_docs: int, relevant_score: int = 1, default_score: int = 0, **kwargs) -> DataFrame: """ Collect training data based on a set of labelled data. :param labelled_data: Labelled data containing query, query_id and relevant ids. :param id_field: The Vespa field representing the document id. :param query_model: Query model. :param number_additional_docs: Number of additional documents to retrieve for each relevant document. :param relevant_score: Score to assign to relevant documents. Default to 1. :param default_score: Score to assign to the additional documents that are not relevant. Default to 0. :param kwargs: Extra keyword arguments to be included in the Vespa Query. :return: DataFrame containing document id (document_id), query id (query_id), scores (relevant) and vespa rank features returned by the Query model RankProfile used. """ training_data = [] for query_data in labelled_data: for doc_data in query_data["relevant_docs"]: training_data_point = self.collect_training_data_point( query=query_data["query"], query_id=query_data["query_id"], relevant_id=doc_data["id"], id_field=id_field, query_model=query_model, number_additional_docs=number_additional_docs, relevant_score=doc_data.get("score", relevant_score), default_score=default_score, **kwargs) training_data.extend(training_data_point) training_data = DataFrame.from_records(training_data) return training_data
def parse(self, sources: Dict[str, str], aux: Dict[str, DataFrame], **parse_opts) -> DataFrame: with open(sources[0], "r") as fd: cases = json.load(fd)["Data"] # {"ConfirmDate":"2021-01-09 00:00:00","No":"9876","Age":66,"Gender":"\u0e0a","GenderEn":"Male","Nation":"Thailand","NationEn":"Thailand","Province":"\u0e2d","ProvinceId":72,"District":"\u0e44","ProvinceEn":"Ang Thong","Detail":null,"StatQuarantine":1} cases = table_rename( DataFrame.from_records(cases), { "ConfirmDate": "date_new_confirmed", "Age": "age", "GenderEn": "sex", "ProvinceEn": "match_string", }, drop=True, ) # Convert dates to ISO format for col in cases.columns: if col.startswith("date_"): cases[col] = cases[col].str.slice(0, 10) # Parse age and sex fields cases["sex"] = cases["sex"].str.lower().apply({"male": "male", "female": "female"}.get) cases["age"] = cases["age"].fillna("age_unknown") cases["sex"] = cases["sex"].fillna("sex_unknown") # Convert to time series data data = convert_cases_to_time_series(cases, ["match_string"]) # Aggregate by country level country = aggregate_admin_level(data, ["date", "age", "sex"], "country") country["key"] = "TH" # Add country code and return data data["country_code"] = "TH" data = data[data["match_string"] != "Unknown"] return concat([country, data])
def get_statistics( self, resolution_list: Optional[List[str]] = None) -> DatetimeStats: """ Args: resolution_list: Returns: """ if resolution_list is None: resolution_list = [ DatetimeResolution.YEAR, DatetimeResolution.MONTH ] resolution_list = sorted(resolution_list) _time_df = DataFrame.from_records(self._time_records) group_count_dict = _time_df.groupby(by=resolution_list).groups frequency_count = dict() group_dict = frequency_count for groups, index in group_count_dict.items(): if len(groups) != len(resolution_list): raise InconsistentSize(column_A='group title', column_B='time resolution', length_A=len(groups), length_B=len(resolution_list)) for group in groups[:-1]: if group not in group_dict.keys(): group_dict[group] = dict() group_dict = group_dict[group] group_dict[groups[-1]] = len(index) group_dict = frequency_count stats = DatetimeStats(frequency_count=frequency_count, resolution_list=resolution_list) return stats
def parse_dataframes( self, dataframes: Dict[str, DataFrame], aux: Dict[str, DataFrame], **parse_opts ) -> DataFrame: # Parse the data as a list of records records = [] for _, row in dataframes[0].iterrows(): row = row.to_dict() # Parse the start and end dates date_start = row["Effective Date"][:10] date_end = row["Valid Through Date"][:10] # Convert column name and delete unnecessary columns row["subregion1_name"] = row["Jurisdictions"] del row["Jurisdictions"] del row["Effective Date"] del row["Valid Through Date"] # Insert a record for each date in the range for date in date_range(date_start, date_end): record = {} record["date"] = date non_numeric_columns = ("date", "subregion1_name") for col, val in row.items(): if col in non_numeric_columns: record[col] = val else: record[_convert_column_name(col)] = safe_int_cast(val) records.append(record) # Convert to DataFrame and add metadata for matching data = DataFrame.from_records(records) data["country_code"] = "US" data["subregion2_code"] = None data["locality_code"] = None return data
def generate_dataframe(self, symbols=None, date_index=None): """ Generate a dataframe consisting of the currency prices (specified by symbols) from the start to end date """ # Set defaults if necessary if symbols == None: symbols = Currency.objects.all().values_list('symbol') try: assert (date_index != None) assert (len(date_index > 0)) except: start_date = date(2005, 1, 1) end_date = date.today() date_index = date_range(start_date, end_date) currency_price_data = CurrencyPrices.objects.filter( currency__symbol__in=symbols, date__in=date_index.tolist()).values_list('date', 'currency__symbol', 'ask_price') try: # Generate numpy array from queryset data forex_data_array = np.core.records.fromrecords( currency_price_data, names=['date', 'symbol', 'ask_price']) except IndexError: # If there is no data, generate an empty array forex_data_array = np.core.records.fromrecords( [(date(1900, 1, 1), "", 0)], names=['date', 'symbol', 'ask_price']) df = DataFrame.from_records(forex_data_array, index='date') # Create pivot table df['date'] = df.index df = df.pivot(index='date', columns='symbol', values='ask_price') return df
def save_annotation_batches(self, batches, batch_size, existing_ids, existing_batches): """ Save annotation batches in .csv files. Don't save queries that have been already saved. Args: batches: int, number of batches to create. batch_size: int, size of the batches. existing_ids: set, ids in the existing annotation batches. existing_batches: set, indexes of the existing annotation batches. """ all_ids = set(self.queries.keys()) ids = all_ids.difference(existing_ids) self.print("Removing %i existing queries from the %i total queries; %i remaining queries." % (len(existing_ids), len(all_ids), len(ids))) batches_ids = choice(a=sorted(ids), size=batches * batch_size, replace=False) batches_ids = np_split(batches_ids, batches) starting_idx = max(existing_batches) + 1 if existing_batches else 0 for batch in range(batches): batch_ids = batches_ids[batch] data = [self.queries[id_].to_html() for id_ in batch_ids] df = DataFrame.from_records(data=data) batch_idx = starting_idx + batch batch_idx = "0" + str(batch_idx) if 0 <= batch_idx < 10 else str(batch_idx) file_name = self.results_path + "queries/batch_" + batch_idx + ".csv" if self.save: df.to_csv(file_name, index=False) self.print("batch_%s saved at %s." % (batch_idx, file_name)) else: self.print("Not saving %s (not in save mode)." % file_name)
def CreateGraph(dist=1000): # import the data dataset = "https://api.bsmsa.eu/ext/api/bsm/gbfs/v2/en/station_information" bicing = DataFrame.from_records(pd.read_json(dataset)['data']['stations'], index='station_id') G = nx.Graph() # add all the nodes in the graph for st in bicing.itertuples(): G.add_node((st.lon, st.lat), id=st.Index) # calculates the bounding box of the coordinates given the graph minx, miny, maxy, maxx = bbox(G) # calculates the width and height of the bbox width = haversine((miny, minx), (maxy, minx)) height = haversine((miny, minx), (miny, maxx)) # how many columns and rows the matrix has columns = int((width // (dist / 1000.0)) + 1) rows = int((height // (dist / 1000.0)) + 1) # x [0] lon # y [1] lat # creates a matrix of lists matriz = [] for i in range(rows): matriz.append([]) for j in range(columns): matriz[i].append([]) # sorts out every node in the matrix for node in G.nodes(): x = int( haversine((node[1], minx), (node[1], node[0])) / (dist / 1000.0)) y = int( haversine((maxy, node[0]), (node[1], node[0])) / (dist / 1000.0)) matriz[x][y].append(node) # given the matrix, it compares with the possible edges G = compare(G, rows, columns, matriz, dist) return G
def print(self): # There is schema available in meta field print("---- SCHEMA ----") # Print schema as dict print(self.avro_reader.meta) # Or load it into json schema_json = json.loads( self.avro_reader.meta.get('avro.schema').decode('utf-8')) print('Avro schema [{}]: {}.{}'.format(schema_json['type'], schema_json['namespace'], schema_json['name'])) print("---- CODEC ----") print(self.avro_reader.meta.get('avro.codec')) for field in schema_json['fields']: print('{}:{}'.format(field['name'], field['type'])) records = [record for record in self.avro_reader] print("---- AVRO RECORDS ----") print(records) print("---- PANDAS DATAFRAME ----") print(DataFrame.from_records(records))
def get_history_data(self, code, year, season): """ 新浪历史复权数据接口 """ res = self.session.get(url=URL_HISTORY_DATA(code, year, season)) if res.status_code == 200: pattern_data = r'<div align="center">([\d\.]+)</div>' data = re.findall(pattern_data, res.text) records = util.slice_list(step=7, data_list=data) print(records) df = DataFrame.from_records(records, columns=[ 'open', 'high', 'close', 'low', 'volume', 'amount', 'restoration_factor' ]) pattern_date = r'date=([\d]{4}-[\d]{2}-[\d]{2})' date = re.findall(pattern_date, res.text) df["date"] = date return df else: self.logger.debug("Status Code: {}".format(res.status_code)) return False
def read_and_format(file_path): data = DataFrame.from_records(gen_records(file_path)) # I've encountered files without time values, which kinda precludes # us creating an ActivityData instance. if 'time' in data: timestamps = data.pop('time') timeoffsets = timestamps - timestamps[0] data = ActivityData(data).astype('float64') data._finish_up(column_spec=COLUMN_SPEC, start=timestamps[0], timeoffsets=timeoffsets) # We should be able to rely on always having lon and lat columns, so # may as well append a distance column. data[special_columns.Distance.colname] = data.haversine().cumsum() else: data = data.astype('float64', copy=False) return data
def test_calculate_stopbuy_and_stoploss(self): #file = test_filepath + 'test_calculate_stopbuy_and_stoploss_Ok.csv' labels = [] for key, value in GlobalVariables.get_stock_data_labels_dict().items(): labels.append(value) data = [ ('2016-10-07', 23.58, 23.65, 23.37, 23.48, 43000), ('2016-10-10', 23.62, 23.88, 23.55, 24.0, 44000), ('2016-10-11', 23.62, 30.0, 23.01, 23.16, 45000), ('2016-10-12', 23.16, 23.5, 23.11, 23.3, 46000)] df = DataFrame.from_records(data, columns=labels) stock_data_container = StockDataContainer("Apple Inc.", "AAPL", "") stock_data_container.set_historical_stock_data(df) sl, sb = calculate_stopbuy_and_stoploss(stock_data_container.historical_stock_data()) # previous calculation with latest value should now be True self.assertEqual(np.math.isclose(sb, 23.6175, abs_tol=0.001), True) # =23,5*1.005 self.assertEqual(np.math.isclose(sl, 22.9089, abs_tol=0.001), True) # =23,5*1.005*0.97 # real calculation with real 52 w high value --> False self.assertEqual(np.math.isclose(sb, 30.15, abs_tol=0.001), False) # =30*1.005 self.assertEqual(np.math.isclose(sl, 29.2455, abs_tol=0.001), False) # =30*1.005*0.97
def fetch_DataFrame(query, database=PARAMS.get("database", ""), attach=False): '''Fetch query results and returns them as a pandas dataframe''' try: cc = database.cursor() except AttributeError: dbhandle = sqlite3.connect(database) cc = dbhandle.cursor() if attach: db_execute(cc, attach) sqlresult = cc.execute(query).fetchall() cc.close() # see http://pandas.pydata.org/pandas-docs/dev/generated/ # pandas.DataFrame.from_records.html#pandas.DataFrame.from_records # this method is design to handle sql_records with proper type # conversion field_names = [d[0] for d in cc.description] pandas_DataFrame = DataFrame.from_records(sqlresult, columns=field_names) return pandas_DataFrame
def merge_report_files_to_csv(): records_list_from_xml = parse_xml_file_to_csv() df_xml = DataFrame.from_records(records_list_from_xml) df_csv = pd.read_csv(csv_file_path) df_json=pd.read_json(json_file_path) # convert epoch time to time string of timezone 'Canada/Atlantic' df_json['request-time'] = df_json['request-time'].apply( lambda d: datetime.fromtimestamp(int(d/1000)).astimezone(timezone('Canada/Atlantic')) .strftime('%Y-%m-%d %H:%M:%S %Z')) df = pd.concat((df_csv, df_xml, df_json), sort=False, ignore_index=True) # filter out the records with packets-serviced 0 and sort the data by request-time df = df[df['packets-serviced'].astype(int) != 0] df = df.sort_values('request-time', ascending=True) # print out the summary file summary = df.groupby('service-guid').size() df_summary = DataFrame(summary, columns=['number of records']) df_summary.to_csv(summary_file_path, index=True) return df
def test_no_unestablished_bgp_sessions_no_session(): """Confirm no-unestablished-bgp-sessions assert passes and fails as expected when not specifying a session.""" with patch.object(bfq, 'bgpSessionStatus', create=True) as bgpSessionStatus: # Test success bgpSessionStatus.return_value = MockQuestion() assert_no_unestablished_bgp_sessions(nodes='nodes', remote_nodes='remote_nodes') bgpSessionStatus.assert_called_with(nodes='nodes', remote_nodes='remote_nodes', status="NOT_ESTABLISHED") # Test failure mock_df = DataFrame.from_records([{'Session': 'found', 'More': 'data'}]) bgpSessionStatus.return_value = MockQuestion( MockTableAnswer(mock_df)) with pytest.raises(BatfishAssertException) as excinfo: assert_no_unestablished_bgp_sessions(nodes='nodes', remote_nodes='remote_nodes') # Ensure found answer is printed assert mock_df.to_string() in str(excinfo.value) bgpSessionStatus.assert_called_with(nodes='nodes', remote_nodes='remote_nodes', status="NOT_ESTABLISHED")
def test_no_incompatible_ospf_sessions_no_session(): """Confirm no-incompatible-ospf-sessions assert passes and fails as expected when not specifying a session.""" with patch.object(bfq, 'ospfSessionCompatibility', create=True) as ospfSessionCompatibility: # Test success ospfSessionCompatibility.return_value = MockQuestion() assert_no_incompatible_ospf_sessions(nodes='nodes', remote_nodes='remote_nodes') ospfSessionCompatibility.assert_called_with(nodes='nodes', remote_nodes='remote_nodes', statuses=UNESTABLISHED_OSPF_SESSION_STATUS_SPEC) # Test failure mock_df = DataFrame.from_records([{'Session': 'found', 'More': 'data'}]) ospfSessionCompatibility.return_value = MockQuestion( MockTableAnswer(mock_df)) with pytest.raises(BatfishAssertException) as excinfo: assert_no_incompatible_ospf_sessions(nodes='nodes', remote_nodes='remote_nodes') # Ensure found answer is printed assert mock_df.to_string() in str(excinfo.value) ospfSessionCompatibility.assert_called_with(nodes='nodes', remote_nodes='remote_nodes', statuses=UNESTABLISHED_OSPF_SESSION_STATUS_SPEC)
def _process_response_rows_for_bigquery(self, rows: Sequence[dict], methods: Sequence[dict], table_reference: TableReference): rows_dataframe = DataFrame.from_records(rows) rows_dataframe['date'] = rows_dataframe['date'].apply( lambda x: x.date()) job_config = LoadJobConfig() job_config.write_disposition = WriteDisposition.WRITE_APPEND job_config.time_partitioning = TimePartitioning( type_=TimePartitioningType.DAY, field='date') job_config.schema = [ self._get_schema_for_field(column, methods) for column in list(rows_dataframe.columns.values) ] try: load_job = self.bigquery.client.load_table_from_dataframe( rows_dataframe, table_reference, job_config=job_config) load_job.result() except BadRequest as error: print(error.errors)
def print_table(self): if XXX in self.teams: self.teams.pop() table = [i.group_records for i in self.teams] self.df = DataFrame.from_records(table) self.df[6] = self.df[4] - self.df[5] self.df[7] = self.df[1] * 3 + self.df[2] global pts_columns pts_columns = ['Team', 'W', 'D', 'L', 'GF', 'GA', 'GD', 'Pts'] self.df.columns = pts_columns self.df = self.df.sort_values(by=['Pts', 'GD', 'GF'], ascending=False) print(self.df) self.rank_list = self.df['Team'].tolist()
def webuse(data, baseurl='http://www.stata-press.com/data/r11/', as_df=True): """ Download and return an example dataset from Stata. Parameters ---------- data : str Name of dataset to fetch. baseurl : str The base URL to the stata datasets. as_df : bool If True, returns a `pandas.DataFrame` Returns ------- dta : Record Array A record array containing the Stata dataset. Examples -------- >>> dta = webuse('auto') Notes ----- Make sure baseurl has trailing forward slash. Doesn't do any error checking in response URLs. """ # lazy imports from statsmodels.iolib import genfromdta url = urljoin(baseurl, data + '.dta') dta = urlopen(url) dta = BytesIO(dta.read()) # make it truly file-like if as_df: # could make this faster if we don't process dta twice? return DataFrame.from_records(genfromdta(dta)) else: return genfromdta(dta)
def test_from_records_bad_index_column(self): df = DataFrame(np.random.randn(10, 3), columns=["A", "B", "C"]) # should pass df1 = DataFrame.from_records(df, index=["C"]) tm.assert_index_equal(df1.index, Index(df.C)) df1 = DataFrame.from_records(df, index="C") tm.assert_index_equal(df1.index, Index(df.C)) # should fail msg = "|".join([ r"Length of values \(10\) does not match length of index \(1\)", ]) with pytest.raises(ValueError, match=msg): DataFrame.from_records(df, index=[2]) with pytest.raises(KeyError, match=r"^2$"): DataFrame.from_records(df, index=2)