def __init__(self, num=False, plot=True): exp = array([1 / 99.] * 100) DataFrame.__init__(self, {'Expected': exp, 'Last_2_Dig': _lt_(num=num)}) self.set_index('Last_2_Dig', inplace=True) if plot: plot_expected(self, -2)
def __init__(self, *args, **kwargs): """ Initializing with a SensitiveFrame as input without specifying metadata will reset metadata to default. Metadeta will be removed if it does not correspond to a SensitiveFrame column. """ DataFrame.__init__(self, *args, **kwargs)
def __init__(self, state_fips, year=2014, read_from='api', key=None, variables=None, filepath=None, impute_emp=True): dtypes = { 'NAICS2012': str, 'county': str, 'state': str } if read_from == 'api': if not key: raise ValueError('If reading from Census API, must provide API key in the key parameter') baseurl = 'http://api.census.gov/data/{}/cbp'.format(year) if variables: cbp_vars = ','.join(variables) else: cbp_vars = 'EMP,EMP_F,ESTAB,NAICS2012,NAICS2012_TTL,GEO_TTL' url = '{baseurl}?get={cbp_vars}&for=county:*&in=state:{state_fips}&key={key}'.format(**locals()) try: req = requests.get(url) req.raise_for_status() except requests.exceptions.HTTPError as err: print err sys.exit(1) results = pd.read_json(req.text, orient='index', dtype=dtypes).T results.columns = results.iloc[0] results.drop(results.index[0], inplace=True) cbp_dtypes = { 'EMP': int, 'ESTAB': int } for variable in cbp_dtypes.keys(): results[variable] = results[variable].astype(cbp_dtypes[variable]) data = results if impute_emp: data['EMP'] = data.apply(lambda x: emp_imputation[x.EMP_F] if x.EMP_F else x.EMP, axis=1) data.drop('EMP_F', axis=1, inplace=True) elif read_from == 'csv': if not filepath: raise ValueError('If reading from CSV file, must provide path to file in filepath parameter') data = pd.read_csv(filepath_or_buffer=filepath, dtype=dtypes) else: raise ValueError('Valid options for the read_from parameter are "api" and "csv".') DataFrame.__init__(self, data=data)
def __init__(self, data=None, index=None, columns=None, dtype=None, copy=False, sampling_rate=1.0): """Construct dataframe-like object.""" DataFrame.__init__(self, data=data, index=index, columns=columns, dtype=dtype, copy=copy) if sampling_rate is not None: self.index = np.arange(0, len(self) * sampling_rate, sampling_rate)
def __init__(self,x,y,d=0.025,verbose=False,xml=None,**kwargs): if not xml: xml = osm_xml_download(x,y,d,verbose) osm = etree.fromstring(xml.encode('utf-8')) data = [Series({key:value for key,value in node.attrib.items() if key in ['lat','lon']},name=node.attrib['id']) for node in osm.findall('node')] DataFrame.__init__(self,data,dtype=float,**kwargs) self.street_setter(osm.findall('way')) self.index = self.index.astype(int)
def __init__(self, plot=True): a = arange(10, 100) Expe = log10(1 + (1. / a)) Sec_Dig = array(list(range(10)) * 9) df = DataFrame({'Expected': Expe, 'Sec_Dig': Sec_Dig}) DataFrame.__init__(self, df.groupby('Sec_Dig').sum()) if plot: plot_expected(self, 22)
def __init__(self, digs, plot=True): _check_digs_(digs) dig_name = f'First_{digs}_Dig' Dig = arange(10 ** (digs - 1), 10 ** digs) Exp = log10(1 + (1. / Dig)) DataFrame.__init__(self, {'Expected': Exp}, index=Dig) self.index.names = [dig_name] if plot: plot_expected(self, digs)
def __init__(self, data=None, index=None, columns=_col_names, dtype=None, copy=False, tick_id=None, unit_amount=None): # print tick_id, dates, data if tick_id == None: raise ValueError("tick_id must be set") if columns == None: columns = _col_names DataFrame.__init__(self, data, index, columns, dtype, copy) self.tick_id = tick_id self.sort(inplace=True) self.unit_amount = unit_amount self.fix_split()
def __init__(self, plot=True, save_plot=None, save_plot_kwargs=None): exp, sec_digs = _gen_second_digits_() DataFrame.__init__(self, {'Expected': exp, 'Sec_Dig': sec_digs}) self.set_index("Sec_Dig", inplace=True) if plot: plot_expected(self, 22, save_plot=save_plot, save_plot_kwargs=save_plot_kwargs)
def __init__(self, digs, plot=True, save_plot=None, save_plot_kwargs=None): _check_digs_(digs) dig_name = f'First_{digs}_Dig' exp_array, dig_array = _gen_first_digits_(digs) DataFrame.__init__(self, {'Expected': exp_array}, index=dig_array) self.index.names = [dig_name] if plot: plot_expected(self, digs, save_plot=save_plot, save_plot_kwargs=save_plot_kwargs)
def __init__(self, num=False, plot=True, save_plot=None, save_plot_kwargs=None): exp, l2d = _gen_last_two_digits_(num=num) DataFrame.__init__(self, {'Expected': exp, 'Last_2_Dig': l2d}) self.set_index('Last_2_Dig', inplace=True) if plot: plot_expected(self, -2, save_plot=save_plot, save_plot_kwargs=save_plot_kwargs)
def __init__(self, data=None, index=None, columns=None, dtype=None, copy: bool = False, start_date: datetime = START_DATE, end_date: datetime = None, include_holding: bool = False, include_finance: bool = False, include_managed: bool = False, include_suspended: bool = False): if not end_date: end_date = datetime.today() if data is None: data, self.benchmarks, self.factors = download_latest_data( download_company_data=True) if not include_holding: data = data.loc[~data[HOLDING], :] if not include_finance: data = data.loc[data[FN_GUIDE_SECTOR] != '금융', :] if not include_managed: data = data.loc[~data[IS_MANAGED], :] if not include_suspended: data = data.loc[~data[IS_SUSPENDED], :] data = data.loc[(start_date <= data[DATE]) & (data[DATE] <= end_date), :] else: _, self.benchmarks, self.factors = download_latest_data( download_company_data=False) self.benchmarks = self.benchmarks.loc[ (start_date <= self.benchmarks[DATE]) & (self.benchmarks[DATE] <= end_date), :] self.factors = self.factors.loc[(start_date <= self.factors.index) & (self.factors.index <= end_date), :] DataFrame.__init__(self=self, data=data, index=index, columns=columns, dtype=dtype, copy=copy)
def __init__(self, data=None, date_column=None, *args, **kwargs) -> None: DataFrame.__init__(self, data, *args, **kwargs) if self.columns.nlevels > 1: # For now, I admit, # there are a lot of works to support MultiIndex dataframes raise ValueError( 'stock-pandas does not support dataframes with MultiIndex columns' # noqa:E501 ) if isinstance(data, StockDataFrame): copy_stock_metas(data, self) else: init_stock_metas(self) if date_column: self[date_column] = to_datetime(self[date_column]) self.set_index(date_column, inplace=True)
def __init__(self, r2labmap, columns=None): # the map object essentially carries the coordinates # system that you wish to use # for weird reasons related to pandas implementation, # we can't seem to do this: # self.r2labmap = r2labmap # as it triggers infinite recursion (go figure...) # turns out we don't absolutely need this apparently, # do let's proceed this way for now if columns is None: columns = dict() # ditto # self.columns = columns all_columns = ['x', 'y'] + list(columns.keys()) DataFrame.__init__(self, index=r2labmap.indexes(), columns=all_columns) for node_id, (gridx, gridy) in r2labmap.iterate_nodes(): self.loc[node_id]['x'] = gridx self.loc[node_id]['y'] = gridy for column, value in columns.items(): self.loc[node_id][column] = value
def __init__(self, data=None, index=None, columns=None, dtype=None, copy: bool = False, start_date: str = START_DATE, end_date: str = None, include_holding: bool = False, include_finance: bool = False, include_managed: bool = False, include_suspended: bool = False): try: datetime.strptime(start_date, '%Y-%m-%d') except ValueError: raise ValueError("Incorrect data format, start_date should be YYYY-MM-DD") if not end_date: end_date = datetime.today().strftime('%Y-%m-%d') try: datetime.strptime(end_date, '%Y-%m-%d') except ValueError: raise ValueError("Incorrect data format, end_date should be YYYY-MM-DD") if data is None: data, self.benchmarks, self.factors = download_latest_data(download_company_data=True) if not include_holding: data = data.loc[~data[HOLDING], :] if not include_finance: data = data.loc[data[FN_GUIDE_SECTOR] != '금융', :] if not include_managed: data = data.loc[~data[IS_MANAGED], :] if not include_suspended: data = data.loc[~data[IS_SUSPENDED], :] data = data.loc[(start_date <= data[DATE]) & (data[DATE] <= end_date), :] else: _, self.benchmarks, self.factors = download_latest_data(download_company_data=False) DataFrame.__init__(self=self, data=data, index=index, columns=columns, dtype=dtype, copy=copy)
def __init__(self, data=None, index=None, columns=None, dtype=None, copy=False, categories=None): """ A DataFrame with categories information. Parameters ---------- categories : list of columns Column names whose values are categorical. """ DataFrame.__init__(self, data=data, index=index, columns=columns, dtype=dtype, copy=copy) self.separator = '_' self._categories = categories or []
def __init__(self, *args, **kwargs): skip = kwargs.get('skiplines', 1) times = kwargs.get('readtime', slice(0, None)) name = kwargs.get('name', 'None') symb = kwargs.get('symb', 'o') files = kwargs.get('search_files', None) properties = kwargs.get('properties', None) lines = kwargs.get('maxlines', 0) search = kwargs.get('search_pattern', FPNUMBER) folder = kwargs.get('folder', None) plot_properties = kwargs.get('plot_properties', PlotProperties()) show_func = kwargs.get('show_func', None) validate = kwargs.get('validate', True) preHooks = kwargs.get('preHooks', None) exclude = kwargs.get('exclude', [" "]) # FIXME times_stride = kwargs.get('times_stride', 1) times_range = kwargs.get('times_range', "all") # FIXME implement strides times_slice = times_range keys = ['skiplines', 'readtime', 'preHooks', 'name', 'symb', 'search_files', 'properties', 'maxlines', 'search_pattern', 'folder', 'plot_properties', 'show_func', 'exclude', 'times_stride', 'times_range', ] for k in keys: if k in kwargs: kwargs.pop(k) # TODO explain what happens here if folder is None: # super(FoamFrame, self).__init__(*args, **kwargs) DataFrame.__init__(self, *args, **kwargs) else: if preHooks: for hook in preHooks: hook.execute() if (folder in case_data_base) and Database: print("re-importing", end=" ") else: print("importing", end=" ") print(name + ": ", end="") origins, data = import_foam_folder( path=folder, search=search, files=files, skiplines=skip, maxlines=lines, skiptimes=times, exclude=exclude, times_slice=times_slice ) try: DataFrame.__init__(self, data) except Exception as e: print(e) self.properties = Props( origins, name, plot_properties, folder, symb, show_func) if validate and Database: self.validate_origins(folder, origins) # register to database if Database: case_data_base.sync()
def __init__(self, *args, **kwargs): DataFrame.__init__(self, *args, **kwargs)
def __init__(self, *args, **kwargs): skip = kwargs.get('skiplines', 1) times = kwargs.get('readtime', slice(0, None)) name = kwargs.get('name', 'None') symb = kwargs.get('symb', 'o') files = kwargs.get('search_files', None) properties = kwargs.get('properties', None) lines = kwargs.get('maxlines', 0) search = kwargs.get('search_pattern', FPNUMBER) folder = kwargs.get('folder', None) plot_properties = kwargs.get('plot_properties', PlotProperties()) show_func = kwargs.get('show_func', None) validate = kwargs.get('validate', True) preHooks = kwargs.get('preHooks', None) exclude = kwargs.get('exclude', [" "]) # FIXME times_stride = kwargs.get('times_stride', 1) times_range = kwargs.get('times_range', "all") # FIXME implement strides times_slice = times_range keys = [ 'skiplines', 'readtime', 'preHooks', 'name', 'symb', 'search_files', 'properties', 'maxlines', 'search_pattern', 'folder', 'plot_properties', 'show_func', 'exclude', 'times_stride', 'times_range', ] for k in keys: if k in kwargs: kwargs.pop(k) # TODO explain what happens here if folder is None: # super(FoamFrame, self).__init__(*args, **kwargs) DataFrame.__init__(self, *args, **kwargs) else: if preHooks: for hook in preHooks: hook.execute() if (folder in case_data_base) and Database: print("re-importing", end=" ") else: print("importing", end=" ") print(name + ": ", end="") origins, data = import_foam_folder(path=folder, search=search, files=files, skiplines=skip, maxlines=lines, skiptimes=times, exclude=exclude, times_slice=times_slice) try: DataFrame.__init__(self, data) except Exception as e: print(e) self.properties = Props(origins, name, plot_properties, folder, symb, show_func) if validate and Database: self.validate_origins(folder, origins) # register to database if Database: case_data_base.sync()
def __init__(self, data=None, index=None, columns=None, dtype=None, copy=False): DataFrame.__init__(self=self, data=data, index=index, columns=columns, dtype=dtype, copy=copy)
def __init__(self, sourcefilename, sourceDataFrame, teamname, teamno): DataFrame.__init__(self, sourceDataFrame) self.SN = sourcefilename[0:15] self.data = sourceDataFrame self.teamname = teamname self.teamno = teamno self.home = 0 if self.teamno == 1 else 1 self.oppteamname = sourcefilename[9:12] if self.teamno == 2 else sourcefilename[12:15] self.oppteamno = 2 if self.teamno == 1 else 1 selector = sourceDataFrame[sourceDataFrame["team"] == teamname] self.FGM = calculate_FGM(selector) self.FGA = calculate_FGA(selector) self.FGP = float(self.FGM) / self.FGA if self.FGA != 0 else 0 self.B3M = calculate_3M(selector) self.B3A = calculate_3A(selector) self.B3P = float(self.B3M) / self.B3A if self.B3A != 0 else 0 self.FTM = calculate_FTM(selector) self.FTA = calculate_FTA(selector) self.FTP = float(self.FTM) / self.FTA if self.FTA != 0 else 0 self.P = int(score_calculate(selector)) self.ORB = calculate_ORB(selector) self.DRB = calculate_DRB(selector) self.TRB = self.ORB + self.DRB self.TOV = calculate_TOV(selector) self.TOVP = ( self.TOV / (self.FGA + self.FTA * 0.44 + self.TOV) if (self.FGA + self.FTA * 0.44 + self.TOV) != 0 else 0 ) self.AST = calculate_AST(selector) self.F = calculate_F(selector) """-------------the code here will be used to produce rows that will aid the calculation of time interval-------------""" _set_dummies(self, self.teamname) combination_starter = self._combinaiton_reference() self.totalcombi = combination_starter.etype.count() """-------------this calculates for every row what the combination number is------------------ """ counter = 0 combinations = combination_starter.index for identifier in combinations: counter += 1 self.data.ix[identifier:, "combination_number"] = counter """this is to calculate the time remaining of each row, notice that overtime is considered here""" self["time remaining"] = 0 end_period = self.data.tail(1).period.item() self.data.ix[(self.data.period <= 4), "time remaining"] = (end_period - 4) * 5 * 60 if end_period > 4: self.data.ix[(self.data.period > 4), "time remaining"] = (end_period - self.data.period) * 5 * 60 self.data.ix[self.data.period <= 4, "time remaining"] += (4 - self.data.period) * 12 * 60 self.data.ix[:, "time"] = pd.to_datetime(self.data.time, format="%M:%S") self.data.ix[:, "time remaining"] = ( self.data.ix[:, "time remaining"] + pd.DatetimeIndex(self.data.time).minute * 60 + pd.DatetimeIndex(self.data.time).second ) self.MP = self.data.head(1).ix[:, "time remaining"].item() / 60 oppselector = sourceDataFrame[sourceDataFrame["team"] == self.oppteamname] self.ORBP = float(self.ORB) / (self.ORB + calculate_DRB(oppselector)) self.DRBP = float(self.DRB) / (self.DRB + calculate_ORB(oppselector)) self.TRBP = float(self.TRB) / (self.TRB + calculate_DRB(oppselector) + calculate_ORB(oppselector)) self.PTD = int(self.P - score_calculate(oppselector)) self.STL = calculate_STL(oppselector) self.BLK = calculate_BLK(oppselector) self.interval = self.data.head(1).ix[:, "time remaining"].item() nameindex = detect_team(self.teamno) self.players = pd.Series(self.data[nameindex].values.ravel()).unique()
def __init__(self, *args, **kwargs): skip = kwargs.get('skiplines', 1) times = kwargs.get('skiptimes', 1) name = kwargs.get('name', 'None') symb = kwargs.get('symb', 'o') files = kwargs.get('search_files', None) properties = kwargs.get('properties', None) lines = kwargs.get('maxlines', 0) search = kwargs.get('search_pattern', io.FPNUMBER) folder = kwargs.get('folder', None) plot_properties = kwargs.get('plot_properties', PlotProperties()) show_func = kwargs.get('show_func', None) validate = kwargs.get('validate', True) preHooks = kwargs.get('preHooks', None) keys = [ 'skiplines', 'skiptimes', 'preHooks', 'name', 'symb', 'search_files', 'properties', 'maxlines', 'search_pattern', 'folder', 'plot_properties', 'show_func'] for k in keys: try: kwargs.pop(k) except: pass #TODO explain what happens here if folder == None: #super(FoamFrame, self).__init__(*args, **kwargs) DataFrame.__init__(self, *args, **kwargs) else: if preHooks: for hook in preHooks: hook.execute() if case_data_base.has_key(folder) and Database: print "re-importing", else: print "importing", print name + ": ", origins, data = io.import_foam_folder( path=folder, search=search, files=files, skiplines=skip, maxlines=lines, skiptimes=times, ) DataFrame.__init__(self, data) self.properties = Props( origins, name, plot_properties, folder, # FIXME fix it for read logs data.index.levels[0], symb, show_func) if validate and Database: self.validate_origins(folder, origins) # register to database if Database: case_data_base.sync()
def __init__(self, df): DataFrame.__init__(self, df) self.colIndxDict = dict( zip(self.columns.values, np.arange(len(self.columns)))) print('col dict', self.colIndxDict) self.rawData = df.values
def __init__( self, data=None, # from_constructor: Optional[bool] = bool, date_col: Optional[str] = None, to_datetime_kwargs: dict = {}, time_frame: TimeFrameArg = None, cumulators: Optional[Cumulators] = None, source: Optional['MetaDataFrame'] = None, *args, **kwargs) -> None: """ Creates a stock data frame Args: data (ndarray, Iterable, dict, DataFrame, StockDataFrame): data date_col (:obj:`str`, optional): If set, then the column named `date_col` will convert and set as the DateTimeIndex of the data frame to_datetime_kwargs (dict): the keyworded arguments to be passed to `pandas.to_datetime()`. It only takes effect if `date_col` is specified. time_frame (str, TimeFrame): defines the time frame of the stock source (:obj:`StockDataFrame`, optional): the source to copy meta data from if the source is a StockDataFrame. Defaults to `data` *args: other pandas.DataFrame arguments **kwargs: other pandas.DataFrame keyworded arguments """ DataFrame.__init__(self, data, *args, **kwargs) if self.columns.nlevels > 1: # For now, I admit, # there are a lot of works to support MultiIndex dataframes raise ValueError( 'stock-pandas does not support dataframes with MultiIndex columns' ) if source is None: source = data is_meta_frame = isinstance(source, MetaDataFrame) if is_meta_frame: copy_stock_metas(source, self, data is not None) else: init_stock_metas(self) if (not is_meta_frame and date_col is None and time_frame is None): # Cases # 1. # StockDataFrame(dataframe) # 2. # created by self._constructor(new_data).__finalize__(self) # we will update cumulator data in __finalize__ return # Cases # 1. # StockDataFrame(stockdataframe) # 2. # StockDataFrame(dataframe, date_col='time') self._cumulator.update(self, source, date_col=date_col, to_datetime_kwargs=to_datetime_kwargs, time_frame=time_frame, cumulators=cumulators)
def __init__(self, filename): DataFrame.__init__(self, data=BaseballDataFrame._create_df(filename)) self.filepath = getcwd() + '/baseball-data/core/' + filename
def __init__(self,data,*args,**kwargs): DataFrame.__init__(self,data,*args,**kwargs)