def truncate(self, before=None, after=None, copy=True): """Function truncate a sorted DataFrame / Series before and/or after some particular dates. Parameters ---------- before : date Truncate before date after : date Truncate after date copy : boolean, default True Returns ------- truncated : type of caller """ from pandas.tseries.tools import to_datetime before = to_datetime(before) after = to_datetime(after) if before is not None and after is not None: if before > after: raise AssertionError('Truncate: %s must be after %s' % (before, after)) result = self.ix[before:after] if isinstance(self.index, MultiIndex): result.index = self.index.truncate(before, after) if copy: result = result.copy() return result
def truncate(self, before=None, after=None, copy=True): """Function truncate a sorted DataFrame / Series before and/or after some particular dates. Parameters ---------- before : date Truncate before date after : date Truncate after date Returns ------- truncated : type of caller """ from pandas.tseries.tools import to_datetime before = to_datetime(before) after = to_datetime(after) if before is not None and after is not None: assert(before <= after) result = self.ix[before:after] if isinstance(self.index, MultiIndex): result.index = self.index.truncate(before, after) if copy: result = result.copy() return result
def generate_range(start=None, end=None, periods=None, offset=BDay(), time_rule=None): """ Generates a sequence of dates corresponding to the specified time offset. Similar to dateutil.rrule except uses pandas DateOffset objects to represent time increments Parameters ---------- start : datetime (default None) end : datetime (default None) periods : int, optional Note ---- * This method is faster for generating weekdays than dateutil.rrule * At least two of (start, end, periods) must be specified. * If both start and end are specified, the returned dates will satisfy start <= date <= end. Returns ------- dates : generator object """ if time_rule is not None: from pandas.tseries.frequencies import get_offset offset = get_offset(time_rule) start = to_datetime(start) end = to_datetime(end) if start and not offset.onOffset(start): start = offset.rollforward(start) if end and not offset.onOffset(end): end = offset.rollback(end) if periods is None and end < start: end = None periods = 0 if end is None: end = start + (periods - 1) * offset if start is None: start = end - (periods - 1) * offset cur = start next_date = cur while cur <= end: yield cur # faster than cur + offset next_date = offset.apply(cur) if next_date <= cur: raise ValueError('Offset %s did not increment date' % offset) cur = next_date
def _handle_date_column(col, format=None): if isinstance(format, dict): return to_datetime(col, **format) else: if format in ['D', 's', 'ms', 'us', 'ns']: return to_datetime(col, coerce=True, unit=format) elif issubclass(col.dtype.type, np.floating) or issubclass(col.dtype.type, np.integer): # parse dates as timestamp format = 's' if format is None else format return to_datetime(col, coerce=True, unit=format) else: return to_datetime(col, coerce=True, format=format)
def test_combine_first_dt64(self): from pandas.tseries.tools import to_datetime s0 = to_datetime(Series(["2010", np.NaN])) s1 = to_datetime(Series([np.NaN, "2011"])) rs = s0.combine_first(s1) xp = to_datetime(Series(['2010', '2011'])) assert_series_equal(rs, xp) s0 = to_datetime(Series(["2010", np.NaN])) s1 = Series([np.NaN, "2011"]) rs = s0.combine_first(s1) xp = Series([datetime(2010, 1, 1), '2011']) assert_series_equal(rs, xp)
def _generate_regular_range(start, end, periods, offset): if isinstance(offset, Tick): stride = offset.nanos if periods is None: b = Timestamp(start).value e = Timestamp(end).value e += stride - e % stride elif start is not None: b = Timestamp(start).value e = b + periods * stride elif end is not None: e = Timestamp(end).value + stride b = e - periods * stride else: raise NotImplementedError data = np.arange(b, e, stride, dtype=np.int64) data = data.view(_NS_DTYPE) else: if isinstance(start, Timestamp): start = start.to_pydatetime() if isinstance(end, Timestamp): end = end.to_pydatetime() xdr = generate_range(start=start, end=end, periods=periods, offset=offset) dates = list(xdr) # utc = len(dates) > 0 and dates[0].tzinfo is not None data = tools.to_datetime(dates) return data
def convert(values, unit, axis): from pandas.tseries.index import DatetimeIndex def try_parse(values): try: return _dt_to_float_ordinal(tools.to_datetime(values)) except Exception: return values if isinstance(values, (datetime, pydt.date)): return _dt_to_float_ordinal(values) elif isinstance(values, pydt.time): return dates.date2num(values) elif (com.is_integer(values) or com.is_float(values)): return values elif isinstance(values, basestring): return try_parse(values) elif isinstance(values, (list, tuple, np.ndarray)): if not isinstance(values, np.ndarray): values = np.array(values, dtype='O') try: values = tools.to_datetime(values) if isinstance(values, Index): values = values.map(_dt_to_float_ordinal) else: values = [_dt_to_float_ordinal(x) for x in values] except Exception: pass return values
def _generate_regular_range(start, end, periods, offset): if isinstance(offset, Tick): stride = offset.nanos if periods is None: b = Timestamp(start).value e = Timestamp(end).value e += stride - e % stride elif start is not None: b = Timestamp(start).value e = b + periods * stride elif end is not None: e = Timestamp(end).value + stride b = e - periods * stride else: raise NotImplementedError data = np.arange(b, e, stride, dtype=np.int64) data = data.view(_NS_DTYPE) else: xdr = generate_range(start=start, end=end, periods=periods, offset=offset) dates = list(xdr) utc = len(dates) > 0 and dates[0].tzinfo is not None data = tools.to_datetime(dates, utc=utc) return data
def convert(values, unit, axis): def try_parse(values): try: return _dt_to_float_ordinal(tools.to_datetime(values)) except Exception: return values if isinstance(values, (datetime, pydt.date)): return _dt_to_float_ordinal(values) elif isinstance(values, pydt.time): return dates.date2num(values) elif (com.is_integer(values) or com.is_float(values)): return values elif isinstance(values, compat.string_types): return try_parse(values) elif isinstance(values, (list, tuple, np.ndarray)): if not isinstance(values, np.ndarray): values = com._asarray_tuplesafe(values) if com.is_integer_dtype(values) or com.is_float_dtype(values): return values try: values = tools.to_datetime(values) if isinstance(values, Index): values = values.map(_dt_to_float_ordinal) else: values = [_dt_to_float_ordinal(x) for x in values] except Exception: pass return values
def convert(values, unit, axis): from pandas.tseries.index import DatetimeIndex def try_parse(values): try: return _dt_to_float_ordinal(tools.to_datetime(values)) except Exception: return values if isinstance(values, (datetime, pydt.date)): return _dt_to_float_ordinal(values) elif isinstance(values, pydt.time): return dates.date2num(values) elif com.is_integer(values) or com.is_float(values): return values elif isinstance(values, str): return try_parse(values) elif isinstance(values, (list, tuple, np.ndarray)): if not isinstance(values, np.ndarray): values = np.array(values, dtype="O") try: values = tools.to_datetime(values) if isinstance(values, Index): values = values.map(_dt_to_float_ordinal) else: values = [_dt_to_float_ordinal(x) for x in values] except Exception: pass return values
def get_config(self, name, group=None, date=None, version=None): """ Get the configurations for the given item on the given date and version Parameters ---------- name : str Data item name group : object, default None Data item group, if any date : str or datetime, default None Configuration date for the item. Use latest date if None version : int, str, datetime, default None Version hash or datetime Returns ------- conf : dict """ if date is None: date = self.latest_date(name) date = tools.to_datetime(date) if version is None: version = date vobj = self.get_version(version) rs = vobj.get_item(name, group, date) if rs is None: raise ValueError('Parameters were not found for %s (version %s) for %s' % (name, version, date)) return rs
def _str_to_dt_array(arr, offset=None): def parser(x): result = parse_time_string(x, offset) return result[0] arr = np.asarray(arr, dtype=object) data = _algos.arrmap_object(arr, parser) return tools.to_datetime(data)
def _cached_range(cls, start=None, end=None, periods=None, offset=None, name=None): if start is not None: start = Timestamp(start) if end is not None: end = Timestamp(end) if offset is None: raise Exception('Must provide a DateOffset!') drc = _daterange_cache if offset not in _daterange_cache: xdr = generate_range(offset=offset, start=_CACHE_START, end=_CACHE_END) arr = tools.to_datetime(list(xdr), box=False) cachedRange = arr.view(DatetimeIndex) cachedRange.offset = offset cachedRange.tz = None cachedRange.name = None drc[offset] = cachedRange else: cachedRange = drc[offset] if start is None: assert (isinstance(end, Timestamp)) end = offset.rollback(end) endLoc = cachedRange.get_loc(end) + 1 startLoc = endLoc - periods elif end is None: assert (isinstance(start, Timestamp)) start = offset.rollforward(start) startLoc = cachedRange.get_loc(start) endLoc = startLoc + periods else: if not offset.onOffset(start): start = offset.rollforward(start) if not offset.onOffset(end): end = offset.rollback(end) startLoc = cachedRange.get_loc(start) endLoc = cachedRange.get_loc(end) + 1 indexSlice = cachedRange[startLoc:endLoc] indexSlice.name = name indexSlice.offset = offset return indexSlice
def time2num(d): if isinstance(d, compat.string_types): parsed = tools.to_datetime(d) if not isinstance(parsed, datetime): raise ValueError('Could not parse time %s' % d) return _to_ordinalf(parsed.time()) if isinstance(d, pydt.time): return _to_ordinalf(d) return d
def time2num(d): if isinstance(d, str): parsed = tools.to_datetime(d) if not isinstance(parsed, datetime): raise ValueError("Could not parse time %s" % d) return _to_ordinalf(parsed.time()) if isinstance(d, pydt.time): return _to_ordinalf(d) return d
def isin(comps, values): """ Compute the isin boolean array Parameters ---------- comps: array-like values: array-like Returns ------- boolean array same length as comps """ if not com.is_list_like(comps): raise TypeError( "only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(comps).__name__) ) comps = np.asarray(comps) if not com.is_list_like(values): raise TypeError( "only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(values).__name__) ) # GH11232 # work-around for numpy < 1.8 and comparisions on py3 # faster for larger cases to use np.in1d if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000: f = lambda x, y: np.in1d(x, np.asarray(list(y))) else: f = lambda x, y: lib.ismember_int64(x, set(y)) # may need i8 conversion for proper membership testing if com.is_datetime64_dtype(comps): from pandas.tseries.tools import to_datetime values = to_datetime(values)._values.view("i8") comps = comps.view("i8") elif com.is_timedelta64_dtype(comps): from pandas.tseries.timedeltas import to_timedelta values = to_timedelta(values)._values.view("i8") comps = comps.view("i8") elif com.is_int64_dtype(comps): pass else: f = lambda x, y: lib.ismember(x, set(values)) return f(comps, values)
def _cached_range(cls, start=None, end=None, periods=None, offset=None, name=None): if start is not None: start = Timestamp(start) if end is not None: end = Timestamp(end) if offset is None: raise Exception('Must provide a DateOffset!') drc = _daterange_cache if offset not in _daterange_cache: xdr = generate_range(offset=offset, start=_CACHE_START, end=_CACHE_END) arr = tools.to_datetime(list(xdr), box=False) cachedRange = arr.view(DatetimeIndex) cachedRange.offset = offset cachedRange.tz = None cachedRange.name = None drc[offset] = cachedRange else: cachedRange = drc[offset] if start is None: assert(isinstance(end, Timestamp)) end = offset.rollback(end) endLoc = cachedRange.get_loc(end) + 1 startLoc = endLoc - periods elif end is None: assert(isinstance(start, Timestamp)) start = offset.rollforward(start) startLoc = cachedRange.get_loc(start) endLoc = startLoc + periods else: if not offset.onOffset(start): start = offset.rollforward(start) if not offset.onOffset(end): end = offset.rollback(end) startLoc = cachedRange.get_loc(start) endLoc = cachedRange.get_loc(end) + 1 indexSlice = cachedRange[startLoc:endLoc] indexSlice.name = name indexSlice.offset = offset return indexSlice
def truncate(self, before=None, after=None, copy=True): """Function truncate a sorted DataFrame / Series before and/or after some particular dates. Parameters ---------- before : date Truncate before date after : date Truncate after date copy : boolean, default True Returns ------- truncated : type of caller """ # if we have a date index, convert to dates, otherwise # treat like a slice if self.index.is_all_dates: from pandas.tseries.tools import to_datetime before = to_datetime(before) after = to_datetime(after) if before is not None and after is not None: if before > after: raise AssertionError('Truncate: %s must be after %s' % (before, after)) result = self.ix[before:after] if isinstance(self.index, MultiIndex): result.index = self.index.truncate(before, after) if copy: result = result.copy() return result
def isin(comps, values): """ Compute the isin boolean array Parameters ---------- comps: array-like values: array-like Returns ------- boolean array same length as comps """ if not is_list_like(comps): raise TypeError("only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(comps).__name__)) comps = np.asarray(comps) if not is_list_like(values): raise TypeError("only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(values).__name__)) if not isinstance(values, np.ndarray): values = list(values) # GH11232 # work-around for numpy < 1.8 and comparisions on py3 # faster for larger cases to use np.in1d if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000: f = lambda x, y: np.in1d(x, np.asarray(list(y))) else: f = lambda x, y: lib.ismember_int64(x, set(y)) # may need i8 conversion for proper membership testing if is_datetime64_dtype(comps): from pandas.tseries.tools import to_datetime values = to_datetime(values)._values.view('i8') comps = comps.view('i8') elif is_timedelta64_dtype(comps): from pandas.tseries.timedeltas import to_timedelta values = to_timedelta(values)._values.view('i8') comps = comps.view('i8') elif is_int64_dtype(comps): pass else: f = lambda x, y: lib.ismember(x, set(values)) return f(comps, values)
def test_parse_tz_aware(self): # See gh-1693 import pytz data = StringIO("Date,x\n2012-06-13T01:39:00Z,0.5") # it works result = self.read_csv(data, index_col=0, parse_dates=True) stamp = result.index[0] self.assertEqual(stamp.minute, 39) try: self.assertIs(result.index.tz, pytz.utc) except AssertionError: # hello Yaroslav arr = result.index.to_pydatetime() result = tools.to_datetime(arr, utc=True)[0] self.assertEqual(stamp.minute, result.minute) self.assertEqual(stamp.hour, result.hour) self.assertEqual(stamp.day, result.day)
def test_non_datetimeindex(self): dates = to_datetime(["1/1/2000", "1/2/2000", "1/3/2000"]) self.assertEqual(frequencies.infer_freq(dates), "D")
def try_parse(values): try: return datetools.to_datetime(values).toordinal() except Exception: return values
def test_non_datetimeindex(self): dates = to_datetime(['1/1/2000', '1/2/2000', '1/3/2000']) self.assert_(infer_freq(dates) == 'D')
#residencies 0.000169 #pottery 0.000152 #radio_&_podcast 0.000128 #typography 0.000104 #chiptune 0.000088 #letterpress 0.000088 #taxidermy 0.000016 #NaN 0.000000 df['funded']= 2 df.funded[df.state == 'successful'] = 1 df.funded[df.state == 'failed'] = 0 df = df[df.funded != 2] df.deadline = to_datetime(df.deadline) df.describe() df.head() # Convert currency to USD df.currency.value_counts(normalize = True) #USD 0.716564 #GBP 0.088293 #CAD 0.039900 #AUD 0.020653 #EUR 0.007431 #NZD 0.004088 #SEK 0.001760
def __new__(cls, data=None, freq=None, start=None, end=None, periods=None, copy=False, name=None, tz=None, verify_integrity=True, normalize=False, **kwds): dayfirst = kwds.pop('dayfirst', None) yearfirst = kwds.pop('yearfirst', None) warn = False if 'offset' in kwds and kwds['offset']: freq = kwds['offset'] warn = True freq_infer = False if not isinstance(freq, DateOffset): if freq != 'infer': freq = to_offset(freq) else: freq_infer = True freq = None if warn: import warnings warnings.warn( "parameter 'offset' is deprecated, " "please use 'freq' instead", FutureWarning) offset = freq if periods is not None: if com.is_float(periods): periods = int(periods) elif not com.is_integer(periods): raise ValueError('Periods must be a number, got %s' % str(periods)) if data is None and offset is None: raise ValueError("Must provide freq argument if no data is " "supplied") if data is None: return cls._generate(start, end, periods, name, offset, tz=tz, normalize=normalize) if not isinstance(data, np.ndarray): if np.isscalar(data): raise ValueError('DatetimeIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) data = np.asarray(data, dtype='O') # try a few ways to make it datetime64 if lib.is_string_array(data): data = _str_to_dt_array(data, offset, dayfirst=dayfirst, yearfirst=yearfirst) else: data = tools.to_datetime(data) data.offset = offset if isinstance(data, DatetimeIndex): if name is not None: data.name = name return data if issubclass(data.dtype.type, basestring): subarr = _str_to_dt_array(data, offset, dayfirst=dayfirst, yearfirst=yearfirst) elif issubclass(data.dtype.type, np.datetime64): if isinstance(data, DatetimeIndex): if tz is None: tz = data.tz subarr = data.values if offset is None: offset = data.offset verify_integrity = False else: if data.dtype != _NS_DTYPE: subarr = lib.cast_to_nanoseconds(data) else: subarr = data elif data.dtype == _INT64_DTYPE: if isinstance(data, Int64Index): raise TypeError('cannot convert Int64Index->DatetimeIndex') if copy: subarr = np.asarray(data, dtype=_NS_DTYPE) else: subarr = data.view(_NS_DTYPE) else: try: subarr = tools.to_datetime(data) except ValueError: # tz aware subarr = tools.to_datetime(data, utc=True) if not np.issubdtype(subarr.dtype, np.datetime64): raise TypeError('Unable to convert %s to datetime dtype' % str(data)) if isinstance(subarr, DatetimeIndex): if tz is None: tz = subarr.tz else: if tz is not None: tz = tools._maybe_get_tz(tz) if (not isinstance(data, DatetimeIndex) or getattr(data, 'tz', None) is None): # Convert tz-naive to UTC ints = subarr.view('i8') subarr = lib.tz_localize_to_utc(ints, tz) subarr = subarr.view(_NS_DTYPE) subarr = subarr.view(cls) subarr.name = name subarr.offset = offset subarr.tz = tz if verify_integrity and len(subarr) > 0: if offset is not None and not freq_infer: inferred = subarr.inferred_freq if inferred != offset.freqstr: raise ValueError('Dates do not conform to passed ' 'frequency') if freq_infer: inferred = subarr.inferred_freq if inferred: subarr.offset = to_offset(inferred) return subarr
def __new__( cls, data=None, freq=None, start=None, end=None, periods=None, copy=False, name=None, tz=None, verify_integrity=True, normalize=False, **kwds ): dayfirst = kwds.pop("dayfirst", None) yearfirst = kwds.pop("yearfirst", None) warn = False if "offset" in kwds and kwds["offset"]: freq = kwds["offset"] warn = True freq_infer = False if not isinstance(freq, DateOffset): if freq != "infer": freq = to_offset(freq) else: freq_infer = True freq = None if warn: import warnings warnings.warn("parameter 'offset' is deprecated, " "please use 'freq' instead", FutureWarning) offset = freq if periods is not None: if com.is_float(periods): periods = int(periods) elif not com.is_integer(periods): raise ValueError("Periods must be a number, got %s" % str(periods)) if data is None and offset is None: raise ValueError("Must provide freq argument if no data is " "supplied") if data is None: return cls._generate(start, end, periods, name, offset, tz=tz, normalize=normalize) if not isinstance(data, np.ndarray): if np.isscalar(data): raise ValueError( "DatetimeIndex() must be called with a " "collection of some kind, %s was passed" % repr(data) ) # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) data = np.asarray(data, dtype="O") # try a few ways to make it datetime64 if lib.is_string_array(data): data = _str_to_dt_array(data, offset, dayfirst=dayfirst, yearfirst=yearfirst) else: data = tools.to_datetime(data) data.offset = offset if isinstance(data, DatetimeIndex): if name is not None: data.name = name return data if issubclass(data.dtype.type, basestring): subarr = _str_to_dt_array(data, offset, dayfirst=dayfirst, yearfirst=yearfirst) elif issubclass(data.dtype.type, np.datetime64): if isinstance(data, DatetimeIndex): if tz is None: tz = data.tz subarr = data.values if offset is None: offset = data.offset verify_integrity = False else: if data.dtype != _NS_DTYPE: subarr = lib.cast_to_nanoseconds(data) else: subarr = data elif data.dtype == _INT64_DTYPE: if isinstance(data, Int64Index): raise TypeError("cannot convert Int64Index->DatetimeIndex") if copy: subarr = np.asarray(data, dtype=_NS_DTYPE) else: subarr = data.view(_NS_DTYPE) else: try: subarr = tools.to_datetime(data) except ValueError: # tz aware subarr = tools.to_datetime(data, utc=True) if not np.issubdtype(subarr.dtype, np.datetime64): raise TypeError("Unable to convert %s to datetime dtype" % str(data)) if isinstance(subarr, DatetimeIndex): if tz is None: tz = subarr.tz else: if tz is not None: tz = tools._maybe_get_tz(tz) if not isinstance(data, DatetimeIndex) or getattr(data, "tz", None) is None: # Convert tz-naive to UTC ints = subarr.view("i8") subarr = lib.tz_localize_to_utc(ints, tz) subarr = subarr.view(_NS_DTYPE) subarr = subarr.view(cls) subarr.name = name subarr.offset = offset subarr.tz = tz if verify_integrity and len(subarr) > 0: if offset is not None and not freq_infer: inferred = subarr.inferred_freq if inferred != offset.freqstr: raise ValueError("Dates do not conform to passed " "frequency") if freq_infer: inferred = subarr.inferred_freq if inferred: subarr.offset = to_offset(inferred) return subarr
def date_index(df): date = df['timestamp'] date = to_datetime(date, unit='s') df['date'] = date df = df.set_index('date') return df
df_loyalty = df_loyalty.dropna() len(df_loyalty) #convert to dask df_loyalty = dd.from_pandas(df_loyalty, npartitions=3) print df_loyalty.head() #converting to same datatypes df2['dlTableStoreNumber'] = df2['dlTableStoreNumber'].astype(int) df2['CHECK'] = df2['CHECK'].astype(int) df_loyalty['StoreNumber'] = df_loyalty['StoreNumber'].astype(int) df_loyalty['ReceiptNumber'] = df_loyalty['ReceiptNumber'].astype(int) df_loyalty['ReceiptDate'] = df_loyalty['ReceiptDate'].compute( get=dask. async .get_sync).to_datetime() left_columns = ["dlTableStoreNumber", 'CHECK'] right_columns = ["StoreNumber", "ReceiptNumber"] #Merging both dataframes df_merge = df2.merge(df_loyalty, how='inner', left_on=left_columns, right_on=right_columns) print df_merge.head() print dd.compute(df_merge.count()) #Working on date format, work in progress
def __getInternalStruct(self): out = [] for d in self._serie['data']: out.append((d['uuid'], Series([d['dps'][k] for k in sorted(d['dps'].keys())], index=to_datetime([int(ts) for ts in sorted(d['dps'].keys())], unit='s')))) return DataFrame(dict(out))
def test_non_datetimeindex(self): dates = to_datetime(['1/1/2000', '1/2/2000', '1/3/2000']) self.assertEqual(frequencies.infer_freq(dates), 'D')
def try_parse(values): try: return _dt_to_float_ordinal(tools.to_datetime(values)) except Exception: return values
main_cats = ('film_and_video', 'music','publishing', 'games', 'design', 'art','food','technology','fashion','comics', 'theater', 'crafts', 'journalism','photography','animals','dance') df.sub_category[df.main_category == 'unknown'] = 'unknown' df.sub_category[df.sub_category == 'film_&_video'] = 'film_and_video' for name in main_cats: df.sub_category[df.sub_category == name] = 'unknown' df.sub_category.value_counts(normalize = True, dropna = False) df['funded']= 2 df.funded[df.state == 'successful'] = 1 df.funded[df.state == 'failed'] = 0 df = df[df.funded != 2] df.deadline = to_datetime(df.deadline) df['year'] = DatetimeIndex(df['deadline']).year df['month'] = DatetimeIndex(df['deadline']).month # Convert pledged to USD df['pledged_USD'] = df.pledged df.pledged_USD = df.pledged_USD[df.currency == "GBP"] = df.pledged * 1.48 df.pledged_USD = df.pledged_USD[df.currency == "CAD"] = df.pledged * .79 df.pledged_USD = df.pledged_USD[df.currency == "AUD"] = df.pledged * .76 df.pledged_USD = df.pledged_USD[df.currency == "EUR"] = df.pledged * 1.07 df.pledged_USD = df.pledged_USD[df.currency == "NZD"] = df.pledged * .75 df.pledged_USD = df.pledged_USD[df.currency == "DKK"] = df.pledged * .14 df.pledged_USD = df.pledged_USD[df.currency == "NOK"] = df.pledged * .12
def __new__(cls, data=None, freq=None, start=None, end=None, periods=None, copy=False, name=None, tz=None, verify_integrity=True, normalize=False, **kwds): warn = False if 'offset' in kwds and kwds['offset']: freq = kwds['offset'] warn = True freq_infer = False if not isinstance(freq, DateOffset): if freq != 'infer': freq = to_offset(freq) else: freq_infer = True freq = None if warn: import warnings warnings.warn("parameter 'offset' is deprecated, " "please use 'freq' instead", FutureWarning) offset = freq if periods is not None: if com.is_float(periods): periods = int(periods) elif not com.is_integer(periods): raise ValueError('Periods must be a number, got %s' % str(periods)) if data is None and offset is None: raise ValueError("Must provide freq argument if no data is " "supplied") if data is None: return cls._generate(start, end, periods, name, offset, tz=tz, normalize=normalize) if not isinstance(data, np.ndarray): if np.isscalar(data): raise ValueError('DatetimeIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) data = np.asarray(data, dtype='O') # try a few ways to make it datetime64 if lib.is_string_array(data): data = _str_to_dt_array(data, offset) else: data = tools.to_datetime(data) data.offset = offset if issubclass(data.dtype.type, basestring): subarr = _str_to_dt_array(data, offset) elif issubclass(data.dtype.type, np.datetime64): if isinstance(data, DatetimeIndex): subarr = data.values if offset is None: offset = data.offset verify_integrity = False else: if data.dtype != _NS_DTYPE: subarr = lib.cast_to_nanoseconds(data) else: subarr = data elif data.dtype == _INT64_DTYPE: if copy: subarr = np.asarray(data, dtype=_NS_DTYPE) else: subarr = data.view(_NS_DTYPE) else: subarr = tools.to_datetime(data) if not np.issubdtype(subarr.dtype, np.datetime64): raise TypeError('Unable to convert %s to datetime dtype' % str(data)) if tz is not None: tz = tools._maybe_get_tz(tz) # Convert local to UTC ints = subarr.view('i8') subarr = lib.tz_localize_to_utc(ints, tz) subarr = subarr.view(_NS_DTYPE) subarr = subarr.view(cls) subarr.name = name subarr.offset = offset subarr.tz = tz if verify_integrity and len(subarr) > 0: if offset is not None and not freq_infer: inferred = subarr.inferred_freq if inferred != offset.freqstr: raise ValueError('Dates do not conform to passed ' 'frequency') if freq_infer: inferred = subarr.inferred_freq if inferred: subarr.offset = to_offset(inferred) return subarr
def test_non_datetimeindex(self): dates = to_datetime(["1/1/2000", "1/2/2000", "1/3/2000"]) self.assert_(infer_freq(dates) == "D")
for name in art: df.sub_category[df.main_category == name] = name df.main_category[df.sub_category == name] = 'art' for name in publishing: df.sub_category[df.main_category == name] = name df.main_category[df.sub_category == name] = 'publishing' df.main_category.value_counts(normalize = True, dropna = False ) df['funded']= 2 df.funded[df.state == 'successful'] = 1 df.funded[df.state == 'failed'] = 0 df = df[df.funded != 2] df.deadline = to_datetime(df.deadline) # Convert pledged to USD df['pledged_USD'] = df.pledged df.pledged_USD = df.pledged_USD[df.currency == "GBP"] = df.pledged * 1.48 df.pledged_USD = df.pledged_USD[df.currency == "CAD"] = df.pledged * .79 df.pledged_USD = df.pledged_USD[df.currency == "AUD"] = df.pledged * .76 df.pledged_USD = df.pledged_USD[df.currency == "EUR"] = df.pledged * 1.07 df.pledged_USD = df.pledged_USD[df.currency == "NZD"] = df.pledged * .75 df.pledged_USD = df.pledged_USD[df.currency == "DKK"] = df.pledged * .14 df.pledged_USD = df.pledged_USD[df.currency == "NOK"] = df.pledged * .12 df.pledged_USD = df.pledged_USD.astype('int')
import time as tm from pandas.tseries.tools import to_datetime #|Set CSV load start time start_csv = tm.time() #|Load trade history CSV file trd = pd.read_csv('btcnCNY.csv', names=['timestamp','price','amount']) print '---CSV File Loaded---' #|Set CSV load end time end_csv = tm.time() #|Create datetime index from timestamp trd['date'] = to_datetime(trd['timestamp'], unit='s') trd = trd.set_index('date') #|Create Series objects of 'price' and 'amount' price = trd['price'].astype(float) amount = trd['amount'].astype(float) #|Set frequency of hour intervals for OLHCV conversion freq = 'min' #|Set resample start time start_resample = tm.time() #|Create index column for OLHCV price history DataFrame prc = pd.DataFrame(index=price.resample(freq, how='last').index)
def test_non_datetimeindex(self): dates = to_datetime(['1/1/2000', '1/2/2000', '1/3/2000']) self.assertEqual(infer_freq(dates), 'D')
def __new__( cls, data=None, freq=None, start=None, end=None, periods=None, copy=False, name=None, tz=None, verify_integrity=True, normalize=False, **kwds ): warn = False if "offset" in kwds and kwds["offset"]: freq = kwds["offset"] warn = True infer_freq = False if not isinstance(freq, DateOffset): if freq != "infer": freq = to_offset(freq) else: infer_freq = True freq = None if warn: import warnings warnings.warn("parameter 'offset' is deprecated, " "please use 'freq' instead", FutureWarning) if isinstance(freq, basestring): freq = to_offset(freq) else: if isinstance(freq, basestring): freq = to_offset(freq) offset = freq if data is None and offset is None: raise ValueError("Must provide freq argument if no data is " "supplied") if data is None: return cls._generate(start, end, periods, name, offset, tz=tz, normalize=normalize) if not isinstance(data, np.ndarray): if np.isscalar(data): raise ValueError( "DatetimeIndex() must be called with a " "collection of some kind, %s was passed" % repr(data) ) if isinstance(data, datetime): data = [data] # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) data = np.asarray(data, dtype="O") # try a few ways to make it datetime64 if lib.is_string_array(data): data = _str_to_dt_array(data, offset) else: data = tools.to_datetime(data) data = np.asarray(data, dtype="M8[ns]") if issubclass(data.dtype.type, basestring): subarr = _str_to_dt_array(data, offset) elif issubclass(data.dtype.type, np.datetime64): if isinstance(data, DatetimeIndex): subarr = data.values offset = data.offset verify_integrity = False else: subarr = np.array(data, dtype="M8[ns]", copy=copy) elif issubclass(data.dtype.type, np.integer): subarr = np.array(data, dtype="M8[ns]", copy=copy) else: subarr = tools.to_datetime(data) if not np.issubdtype(subarr.dtype, np.datetime64): raise TypeError("Unable to convert %s to datetime dtype" % str(data)) if tz is not None: tz = tools._maybe_get_tz(tz) # Convert local to UTC ints = subarr.view("i8") lib.tz_localize_check(ints, tz) subarr = lib.tz_convert(ints, tz, _utc()) subarr = subarr.view("M8[ns]") subarr = subarr.view(cls) subarr.name = name subarr.offset = offset subarr.tz = tz if verify_integrity and len(subarr) > 0: if offset is not None and not infer_freq: inferred = subarr.inferred_freq if inferred != offset.freqstr: raise ValueError("Dates do not conform to passed " "frequency") if infer_freq: inferred = subarr.inferred_freq if inferred: subarr.offset = to_offset(inferred) return subarr