def test_string_na_nat_conversion(self): # GH #999, #858 from pandas.compat import parse_date strings = np.array(['1/1/2000', '1/2/2000', np.nan, '1/4/2000, 12:34:56'], dtype=object) expected = np.empty(4, dtype='M8[ns]') for i, val in enumerate(strings): if isnull(val): expected[i] = tslib.iNaT else: expected[i] = parse_date(val) result = tslib.array_to_datetime(strings) tm.assert_almost_equal(result, expected) result2 = to_datetime(strings) tm.assertIsInstance(result2, DatetimeIndex) tm.assert_numpy_array_equal(result, result2.values) malformed = np.array(['1/100/2000', np.nan], dtype=object) # GH 10636, default is now 'raise' self.assertRaises(ValueError, lambda: to_datetime(malformed, errors='raise')) result = to_datetime(malformed, errors='ignore') tm.assert_numpy_array_equal(result, malformed) self.assertRaises(ValueError, to_datetime, malformed, errors='raise') idx = ['a', 'b', 'c', 'd', 'e'] series = Series(['1/1/2000', np.nan, '1/3/2000', np.nan, '1/5/2000'], index=idx, name='foo') dseries = Series([to_datetime('1/1/2000'), np.nan, to_datetime('1/3/2000'), np.nan, to_datetime('1/5/2000')], index=idx, name='foo') result = to_datetime(series) dresult = to_datetime(dseries) expected = Series(np.empty(5, dtype='M8[ns]'), index=idx) for i in range(5): x = series[i] if isnull(x): expected[i] = tslib.iNaT else: expected[i] = to_datetime(x) assert_series_equal(result, expected, check_names=False) self.assertEqual(result.name, 'foo') assert_series_equal(dresult, expected, check_names=False) self.assertEqual(dresult.name, 'foo')
def test_parse_dates_custom_euroformat(self): text = """foo,bar,baz 31/01/2010,1,2 01/02/2010,1,NA 02/02/2010,1,2 """ parser = lambda d: parse_date(d, dayfirst=True) df = self.read_csv(StringIO(text), names=['time', 'Q', 'NTU'], header=0, index_col=0, parse_dates=True, date_parser=parser, na_values=['NA']) exp_index = Index([ datetime(2010, 1, 31), datetime(2010, 2, 1), datetime(2010, 2, 2) ], name='time') expected = DataFrame({ 'Q': [1, 1, 1], 'NTU': [2, np.nan, 2] }, index=exp_index, columns=['Q', 'NTU']) tm.assert_frame_equal(df, expected) parser = lambda d: parse_date(d, day_first=True) pytest.raises(TypeError, self.read_csv, StringIO(text), skiprows=[0], names=['time', 'Q', 'NTU'], index_col=0, parse_dates=True, date_parser=parser, na_values=['NA'])
def test_parse_dates_custom_euro_format(all_parsers, kwargs): parser = all_parsers data = """foo,bar,baz 31/01/2010,1,2 01/02/2010,1,NA 02/02/2010,1,2 """ if "dayfirst" in kwargs: df = parser.read_csv(StringIO(data), names=["time", "Q", "NTU"], date_parser=lambda d: parse_date(d, **kwargs), header=0, index_col=0, parse_dates=True, na_values=["NA"]) exp_index = Index([ datetime(2010, 1, 31), datetime(2010, 2, 1), datetime(2010, 2, 2) ], name="time") expected = DataFrame({ "Q": [1, 1, 1], "NTU": [2, np.nan, 2] }, index=exp_index, columns=["Q", "NTU"]) tm.assert_frame_equal(df, expected) else: msg = "got an unexpected keyword argument 'day_first'" with pytest.raises(TypeError, match=msg): parser.read_csv(StringIO(data), names=["time", "Q", "NTU"], date_parser=lambda d: parse_date(d, **kwargs), skiprows=[0], index_col=0, parse_dates=True, na_values=["NA"])
def test_parse_dates_custom_euroformat(self): text = """foo,bar,baz 31/01/2010,1,2 01/02/2010,1,NA 02/02/2010,1,2 """ parser = lambda d: parse_date(d, dayfirst=True) df = self.read_csv(StringIO(text), names=['time', 'Q', 'NTU'], header=0, index_col=0, parse_dates=True, date_parser=parser, na_values=['NA']) exp_index = Index([datetime(2010, 1, 31), datetime(2010, 2, 1), datetime(2010, 2, 2)], name='time') expected = DataFrame({'Q': [1, 1, 1], 'NTU': [2, np.nan, 2]}, index=exp_index, columns=['Q', 'NTU']) tm.assert_frame_equal(df, expected) parser = lambda d: parse_date(d, day_first=True) pytest.raises(TypeError, self.read_csv, StringIO(text), skiprows=[0], names=['time', 'Q', 'NTU'], index_col=0, parse_dates=True, date_parser=parser, na_values=['NA'])
def test_parse_dates_custom_euro_format(all_parsers, kwargs): parser = all_parsers data = """foo,bar,baz 31/01/2010,1,2 01/02/2010,1,NA 02/02/2010,1,2 """ if "dayfirst" in kwargs: df = parser.read_csv(StringIO(data), names=["time", "Q", "NTU"], date_parser=lambda d: parse_date(d, **kwargs), header=0, index_col=0, parse_dates=True, na_values=["NA"]) exp_index = Index([datetime(2010, 1, 31), datetime(2010, 2, 1), datetime(2010, 2, 2)], name="time") expected = DataFrame({"Q": [1, 1, 1], "NTU": [2, np.nan, 2]}, index=exp_index, columns=["Q", "NTU"]) tm.assert_frame_equal(df, expected) else: msg = "got an unexpected keyword argument 'day_first'" with pytest.raises(TypeError, match=msg): parser.read_csv(StringIO(data), names=["time", "Q", "NTU"], date_parser=lambda d: parse_date(d, **kwargs), skiprows=[0], index_col=0, parse_dates=True, na_values=["NA"])
def _parse_commit_log(this,repo_path,base_commit=None): from vbench.git import _convert_timezones from pandas import Series from pandas.compat import parse_date git_cmd = 'git --git-dir=%s/.git --work-tree=%s ' % (repo_path, repo_path) githist = git_cmd + ('log --graph --pretty=format:'+ '\"::%h::%cd::%s::%an\"'+ ('%s..' % base_commit)+ '> githist.txt') os.system(githist) githist = open('githist.txt').read() os.remove('githist.txt') shas = [] timestamps = [] messages = [] authors = [] for line in githist.split('\n'): if '*' not in line.split("::")[0]: # skip non-commit lines continue _, sha, stamp, message, author = line.split('::', 4) # parse timestamp into datetime object stamp = parse_date(stamp) shas.append(sha) timestamps.append(stamp) messages.append(message) authors.append(author) # to UTC for now timestamps = _convert_timezones(timestamps) shas = Series(shas, timestamps) messages = Series(messages, shas) timestamps = Series(timestamps, shas) authors = Series(authors, shas) return shas[::-1], messages[::-1], timestamps[::-1], authors[::-1]
def _parse_commit_log(this, repo_path, base_commit=None): from vbench.git import _convert_timezones from pandas import Series from pandas.compat import parse_date git_cmd = 'git --git-dir=%s/.git --work-tree=%s ' % (repo_path, repo_path) githist = git_cmd + ('log --graph --pretty=format:' + '\"::%h::%cd::%s::%an\"' + ('%s..' % base_commit) + '> githist.txt') os.system(githist) githist = open('githist.txt').read() os.remove('githist.txt') shas = [] timestamps = [] messages = [] authors = [] for line in githist.split('\n'): if '*' not in line.split("::")[0]: # skip non-commit lines continue _, sha, stamp, message, author = line.split('::', 4) # parse timestamp into datetime object stamp = parse_date(stamp) shas.append(sha) timestamps.append(stamp) messages.append(message) authors.append(author) # to UTC for now timestamps = _convert_timezones(timestamps) shas = Series(shas, timestamps) messages = Series(messages, shas) timestamps = Series(timestamps, shas) authors = Series(authors, shas) return shas[::-1], messages[::-1], timestamps[::-1], authors[::-1]
def get_commit_vitals(c,hlen=HASH_LEN): h,s,d= get_commit_info(c,'%H\t%s\t%ci',"\t") return h[:hlen],s,parse_date(d)