Exemplo n.º 1
0
    def test_string_na_nat_conversion(self):
        # GH #999, #858

        from pandas.compat import parse_date

        strings = np.array(['1/1/2000', '1/2/2000', np.nan,
                            '1/4/2000, 12:34:56'], dtype=object)

        expected = np.empty(4, dtype='M8[ns]')
        for i, val in enumerate(strings):
            if isnull(val):
                expected[i] = tslib.iNaT
            else:
                expected[i] = parse_date(val)

        result = tslib.array_to_datetime(strings)
        tm.assert_almost_equal(result, expected)

        result2 = to_datetime(strings)
        tm.assertIsInstance(result2, DatetimeIndex)
        tm.assert_numpy_array_equal(result, result2.values)

        malformed = np.array(['1/100/2000', np.nan], dtype=object)

        # GH 10636, default is now 'raise'
        self.assertRaises(ValueError,
                          lambda: to_datetime(malformed, errors='raise'))

        result = to_datetime(malformed, errors='ignore')
        tm.assert_numpy_array_equal(result, malformed)

        self.assertRaises(ValueError, to_datetime, malformed, errors='raise')

        idx = ['a', 'b', 'c', 'd', 'e']
        series = Series(['1/1/2000', np.nan, '1/3/2000', np.nan,
                         '1/5/2000'], index=idx, name='foo')
        dseries = Series([to_datetime('1/1/2000'), np.nan,
                          to_datetime('1/3/2000'), np.nan,
                          to_datetime('1/5/2000')], index=idx, name='foo')

        result = to_datetime(series)
        dresult = to_datetime(dseries)

        expected = Series(np.empty(5, dtype='M8[ns]'), index=idx)
        for i in range(5):
            x = series[i]
            if isnull(x):
                expected[i] = tslib.iNaT
            else:
                expected[i] = to_datetime(x)

        assert_series_equal(result, expected, check_names=False)
        self.assertEqual(result.name, 'foo')

        assert_series_equal(dresult, expected, check_names=False)
        self.assertEqual(dresult.name, 'foo')
Exemplo n.º 2
0
    def test_parse_dates_custom_euroformat(self):
        text = """foo,bar,baz
31/01/2010,1,2
01/02/2010,1,NA
02/02/2010,1,2
"""
        parser = lambda d: parse_date(d, dayfirst=True)
        df = self.read_csv(StringIO(text),
                           names=['time', 'Q', 'NTU'],
                           header=0,
                           index_col=0,
                           parse_dates=True,
                           date_parser=parser,
                           na_values=['NA'])

        exp_index = Index([
            datetime(2010, 1, 31),
            datetime(2010, 2, 1),
            datetime(2010, 2, 2)
        ],
                          name='time')
        expected = DataFrame({
            'Q': [1, 1, 1],
            'NTU': [2, np.nan, 2]
        },
                             index=exp_index,
                             columns=['Q', 'NTU'])
        tm.assert_frame_equal(df, expected)

        parser = lambda d: parse_date(d, day_first=True)
        pytest.raises(TypeError,
                      self.read_csv,
                      StringIO(text),
                      skiprows=[0],
                      names=['time', 'Q', 'NTU'],
                      index_col=0,
                      parse_dates=True,
                      date_parser=parser,
                      na_values=['NA'])
Exemplo n.º 3
0
def test_parse_dates_custom_euro_format(all_parsers, kwargs):
    parser = all_parsers
    data = """foo,bar,baz
31/01/2010,1,2
01/02/2010,1,NA
02/02/2010,1,2
"""
    if "dayfirst" in kwargs:
        df = parser.read_csv(StringIO(data),
                             names=["time", "Q", "NTU"],
                             date_parser=lambda d: parse_date(d, **kwargs),
                             header=0,
                             index_col=0,
                             parse_dates=True,
                             na_values=["NA"])
        exp_index = Index([
            datetime(2010, 1, 31),
            datetime(2010, 2, 1),
            datetime(2010, 2, 2)
        ],
                          name="time")
        expected = DataFrame({
            "Q": [1, 1, 1],
            "NTU": [2, np.nan, 2]
        },
                             index=exp_index,
                             columns=["Q", "NTU"])
        tm.assert_frame_equal(df, expected)
    else:
        msg = "got an unexpected keyword argument 'day_first'"
        with pytest.raises(TypeError, match=msg):
            parser.read_csv(StringIO(data),
                            names=["time", "Q", "NTU"],
                            date_parser=lambda d: parse_date(d, **kwargs),
                            skiprows=[0],
                            index_col=0,
                            parse_dates=True,
                            na_values=["NA"])
Exemplo n.º 4
0
    def test_parse_dates_custom_euroformat(self):
        text = """foo,bar,baz
31/01/2010,1,2
01/02/2010,1,NA
02/02/2010,1,2
"""
        parser = lambda d: parse_date(d, dayfirst=True)
        df = self.read_csv(StringIO(text),
                           names=['time', 'Q', 'NTU'], header=0,
                           index_col=0, parse_dates=True,
                           date_parser=parser, na_values=['NA'])

        exp_index = Index([datetime(2010, 1, 31), datetime(2010, 2, 1),
                           datetime(2010, 2, 2)], name='time')
        expected = DataFrame({'Q': [1, 1, 1], 'NTU': [2, np.nan, 2]},
                             index=exp_index, columns=['Q', 'NTU'])
        tm.assert_frame_equal(df, expected)

        parser = lambda d: parse_date(d, day_first=True)
        pytest.raises(TypeError, self.read_csv,
                      StringIO(text), skiprows=[0],
                      names=['time', 'Q', 'NTU'], index_col=0,
                      parse_dates=True, date_parser=parser,
                      na_values=['NA'])
Exemplo n.º 5
0
def test_parse_dates_custom_euro_format(all_parsers, kwargs):
    parser = all_parsers
    data = """foo,bar,baz
31/01/2010,1,2
01/02/2010,1,NA
02/02/2010,1,2
"""
    if "dayfirst" in kwargs:
        df = parser.read_csv(StringIO(data), names=["time", "Q", "NTU"],
                             date_parser=lambda d: parse_date(d, **kwargs),
                             header=0, index_col=0, parse_dates=True,
                             na_values=["NA"])
        exp_index = Index([datetime(2010, 1, 31), datetime(2010, 2, 1),
                           datetime(2010, 2, 2)], name="time")
        expected = DataFrame({"Q": [1, 1, 1], "NTU": [2, np.nan, 2]},
                             index=exp_index, columns=["Q", "NTU"])
        tm.assert_frame_equal(df, expected)
    else:
        msg = "got an unexpected keyword argument 'day_first'"
        with pytest.raises(TypeError, match=msg):
            parser.read_csv(StringIO(data), names=["time", "Q", "NTU"],
                            date_parser=lambda d: parse_date(d, **kwargs),
                            skiprows=[0], index_col=0, parse_dates=True,
                            na_values=["NA"])
Exemplo n.º 6
0
def _parse_commit_log(this,repo_path,base_commit=None):
    from vbench.git import _convert_timezones
    from pandas import Series
    from pandas.compat import parse_date

    git_cmd = 'git --git-dir=%s/.git --work-tree=%s ' % (repo_path, repo_path)
    githist = git_cmd + ('log --graph --pretty=format:'+
                         '\"::%h::%cd::%s::%an\"'+
                         ('%s..' % base_commit)+
                         '> githist.txt')
    os.system(githist)
    githist = open('githist.txt').read()
    os.remove('githist.txt')

    shas = []
    timestamps = []
    messages = []
    authors = []
    for line in githist.split('\n'):
        if '*' not in line.split("::")[0]:  # skip non-commit lines
            continue

        _, sha, stamp, message, author = line.split('::', 4)

        # parse timestamp into datetime object
        stamp = parse_date(stamp)

        shas.append(sha)
        timestamps.append(stamp)
        messages.append(message)
        authors.append(author)

    # to UTC for now
    timestamps = _convert_timezones(timestamps)

    shas = Series(shas, timestamps)
    messages = Series(messages, shas)
    timestamps = Series(timestamps, shas)
    authors = Series(authors, shas)
    return shas[::-1], messages[::-1], timestamps[::-1], authors[::-1]
Exemplo n.º 7
0
def _parse_commit_log(this, repo_path, base_commit=None):
    from vbench.git import _convert_timezones
    from pandas import Series
    from pandas.compat import parse_date

    git_cmd = 'git --git-dir=%s/.git --work-tree=%s ' % (repo_path, repo_path)
    githist = git_cmd + ('log --graph --pretty=format:' +
                         '\"::%h::%cd::%s::%an\"' +
                         ('%s..' % base_commit) + '> githist.txt')
    os.system(githist)
    githist = open('githist.txt').read()
    os.remove('githist.txt')

    shas = []
    timestamps = []
    messages = []
    authors = []
    for line in githist.split('\n'):
        if '*' not in line.split("::")[0]:  # skip non-commit lines
            continue

        _, sha, stamp, message, author = line.split('::', 4)

        # parse timestamp into datetime object
        stamp = parse_date(stamp)

        shas.append(sha)
        timestamps.append(stamp)
        messages.append(message)
        authors.append(author)

    # to UTC for now
    timestamps = _convert_timezones(timestamps)

    shas = Series(shas, timestamps)
    messages = Series(messages, shas)
    timestamps = Series(timestamps, shas)
    authors = Series(authors, shas)
    return shas[::-1], messages[::-1], timestamps[::-1], authors[::-1]
Exemplo n.º 8
0
def get_commit_vitals(c,hlen=HASH_LEN):
    h,s,d= get_commit_info(c,'%H\t%s\t%ci',"\t")
    return h[:hlen],s,parse_date(d)
def get_commit_vitals(c,hlen=HASH_LEN):
    h,s,d= get_commit_info(c,'%H\t%s\t%ci',"\t")
    return h[:hlen],s,parse_date(d)