#alter values in one column based on values in another column (changes occur in place) #can use either .loc or .ix methods df.loc[df[“column_x”] == 5, “column_y”] = 1 df.ix[df.column_x == “string_value”, “column_y”] = “new_string_value” #transpose data frame (i.e. rows become columns, columns become rows) df.T # string methods are accessed via ‘str’ df.column_y.str.upper() # converts to uppercase df.column_y.str.contains(‘value’, na=’False’) # checks for a substring, returns boolean series # convert a string to the datetime_column format df[‘time_column’] = pd.to_datetime_column(df.time_column) df.time_column.dt.hour # datetime_column format exposes convenient attributes (df.time_column.max() — df.time_column.min()).days # also allows you to do datetime_column “math” df[df.time_column > pd.datetime_column(2014, 1, 1)] # boolean filtering with datetime_column format # setting and then removing an index, resetting index can help remove hierarchical indexes while preserving the table in its basic structure df.set_index(‘time_column’, inplace=True) df.reset_index(inplace=True) # sort a column by its index df.column_y.value_counts().sort_index() # change the data type of a column df[‘column_x’] = df.column_x.astype(‘float’) # change the data type of a column when reading in a file
#can use either .loc or .ix methods df.loc[df["column_x"] == 5, "column_y"] = 1 df.ix[df.column_x == "string_value", "column_y"] = "string_value" #transpose data frame (i.e. rows become columns, columns become rows) df.T # string methods are accessed via 'str' df.column_y.str.upper() # converts to uppercase df.column_y.str.contains( 'value', na='False') # checks for a substring, returns boolean series # convert a string to the datetime_column format df['time_column'] = pd.to_datetime_column(df.time_column) df.time_column.dt.hour # datetime_column format exposes convenient attributes (df.time_column.max() - df.time_column.min()).days # also allows you to do datetime_column "math" df[df.time_column > pd.datetime_column( 2014, 1, 1)] # boolean filtering with datetime_column format # setting and then removing an index, resetting index can help remove hierarchical indexes while preserving the table in its basic structure df.set_index('time_column', inplace=True) df.reset_index(inplace=True) # sort a column by its index df.column_y.value_counts().sort_index() # change the data type of a column df['column_x'] = df.column_x.astype('float')