def iter_column(idx, f): """ :param idx: index of column :param f: gzip file object of CSV dataset :return: col_type, null_values where col_type is inferred type from typeinference.py and null_values is whether null values were found and normalized. """ f.seek(0) reader = UnicodeCSVReader(f) # Discard the header reader.next() col = [] for row in reader: if row: try: col.append(row[idx]) except IndexError: # Bad data. Maybe we can fill with nulls? pass col_type, null_values = normalize_column_type(col) return col_type, null_values
def iter_column(idx, f): f.seek(0) reader = UnicodeCSVReader(f) header = reader.next() col = [] for row in reader: if row: try: col.append(row[idx]) except IndexError: # Bad data. Maybe we can fill with nulls? pass col_type, null_values = normalize_column_type(col) return col_type, null_values