Exemplo n.º 1
0
def validate_import_file(file_path):
    """
    Run import file against required fields
    'name' and 'corporate_membership_type' are required fields
    """
    normalize_newline(file_path)
    data = csv.reader(default_storage.open(file_path, mode='rU'))
    fields = data.next()
    fields = [smart_str(field) for field in fields]

    corp_memb_keys = [slugify(cm) for cm in fields]
    required = ('name','corporate_membership_type')
    requirements = [r in corp_memb_keys for r in required]
    missing_required_fields = [r for r in required if r not in fields]

    return all(requirements), missing_required_fields
Exemplo n.º 2
0
def validate_import_file(file_path):
    """
    Run import file against required fields
    'name' and 'corporate_membership_type' are required fields
    """
    normalize_newline(file_path)
    data = csv.reader(default_storage.open(file_path, mode='rU'))
    fields = data.next()
    fields = [smart_str(field) for field in fields]

    corp_memb_keys = [slugify(cm) for cm in fields]
    required = ('name', 'corporate_membership_type')
    requirements = [r in corp_memb_keys for r in required]
    missing_required_fields = [r for r in required if r not in fields]

    return all(requirements), missing_required_fields
Exemplo n.º 3
0
def memb_import_parse_csv(mimport):
    """
    Parse csv data into a dictionary.
    """
    normalize_newline(mimport.upload_file.name)
    csv_reader = csv.reader(
        default_storage.open(mimport.upload_file.name, 'rb'))
    fieldnames = csv_reader.next()
    fieldnames = normalize_field_names(fieldnames)

    data_list = []

    for row in csv_reader:
        data_list.append(dict(zip(fieldnames, row)))

    return fieldnames, data_list
Exemplo n.º 4
0
def user_import_parse_csv(mimport):
    """
    Parse csv data into a dictionary.
    """
    normalize_newline(mimport.upload_file.name)
    csv_reader = csv.reader(
        default_storage.open(mimport.upload_file.name, 'rb'))
    fieldnames = csv_reader.next()
    fieldnames = normalize_field_names(fieldnames)

    data_list = []

    for row in csv_reader:
        if not row:
            continue
        data_list.append(dict(zip(fieldnames, row)))

    return fieldnames, data_list
Exemplo n.º 5
0
def csv_to_dict(file_path, **kwargs):
    """
    Returns a list of dicts. Each dict represents record.
    """
    machine_name = kwargs.get('machine_name', False)

    # null byte; assume xls; not csv
    if has_null_byte(file_path):
        return []
    
    normalize_newline(file_path)
    csv_file = csv.reader(default_storage.open(file_path, 'rU'))
    colnames = csv_file.next()  # row 1;

    if machine_name:
        colnames = [slugify(c).replace('-', '') for c in colnames]
        
    cols = xrange(len(colnames))
    lst = []
    
    # make sure colnames are unique
    duplicates = {}
    for i in cols:
        for j in cols:
            # compare with previous and next fields
            if i != j and colnames[i] == colnames[j]:
                number = duplicates.get(colnames[i], 0) + 1
                duplicates[colnames[i]] = number
                colnames[j] = colnames[j] + "-" + str(number)
    
    for row in csv_file:
        entry = {}
        rows = len(row) - 1
        for col in cols:
            if col > rows:
                break  # go to next row
            entry[colnames[col]] = row[col]
        lst.append(entry)

    return lst  # list of dictionaries
Exemplo n.º 6
0
def extract_from_excel(file_path):
    if not default_storage.exists(file_path):
        raise NameError("%s is not a valid file." % file_path)

    file_ext = (file_path[-4:]).lower()
    if file_ext != ".csv" and file_ext != ".xls":
        raise NameError("%s is not a valid file type (should be either .csv or .xls)." % file_path)

    fields = []
    data_list = []

    if file_ext == ".csv":
        import csv
        import dateutil.parser as dparser

        normalize_newline(file_path)
        data = csv.reader(default_storage.open(file_path, "rU"))

        # read the column header
        fields = data.next()
        fields = [smart_str(field) for field in fields]

        r = 1
        for row in data:
            item = dict(zip(fields, row))
            for key in item.keys():
                if key in field_type_dict and field_type_dict[key] == "DateTimeField":
                    item[key] = dparser.parser(item[key])
            item["ROW_NUM"] = r + 1
            data_list.append(item)
            r += 1
    else:
        book = xlrd.open_workbook(file_path)
        nsheets = book.nsheets
        nrows = book.sheet_by_index(0).nrows

        # get the fields from the first row
        for i in range(0, nsheets):
            sh = book.sheet_by_index(i)
            for c in range(0, sh.ncols):
                col_item = sh.cell_value(rowx=0, colx=c)
                fields.append(smart_str(col_item))

        # get the data - skip the first row
        for r in range(1, nrows):
            row = []
            for i in range(0, nsheets):
                sh = book.sheet_by_index(i)
                for c in range(0, sh.ncols):
                    cell = sh.cell(r, c)
                    cell_value = cell.value
                    if cell.ctype == xlrd.XL_CELL_DATE:
                        date_tuple = xlrd.xldate_as_tuple(cell_value, book.datemode)
                        cell_value = datetime.date(date_tuple[0], date_tuple[1], date_tuple[2])
                    elif cell.ctype in (2, 3) and int(cell_value) == cell_value:
                        # so for zipcode 77079,
                        # we don't end up with 77079.0
                        cell_value = int(cell_value)
                    row.append(cell_value)

            item = dict(zip(fields, row))
            item["ROW_NUM"] = r + 1
            data_list.append(item)

    return data_list
Exemplo n.º 7
0
def extract_from_excel(file_path):
    if not default_storage.exists(file_path):
        raise NameError("%s is not a valid file." % file_path)

    file_ext = (file_path[-4:]).lower()
    if file_ext != '.csv' and file_ext != '.xls':
        raise NameError(
    "%s is not a valid file type (should be either .csv or .xls)." % file_path
        )

    fields = []
    data_list = []

    if file_ext == '.csv':
        import csv
        import dateutil.parser as dparser

        normalize_newline(file_path)
        data = csv.reader(default_storage.open(file_path, 'rU'))

        # read the column header
        fields = data.next()
        fields = [smart_str(field) for field in fields]

        r = 1
        for row in data:
            item = dict(zip(fields, row))
            for key in item.keys():
                if key in field_type_dict and \
                field_type_dict[key] == 'DateTimeField':
                    item[key] = dparser.parser(item[key])
            item['ROW_NUM'] = r + 1
            data_list.append(item)
            r += 1
    else:
        book = xlrd.open_workbook(file_path)
        nsheets = book.nsheets
        nrows = book.sheet_by_index(0).nrows

        # get the fields from the first row
        for i in range(0, nsheets):
            sh = book.sheet_by_index(i)
            for c in range(0, sh.ncols):
                col_item = sh.cell_value(rowx=0, colx=c)
                fields.append(smart_str(col_item))

        # get the data - skip the first row
        for r in  range(1, nrows):
            row = []
            for i in range(0, nsheets):
                sh = book.sheet_by_index(i)
                for c in range(0, sh.ncols):
                    cell = sh.cell(r, c)
                    cell_value = cell.value
                    if cell.ctype == xlrd.XL_CELL_DATE:
                        date_tuple = xlrd.xldate_as_tuple(
                                        cell_value, book.datemode)
                        cell_value = datetime.date(date_tuple[0],
                                                   date_tuple[1],
                                                    date_tuple[2])
                    elif cell.ctype in (2, 3) \
                        and int(cell_value) == cell_value:
                        # so for zipcode 77079,
                        # we don't end up with 77079.0
                        cell_value = int(cell_value)
                    row.append(cell_value)

            item = dict(zip(fields, row))
            item['ROW_NUM'] = r + 1
            data_list.append(item)

    return data_list