示例#1
0
def from_url(url):
    # allow the year to go +1 incase we're on new years eve
    # and dealing with timezones
    max_year = datetime.date.today().year + 1

    def parse_segments(segments):
        for segment in segments:
            try:
                val = int(segment)
                # years
                if val >= 1980 or val <= max_year:
                    yield val
                    continue
                # months
                if val >= 1 or val <= 31:
                    yield val
                    continue
            except Exception as e:
                continue
            except GeneratorExit:
                break

    segments = url.split('/')
    year, month, day = None, None, None
    parser = parse_segments(segments)

    try:
        # try and extract in year order
        year = parser.next()
        month = parser.next()
        day = parser.next()
    except Exception as e:
        pass

    if year:
        if year < 1980 or year > max_year:
            year = None
    if month:
        if month < 1 or month > 12:
            month = None
    if day:
        if day < 1 or day > 31:
            day = None
    if not day:
        day = 1

    if year and month and day:
        try:
            return datetime.datetime(year, month, day)
        except:
            return None

    return None
示例#2
0
文件: tasks.py 项目: gunduru/seed
def _save_raw_data(file_pk, *args, **kwargs):
    """Chunk up the CSV and save data into the DB raw."""
    import_file = ImportFile.objects.get(pk=file_pk)

    if import_file.raw_save_done:
        return {'status': 'warning', 'message': 'raw data already saved'}

    if import_file.source_type == "Green Button Raw":
        return _save_raw_green_button_data(file_pk, *args, **kwargs)

    parser = reader.MCMParser(import_file.local_file)
    cache_first_rows(import_file, parser)
    rows = parser.next()
    import_file.num_rows = 0

    prog_key = get_prog_key('save_raw_data', file_pk)

    tasks = []
    for chunk in batch(rows, 100):
        import_file.num_rows += len(chunk)
        tasks.append(_save_raw_data_chunk.subtask((chunk, file_pk, prog_key)))

    tasks = add_cache_increment_parameter(tasks)
    import_file.num_columns = parser.num_columns()
    import_file.save()

    if tasks:
        chord(tasks, interval=15)(finish_raw_save.subtask([file_pk]))
    else:
        finish_raw_save.task(file_pk)

    return {'status': 'success'}
示例#3
0
def cache_first_rows(import_file, parser):
    """Cache headers, and rows 2-6 for validation/viewing.

    :param import_file: ImportFile inst.
    :param parser: unicode-csv.Reader instance.

    Unfortunately, this is duplicated logic from data_importer,
    but since data_importer makes many faulty assumptions we need to do
    it differently.

    """
    parser.seek_to_beginning()
    rows = parser.next()

    validation_rows = []
    for i in range(5):
        row = rows.next()
        if row:
            validation_rows.append(row)

    import_file.cached_second_to_fifth_row = "\n".join(
        [
            ROW_DELIMITER.join(map(lambda x: str(x), r.values()))
            for r in validation_rows
        ]
    )
    first_row = rows.next().keys()
    if first_row:
        first_row = ROW_DELIMITER.join(first_row)
    import_file.cached_first_row = first_row or ''

    import_file.save()
    # Reset our file pointer for mapping.
    parser.seek_to_beginning()
示例#4
0
def _save_raw_data(file_pk, *args, **kwargs):
    """Chunk up the CSV and save data into the DB raw."""
    import_file = ImportFile.objects.get(pk=file_pk)

    if import_file.raw_save_done:
        return {'status': 'warning', 'message': 'raw data already saved'}

    if import_file.source_type == "Green Button Raw":
        return _save_raw_green_button_data(file_pk, *args, **kwargs)

    parser = reader.MCMParser(import_file.local_file)
    cache_first_rows(import_file, parser)
    rows = parser.next()
    import_file.num_rows = 0

    prog_key = get_prog_key('save_raw_data', file_pk)

    tasks = []
    for chunk in batch(rows, 100):
        import_file.num_rows += len(chunk)
        tasks.append(_save_raw_data_chunk.subtask((chunk, file_pk, prog_key)))

    tasks = add_cache_increment_parameter(tasks)
    import_file.num_columns = parser.num_columns()
    import_file.save()

    if tasks:
        chord(tasks, interval=15)(finish_raw_save.subtask([file_pk]))
    else:
        finish_raw_save.task(file_pk)

    return {'status': 'success'}
示例#5
0
def cache_first_rows(import_record, parser):
    """Cache headers, and rows 2-6 for validation/viewing.

    :param import_record: ImportRecord inst.
    :param parser: unicode-csv.Reader instance.

    Unfortunately, this is duplicated logic from data_importer,
    but since data_importer makes many faulty assumptions we need to do
    it differently.

    """
    parser.csvfile.seek(0)
    rows = parser.next()
    first_row = rows.next().values()

    if first_row:
        first_row = ROW_DELIMITER.join(first_row)
    import_record.cached_first_row = first_row or ''
    validation_rows = []
    for i in range(5):
        row = rows.next()
        if row:
            validation_rows.append(row)

    import_record.cached_second_to_fifth_row = "\n".join(
        [ROW_DELIMITER.join(r.values()) for r in validation_rows]
    )
    import_record.save()
    # Reset our file pointer for mapping.
    parser.csvfile.seek(0)
示例#6
0
def _save_raw_data(file_pk, *args, **kwargs):
    """Chunk up the CSV or XLSX file and save the raw data into the DB BuildingSnapshot table."""

    result = {'status': 'success', 'progress': 100}
    prog_key = get_prog_key('save_raw_data', file_pk)
    try:
        import_file = ImportFile.objects.get(pk=file_pk)
        if import_file.raw_save_done:
            result['status'] = 'warning'
            result['message'] = 'Raw data already saved'
            set_cache(prog_key, result['status'], result)
            return result

        if import_file.source_type == "Green Button Raw":
            return _save_raw_green_button_data(file_pk, *args, **kwargs)

        parser = reader.MCMParser(import_file.local_file)
        cache_first_rows(import_file, parser)
        rows = parser.next()
        import_file.num_rows = 0
        import_file.num_columns = parser.num_columns()

        # Why are we setting the num_rows to the number of chunks?
        tasks = []
        for chunk in batch(rows, 100):
            import_file.num_rows += len(chunk)
            tasks.append(_save_raw_data_chunk.s(chunk, file_pk, prog_key))

        import_file.save()

        # need to rework how the progress keys are implemented here
        tasks = add_cache_increment_parameter(tasks)
        if tasks:
            chord(tasks, interval=15)(finish_raw_save.s(file_pk))
        else:
            finish_raw_save.s(file_pk)

    except StopIteration:
        result['status'] = 'error'
        result['message'] = 'StopIteration Exception'
        result['stacktrace'] = traceback.format_exc()
    except Error as e:
        result['status'] = 'error'
        result['message'] = 'File Content Error: ' + e.message
        result['stacktrace'] = traceback.format_exc()
    except KeyError as e:
        result['status'] = 'error'
        result['message'] = 'Invalid Column Name: "' + e.message + '"'
        result['stacktrace'] = traceback.format_exc()
    except Exception as e:
        result['status'] = 'error'
        result['message'] = 'Unhandled Error: ' + str(e.message)
        result['stacktrace'] = traceback.format_exc()

    set_cache(prog_key, result['status'], result)
    return result
示例#7
0
def _save_raw_data(file_pk, *args, **kwargs):
    """Chunk up the CSV and save data into the DB raw."""

    result = {'status': 'success', 'progress': 100}
    prog_key = get_prog_key('save_raw_data', file_pk)
    try:
        import_file = ImportFile.objects.get(pk=file_pk)

        if import_file.raw_save_done:
            result['status'] = 'warning'
            result['message'] = 'Raw data already saved'
            cache.set(prog_key, result)
            return result

        if import_file.source_type == "Green Button Raw":
            return _save_raw_green_button_data(file_pk, *args, **kwargs)

        parser = reader.MCMParser(import_file.local_file)
        cache_first_rows(import_file, parser)
        rows = parser.next()
        import_file.num_rows = 0

        tasks = []
        for chunk in batch(rows, 100):
            import_file.num_rows += len(chunk)
            tasks.append(
                _save_raw_data_chunk.subtask((chunk, file_pk, prog_key)))

        tasks = add_cache_increment_parameter(tasks)
        import_file.num_columns = parser.num_columns()
        import_file.save()

        if tasks:
            chord(tasks, interval=15)(finish_raw_save.subtask([file_pk]))
        else:
            finish_raw_save.task(file_pk)

    except StopIteration:
        result['status'] = 'error'
        result['message'] = 'StopIteration Exception'
        result['stacktrace'] = traceback.format_exc()
    except Error as e:
        result['status'] = 'error'
        result['message'] = 'File Content Error: ' + e.message
        result['stacktrace'] = traceback.format_exc()
    except KeyError as e:
        result['status'] = 'error'
        result['message'] = 'Invalid Column Name: "' + e.message + '"'
        result['stacktrace'] = traceback.format_exc()
    except Exception as e:
        result['status'] = 'error'
        result['message'] = 'Unhandled Error: ' + e.message
        result['stacktrace'] = traceback.format_exc()

    cache.set(prog_key, result)
    return result
示例#8
0
def _save_raw_data(file_pk, *args, **kwargs):
    """Chunk up the CSV and save data into the DB raw."""

    result = {'status': 'success', 'progress': 100}
    prog_key = get_prog_key('save_raw_data', file_pk)
    try:
        import_file = ImportFile.objects.get(pk=file_pk)

        if import_file.raw_save_done:
            result['status'] = 'warning'
            result['message'] = 'Raw data already saved'
            cache.set(prog_key, result)
            return result

        if import_file.source_type == "Green Button Raw":
            return _save_raw_green_button_data(file_pk, *args, **kwargs)

        parser = reader.MCMParser(import_file.local_file)
        cache_first_rows(import_file, parser)
        rows = parser.next()
        import_file.num_rows = 0

        tasks = []
        for chunk in batch(rows, 100):
            import_file.num_rows += len(chunk)
            tasks.append(_save_raw_data_chunk.subtask((chunk, file_pk, prog_key)))

        tasks = add_cache_increment_parameter(tasks)
        import_file.num_columns = parser.num_columns()
        import_file.save()

        if tasks:
            chord(tasks, interval=15)(finish_raw_save.subtask([file_pk]))
        else:
            finish_raw_save.task(file_pk)

    except StopIteration:
        result['status'] = 'error'
        result['message'] = 'StopIteration Exception'
        result['stacktrace'] = traceback.format_exc()
    except Error as e:
        result['status'] = 'error'
        result['message'] = 'File Content Error: ' + e.message
        result['stacktrace'] = traceback.format_exc()
    except KeyError as e:
        result['status'] = 'error'
        result['message'] = 'Invalid Column Name: "' + e.message + '"'
        result['stacktrace'] = traceback.format_exc()
    except Exception as e:
        result['status'] = 'error'
        result['message'] = 'Unhandled Error: ' + e.message
        result['stacktrace'] = traceback.format_exc()

    cache.set(prog_key, result)
    return result
示例#9
0
def cache_first_rows(import_file, parser):
    """Cache headers, and rows 2-6 for validation/viewing.

    :param import_file: ImportFile inst.
    :param parser: unicode-csv.Reader instance.

    Unfortunately, this is duplicated logic from data_importer,
    but since data_importer makes many faulty assumptions we need to do
    it differently.

    """
    parser.seek_to_beginning()
    rows = parser.next()

    validation_rows = []
    for i in range(5):
        try:
            row = rows.next()
            if row:
                validation_rows.append(row)
        except StopIteration:
            """Less than 5 rows in file"""
            break

    # This is a fix for issue #24 to use original field order when importing
    # This is ultimately not the correct place for this fix.  The correct fix
    # is to update the mcm code to a newer version where the readers in mcm/reader.py
    # have a headers() function defined and then just do
    # first_row = parser.headers()
    # But until we can patch the mcm code this should fix the issue.
    local_reader = parser.reader
    if isinstance(local_reader, reader.ExcelParser):
        first_row = local_reader.sheet.row_values(local_reader.header_row)
    elif isinstance(local_reader, reader.CSVParser):
        first_row = local_reader.csvreader.fieldnames
        first_row = [local_reader._clean_super(x) for x in first_row]
    else:
        # default to the original behavior if a new type of parser for lack of anything better
        first_row = rows.next().keys()

    tmp = []
    for r in validation_rows:
        tmp.append(ROW_DELIMITER.join([str(r[x]) for x in first_row]))

    import_file.cached_second_to_fifth_row = "\n".join(tmp)

    if first_row:
        first_row = ROW_DELIMITER.join(first_row)
    import_file.cached_first_row = first_row or ''

    import_file.save()
    # Reset our file pointer for mapping.
    parser.seek_to_beginning()
示例#10
0
def cache_first_rows(import_file, parser):
    """Cache headers, and rows 2-6 for validation/viewing.

    :param import_file: ImportFile inst.
    :param parser: unicode-csv.Reader instance.

    Unfortunately, this is duplicated logic from data_importer,
    but since data_importer makes many faulty assumptions we need to do
    it differently.

    """
    parser.seek_to_beginning()
    rows = parser.next()

    validation_rows = []
    for i in range(5):
        try:
            row = rows.next()
            if row:
                validation_rows.append(row)
        except StopIteration:
            """Less than 5 rows in file"""
            break

    #This is a fix for issue #24 to use original field order when importing
    #This is ultimately not the correct place for this fix.  The correct fix 
    #is to update the mcm code to a newer version where the readers in mcm/reader.py
    #have a headers() function defined and then just do
    #first_row = parser.headers()
    #But until we can patch the mcm code this should fix the issue.
    local_reader = parser.reader
    if isinstance(local_reader, reader.ExcelParser):
        first_row = local_reader.sheet.row_values(local_reader.header_row)
    elif isinstance(local_reader, reader.CSVParser):
        first_row = local_reader.csvreader.fieldnames
        first_row = [local_reader._clean_super(x) for x in first_row]
    else:
        #default to the original behavior if a new type of parser for lack of anything better
        first_row = rows.next().keys()

    tmp = []
    for r in validation_rows:
        tmp.append(ROW_DELIMITER.join([str(r[x]) for x in first_row]))

    import_file.cached_second_to_fifth_row = "\n".join(tmp)

    if first_row:
        first_row = ROW_DELIMITER.join(first_row)
    import_file.cached_first_row = first_row or ''

    import_file.save()
    # Reset our file pointer for mapping.
    parser.seek_to_beginning()
示例#11
0
def cache_first_rows(import_file, parser):
    """Cache headers, and rows 2-6 for validation/viewing.

    :param import_file: ImportFile inst.
    :param parser: unicode-csv.Reader instance.

    Unfortunately, this is duplicated logic from data_importer,
    but since data_importer makes many faulty assumptions we need to do
    it differently.

    """
    parser.seek_to_beginning()
    rows = parser.next()

    validation_rows = []
    for i in range(5):
        try:
            row = rows.next()
            if row:
                validation_rows.append(row)
        except StopIteration:
            """Less than 5 rows in file"""
            break

    # return the first row of the headers which are cleaned
    first_row = parser.headers()

    tmp = []
    for r in validation_rows:
        tmp.append(ROW_DELIMITER.join([str(r[x]) for x in first_row]))

    import_file.cached_second_to_fifth_row = "\n".join(tmp)

    if first_row:
        first_row = ROW_DELIMITER.join(first_row)
    import_file.cached_first_row = first_row or ''

    import_file.save()

    # Reset our file pointer for mapping.
    parser.seek_to_beginning()