Пример #1
1
def parse_and_save_smdr_data():
    all_files = get_all_files(ATTACHMENTS_PATH)
    logger.debug(f"GOT {len(all_files)} files to parse")
    for file in all_files[:5]:
        if file.endswith("slk"):
            # only parse slk files - ignore gzipped files
            file_path = os.path.join(ATTACHMENTS_PATH, file)
            parser = SylkParser(file_path)
            fbuf = StringIO()
            parser.to_csv(fbuf)

            data = fbuf.getvalue().split("\n")

            for line in data:
                print(line)
Пример #2
0
def test_stream_unicode():
    fpath = get_data_path("balance_analytique_cp1252.slk")
    parser = SylkParser(fpath, use_unicode=True)
    for line in parser:
        for i in line:
            assert isinstance(i, unicode)
    parser = SylkParser(fpath, use_unicode=False)
    for line in parser:
        for i in line:
            assert isinstance(i, str)
Пример #3
0
def _test_one(test_filename, expected_results_filename, headers=None,
              encoding='cp1252'):

    filepath = get_data_path(test_filename)
    expected_results_filepath = get_data_path(expected_results_filename)

    if headers is None:
        headers = []

    parser = SylkParser(filepath, headers=headers, encoding=encoding)
    fbuf = StringIO()
    parser.to_csv(fbuf)
    test_results = fbuf.getvalue()
    with open(expected_results_filepath) as handle:
        expected_results = handle.read()
    assert test_results.strip() == expected_results.strip()
    print("Tested {}".format(test_filename))
Пример #4
0
import sys

from sylk_parser import SylkParser

# argv1: source file
# argv2: output file

parser = SylkParser(sys.argv[1])
with open(sys.argv[2], "wb") as fbuf:
    parser.to_csv(fbuf)
Пример #5
0
def parse_xls(xls):
    worksheet = SylkParser(xls)
    SUBTABLE_START = "AREA"
    SUBTABLE_END = "Total"
    SUBTABLE_EMPTY = "Tabla vac"
    area_row = None
    dataset = {}
    cross_cols = None
    current_index = None
    previous_row = None
    for i, row in enumerate(worksheet):
        first_col = str(row[0]).strip()
        if area_row is None:
            if first_col.startswith(SUBTABLE_START):
                number = int(
                    re.match(fr"{SUBTABLE_START}\s*#\s*(\d+)\s*",
                             first_col).group(1))
                current_index = number
                name = row[1]
                area_row = i
                header_row = None
        else:
            if first_col == SUBTABLE_END or first_col.startswith(
                    SUBTABLE_EMPTY):
                if current_index not in dataset:
                    dataset[current_index] = {}
                area_row = None
            elif i > area_row + 2 and first_col:  # actual rows
                if current_index not in dataset:
                    dataset[current_index] = {}
                    header_row = previous_row
                    first_header = str(header_row[1]).strip()
                    if first_header != 'Casos':
                        cross_cols = []
                        while str(header_row[
                                1 + len(cross_cols)]).strip() != 'Total':
                            cross_cols.append(
                                '.' +
                                format_col(header_row[1 + len(cross_cols)]))
                    else:
                        cross_cols = ['']
                for j, col in enumerate(cross_cols):
                    value = row[1 + j]
                    if not value:
                        print(f"cannot parse row {row}")
                        exit(1)
                    dataset[current_index][format_col(first_col) + col] = value
        previous_row = row
    if len(dataset) < 100:
        return None
    dataset_fixed = {INDEX: []}
    for area, cols in dataset.items():
        for col in cols:
            dataset_fixed[col] = []
    for area, cols in dataset.items():
        dataset_fixed[INDEX].append(area)
        dataset[area] = {col: vals for col, vals in cols.items()}
        for col in dataset_fixed.keys():
            if col != INDEX:
                value = dataset[area][col] if col in dataset[area] else '0'
                value = int(format_col(value.replace('-', '0')))
                dataset_fixed[col].append(value)
    dataset = dataset_fixed
    return dataset
Пример #6
0
#!/usr/bin/env python
# Hmmm. doesn't seem to work

from io import StringIO
from sylk_parser import SylkParser

parser = SylkParser("loop.oleo")

fbuf = StringIO()
parser.to_csv(fbuf)

test_results = fbuf.getvalue()
print test_results