示例#1
0
    def execute(self, context):
        try:
            nm_arq = 'CONSULTAS.csv'
            with open(f'{_PROC_FILES}/{nm_arq}', 'wb') as data_from:
                data_from.write(
                    self.client_from.get_blob_client(
                        nm_arq).download_blob().readall())
            table = etl.fromcsv(f'{_PROC_FILES}/{nm_arq}', delimiter='|')

            table1 = etl.convert(table, {
                'fk_servico': int,
                'fk_operadora': int
            })

            etl.tocsv(table1, f'{_PROC_FILES}/t{nm_arq}', delimiter='|')

            self.logger.info(f"Destino do arquivo {self.client_to}")
            self.logger.info(f"Container destino {self.container_to}")

        except azure.core.exceptions.ResourceNotFoundError:
            print('Entrou na exceção :)')

        upload_file = f'{_PROC_FILES}/t{nm_arq}'

        try:
            if os.path.isfile(upload_file):
                with open(upload_file, "rb") as data:
                    self.client_to.upload_blob(nm_arq, data, overwrite=True)
                    self.logger.info(f'{data} carregado')
            else:
                self.logger.info(f't{nm_arq} não foi encontrado no container')
        finally:
            self.logger.info('Tudo Carregado')
示例#2
0
文件: test_io.py 项目: deytao/petl
def test_tocsv_appendcsv_gz():
    """Test the tocsv and appendcsv function."""

    # exercise function
    table = (("foo", "bar"), ("a", 1), ("b", 2), ("c", 2))
    f = NamedTemporaryFile(delete=False)
    fn = f.name + ".gz"
    f.close()
    tocsv(table, fn, delimiter="\t")

    # check what it did
    with gzip.open(fn, "rb") as o:
        actual = csv.reader(o, delimiter="\t")
        expect = [["foo", "bar"], ["a", "1"], ["b", "2"], ["c", "2"]]
        ieq(expect, actual)

    # check appending
    table2 = (("foo", "bar"), ("d", 7), ("e", 9), ("f", 1))
    appendcsv(table2, fn, delimiter="\t")

    # check what it did
    with gzip.open(fn, "rb") as o:
        actual = csv.reader(o, delimiter="\t")
        expect = [["foo", "bar"], ["a", "1"], ["b", "2"], ["c", "2"], ["d", "7"], ["e", "9"], ["f", "1"]]
        ieq(expect, actual)
示例#3
0
def tocsvwithheader(table, source, **kwargs):
    """
    Use `petl.tocsv` to write CSV data in `table` to file `source`, including
    key-value metadata header if passed in as the keyword argument `metadata`.
    The first row in `table` is assumed to contain the header columns.
    """
    metadata = kwargs.pop("metadata", {})
    kwargs.pop("write_header", None)  # make sure write_header not in kwargs

    # prepare header
    header = petl.header(table)

    # prepare metadata rows using #-prefix, and :-suffix for keys
    metadata_rows = []
    for key, value in metadata.items():
        metadata_row = [''] * len(header)
        metadata_row[0] = '#' + str(key) + ':'
        metadata_row[1] = str(value)
        metadata_rows.append(metadata_row)

    # prepare data (stripped of header)
    data = petl.data(table)

    # combine metadata + header + data the write out
    combined = metadata_rows + [header] + list(data)
    petl.tocsv(combined, source, write_header=True, **kwargs)
示例#4
0
def test_stringsource():

    table1 = (('foo', 'bar'), ('a', '1'), ('b', '2'), ('c', '2'))

    # test writing to a string buffer
    ss = StringSource()
    etl.tocsv(table1, ss)
    expect = "foo,bar\r\na,1\r\nb,2\r\nc,2\r\n"
    if not PY2:
        expect = expect.encode('ascii')
    actual = ss.getvalue()
    eq_(expect, actual)

    # test reading from a string buffer
    table2 = etl.fromcsv(StringSource(actual))
    ieq(table1, table2)
    ieq(table1, table2)

    # test appending
    etl.appendcsv(table1, ss)
    actual = ss.getvalue()
    expect = "foo,bar\r\na,1\r\nb,2\r\nc,2\r\na,1\r\nb,2\r\nc,2\r\n"
    if not PY2:
        expect = expect.encode('ascii')
    eq_(expect, actual)
示例#5
0
def test_stringsource():
    tbl1 = (('foo', 'bar'),
            ('a', '1'),
            ('b', '2'),
            ('c', '2'))

    # test writing to a string buffer
    ss = StringSource()
    etl.tocsv(tbl1, ss)
    expect = "foo,bar\r\na,1\r\nb,2\r\nc,2\r\n"
    if not PY2:
        expect = expect.encode('ascii')
    actual = ss.getvalue()
    eq_(expect, actual)

    # test reading from a string buffer
    tbl2 = etl.fromcsv(StringSource(actual))
    ieq(tbl1, tbl2)
    ieq(tbl1, tbl2)

    # test appending
    etl.appendcsv(tbl1, ss)
    actual = ss.getvalue()
    expect = "foo,bar\r\na,1\r\nb,2\r\nc,2\r\na,1\r\nb,2\r\nc,2\r\n"
    if not PY2:
        expect = expect.encode('ascii')
    eq_(expect, actual)
示例#6
0
    def download_new_collection(cls) -> None:
        # store small dictionary for later on transofrmation
        planets_arr = {}
        for planets in SWAPI.fetch_data(settings.SW_PLANETS_URL):
            planets_arr.update({i['url']: i['name'] for i in planets})

        create = True
        file_name = '{}.csv'.format(time())
        csv_path = Path(CSV_PATH, file_name)

        for people in SWAPI.fetch_data(settings.SW_PEOPLE_URL):
            table = etl.fromdicts(
                people,
                header=[
                    'name', 'height', 'mass', 'hair_color', 'skin_color',
                    'eye_color', 'birth_year', 'gender', 'homeworld', 'edited'
                ]).convert('edited', lambda v: v[0:10]).convert(
                    'homeworld',
                    lambda v: planets_arr.get(v, '')).rename('edited', 'date')

            if create:
                etl.tocsv(table, source=csv_path, write_header=True)
                create = False
            else:
                etl.appendcsv(table, source=csv_path)

        c = SWPeopleCollection()
        c.file.name = file_name
        c.save()
示例#7
0
    def to_csv(self, local_path=None, temp_file_compression=None, encoding=None, errors='strict',
               write_header=True, csv_name=None, **csvargs):
        """
        Outputs table to a CSV. Additional key word arguments are passed to ``csv.writer()``. So,
        e.g., to override the delimiter from the default CSV dialect, provide the delimiter
        keyword argument.

        .. warning::
                If a file already exists at the given location, it will be
                overwritten.

        `Args:`
            local_path: str
                The path to write the csv locally. If it ends in ".gz" or ".zip", the file will be
                compressed. If not specified, a temporary file will be created and returned,
                and that file will be removed automatically when the script is done running.
            temp_file_compression: str
                If a temp file is requested (ie. no ``local_path`` is specified), the compression
                type for that file. Currently "None", "gzip" or "zip" are supported.
                If a ``local_path`` is specified, this argument is ignored.
            encoding: str
                The CSV encoding type for `csv.writer()
                <https://docs.python.org/2/library/csv.html#csv.writer/>`_
            errors: str
                Raise an Error if encountered
            write_header: boolean
                Include header in output
            csv_name: str
                If ``zip`` compression (either specified or inferred), the name of csv file
                within the archive.
            \**csvargs: kwargs
                ``csv_writer`` optional arguments

        `Returns:`
            str
                The path of the new file
        """  # noqa: W605

        # If a zip archive.
        if files.zip_check(local_path, temp_file_compression):
            return self.to_zip_csv(archive_path=local_path,
                                   encoding=encoding,
                                   errors=errors,
                                   write_header=write_header,
                                   csv_name=csv_name,
                                   **csvargs)

        if not local_path:
            suffix = '.csv' + files.suffix_for_compression_type(temp_file_compression)
            local_path = files.create_temp_file(suffix=suffix)

        # Create normal csv/.gzip
        petl.tocsv(self.table,
                   source=local_path,
                   encoding=encoding,
                   errors=errors,
                   write_header=write_header,
                   **csvargs)

        return local_path
def test_append_to_file_for_csv(user_export_file, tmpdir, media_root):
    # given
    export_data = [
        {"id": "123", "name": "test1", "collections": "coll1"},
        {"id": "345", "name": "test2"},
    ]
    headers = ["id", "name", "collections"]
    delimiter = ";"

    file_name = "test.csv"

    table = etl.fromdicts([{"id": "1", "name": "A"}], header=headers, missing=" ")

    with NamedTemporaryFile() as temp_file:
        etl.tocsv(table, temp_file.name, delimiter=delimiter)
        user_export_file.content_file.save(file_name, temp_file)

    # when
    append_to_file(export_data, headers, user_export_file, FileTypes.CSV, delimiter)

    # then
    user_export_file.refresh_from_db()

    csv_file = user_export_file.content_file
    file_content = csv_file.read().decode().split("\r\n")
    assert ";".join(headers) in file_content
    assert ";".join(export_data[0].values()) in file_content
    assert (";".join(export_data[1].values()) + "; ") in file_content

    shutil.rmtree(tmpdir)
示例#9
0
def test_tocsv_appendcsv_gz():
    """Test the tocsv and appendcsv function."""

    # exercise function
    table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2))
    f = NamedTemporaryFile(delete=False)
    fn = f.name + '.gz'
    f.close()
    tocsv(table, fn, delimiter='\t')

    # check what it did
    o = gzip.open(fn, 'rb')
    try:
        actual = csv.reader(o, delimiter='\t')
        expect = [['foo', 'bar'], ['a', '1'], ['b', '2'], ['c', '2']]
        ieq(expect, actual)
    finally:
        o.close()

    # check appending
    table2 = (('foo', 'bar'), ('d', 7), ('e', 9), ('f', 1))
    appendcsv(table2, fn, delimiter='\t')

    # check what it did
    o = gzip.open(fn, 'rb')
    try:
        actual = csv.reader(o, delimiter='\t')
        expect = [['foo', 'bar'], ['a', '1'], ['b', '2'], ['c', '2'],
                  ['d', '7'], ['e', '9'], ['f', '1']]
        ieq(expect, actual)
    finally:
        o.close()
示例#10
0
def DataIntegration(clinics_LOC, Services_LOC, Location_LOC):
    # Reading the clinics.csv file
    fileData = pt.fromcsv(clinics_LOC)

    # Reading the clinic_services.csv file
    servicesData = pt.fromcsv(Services_LOC)

    # reading the xml file cliniclocations.xml
    locationXML = pt.fromxml(Location_LOC, 'clinic', {
        "ClinicID": "ClinicID",
        "Lat": "Lat",
        "Lon": "Lon"
    })

    # join the csv file's using the inbuilt function join using ClinicID as main key
    fileJoin = pt.join(servicesData, fileData, key="ClinicID")

    # join the csv file using the inbuilt function join using ClinicID as main key
    MainJoin = pt.join(fileJoin, locationXML, key="ClinicID")

    # acquire the required columns
    result = pt.cut(MainJoin, 'ClinicServiceID', 'Service', 'ClinicID',
                    'Suburb', 'Postcode', 'Lat', 'Lon')

    # creating the final csv file which is clinicservicelocations.csv
    pt.tocsv(result, "clinic_service_locations.csv")
    print('Csv file generated.!!!')
示例#11
0
文件: commands.py 项目: PGower/Unsync
def export(data, output_file, source, csv_arg, errors, write_header, append):
    """Export the specified table of data to a csv file."""
    existing_data = data.get(source)
    if append is True:
        petl.appendcsv(existing_data, output_file, errors=errors, **dict(csv_arg))
    else:
        petl.tocsv(existing_data, output_file, errors=errors, write_header=write_header, **dict(csv_arg))
示例#12
0
def test_export_gift_cards_in_batches_to_csv(
    gift_card,
    gift_card_expiry_date,
    gift_card_used,
    tmpdir,
):
    # given
    gift_cards = GiftCard.objects.exclude(id=gift_card_used.id).order_by("pk")

    table = etl.wrap([["code"]])
    temp_file = NamedTemporaryFile()
    etl.tocsv(table, temp_file.name, delimiter=",")

    # when
    export_gift_cards_in_batches(
        gift_cards,
        ["code"],
        ",",
        temp_file,
        "csv",
    )

    # then
    file_content = temp_file.read().decode().split("\r\n")

    # ensure headers are in the file
    assert "code" in file_content

    for card in gift_cards:
        assert card.code in file_content

    shutil.rmtree(tmpdir)
示例#13
0
def main(argv):
    global full_name2sk_indiv_id

    parser = argparse.ArgumentParser()
    parser.add_argument("--attendance-filename", required=True, nargs='+', action='append', \
        help="Attendance filename (input Servant Keeper attendance report file(s)...can be wildcard)")
    parser.add_argument("--mapping-filename", required=True, help="'Mapping' filename (CSV mapping file with " \
        "'Last Name', 'Preferred Name' and 'Individual ID' Servant Keeper data columns)")
    parser.add_argument("--output-filename", required=True, help="'Output' filename (output loading CSV file " \
                        "containing resulting <date>, <time>, <ccb_event_id>, <sk_indiv_id> data)")
    parser.add_argument('--emit-data-csvs', action='store_true', help="If specified, output a CSV file per input " \
        "attendance data text file")
    parser.add_argument('--add-extra-fields', action='store_true', help="If specified, emit attender's full name, " \
                        "event name, and Servant Keeper week number in addition to base fields into loading CSV file")
    args = parser.parse_args()

    # Load up mapping matrix to map from Servant Keeper full_name's to Servant Keeper individual_id's
    full_name2sk_indiv_id = {}
    with open(args.mapping_filename, 'rb') as csvfile:
        csvreader = csv.reader(csvfile)
        for row in csvreader:
            full_name2sk_indiv_id[row[0] + ', ' + row[1]] = row[2]

    if args.emit_data_csvs:
        output_csv_filebase = os.path.dirname(args.output_filename)
    else:
        output_csv_filebase = None

    attendance_table = join_tables(args.attendance_filename[0],
                                   output_csv_filebase, args.add_extra_fields)

    petl.tocsv(attendance_table, args.output_filename)
示例#14
0
def test_export_products_in_batches_for_csv(
    product_list,
    user_export_file,
    tmpdir,
    media_root,
):
    # given
    qs = Product.objects.all()
    export_info = {
        "fields": [
            ProductFieldEnum.NAME.value,
            ProductFieldEnum.DESCRIPTION.value,
            ProductFieldEnum.VARIANT_SKU.value,
        ],
        "warehouses": [],
        "attributes": [],
        "channels": [],
    }
    export_fields = ["id", "name", "variants__sku"]
    expected_headers = ["id", "name", "variant sku"]

    table = etl.wrap([expected_headers])

    temp_file = NamedTemporaryFile()
    etl.tocsv(table, temp_file.name, delimiter=";")

    # when
    export_products_in_batches(
        qs,
        export_info,
        set(export_fields),
        export_fields,
        ";",
        temp_file,
        FileTypes.CSV,
    )

    # then

    expected_data = []
    for product in qs.order_by("pk"):
        product_data = []
        id = graphene.Node.to_global_id("Product", product.pk)
        product_data.append(id)
        product_data.append(product.name)

        for variant in product.variants.all():
            product_data.append(str(variant.sku))
            expected_data.append(product_data)

    file_content = temp_file.read().decode().split("\r\n")

    # ensure headers are in file
    assert ";".join(expected_headers) in file_content

    for row in expected_data:
        assert ";".join(row) in file_content

    shutil.rmtree(tmpdir)
示例#15
0
def test_stdoutsource_unicode():

    tbl = [('foo', 'bar'),
           (u'Արամ Խաչատրյան', 1),
           (u'Johann Strauß', 2)]
    etl.tocsv(tbl, StdoutSource(), encoding='utf-8')
    etl.tohtml(tbl, StdoutSource(), encoding='utf-8')
    etl.topickle(tbl, StdoutSource())
def createDimCampaign():
    try:
        tbl_campaign = [['campaign_name', 'campaign_started', 'campaign_ended'], ['none', '2014-04-28T00:00:00', '2018-09-30T00:00:00']]
        dim_campaign = etl.head(tbl_campaign, 1)
        # Export as csv to load folder
        etl.tocsv(dim_campaign, 'load/dim_campaign.csv')
    except Exception as e:
        print("Something went wrong. Error {0}".format(e))
示例#17
0
def get_load_result(nameFile):

    table1 = etl.fromjson('./static/data/tabalaElegidaCalculadora.json')
    tocsv(table1, './exelFiles/' + str(nameFile) + '.csv')
    etl.tohtml(table1,
               './exelFiles/' + str(nameFile) + '.html',
               caption=str(nameFile))
    return jsonify(True)
def createDimSubscriptions(events):
    try:
        dim_subscriptions_cut = etl.cut(events, 'type')
        dim_subscriptions_rename = etl.rename(dim_subscriptions_cut, {'type': 'subscription_name'})
        dim_subscriptions = etl.distinct(dim_subscriptions_rename)
        # Export as csv to load folder
        etl.tocsv(dim_subscriptions, 'load/dim_subscriptions.csv')
    except Exception as e:
        print("Something went wrong. Error {0}".format(e))
def createDimMedium(events):
    try:
        dim_medium_cut = etl.cut(events, 'utm_medium')
        dim_medium_rename = etl.rename(dim_medium_cut, {'utm_medium': 'medium'})
        dim_medium = etl.distinct(dim_medium_rename)
        # Export as csv to load folder
        etl.tocsv(dim_medium, 'load/dim_medium.csv')
    except Exception as e:
        print("Something went wrong. Error {0}".format(e))
示例#20
0
def save_data_to_csv(data, record_count):
    db_meta = dict()
    transformed_data = transform_data(data)
    csv_filename = f'{datetime.now().ctime()}.csv'
    csv_file = CSV_DIR + csv_filename
    db_meta['name'] = csv_filename
    db_meta['count'] = record_count
    petl.tocsv(transformed_data, csv_file)
    add_metadata(db_meta)
def createDimCampaignType(events):
    try:
        dim_campaigntype_cut = etl.cut(events, 'utm_campaign')
        dim_campaigntype_rename = etl.rename(dim_campaigntype_cut, {'utm_campaign': 'campaign_type'})
        dim_campaigntype = etl.distinct(dim_campaigntype_rename)
        # export as csv to load folder
        etl.tocsv(dim_campaigntype, 'load/dim_campaigntype.csv')
    except Exception as e:
        print("Something went wrong. Error {0}".format(e))
def test_export_products_in_batches_for_csv(
    product_list, user_export_file, tmpdir, media_root,
):
    # given
    qs = Product.objects.all()
    export_info = {
        "fields": [ProductFieldEnum.NAME.value, ProductFieldEnum.VARIANT_SKU.value],
        "warehouses": [],
        "attributes": [],
    }
    file_name = "test.csv"
    export_fields = ["id", "name", "variants__sku"]
    expected_headers = ["id", "name", "variant sku"]

    table = etl.wrap([expected_headers])

    with NamedTemporaryFile() as temp_file:
        etl.tocsv(table, temp_file.name, delimiter=";")
        user_export_file.content_file.save(file_name, temp_file)

    assert user_export_file.content_file

    # when
    export_products_in_batches(
        qs,
        export_info,
        set(export_fields),
        export_fields,
        ";",
        user_export_file,
        FileTypes.CSV,
    )

    # then
    user_export_file.refresh_from_db()
    csv_file = user_export_file.content_file
    assert csv_file

    expected_data = []
    for product in qs.order_by("pk"):
        product_data = []
        product_data.append(str(product.pk))
        product_data.append(product.name)

        for variant in product.variants.all():
            product_data.append(str(variant.sku))
            expected_data.append(product_data)

    file_content = csv_file.read().decode().split("\r\n")

    # ensure headers are in file
    assert ";".join(expected_headers) in file_content

    for row in expected_data:
        assert ";".join(row) in file_content

    shutil.rmtree(tmpdir)
示例#23
0
    def execute(self, context):
        try:
            nm_arq = 'RECEITAS.csv'
            with open(f'{_PROC_FILES}/{nm_arq}', 'wb') as data_from:
                data_from.write(
                    self.client_from.get_blob_client(
                        nm_arq).download_blob().readall())
            table = etl.fromcsv(f'{_PROC_FILES}/{nm_arq}', delimiter='|')

            def rowmapper(row):
                strnull = {'NULL': ''}
                return [
                    row[0].strip(), row[1].strip(), row[2].strip(),
                    row[3].strip(), row[4].strip(),
                    strnull[row['nr_centro_custo'].strip()]
                    if row['nr_centro_custo'].strip() in strnull else
                    row['nr_centro_custo'].strip(),
                    strnull[row['descricao_centro_custo'].strip()]
                    if row['descricao_centro_custo'].strip() in strnull else
                    row['descricao_centro_custo'].strip(), row[7].strip(),
                    row[8].strip(), row[9].strip(),
                    strnull[row['tipo_cobranca_sub'].strip()]
                    if row['tipo_cobranca_sub'].strip() in strnull else
                    row['tipo_cobranca_sub'].strip()
                ]

            table1 = etl.rowmap(table,
                                rowmapper,
                                header=[
                                    'mes_competencia', 'fk_beneficiario',
                                    'fk_empresa', 'dt_geracao_titulo',
                                    'dt_pgto', 'nr_centro_custo',
                                    'descricao_centro_custo', 'tipo_cobranca',
                                    'vl_cobranca', 'vl_pago',
                                    'tipo_cobranca_sub'
                                ])

            table2 = etl.addfields(table1, [('fk_operadora', _SOURCE)])
            etl.tocsv(table2, f'{_PROC_FILES}/t{nm_arq}', delimiter='|')

            self.logger.info(f"Destino do arquivo {self.client_to}")
            self.logger.info(f"Container destino {self.container_to}")

        except azure.core.exceptions.ResourceNotFoundError:
            print('Entrou na exceção :)')

        upload_file = f'{_PROC_FILES}/t{nm_arq}'
        try:
            if os.path.isfile(upload_file):
                with open(upload_file, "rb") as data:
                    self.client_to.upload_blob(nm_arq, data, overwrite=True)
                    self.logger.info(f'{data} carregado')
            else:
                self.logger.info(f't{nm_arq} não foi encontrado no container')
        finally:
            self.logger.info('Tudo Carregado')
示例#24
0
    def setup(self):
        # write headers to CSV file
        etl.tocsv([self.csv_file_columns], self.csv_file_name)

        # create collection in database
        self.person_collection = PersonCollection(
            file_name=self.file_name,
            date=datetime.datetime.now(datetime.timezone.utc),
        )
        self.person_collection.save()
示例#25
0
文件: commands.py 项目: PGower/Unsync
def full_export(data, output_dir, csv_arg, errors, write_header, exclude_empty):
    """Export all data tables as CSV files."""
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    for name in data.registry:
        current_data = data.get(name)
        if exclude_empty and current_data.nrows() <= 0:
            continue
        output_file = os.path.join(output_dir, data.filename(name, 'csv'))
        petl.tocsv(current_data, output_file, errors=errors, write_header=write_header, **dict(csv_arg))
示例#26
0
def create_file_with_headers(file_headers: List[str], delimiter: str, file_type: str):
    table = etl.wrap([file_headers])

    if file_type == FileTypes.CSV:
        temp_file = NamedTemporaryFile("ab+", suffix=".csv")
        etl.tocsv(table, temp_file.name, delimiter=delimiter)
    else:
        temp_file = NamedTemporaryFile("ab+", suffix=".xlsx")
        etl.io.xlsx.toxlsx(table, temp_file.name)

    return temp_file
示例#27
0
def run_backup(sqlite_db, backup_path):
    """backs-up each table in the inventory database to a csv,
    zips them all up, and saves the zip with a timestamp-derived name.
    """
    ts = timestamp()

    # SET UP THE FOLDERS -----------------------------------------------------

    #check for backup folder, make if it doesn't exist
    if not os.path.exists(backup_path):
        os.makedirs(backup_path)

    #make a folder for this backup
    this_backup_path = os.path.join(backup_path, "backup_{0}".format(ts))
    if not os.path.exists(this_backup_path):
        os.makedirs(this_backup_path)
    click.echo(this_backup_path)

    # GET THE DATA OUT -------------------------------------------------------

    # temporarily store extracted csv files. (use this to delete them later)
    csvs = []

    # connect to the DB, get each table, save out as a csv.
    conn = sqlite3.connect(sqlite_db)
    for table in [
            'product', 'product_tags', 'sale', 'staff', 'supplier', 'tag'
    ]:
        t = etl.fromdb(lambda: conn.cursor(),
                       """SELECT * FROM {0}""".format(table))
        out_csv = os.path.join(this_backup_path, '{0}.csv'.format(table))
        etl.tocsv(t, out_csv)
        csvs.append(out_csv)

    # ZIP THE DATA UP --------------------------------------------------------

    # make a zip file in the main backup location
    zipfile_directory = os.path.join(backup_path,
                                     "inventory_backup_{0}.zip".format(ts))
    # create a zip file object
    zf = zipfile.ZipFile(zipfile_directory, mode="w")

    for each in csvs:
        click.echo(each)
        zf.write(filename=each,
                 arcname=os.path.basename(each),
                 compress_type=compression)
    zf.close()

    # REMOVE TEMP FILES -------------------------------------------------------

    for each in csvs:
        os.remove(each)
    os.rmdir(this_backup_path)
示例#28
0
def run(argv=None):
    parser = argparse.ArgumentParser()

    parser.add_argument("--filename", type=str, help='Input filename', required=True)
    # parser.add_argument("--max_iterations", type=int, help='Max number of requests', default=1000)

    known_args, _ = parser.parse_known_args()

    file_content = [line for line in read_file(known_args.filename)]
    table = petl.fromdicts(file_content)
    tokenized_table = tokenize(table)
    petl.tocsv(tokenized_table, 'words.csv')
示例#29
0
    def execute(self, context):
        try:
            nm_arq = 'SERVICOS.csv'
            with open(f'{_PROC_FILES}/{nm_arq}', 'wb') as data_from:
                data_from.write(
                    self.client_from.get_blob_client(
                        nm_arq).download_blob().readall())
            table = etl.fromcsv(f'{_PROC_FILES}/{nm_arq}', delimiter='|')

            def rowmapper(row):
                strnull = {'NULL': ''}
                return [
                    row[0].strip(), row[1].strip(), row[2].strip(),
                    row[3].strip(), row[4].strip(),
                    strnull[row['subgrupo'].strip()]
                    if row['subgrupo'].strip() in strnull else
                    row['subgrupo'].strip(), row[6].strip(),
                    strnull[row['dt_alteracao'].strip()]
                    if row['dt_alteracao'].strip() in strnull else
                    row['dt_alteracao'].strip(),
                    strnull[row['ind_cirurgico'].strip()]
                    if row['ind_cirurgico'].strip() in strnull else
                    row['ind_cirurgico'].strip()
                ]

            table1 = etl.rowmap(table,
                                rowmapper,
                                header=[
                                    'pk_servico', 'tipo', 'descricao',
                                    'capitulo', 'grupo', 'subgrupo',
                                    'dt_inclusao', 'dt_alteracao',
                                    'ind_cirurgico'
                                ])

            table2 = etl.addfields(table1, [('fk_operadora', _SOURCE)])
            etl.tocsv(table2, f'{_PROC_FILES}/t{nm_arq}', delimiter='|')

            self.logger.info(f"Destino do arquivo {self.client_to}")
            self.logger.info(f"Container destino {self.container_to}")

        except azure.core.exceptions.ResourceNotFoundError:
            print('Entrou na exceção :)')

        upload_file = f'{_PROC_FILES}/t{nm_arq}'
        try:
            if os.path.isfile(upload_file):
                with open(upload_file, "rb") as data:
                    self.client_to.upload_blob(nm_arq, data, overwrite=True)
                    self.logger.info(f'{data} carregado')
            else:
                self.logger.info(f't{nm_arq} não foi encontrado no container')
        finally:
            self.logger.info('Tudo Carregado')
示例#30
0
def save_as_csv(data: List[Dict], file_path: str) -> None:
    """
    Saves a CSV to a filesystem.
    """
    csv_header = data[0].keys()
    csv_table = [csv_header]

    for row in data:
        csv_row = [row[row_name] for row_name in csv_header]
        csv_table.append(csv_row)

    petl.tocsv(csv_table, file_path)
示例#31
0
def test_issue_231():

    table = [['foo', 'bar'], ['a', '1'], ['b', '2']]
    t = cut(table, 'foo')
    totsv(t, 'tmp/issue_231.tsv')
    u = fromtsv('tmp/issue_231.tsv')
    ieq(t, u)
    tocsv(t, 'tmp/issue_231.csv')
    u = fromcsv('tmp/issue_231.csv')
    ieq(t, u)
    topickle(t, 'tmp/issue_231.pickle')
    u = frompickle('tmp/issue_231.pickle')
    ieq(t, u)
示例#32
0
文件: test_io.py 项目: brutimus/petl
def test_issue_231():

    table = [['foo', 'bar'], ['a', '1'], ['b', '2']]
    t = cut(table, 'foo')
    totsv(t, 'tmp/issue_231.tsv')
    u = fromtsv('tmp/issue_231.tsv')
    ieq(t, u)
    tocsv(t, 'tmp/issue_231.csv')
    u = fromcsv('tmp/issue_231.csv')
    ieq(t, u)
    topickle(t, 'tmp/issue_231.pickle')
    u = frompickle('tmp/issue_231.pickle')
    ieq(t, u)
示例#33
0
def transform_xls(hires_and_promotions_excel, separations_excel,
                  exempt_roster_excel, output_file):
    hires_and_promotions = petl.io.xls \
                            .fromxls(hires_and_promotions_excel, sheet='Data') \
                            .rename(column_map_shared)

    separations = petl.io.xls \
                    .fromxls(separations_excel, sheet='Data') \
                    .rename({**column_map_shared, **column_map_separations})

    def dedup_separations(payroll_number, rows):
        rows_sorted = sorted(rows, key=lambda x: x['termination_date'])
        return rows_sorted[-1]

    separations_deduped = petl.rowreduce(separations, 'payroll_number',
                                         dedup_separations)

    exempt_roster = petl.io.xls \
                        .fromxls(exempt_roster_excel, sheet='Data') \
                        .rename(column_map_roster)

    merged = petl.mergesort(hires_and_promotions,
                            separations_deduped,
                            exempt_roster,
                            key='payroll_number')

    def dedup_merged(payroll_number, rows):
        rows_sorted = sorted(rows, key=lambda x: x['latest_start_date'])

        if len(rows_sorted) == 1:
            return rows_sorted[-1]

        merged_row = []
        for i in range(0, len(rows_sorted[0]) - 1):
            if (rows_sorted[0][i] == '' or rows_sorted[0][i] == None
                ) and rows_sorted[1][i] != '' and rows_sorted[1][i] != None:
                merged_row.append(rows_sorted[1][i])
            elif (rows_sorted[1][i] == '' or rows_sorted[1][i] == None
                  ) and rows_sorted[0][i] != '' and rows_sorted[0][i] != None:
                merged_row.append(rows_sorted[0][i])
            elif rows_sorted[0][i] == rows_sorted[1][i]:
                merged_row.append(rows_sorted[0][i])
            else:
                merged_row.append(
                    rows_sorted[1][i])  ## take latest value by start date

        return merged_row

    merged_deduped = petl.rowreduce(merged, 'payroll_number', dedup_merged)

    petl.tocsv(merged_deduped, source=output_file)
示例#34
0
def main(argv):
    global full_name2sk_indiv_id

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--attendance-filename",
        required=True,
        nargs="+",
        action="append",
        help="Attendance filename (input Servant Keeper attendance report file(s)...can be wildcard)",
    )
    parser.add_argument(
        "--mapping-filename",
        required=True,
        help="'Mapping' filename (CSV mapping file with "
        "'Last Name', 'Preferred Name' and 'Individual ID' Servant Keeper data columns)",
    )
    parser.add_argument(
        "--output-filename",
        required=True,
        help="'Output' filename (output loading CSV file "
        "containing resulting <date>, <time>, <ccb_event_id>, <sk_indiv_id> data)",
    )
    parser.add_argument(
        "--emit-data-csvs",
        action="store_true",
        help="If specified, output a CSV file per input " "attendance data text file",
    )
    parser.add_argument(
        "--add-extra-fields",
        action="store_true",
        help="If specified, emit attender's full name, "
        "event name, and Servant Keeper week number in addition to base fields into loading CSV file",
    )
    args = parser.parse_args()

    # Load up mapping matrix to map from Servant Keeper full_name's to Servant Keeper individual_id's
    full_name2sk_indiv_id = {}
    with open(args.mapping_filename, "rb") as csvfile:
        csvreader = csv.reader(csvfile)
        for row in csvreader:
            full_name2sk_indiv_id[row[0] + ", " + row[1]] = row[2]

    if args.emit_data_csvs:
        output_csv_filebase = os.path.dirname(args.output_filename)
    else:
        output_csv_filebase = None

    attendance_table = join_tables(args.attendance_filename[0], output_csv_filebase, args.add_extra_fields)

    petl.tocsv(attendance_table, args.output_filename)
示例#35
0
文件: convert.py 项目: bazenkov/pytb
def load(tables_by_id, output_folder, devices):
    for device_id in tables_by_id:
        name = valid_name(devices[device_id]['name'])
        tbl_device_file = path.join(output_folder, f"{name}.csv")
        if path.isfile(tbl_device_file):
            tbl_old = petl.fromcsv(tbl_device_file, delimiter=';')
            old_header = petl.header(tbl_old)
            new_header = petl.header(tables_by_id[device_id])
            if old_header == new_header:
                petl.appendcsv(tables_by_id[device_id], source=tbl_device_file, delimiter=';')
            else:  # TODO: write to the new file
                raise ValueError(f"Incompatible headers:\n old={old_header}\n new={new_header}")
        else:
            petl.tocsv(tables_by_id[device_id], tbl_device_file, delimiter=';')
示例#36
0
文件: test_io.py 项目: brutimus/petl
def test_tocsv_appendcsv_gz():
    """Test the tocsv and appendcsv function."""
    
    # exercise function
    table = (('foo', 'bar'),
             ('a', 1),
             ('b', 2),
             ('c', 2))
    f = NamedTemporaryFile(delete=False)
    fn = f.name + '.gz'
    f.close()
    tocsv(table, fn, delimiter='\t')
    
    # check what it did
    o = gzip.open(fn, 'rb')
    try:
        actual = csv.reader(o, delimiter='\t')
        expect = [['foo', 'bar'],
                  ['a', '1'],
                  ['b', '2'],
                  ['c', '2']]
        ieq(expect, actual)
    finally:
        o.close()

    # check appending
    table2 = (('foo', 'bar'),
              ('d', 7),
              ('e', 9),
              ('f', 1))
    appendcsv(table2, fn, delimiter='\t') 

    # check what it did
    o = gzip.open(fn, 'rb')
    try:
        actual = csv.reader(o, delimiter='\t')
        expect = [['foo', 'bar'],
                  ['a', '1'],
                  ['b', '2'],
                  ['c', '2'],
                  ['d', '7'],
                  ['e', '9'],
                  ['f', '1']]
        ieq(expect, actual)
    finally:
        o.close()
示例#37
0
def test_gzipsource():

    # setup
    tbl = [('foo', 'bar'), ('a', '1'), ('b', '2')]
    fn = NamedTemporaryFile().name + '.gz'
    expect = b"foo,bar\na,1\nb,2\n"

    # write explicit
    etl.tocsv(tbl, GzipSource(fn), lineterminator='\n')
    actual = gzip.open(fn).read()
    eq_(expect, actual)
    # write implicit
    etl.tocsv(tbl, fn, lineterminator='\n')
    actual = gzip.open(fn).read()
    eq_(expect, actual)

    # read explicit
    tbl2 = etl.fromcsv(GzipSource(fn))
    ieq(tbl, tbl2)
    # read implicit
    tbl2 = etl.fromcsv(fn)
    ieq(tbl, tbl2)
示例#38
0
文件: test_io.py 项目: brutimus/petl
def test_tocsv_appendcsv():
    """Test the tocsv and appendcsv function."""
    
    # exercise function
    table = (('foo', 'bar'),
             ('a', 1),
             ('b', 2),
             ('c', 2))
    f = NamedTemporaryFile(delete=False)
    tocsv(table, f.name, delimiter='\t')
    
    # check what it did
    with open(f.name, 'rb') as o:
        actual = csv.reader(o, delimiter='\t')
        expect = [['foo', 'bar'],
                  ['a', '1'],
                  ['b', '2'],
                  ['c', '2']]
        ieq(expect, actual)
    
    # check appending
    table2 = (('foo', 'bar'),
              ('d', 7),
              ('e', 9),
              ('f', 1))
    appendcsv(table2, f.name, delimiter='\t') 

    # check what it did
    with open(f.name, 'rb') as o:
        actual = csv.reader(o, delimiter='\t')
        expect = [['foo', 'bar'],
                  ['a', '1'],
                  ['b', '2'],
                  ['c', '2'],
                  ['d', '7'],
                  ['e', '9'],
                  ['f', '1']]
        ieq(expect, actual)
示例#39
0
文件: test_io.py 项目: brutimus/petl
def test_StringSource():
    
    table1 = (('foo', 'bar'),
             ('a', '1'),
             ('b', '2'),
             ('c', '2'))

    # test writing to a string buffer
    ss = StringSource()
    tocsv(table1, ss)
    expect = "foo,bar\r\na,1\r\nb,2\r\nc,2\r\n"
    actual = ss.getvalue()
    eq_(expect, actual)

    # test reading from a string buffer
    table2 = fromcsv(StringSource(actual))
    ieq(table1, table2)
    ieq(table1, table2)

    # test appending
    appendcsv(table1, ss)
    actual = ss.getvalue()
    expect = "foo,bar\r\na,1\r\nb,2\r\nc,2\r\na,1\r\nb,2\r\nc,2\r\n"
    eq_(expect, actual)
示例#40
0
        error = [e,'','']
        query_errors[url] = error
    except JSONDecodeError as e:
        error = [e, r.raw.data, r.raw.read(100)]
        query_errors[url] = error


read_conn = psycopg2.connect("dbname=ais_engine user=ais_engine")
address_count = etl.fromdb(read_conn, 'select count(*) as N from {}'.format(warmup_address_table_name))
n = list(address_count.values('n'))[0]
warmup_rows = etl.fromdb(read_conn, 'select {address_field} from {table} OFFSET floor(random()*{n}) limit {limit}'.format(address_field=warmup_address_field, table=warmup_address_table_name, n=n, limit=warmup_row_limit))
# print(etl.look(warmup_rows))
responses = warmup_rows.addfield('response_status', (lambda a: query_address(a['street_address']))).progress(100)
# print(etl.look(responses))
eval = responses.aggregate('response_status', len)
print(etl.look(eval))
f_200 = [(count/warmup_row_limit) for status, count in eval[1:] if status == 200][0]
print(f_200)
###########################
# WRITE ERRORS OUT TO FILE #
############################
print("Writing errors to file...")
error_table = []
for url, error_vals in query_errors.items():
    error_table.append([url, error_vals[0], error_vals[1]])
etl.tocsv(error_table, error_file)
exit(0) if f_200 > warmup_fraction_success else exit(1)



    attr = data['attibutes'][x]['attrName']
    rules = data['attibutes'][x]['rules']
    rulesListSize = len(rules)
    for y in range(rulesListSize):
        if rules[y] == "Remove Null Value Rows":
            cleansedTable = etl.select(cleansedTable, attr, lambda v: v != '')
        if rules[y] == "Remove Duplicates":
            cleansedTable = etl.aggregate(cleansedTable, attr)
        if rules[y] == "Sort":
            cleansedTable = etl.mergesort(cleansedTable, key=attr)
        if rules[y] == "Number Validation":
            cleansedTable = etl.select(cleansedTable, attr)
        if rules[y] == "Fill Missing Values":
            cleansedTable = etl.filldown(cleansedTable, attr)

etl.tocsv(cleansedTable,'src/etl/outputs/cleansed.csv')

#Create rawData Table
dataTable = cleansedTable
rawDataTable = cleansedTable

reasonUniqueValues = etl.aggregate(dataTable,dataTable[0][20])

mappings = OrderedDict()

#mapping attributes
# go through each column (c = table)
for i in range(length):
    #get unique values for each column
    uniqueValues = etl.aggregate(dataTable,dataTable[0][i])
    #create unique value for each column
#!/usr/bin/env python


import methods_raw_input
import preprocessing
from petl import tocsv


# Main code for sentiment classifier

#Setting Parameters
data_filename = "Tweets.csv"
p_train_data = 0.7
split_mode = 'normal'



train_data, test_data = methods_raw_input.really_read_filelines(data_filename, p_train_data, split_mode)

train_data = preprocessing.tokenise_data(train_data)

train_data = preprocessing.word_frequency(train_data, 'tweet_tokenized')


tocsv(train_data,'New_table.csv')
    attr = data['attibutes'][x]['attrName']
    rules = data['attibutes'][x]['rules']
    rulesListSize = len(rules)
    for y in range(rulesListSize):
        if rules[y] == "Remove Null Value Rows":
            cleansedTable = etl.select(cleansedTable, attr, lambda v: v != '')
        if rules[y] == "Remove Duplicates":
            cleansedTable = etl.aggregate(cleansedTable, attr)
        if rules[y] == "Sort":
            cleansedTable = etl.mergesort(cleansedTable, key=attr)
        if rules[y] == "Number Validation":
            cleansedTable = etl.select(cleansedTable, attr)
        if rules[y] == "Fill Missing Values":
            cleansedTable = etl.filldown(cleansedTable, attr)

etl.tocsv(cleansedTable,'src/etl/outputs/cleansed.csv')

#Create rawData Table
dataTable = cleansedTable
rawDataTable = cleansedTable

mappings = OrderedDict()
#mapping attributes
#go through each column (c = table)
for i in range(length):
    #get unique values for each column
    uniqueValues = etl.aggregate(dataTable,dataTable[0][i])
    #create unique value for each column
    uniqueValArr = []
    k = 0
    for iterating_var in uniqueValues:
示例#44
0
# a table and field name can also be provided as arguments
look(table1)
table2 = unflatten(table1, 'lines', 3)
look(table2)


# tocsv

table = [['foo', 'bar'],
         ['a', 1],
         ['b', 2],
         ['c', 2]]

from petl import tocsv, look
look(table)
tocsv(table, 'test.csv', delimiter='\t')
# look what it did
from petl import fromcsv
look(fromcsv('test.csv', delimiter='\t'))


# appendcsv

table = [['foo', 'bar'],
         ['d', 7],
         ['e', 42],
         ['f', 12]]

# look at an existing CSV file
from petl import look, fromcsv
testcsv = fromcsv('test.csv', delimiter='\t')
示例#45
0
def main(argv):

    urls = {
        'INDIVIDUALS': {
            'xmlroot': 'response/individuals/individual',
            'parse_dict': {
                'Family ID': ('family', 'id'),
                'Individual ID': ('.', 'id'),
                'Family Position': 'family_position',
                'Prefix': 'salutation',
                'First Name': 'first_name',
                'Middle Name': 'middle_name',
                'Last Name': 'last_name',
                'Legal Name': 'legal_first_name',
                'Legal Name': 'legal_first_name',
                'Active': 'active',
                'Campus': 'campus',
                'Email': 'email',

                'Mailing Street': ".//address[@type='mailing']/street_address",
                'Mailing City': ".//address[@type='mailing']/city",
                'Mailing State': ".//address[@type='mailing']/state",
                'Mailing Postal Code': ".//address[@type='mailing']/zip",
                'Mailing Country': ".//address[@type='mailing']/country",

                'Home Street': ".//address[@type='home']/street_address",
                'Home City': ".//address[@type='home']/city",
                'Home State': ".//address[@type='home']/state",
                'Home Postal Code': ".//address[@type='home']/zip",
                'Home Country': ".//address[@type='home']/country",

                'Other Street': ".//address[@type='other']/street_address",
                'Other City': ".//address[@type='other']/city",
                'Other State': ".//address[@type='other']/state",
                'Other Postal Code': ".//address[@type='other']/zip",
                'Other Country': ".//address[@type='other']/country",

                'Contact Phone': ".//phone[@type='contact']",
                'Home Phone': ".//phone[@type='home']",
                'Work Phone': ".//phone[@type='work']",
                'Mobile Phone': ".//phone[@type='mobile']",
                'Emergency Phone': ".//phone[@type='emergency']",

                'Birthday': 'birthday',
                'Anniversary': 'anniversary',
                'Gender': 'gender',
                'Giving Number': 'giving_number',
                'Marital Status': 'marital_status',
                'Membership Start Date': 'membership_date',
                'Membership End Date': 'membership_end',
                'Membership Type': 'membership_type',
                'Baptized': 'baptized',
                # 'School District': ??,
                # 'How They Heard': ??,
                # 'How They Joined': ??,
                # 'Reason Left Church': ??,
                # 'Job Title': ??,
                'Deceased': 'deceased',

                # !!!

                'Baptism Date': ".//user_defined_date_fields/user_defined_date_field[label='Baptism Date']/date",
                'Baptized By': ".//user_defined_text_fields/user_defined_text_field[label='Baptized By']/text",
                'Confirmed Date': ".//user_defined_date_fields/user_defined_date_field[label='Confirmed Date']/date",
                'Confirmed': ".//user_defined_pulldown_fields/user_defined_pulldown_field[label='Confirmed']/selection",
                'Mailbox Number': ".//user_defined_text_fields/user_defined_text_field[label='Mailbox Number']/text",
                'Spirit Mailing': ".//user_defined_pulldown_fields/user_defined_pulldown_field[label='Spirit Mailing']/selection",
                'Photo Release': ".//user_defined_pulldown_fields/user_defined_pulldown_field[label='Photo Release']/selection",
                'Ethnicity': ".//user_defined_pulldown_fields/user_defined_pulldown_field[label='Ethnicity']/selection",
                'Transferred Frm': ".//user_defined_text_fields/user_defined_text_field[label='Transferred Frm']/text",
                'Transferred To': ".//user_defined_text_fields/user_defined_text_field[label='Transferred To']/text",
                'Pastr When Join': ".//user_defined_text_fields/user_defined_text_field[label='Pastr When Join']/text",
                'Pastr When Leav': ".//user_defined_text_fields/user_defined_text_field[label='Pastr When Leav']/text",
                'SK Indiv ID': ".//user_defined_text_fields/user_defined_text_field[label='SK Indiv ID']/text"
            }
        },
        'GROUPS': 'https://ingomar.ccbchurch.com/api.php?srv=group_profiles',
        'ACCOUNTS': 'https://ingomar.ccbchurch.com/api.php?srv=transaction_detail_type_list',
        'TRANSACTIONS': {
            'xmlroot': 'response/batches/batch/transactions/transaction',
            'parse_dict': {
                'Date': 'date',
                'Payment Type': 'payment_type',
                'Check Number': 'check_number',
                'Individual ID': ('individual', 'id'),
                'Account': './/transaction_details/transaction_detail/coa',
                'Amount': './/transaction_details/transaction_detail/amount',
                'Tax Deductible': './/transaction_details/transaction_detail/tax_deductible',
                'Note': './/transaction_details/transaction_detail/note'
            }
        }
    }

    parser = argparse.ArgumentParser(description="Parses XML file into CSV output")
    parser.add_argument("--type", required=True, help='One of ' + ', '.join(urls.keys()))
    parser.add_argument("--xml-input-filename", required=True, help="XML file to parse")
    parser.add_argument("--csv-output-filename", required=True, help="CSV output file")
    args = parser.parse_args()

    table1 = petl.fromxml(args.xml_input_filename, urls[args.type]['xmlroot'], urls[args.type]['parse_dict'])
    petl.tocsv(table1, args.csv_output_filename)
示例#46
0
文件: csv.py 项目: DeanWay/petl
# fromcsv()
###########

import petl as etl
import csv
# set up a CSV file to demonstrate with
table1 = [['foo', 'bar'],
          ['a', 1],
          ['b', 2],
          ['c', 2]]
with open('example.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerows(table1)

# now demonstrate the use of fromcsv()
table2 = etl.fromcsv('example.csv')
table2


# tocsv()
#########

import petl as etl
table1 = [['foo', 'bar'],
          ['a', 1],
          ['b', 2],
          ['c', 2]]
etl.tocsv(table1, 'example.csv')
# look what it did
print(open('example.csv').read())
import petl as etl
import re
from collections import OrderedDict
import pymysql
import dbconfig

read_db_conn = pymysql.connect(host=dbconfig.db_host,
                              port=dbconfig.db_port,
                              charset="utf8",
                              user=dbconfig.db_user,
                              password=dbconfig.db_pass,
                              db=dbconfig.db_name)


products = etl.fromdb(read_db_conn, "SELECT id,name,description FROM {} limit 5000".format(dbconfig.db_table_items))

# remove non-alphanumeric character
def cleanString(val):
    nonewline = val.replace('\n'," ")
    return re.sub(r'\W+', ' ', nonewline).lower()

mappings = OrderedDict()
mappings['id'] = 'id'
mappings['item_description'] = lambda val : cleanString(val['name'] + " " +val['description'])

products = etl.fieldmap(products, mappings)
etl.tocsv(products, 'query_result.csv')
示例#48
0
def writeDataToCsv(path, output):
    etl.tocsv(getTable(path), output)
示例#49
0
 def save(self):
     if (self.rows is not None):
         etl.tocsv(self.rows, self.datafile, encoding='utf8')
示例#50
0
文件: timing.py 项目: DeanWay/petl
from __future__ import division, print_function, absolute_import


# progress()
############

import petl as etl
table = etl.dummytable(100000)
table.progress(10000).tocsv('example.csv')


# clock()
#########

import petl as etl
t1 = etl.dummytable(100000)
c1 = etl.clock(t1)
t2 = etl.convert(c1, 'foo', lambda v: v**2)
c2 = etl.clock(t2)
p = etl.progress(c2, 10000)
etl.tocsv(p, 'example.csv')
# time consumed retrieving rows from t1
c1.time
# time consumed retrieving rows from t2
c2.time
# actual time consumed by the convert step
c2.time - c1.time


示例#51
0
def attendance_file2table(filename, output_csv_filebase, add_extra_fields):
    global full_name2sk_indiv_id

    print "*** Parsing file: " + filename
    print

    attendance_dicts = []

    # CCB's Worship Service event IDs...
    event_ids = {}
    event_ids["8"] = 6
    event_ids["9"] = 7
    event_ids["10"] = 8
    event_ids["11:15"] = 9
    event_ids["Christmas"] = 13

    # The following are used to create CSV output filenames and to emit human-readable event name if add_extra_fields
    # flag is on
    event_names = {}
    event_names[6] = "08am"
    event_names[7] = "09am"
    event_names[8] = "10am"
    event_names[9] = "11_15am"
    event_names[13] = "Christmas Eve"

    # Time of event in Excel-parseable format
    event_times = {}
    event_times[6] = "08:00 AM"
    event_times[7] = "09:00 AM"
    event_times[8] = "10:00 AM"
    event_times[9] = "11:15 AM"
    event_times[13] = "04:00 PM"

    # Starting state...
    prior_line = None
    matched_month_year = None
    matched_service_time = None
    month = None
    year = None
    service_time = None
    line_number = 1
    total_row_dict = None
    event_id = None
    accumulated_row_totals_dict = {"week1": 0, "week2": 0, "week3": 0, "week4": 0, "week5": 0, "week6": 0, "total": 0}
    full_name = None
    phone = None
    num_processed_lines = 0

    for line in open(filename):

        # First pick off line at front of file indicating month and year that this attendance file is for...
        if not matched_month_year:
            matched_month_year = re.search("For the month of ([A-Z][a-z]+), ([0-9]{4})", line)
            if matched_month_year:
                month = string2monthnum(matched_month_year.group(1))
                year = string2yearnum(matched_month_year.group(2))
                if not (month and year):
                    print >> sys.stderr, "*** Filename: " + filename + ", line number: " + str(line_number)
                    print >> sys.stderr, "*** ERROR! Invalid month or year found"
                    print >> sys.stderr, line
                    print >> sys.stderr
                    sys.exit(1)
                first_day_in_month, num_days_in_month = calendar.monthrange(year, month)

                # Create list of 6 date objects, month_sundays, representing week1, week2, ... week6 Sunday dates
                # If a week has no Sunday, it is None
                day_countup = 1
                day_countup += 6 - first_day_in_month
                month_sundays = []
                if first_day_in_month != 6:
                    month_sundays.append(None)
                while day_countup <= num_days_in_month:
                    month_sundays.append(datetime.date(year, month, day_countup))
                    day_countup += 7
                while len(month_sundays) < 6:
                    month_sundays.append(None)
                christmas_eve_date = datetime.date(year, month, 24)

        # Second pick off line at front of file indicating worship service time that this attendance file is for...
        elif not matched_service_time:
            matched_service_time = re.search("Worship Service - (Sunday |Summer )?([^ ]*)", line)
            if matched_service_time:
                service_time = matched_service_time.group(2)
                if service_time in event_ids:
                    event_id = event_ids[service_time]
                    event_name = event_names[event_id]
                else:
                    print >> sys.stderr, "*** Filename: " + filename + ", line number: " + str(line_number)
                    print >> sys.stderr, '*** ERROR! Unrecognized service_time: "' + service_time + '"'
                    print >> sys.stderr
                    sys.exit(1)

        # ...then match attendance (row per person with weeks they attended) and total (summary at bottom) rows
        else:

            # Once we found row with totals...we're done, that's last line in attendance file we need to parse
            matched_total_line = re.search("^ {18}Total: {13}(?P<attendance>( +[0-9]+)+)\r?$", line)
            if matched_total_line:
                totals_attendance_dict = attendance_str2dict(
                    matched_total_line.group("attendance"), [-3, -9, -15, -20, -24, -29, -35], 3
                )
                break

            matched_attendance_line = re.search(
                "^ {6}"
                + "(?P<full_name>(?P<last_name>[A-Za-z]+([ \-'][A-Za-z]+)*), "
                + "(?P<first_name>([A-Za-z]+\.?)+([\-' ][A-Za-z]+)*)( \((?P<nick_name>[A-Za-z]+)\))?\.?)?\r?"
                + "(?P<phone>( +)?([0-9]{3}-[0-9]{3}-[0-9]{4}|Unlisted))?"
                + "(?P<attendance> +(1 +)+[1-6])?\r?$",
                line,
            )
            if matched_attendance_line:
                if matched_attendance_line.group("full_name"):
                    full_name = matched_attendance_line.group("full_name").strip()
                if matched_attendance_line.group("phone"):
                    phone = matched_attendance_line.group("phone").strip()
                if matched_attendance_line.group("attendance"):
                    if full_name:
                        attendance = matched_attendance_line.group("attendance").strip()
                        row_dict = attendance_str2dict(attendance, [-1, -7, -13, -18, -22, -27, -33], 1)
                        row_dict["full_name"] = full_name
                        if phone:
                            row_dict["phone"] = phone
                        else:
                            row_dict["phone"] = ""
                        num_processed_lines += 1
                        full_name = None
                        phone = None
                        if row_dict["total"] != (
                            row_dict["week1"]
                            + row_dict["week2"]
                            + row_dict["week3"]
                            + row_dict["week4"]
                            + row_dict["week5"]
                            + row_dict["week6"]
                        ):
                            print >> sys.stderr, "*** Filename: " + filename + ", line number: " + str(line_number)
                            print >> sys.stderr, "*** ERROR! Bad row total, doesn't match sum of weeks 1-6"
                            print >> sys.stderr, row_dict
                            print >> sys.stderr
                            break

                        for key in accumulated_row_totals_dict:
                            accumulated_row_totals_dict[key] += row_dict[key]
                        attendance_dicts.append(row_dict)

            # Buffer the current line for line folding if needed (see 'line folding' above)
            prior_line = line
            line_number += 1

    print "*** Number of attendance lines processed: " + str(num_processed_lines)
    print "*** Number of attendees: " + str(accumulated_row_totals_dict["total"])
    print

    if output_csv_filebase and event_id:
        output_csv_filename = (
            output_csv_filebase + "/" + str(year) + format(month, "02d") + "_" + str(event_names[event_id]) + ".csv"
        )
        all_columns_table = petl.fromdicts(attendance_dicts)
        petl.tocsv(all_columns_table, output_csv_filename)

    # Build 2nd list of dicts, where each list item is dict of individual date/event attendance.  I.e. a row per
    # worship service date vs original attendance dicts format of a row per attendee across all weeks in month.
    # This is the actual one returned and eventually emitted into output file
    attendance_dicts2 = []
    for attendance_dict in attendance_dicts:
        for key in attendance_dict:
            if key[:4] == "week" and attendance_dict[key] != 0:
                week_index = int(key[4:5]) - 1
                if month_sundays[week_index] is not None:
                    attendance_dict2 = {}
                    full_name = attendance_dict["full_name"]
                    if full_name in full_name2sk_indiv_id:
                        attendance_dict2["Individual ID"] = full_name2sk_indiv_id[full_name]
                        if event_name == "Christmas Eve":
                            attendance_dict2["Date"] = christmas_eve_date
                        else:
                            attendance_dict2["Date"] = month_sundays[week_index]
                        attendance_dict2["Event ID"] = event_id
                        if add_extra_fields:
                            attendance_dict2["Time"] = event_times[event_id]
                            attendance_dict2["Full Name"] = full_name
                            attendance_dict2["Event Name"] = event_name
                            attendance_dict2["Week Num"] = week_index + 1
                        attendance_dicts2.append(attendance_dict2)
                    else:
                        print >> sys.stderr, '*** WARNING! Cannot find "' + full_name + '" in map'
                        print >> sys.stderr
                else:
                    print >> sys.stderr, '*** WARNING! Cannot find Sunday date for week index "' + str(week_index) + '"'
                    print >> sys.stderr

    # Check if numbers on Servant Keeper's reported Total: line match the totals we've been accumulating
    # per attendance row entry.  If they don't match, show WARNING (not ERROR, since via manual checks, it appears
    # that Servant Keeper totals are buggy)
    if totals_attendance_dict:
        for key in accumulated_row_totals_dict:
            if accumulated_row_totals_dict[key] != totals_attendance_dict[key]:
                pp = pprint.PrettyPrinter(stream=sys.stderr)
                print >> sys.stderr, "*** WARNING! Servant Keeper reported totals do not match data totals"
                print >> sys.stderr, "Servant Keeper Totals:"
                pp.pprint(totals_attendance_dict)
                print >> sys.stderr, "Data Totals:"
                pp.pprint(accumulated_row_totals_dict)
                print >> sys.stderr
                break

    return_table = petl.fromdicts(attendance_dicts2)
    header = petl.header(return_table)
    if "Event Name" in header:
        return_table = petl.cut(
            return_table, "Full Name", "Event Name", "Time", "Week Num", "Date", "Event ID", "Individual ID"
        )
    else:
        return_table = petl.cut(return_table, "Date", "Event ID", "Individual ID")

    return return_table
示例#52
0
def test_stdoutsource():

    tbl = [('foo', 'bar'), ('a', 1), ('b', 2)]
    etl.tocsv(tbl, StdoutSource(), encoding='ascii')
    etl.tohtml(tbl, StdoutSource(), encoding='ascii')
    etl.topickle(tbl, StdoutSource())
示例#53
0
import csv
import petl as etl
import os

def call():
    input_filename = os.path.abspath(os.path.join(os.path.dirname(__file__),
        'data.csv' ))

    return (
        etl
        .fromcsv(input_filename)
        .rename('intptlat', 'Latitude')
        .rename('intptlong', 'Longitude')
        .rename('zcta5', 'ZCTA')
        .convert('Latitude', float)
        .convert('Longitude', float)
    )

if __name__ == '__main__':
    etl.tocsv(call())