Пример #1
0
    def test_pipeline(self):
        data = [2, 4, 6]
        with patch('requests.request') as mock_request:
            mock_request.return_value.status_code = 200
            mock_request.return_value.json.return_value = data

            class IncrementTransformer:
                def __init__(self, by):
                    self.by = by

                def __call__(self, record):
                    return record + self.by

            extract_stream = pipe.instream(
                extractor=HttpJSONExtractor,
                extractor_config={'url': "http://blah.com"})

            transformer_stream1 = pipe.midstream(
                transformer=IncrementTransformer, transformer_config={'by': 3})
            transformer_stream2 = pipe.midstream(
                transformer=IncrementTransformer, transformer_config={'by': 1})

            loader_stream = pipe.outstream(StdoutLoader)
            pipe.flow(extract_stream, loader_stream, transformer_stream1,
                      transformer_stream2)
            """
Пример #2
0
 def test_extractor_http_json(self):
     data = [{"foo": "bar"}]
     with patch('requests.request') as mock_request:
         mock_request.return_value.status_code = 200
         mock_request.return_value.json.return_value = data
         instream = pipe.instream(
             extractor=HttpJSONExtractor,
             extractor_config={'url': 'http://blah.com'})
         for i, o in zip(data, instream):
             assert i == o
Пример #3
0
 def test_extractor(self):
     data = [{"foo": "bar"}]
     instream = pipe.instream(extractor=EchoExtractor,
                              extractor_config={
                                  'data': [{
                                      'foo': 'bar'
                                  }, {
                                      'lorem': 'ipsum'
                                  }, {
                                      45: 193
                                  }]
                              })
     for i, o in zip(data, instream):
         assert i == o
Пример #4
0
from etl import pipe
from etl.extractors import HttpJSONExtractor
from etl.loaders import CSVLoader
import os

# Initialise instream that fetches data from API.
#
# instream by itself doesnt fetch data, it needs an extractor to do that. Extractors are callable classes that have
# the logic of reading data from source and handing it over to instream in the form of an iterator.
#
# Our source data is JSON list of users from https://jsonplaceholder.typicode.com/users
# etl-pipeline has a default extractor for reading JSON from REST API - HttpJSONExtractor
# To initialise instream, we pass HttpJSONExtractor and provide parameters needed to initialize it.

data_source_api = 'https://jsonplaceholder.typicode.com/users'
instream = pipe.instream(extractor=HttpJSONExtractor,
                         extractor_config={'url': data_source_api})

# Initialise outstream to load data coming from instream to CSV file.
#
# Just like instream, outstream doesn't load data by itself, it need a loader to do that. Loaders are callable classes
# that have the logic of loading data into storage, outstream provides loaders with data, one record at a time.
#
# We will load the incoming data to a CSV file.
# etl-pipeline has a default loader for loading data in CSV file.
# To initialise outstream, we pass CSVLoader class and provide parameters needed to initialize it.

filepath = "%s/'simple_transfer.csv'" % os.path.dirname(__file__)
headers = [
    'id', 'name', 'username', 'email', 'address', 'phone', 'website', 'company'
]
outstream = pipe.outstream(loader=CSVLoader,
Пример #5
0
from etl import pipe
from etl.extractors import CSVExtractor
from etl.transformers import CsvToDictTransformer
from etl.loaders import MongodbLoader
import os

# Initialise instream that fetches data from local CSV file.
#
# instream by itself doesnt fetch data, it needs an extractor to do that. Extractors are callable classes that have
# the logic of reading data from source and handing it over to instream in the form of an iterator.
#
# Our source data is CSV list of users from a local CSV file.
# etl-pipeline has a default extractor for reading CSV files.
# To initialise instream, we pass CSVExtractor and provide parameters needed to initialize it.
source_filepath = "%s/csv_to_mongo_source.csv" % os.path.dirname(__file__)
instream = pipe.instream(extractor=CSVExtractor,
                         extractor_config={'csv_file_path': source_filepath})

# Initialise mid-stream that transforms CSV record to a dict.
#
# midstream by itself doesnt transform data, it needs a transformer to do that. Transformers are callable classes that
# have the logic of transforming incoming data and handing it over to midstream.
#
# etl-pipeline has a default transformer for transforming CSV record to a dict.
# To initialise midstream, we pass CsvToDictTransformer and provide parameters needed to initialize it.

headers = ['id', 'first_name', 'last_name', 'email', 'gender', 'ip_address']
midstream = pipe.midstream(transformer=CsvToDictTransformer,
                           transformer_config={'headers': headers})

# Initialise outstream to load data coming from instream to a MongoDB collection.
#