Exemplo n.º 1
0
def run_load_dataframes(config, expected_columns=10, expected_rows=42):
    df_list = SourceFile.load_dataframes(config=config, logger=AirbyteLogger(), skip_data=False)
    assert len(df_list) == 1  # Properly load 1 DataFrame
    df = df_list[0]
    assert len(df.columns) == expected_columns  # DataFrame should have 10 columns
    assert len(df.index) == expected_rows  # DataFrame should have 42 rows of data
    return df
Exemplo n.º 2
0
    def test_streams_outputs_records(self, config_credentials, configured_catalog):
        """
        Using standard tests is unreliable for Agent Activities and Agent Overview streams,
        because the data there changes in real-time, therefore additional pytests are used.
        """
        records = []
        for message in SourceZendeskTalk().read(AirbyteLogger(), config_credentials, configured_catalog):
            if message.type == Type.RECORD:
                records.append(message)

        assert len(records) > 0
Exemplo n.º 3
0
    def _run_sync_test(conf, catalog):
        records = []
        state = []
        for message in SourceInstagram().read(AirbyteLogger(), conf, catalog):
            if message.type == Type.RECORD:
                records.append(message)
            elif message.type == Type.STATE:
                state.append(message)

        assert len(records) > 0
        assert len(state) > 0
Exemplo n.º 4
0
def read_stream(
    source: SourceHubspot, config: Mapping, catalog: ConfiguredAirbyteCatalog, state: MutableMapping = None
) -> Tuple[Mapping, List]:
    records = {}
    states = []
    for message in source.read(AirbyteLogger(), config, catalog, state):
        if message.type == Type.RECORD:
            records.setdefault(message.record.stream, [])
            records[message.record.stream].append(message.record)
        elif message.type == Type.STATE:
            states.append(message.state)

    return records, states
Exemplo n.º 5
0
    def _read_records(
            conf,
            catalog,
            state=None) -> Tuple[List[AirbyteMessage], List[AirbyteMessage]]:
        records = []
        states = []
        for message in SourceFacebookMarketing().read(AirbyteLogger(),
                                                      conf,
                                                      catalog,
                                                      state=state):
            if message.type == Type.RECORD:
                records.append(message)
            elif message.type == Type.STATE:
                states.append(message)

        return records, states
Exemplo n.º 6
0
    def get_grid_sheets(spreadsheet_metadata) -> List[str]:
        """Return grid only diagram, filter out sheets with image/diagram only

        https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/sheets#sheetproperties
        """
        grid_sheets = []
        non_grid_sheets = []
        for sheet in spreadsheet_metadata.sheets:
            sheet_title = sheet.properties.title
            if hasattr(sheet.properties, "gridProperties"):
                grid_sheets.append(sheet_title)
            else:
                non_grid_sheets.append(sheet_title)

        if non_grid_sheets:
            AirbyteLogger().log("WARN", "Skip non-grid sheets: " + "".join(non_grid_sheets))

        return grid_sheets
Exemplo n.º 7
0
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""


import unittest
from unittest.mock import Mock, patch

from airbyte_protocol import AirbyteRecordMessage, AirbyteStream, ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, SyncMode
from airbyte_protocol.models.airbyte_protocol import DestinationSyncMode
from base_python import AirbyteLogger
from google_sheets_source.client import GoogleSheetsClient
from google_sheets_source.helpers import Helpers
from google_sheets_source.models import CellData, GridData, RowData, Sheet, SheetProperties, Spreadsheet

logger = AirbyteLogger()


class TestHelpers(unittest.TestCase):
    def test_headers_to_airbyte_stream(self):
        sheet_name = "sheet1"
        header_values = ["h1", "h2", "h3"]

        expected_stream = AirbyteStream(
            name=sheet_name,
            json_schema={
                "$schema": "http://json-schema.org/draft-07/schema#",
                "type": "object",
                # For simplicity, the type of every cell is a string
                "properties": {header: {"type": "string"} for header in header_values},
            },
Exemplo n.º 8
0
import csv
import io
import json
import pkgutil
import sys
import time
from typing import Dict, List, Optional, Tuple, Union

import backoff
import msal
import requests
from airbyte_protocol import AirbyteStream
from base_python import AirbyteLogger
from msal.exceptions import MsalServiceError

LOGGER = AirbyteLogger()


def log_backoff_attempt(details):
    LOGGER.info(f"Encountered exception when querying the Microsoft API: {str(sys.exc_info()[1])}. Backing off: {details.get('tries')} try")


class Client:
    """
    Microsoft Teams API Reference: https://docs.microsoft.com/en-us/graph/api/resources/teams-api-overview?view=graph-rest-1.0
    """

    MICROSOFT_GRAPH_BASE_API_URL: str = "https://graph.microsoft.com/"
    MICROSOFT_GRAPH_API_VERSION: str = "v1.0"
    PAGINATION_COUNT: Optional[int] = 20
Exemplo n.º 9
0
def run_load_nested_json_schema(config, expected_columns=10, expected_rows=42):
    data_list = SourceFile.load_nested_json(config, logger=AirbyteLogger())
    assert len(data_list) == 1  # Properly load data
    df = data_list[0]
    assert len(df) == expected_rows  # DataFrame should have 42 items
    return df
Exemplo n.º 10
0
 def _write_config(self, token):
     logger = AirbyteLogger()
     logger.info("Credentials Refreshed")