Пример #1
0
 def __init__(self, config, plugin_config):
     Connector.__init__(self, config, plugin_config)
     self.sharepoint_list_title = self.config.get("sharepoint_list_title")
     self.auth_type = config.get('auth_type')
     logger.info('init:sharepoint_list_title={}, auth_type={}'.format(self.sharepoint_list_title, self.auth_type))
     self.column_ids = {}
     self.column_names = {}
     self.expand_lookup = config.get("expand_lookup", False)
     self.column_to_expand = {}
     self.metadata_to_retrieve = config.get("metadata_to_retrieve", [])
     self.display_metadata = len(self.metadata_to_retrieve) > 0
     self.client = SharePointClient(config)
Пример #2
0
    def __init__(self, root, config, plugin_config):
        """
        :param root: the root path for this provider
        :param config: the dict of the configuration of the object
        :param plugin_config: contains the plugin settings
        """
        if len(root) > 0 and root[0] == '/':
            root = root[1:]
        self.root = root
        self.provider_root = "/"
        logger.info('init:root={}'.format(self.root))

        self.client = SharePointClient(config)
 def __init__(self, config, plugin_config):
     Connector.__init__(self, config, plugin_config)
     logger.info('SharePoint Online plugin connector v1.0.8')
     self.sharepoint_list_title = self.config.get("sharepoint_list_title")
     self.auth_type = config.get('auth_type')
     logger.info('init:sharepoint_list_title={}, auth_type={}'.format(
         self.sharepoint_list_title, self.auth_type))
     self.column_ids = {}
     self.column_names = {}
     self.column_entity_property_name = {}
     self.columns_to_format = []
     self.dss_column_name = {}
     self.column_sharepoint_type = {}
     self.expand_lookup = config.get("expand_lookup", False)
     self.metadata_to_retrieve = config.get("metadata_to_retrieve", [])
     advanced_parameters = config.get("advanced_parameters", False)
     self.write_mode = "create"
     if not advanced_parameters:
         self.max_workers = 1  # no multithread per default
         self.batch_size = 100
         self.sharepoint_list_view_title = ""
     else:
         self.max_workers = config.get("max_workers", 1)
         self.batch_size = config.get("batch_size", 100)
         self.sharepoint_list_view_title = config.get(
             "sharepoint_list_view_title", "")
     logger.info(
         "init:advanced_parameters={}, max_workers={}, batch_size={}".
         format(advanced_parameters, self.max_workers, self.batch_size))
     self.metadata_to_retrieve.append("Title")
     self.display_metadata = len(self.metadata_to_retrieve) > 0
     self.client = SharePointClient(config)
     self.sharepoint_list_view_id = None
     if self.sharepoint_list_view_title:
         self.sharepoint_list_view_id = self.get_view_id(
             self.sharepoint_list_title, self.sharepoint_list_view_title)
Пример #4
0
class SharePointFSProvider(FSProvider):
    def __init__(self, root, config, plugin_config):
        """
        :param root: the root path for this provider
        :param config: the dict of the configuration of the object
        :param plugin_config: contains the plugin settings
        """
        if len(root) > 0 and root[0] == '/':
            root = root[1:]
        self.root = root
        self.provider_root = "/"
        logger.info('init:root={}'.format(self.root))

        self.client = SharePointClient(config)

    # util methods
    def get_full_path(self, path):
        path_elts = [
            self.provider_root,
            get_rel_path(self.root),
            get_rel_path(path)
        ]
        path_elts = [e for e in path_elts if len(e) > 0]
        return os.path.join(*path_elts)

    def close(self):
        logger.info('close')

    def stat(self, path):
        full_path = get_lnt_path(self.get_full_path(path))
        logger.info('stat:path="{}", full_path="{}"'.format(path, full_path))
        files = self.client.get_files(full_path)
        folders = self.client.get_folders(full_path)

        if has_sharepoint_items(files) or has_sharepoint_items(folders):
            return {
                DSSConstants.PATH: get_lnt_path(path),
                DSSConstants.SIZE: 0,
                DSSConstants.IS_DIRECTORY: True
            }

        path_to_item, item_name = os.path.split(full_path)
        files = self.client.get_files(path_to_item)
        folders = self.client.get_folders(path_to_item)

        file = extract_item_from(item_name, files)
        folder = extract_item_from(item_name, folders)

        if folder is not None:
            return {
                DSSConstants.PATH: get_lnt_path(path),
                DSSConstants.SIZE: 0,
                DSSConstants.LAST_MODIFIED: get_last_modified(folder),
                DSSConstants.IS_DIRECTORY: True
            }
        if file is not None:
            return {
                DSSConstants.PATH: get_lnt_path(path),
                DSSConstants.SIZE: get_size(file),
                DSSConstants.LAST_MODIFIED: get_last_modified(file),
                DSSConstants.IS_DIRECTORY: False
            }
        return None

    def set_last_modified(self, path, last_modified):
        full_path = self.get_full_path(path)
        logger.info('set_last_modified: path="{}", full_path="{}"'.format(
            path, full_path))
        return False

    def browse(self, path):
        path = get_rel_path(path)
        full_path = get_lnt_path(self.get_full_path(path))
        logger.info('browse:path="{}", full_path="{}"'.format(path, full_path))

        folders = self.client.get_folders(full_path)
        files = self.client.get_files(full_path)
        children = []

        for file in loop_sharepoint_items(files):
            children.append({
                DSSConstants.FULL_PATH:
                get_lnt_path(os.path.join(path, get_name(file))),
                DSSConstants.EXISTS:
                True,
                DSSConstants.DIRECTORY:
                False,
                DSSConstants.SIZE:
                get_size(file),
                DSSConstants.LAST_MODIFIED:
                get_last_modified(file)
            })
        for folder in loop_sharepoint_items(folders):
            children.append({
                DSSConstants.FULL_PATH:
                get_lnt_path(os.path.join(path, get_name(folder))),
                DSSConstants.EXISTS:
                True,
                DSSConstants.DIRECTORY:
                True,
                DSSConstants.SIZE:
                0,
                DSSConstants.LAST_MODIFIED:
                get_last_modified(folder)
            })

        if len(children) > 0:
            return {
                DSSConstants.FULL_PATH: get_lnt_path(path),
                DSSConstants.EXISTS: True,
                DSSConstants.DIRECTORY: True,
                DSSConstants.CHILDREN: children
            }
        path_to_file, file_name = os.path.split(full_path)

        files = self.client.get_files(path_to_file)

        for file in loop_sharepoint_items(files):
            if get_name(file) == file_name:
                return {
                    DSSConstants.FULL_PATH: get_lnt_path(path),
                    DSSConstants.EXISTS: True,
                    DSSConstants.SIZE: get_size(file),
                    DSSConstants.LAST_MODIFIED: get_last_modified(file),
                    DSSConstants.DIRECTORY: False
                }

        parent_path, item_name = os.path.split(full_path)
        folders = self.client.get_folders(parent_path)
        folder = extract_item_from(item_name, folders)
        if folder is None:
            ret = {DSSConstants.FULL_PATH: None, DSSConstants.EXISTS: False}
        else:
            ret = {
                DSSConstants.FULL_PATH: get_lnt_path(path),
                DSSConstants.EXISTS: True,
                DSSConstants.DIRECTORY: True,
                DSSConstants.SIZE: 0
            }
        return ret

    def enumerate(self, path, first_non_empty):
        path = get_rel_path(path)
        full_path = get_lnt_path(self.get_full_path(path))
        logger.info('enumerate:path={},fullpath={}'.format(path, full_path))
        path_to_item, item_name = os.path.split(full_path)
        ret = self.list_recursive(path, full_path, first_non_empty)
        return ret

    def list_recursive(self, path, full_path, first_non_empty):
        paths = []
        folders = self.client.get_folders(full_path)
        for folder in loop_sharepoint_items(folders):
            paths.extend(
                self.list_recursive(
                    get_lnt_path(os.path.join(path, get_name(folder))),
                    get_lnt_path(os.path.join(full_path, get_name(folder))),
                    first_non_empty))
        files = self.client.get_files(full_path)
        for file in loop_sharepoint_items(files):
            paths.append({
                DSSConstants.PATH:
                get_lnt_path(os.path.join(path, get_name(file))),
                DSSConstants.LAST_MODIFIED:
                get_last_modified(file),
                DSSConstants.SIZE:
                get_size(file)
            })
            if first_non_empty:
                return paths
        return paths

    def delete_recursive(self, path):
        full_path = self.get_full_path(path)
        logger.info('delete_recursive:path={},fullpath={}'.format(
            path, full_path))
        assert_path_is_not_root(full_path)
        path_to_item, item_name = os.path.split(full_path.rstrip("/"))
        files = self.client.get_files(path_to_item)
        folders = self.client.get_folders(path_to_item)
        file = extract_item_from(item_name, files)
        folder = extract_item_from(item_name, folders)

        if file is not None and folder is not None:
            raise Exception(
                "Ambiguous naming with file / folder {}".format(item_name))

        if file is not None:
            self.client.delete_file(get_lnt_path(full_path))
            return 1

        if folder is not None:
            self.client.delete_folder(get_lnt_path(full_path))
            return 1

        return 0

    def move(self, from_path, to_path):
        full_from_path = self.get_full_path(from_path)
        full_to_path = self.get_full_path(to_path)
        logger.info('move:from={},to={}'.format(full_from_path, full_to_path))

        response = self.client.move_file(full_from_path, full_to_path)
        return SharePointConstants.RESULTS_CONTAINER_V2 in response and SharePointConstants.MOVE_TO in response[
            SharePointConstants.RESULTS_CONTAINER_V2]

    def read(self, path, stream, limit):
        full_path = self.get_full_path(path)
        logger.info('read:full_path={}'.format(full_path))

        response = self.client.get_file_content(full_path)
        bio = BytesIO(response.content)
        shutil.copyfileobj(bio, stream)

    def write(self, path, stream):
        full_path = self.get_full_path(path)
        logger.info('write:path="{}", full_path="{}"'.format(path, full_path))
        bio = BytesIO()
        shutil.copyfileobj(stream, bio)
        bio.seek(0)
        data = bio.read()
        create_path(self.client, full_path)
        response = self.client.write_file_content(full_path, data)
        logger.info("write:response={}".format(response))
class SharePointListsConnector(Connector):
    def __init__(self, config, plugin_config):
        Connector.__init__(self, config, plugin_config)
        logger.info('SharePoint Online plugin connector v1.0.8')
        self.sharepoint_list_title = self.config.get("sharepoint_list_title")
        self.auth_type = config.get('auth_type')
        logger.info('init:sharepoint_list_title={}, auth_type={}'.format(
            self.sharepoint_list_title, self.auth_type))
        self.column_ids = {}
        self.column_names = {}
        self.column_entity_property_name = {}
        self.columns_to_format = []
        self.dss_column_name = {}
        self.column_sharepoint_type = {}
        self.expand_lookup = config.get("expand_lookup", False)
        self.metadata_to_retrieve = config.get("metadata_to_retrieve", [])
        advanced_parameters = config.get("advanced_parameters", False)
        self.write_mode = "create"
        if not advanced_parameters:
            self.max_workers = 1  # no multithread per default
            self.batch_size = 100
            self.sharepoint_list_view_title = ""
        else:
            self.max_workers = config.get("max_workers", 1)
            self.batch_size = config.get("batch_size", 100)
            self.sharepoint_list_view_title = config.get(
                "sharepoint_list_view_title", "")
        logger.info(
            "init:advanced_parameters={}, max_workers={}, batch_size={}".
            format(advanced_parameters, self.max_workers, self.batch_size))
        self.metadata_to_retrieve.append("Title")
        self.display_metadata = len(self.metadata_to_retrieve) > 0
        self.client = SharePointClient(config)
        self.sharepoint_list_view_id = None
        if self.sharepoint_list_view_title:
            self.sharepoint_list_view_id = self.get_view_id(
                self.sharepoint_list_title, self.sharepoint_list_view_title)

    def get_view_id(self, list_title, view_title):
        if not list_title:
            return None
        views = self.client.get_list_views(list_title)
        for view in views:
            if view.get("Title") == view_title:
                return view.get("Id")
        raise ValueError("View '{}' does not exist in list '{}'.".format(
            view_title, list_title))

    def get_read_schema(self):
        logger.info('get_read_schema')
        sharepoint_columns = self.client.get_list_fields(
            self.sharepoint_list_title)
        dss_columns = []
        self.column_ids = {}
        self.column_names = {}
        self.column_entity_property_name = {}
        self.columns_to_format = []
        for column in sharepoint_columns:
            logger.info("get_read_schema:{}/{}/{}/{}/{}/{}".format(
                column[SharePointConstants.TITLE_COLUMN],
                column[SharePointConstants.TYPE_AS_STRING],
                column[SharePointConstants.STATIC_NAME],
                column[SharePointConstants.INTERNAL_NAME],
                column[SharePointConstants.ENTITY_PROPERTY_NAME],
                self.is_column_displayable(column)))
            if self.is_column_displayable(column):
                sharepoint_type = get_dss_type(
                    column[SharePointConstants.TYPE_AS_STRING])
                self.column_sharepoint_type[column[
                    SharePointConstants.STATIC_NAME]] = column[
                        SharePointConstants.TYPE_AS_STRING]
                if sharepoint_type is not None:
                    dss_columns.append({
                        SharePointConstants.NAME_COLUMN:
                        column[SharePointConstants.TITLE_COLUMN],
                        SharePointConstants.TYPE_COLUMN:
                        sharepoint_type
                    })
                    self.column_ids[column[
                        SharePointConstants.STATIC_NAME]] = sharepoint_type
                    self.column_names[column[
                        SharePointConstants.STATIC_NAME]] = column[
                            SharePointConstants.TITLE_COLUMN]
                    self.column_entity_property_name[column[
                        SharePointConstants.STATIC_NAME]] = column[
                            SharePointConstants.ENTITY_PROPERTY_NAME]
                    self.dss_column_name[column[
                        SharePointConstants.STATIC_NAME]] = column[
                            SharePointConstants.TITLE_COLUMN]
                    self.dss_column_name[column[
                        SharePointConstants.ENTITY_PROPERTY_NAME]] = column[
                            SharePointConstants.TITLE_COLUMN]
                if sharepoint_type == "date":
                    self.columns_to_format.append(
                        (column[SharePointConstants.STATIC_NAME],
                         sharepoint_type))
        logger.info(
            "get_read_schema: Schema updated with {}".format(dss_columns))
        return {SharePointConstants.COLUMNS: dss_columns}

    @staticmethod
    def get_column_lookup_field(column_static_name):
        if column_static_name in SharePointConstants.EXPENDABLES_FIELDS:
            return SharePointConstants.EXPENDABLES_FIELDS.get(
                column_static_name)
        return None

    def is_column_displayable(self, column):
        if self.display_metadata and (column['StaticName']
                                      in self.metadata_to_retrieve):
            return True
        return (not column[SharePointConstants.HIDDEN_COLUMN])

    @staticmethod
    def must_column_display_be_forced(column):
        return column[SharePointConstants.TYPE_AS_STRING] in ["Calculated"]

    @staticmethod
    def is_column_expendable(column):
        return (not column[SharePointConstants.HIDDEN_COLUMN]) \
            and (not column[SharePointConstants.READ_ONLY_FIELD])

    def generate_rows(self,
                      dataset_schema=None,
                      dataset_partitioning=None,
                      partition_id=None,
                      records_limit=-1):
        if self.column_ids == {}:
            self.get_read_schema()

        logger.info(
            'generate_row:dataset_schema={}, dataset_partitioning={}, partition_id={}, records_limit={}'
            .format(dataset_schema, dataset_partitioning, partition_id,
                    records_limit))

        page = {}
        record_count = 0
        is_first_run = True
        is_record_limit = records_limit > 0
        while is_first_run or self.is_not_last_page(page):
            is_first_run = False
            page = self.client.get_list_items(
                self.sharepoint_list_title,
                params=self.get_requests_params(page))
            rows = self.get_page_rows(page)
            for row in rows:
                row = self.format_row(row)
                yield column_ids_to_names(self.dss_column_name, row)
            record_count += len(rows)
            if is_record_limit and record_count >= records_limit:
                break

    @staticmethod
    def is_not_last_page(page):
        return "Row" in page and "NextHref" in page

    def get_requests_params(self, page):
        next_page_query_string = page.get("NextHref", "")
        next_page_requests_params = parse_query_string_to_dict(
            next_page_query_string)
        if self.sharepoint_list_view_id:
            next_page_requests_params.update(
                {"View": self.sharepoint_list_view_id})
        return next_page_requests_params

    @staticmethod
    def get_page_rows(page):
        return page.get("Row", "")

    def format_row(self, row):
        for column_to_format, type_to_process in self.columns_to_format:
            value = row.get(column_to_format)
            if value:
                row[column_to_format] = sharepoint_to_dss_date(value)
        return row

    def get_writer(self,
                   dataset_schema=None,
                   dataset_partitioning=None,
                   partition_id=None):
        assert_list_title(self.sharepoint_list_title)
        return SharePointListWriter(self.config,
                                    self,
                                    dataset_schema,
                                    dataset_partitioning,
                                    partition_id,
                                    max_workers=self.max_workers,
                                    batch_size=self.batch_size,
                                    write_mode=self.write_mode)

    def get_partitioning(self):
        logger.info('get_partitioning')
        raise Exception("Unimplemented")

    def list_partitions(self, partitioning):
        logger.info('list_partitions:partitioning={}'.format(partitioning))
        return []

    def partition_exists(self, partitioning, partition_id):
        logger.info('partition_exists:partitioning={}, partition_id={}'.format(
            partitioning, partition_id))
        raise Exception("unimplemented")

    def get_records_count(self, partitioning=None, partition_id=None):
        logger.info(
            'get_records_count:partitioning={}, partition_id={}'.format(
                partitioning, partition_id))
        raise Exception("unimplemented")
Пример #6
0
class SharePointListsConnector(Connector):

    def __init__(self, config, plugin_config):
        Connector.__init__(self, config, plugin_config)
        self.sharepoint_list_title = self.config.get("sharepoint_list_title")
        self.auth_type = config.get('auth_type')
        logger.info('init:sharepoint_list_title={}, auth_type={}'.format(self.sharepoint_list_title, self.auth_type))
        self.column_ids = {}
        self.column_names = {}
        self.expand_lookup = config.get("expand_lookup", False)
        self.column_to_expand = {}
        self.metadata_to_retrieve = config.get("metadata_to_retrieve", [])
        self.display_metadata = len(self.metadata_to_retrieve) > 0
        self.client = SharePointClient(config)

    def get_read_schema(self):
        logger.info('get_read_schema')
        response = self.client.get_list_fields(self.sharepoint_list_title)
        if is_response_empty(response) or len(response[SharePointConstants.RESULTS_CONTAINER_V2][SharePointConstants.RESULTS]) < 1:
            return None
        columns = []
        self.column_ids = {}
        self.column_names = {}
        has_expandable_columns = False
        for column in extract_results(response):
            if self.is_column_displayable(column):
                sharepoint_type = get_dss_type(column[SharePointConstants.TYPE_AS_STRING])
                if sharepoint_type is not None:
                    columns.append({
                        SharePointConstants.NAME_COLUMN: column[SharePointConstants.TITLE_COLUMN],
                        SharePointConstants.TYPE_COLUMN: sharepoint_type
                    })
                    self.column_ids[column[SharePointConstants.ENTITY_PROPERTY_NAME]] = sharepoint_type
                    self.column_names[column[SharePointConstants.ENTITY_PROPERTY_NAME]] = column[SharePointConstants.TITLE_COLUMN]
                if self.expand_lookup:
                    if column[SharePointConstants.TYPE_AS_STRING] == "Lookup":
                        self.column_to_expand.update({column[SharePointConstants.STATIC_NAME]: column[SharePointConstants.LOOKUP_FIELD]})
                        has_expandable_columns = True
                    else:
                        self.column_to_expand.update({
                            column[SharePointConstants.STATIC_NAME]: self.get_column_lookup_field(column[SharePointConstants.STATIC_NAME])
                        })
        if not has_expandable_columns:
            self.column_to_expand = {}
        return {
            SharePointConstants.COLUMNS: columns
        }

    @staticmethod
    def get_column_lookup_field(column_static_name):
        if column_static_name in SharePointConstants.EXPENDABLES_FIELDS:
            return SharePointConstants.EXPENDABLES_FIELDS.get(column_static_name)
        return None

    def is_column_displayable(self, column):
        if self.display_metadata and (column['StaticName'] in self.metadata_to_retrieve):
            return True
        return (not column[SharePointConstants.HIDDEN_COLUMN]) \
            and (not column[SharePointConstants.READ_ONLY_FIELD])

    def generate_rows(self, dataset_schema=None, dataset_partitioning=None,
                      partition_id=None, records_limit=-1):
        if self.column_ids == {}:
            self.get_read_schema()

        logger.info('generate_row:dataset_schema={}, dataset_partitioning={}, partition_id={}'.format(
            dataset_schema, dataset_partitioning, partition_id
        ))

        response = self.client.get_list_all_items(self.sharepoint_list_title, self.column_to_expand)
        if is_response_empty(response):
            if is_error(response):
                raise Exception("Error: {}".format(response[SharePointConstants.ERROR_CONTAINER][SharePointConstants.MESSAGE][SharePointConstants.VALUE]))
            else:
                raise Exception("Error when interacting with SharePoint")
        if self.column_to_expand == {}:
            for item in extract_results(response):
                yield matched_item(self.column_ids, self.column_names, item)
        else:
            for item in extract_results(response):
                yield expand_matched_item(self.column_ids, self.column_names, item, column_to_expand=self.column_to_expand)

    def get_writer(self, dataset_schema=None, dataset_partitioning=None,
                   partition_id=None):
        assert_list_title(self.sharepoint_list_title)
        return SharePointListWriter(self.config, self, dataset_schema, dataset_partitioning, partition_id)

    def get_partitioning(self):
        logger.info('get_partitioning')
        raise Exception("Unimplemented")

    def list_partitions(self, partitioning):
        logger.info('list_partitions:partitioning={}'.format(partitioning))
        return []

    def partition_exists(self, partitioning, partition_id):
        logger.info('partition_exists:partitioning={}, partition_id={}'.format(partitioning, partition_id))
        raise Exception("unimplemented")

    def get_records_count(self, partitioning=None, partition_id=None):
        logger.info('get_records_count:partitioning={}, partition_id={}'.format(partitioning, partition_id))
        raise Exception("unimplemented")