Exemplo n.º 1
0
    def save_datasource(self, name, source_type, source, file_path=None):
        if source_type == 'file' and (file_path is None):
            raise Exception(
                '`file_path` argument required when source_type == "file"')

        for i in range(1, 1000):
            if name in [x['name'] for x in self.get_datasources()]:
                previous_index = i - 1
                name = name.replace(f'__{previous_index}__', '')
                name = f'{name}__{i}__'
            else:
                break

        ds_meta_dir = os.path.join(self.dir, name)
        os.mkdir(ds_meta_dir)

        if source_type == 'file':
            try:
                source = os.path.join(ds_meta_dir, source)
                shutil.move(file_path, source)
                ds = FileDS(source)
            except Exception:
                shutil.rmtree(ds_meta_dir)
                raise

            picklable = {'class': 'FileDS', 'args': [source], 'kwargs': {}}

        elif source_type in self.config['integrations']:
            integration = self.config['integrations'][source_type]

            ds_class_map = {
                'clickhouse': ClickhouseDS,
                'mariadb': MariaDS,
                'mysql': MySqlDS,
                'postgres': PostgresDS,
                'mssql': MSSQLDS,
                'mongodb': MongoDS,
                'snowflake': SnowflakeDS
            }

            try:
                dsClass = ds_class_map[integration['type']]
            except KeyError:
                raise KeyError(
                    f"Unknown DS type: {source_type}, type is {integration['type']}"
                )

            if integration['type'] in ['clickhouse']:
                picklable = {
                    'class': dsClass.__name__,
                    'args': [],
                    'kwargs': {
                        'query': source['query'],
                        'user': integration['user'],
                        'password': integration['password'],
                        'host': integration['host'],
                        'port': integration['port']
                    }
                }
                try:
                    ds = dsClass(**picklable['kwargs'])
                except Exception:
                    shutil.rmtree(ds_meta_dir)
                    raise

            elif integration['type'] in [
                    'mssql', 'postgres', 'mariadb', 'mysql'
            ]:
                picklable = {
                    'class': dsClass.__name__,
                    'args': [],
                    'kwargs': {
                        'query': source['query'],
                        'user': integration['user'],
                        'password': integration['password'],
                        'host': integration['host'],
                        'port': integration['port']
                    }
                }

                if 'database' in integration:
                    picklable['kwargs']['database'] = integration['database']

                if 'database' in source:
                    picklable['kwargs']['database'] = source['database']

                try:
                    ds = dsClass(**picklable['kwargs'])
                except Exception:
                    shutil.rmtree(ds_meta_dir)
                    raise

            elif integration['type'] == 'snowflake':
                picklable = {
                    'class': dsClass.__name__,
                    'args': [],
                    'kwargs': {
                        'query': source['query'],
                        'schema': source['schema'],
                        'warehouse': source['warehouse'],
                        'database': source['database'],
                        'host': integration['host'],
                        'password': integration['password'],
                        'user': integration['user'],
                        'account': integration['account']
                    }
                }

                try:
                    ds = dsClass(**picklable['kwargs'])
                except Exception:
                    shutil.rmtree(ds_meta_dir)
                    raise

            elif integration['type'] == 'mongodb':
                picklable = {
                    'class': dsClass.__name__,
                    'args': [],
                    'kwargs': {
                        'database': source['database'],
                        'collection': source['collection'],
                        'query': source['find'],
                        'user': integration['user'],
                        'password': integration['password'],
                        'host': integration['host'],
                        'port': integration['port']
                    }
                }

                try:
                    ds = dsClass(**picklable['kwargs'])
                except Exception:
                    shutil.rmtree(ds_meta_dir)
                    raise
        else:
            # This probably only happens for urls
            print('Create URL data source !')
            try:
                ds = FileDS(source)
            except Exception:
                shutil.rmtree(ds_meta_dir)
                raise
            picklable = {'class': 'FileDS', 'args': [source], 'kwargs': {}}

        df = ds.df

        df_with_types = cast_df_columns_types(
            df,
            self.get_analysis(df)['data_analysis_v2'])
        create_sqlite_db(os.path.join(ds_meta_dir, 'sqlite.db'), df_with_types)

        with open(os.path.join(ds_meta_dir, 'ds.pickle'), 'wb') as fp:
            pickle.dump(picklable, fp)

        with open(os.path.join(ds_meta_dir, 'metadata.json'), 'w') as fp:
            meta = {
                'name': name,
                'source_type': source_type,
                'source': source,
                'created_at': str(datetime.datetime.now()).split('.')[0],
                'updated_at': str(datetime.datetime.now()).split('.')[0],
                'row_count': len(df),
                'columns': [dict(name=x) for x in list(df.keys())]
            }
            json.dump(meta, fp, indent=4, sort_keys=True)

        with open(os.path.join(ds_meta_dir, 'versions.json'), 'wt') as fp:
            json.dump(self.config.versions, fp, indent=4, sort_keys=True)

        return self.get_datasource_obj(name, raw=True), name
Exemplo n.º 2
0
    def save_datasource(self, name, source_type, source, file_path=None):
        if source_type == 'file' and (file_path is None):
            raise Exception(
                '`file_path` argument required when source_type == "file"')

        for i in range(1, 1000):
            if name in [x['name'] for x in self.get_datasources()]:
                previous_index = i - 1
                name = name.replace(f'__{previous_index}__', '')
                name = f'{name}__{i}__'
            else:
                break

        ds_meta_dir = os.path.join(self.dir, name)
        os.mkdir(ds_meta_dir)

        ds_dir = os.path.join(ds_meta_dir, 'datasource')
        os.mkdir(ds_dir)

        if source_type == 'file':
            try:
                source = os.path.join(ds_dir, source)
                shutil.move(file_path, source)
                ds = FileDS(source)
            except Exception:
                shutil.rmtree(ds_meta_dir)
                raise

            picklable = {'class': 'FileDS', 'args': [source], 'kwargs': {}}
        elif source_type in self.config['integrations']:
            integration = self.config['integrations'][source_type]
            dsClass = None
            picklable = {
                'args': [],
                'kwargs': {
                    'query': source,
                    'user': integration['user'],
                    'password': integration['password'],
                    'host': integration['host'],
                    'port': integration['port']
                }
            }
            if integration['type'] == 'clickhouse':
                dsClass = ClickhouseDS
                picklable['class'] = 'ClickhouseDS'
            elif integration['type'] == 'mariadb':
                dsClass = MariaDS
                picklable['class'] = 'MariaDS'
            elif integration['type'] == 'mysql':
                dsClass = MySqlDS
                picklable['class'] = 'MySqlDS'
            elif integration['type'] == 'postgres':
                dsClass = PostgresDS
                picklable['class'] = 'PostgresDS'
            elif integration['type'] == 'mssql':
                dsClass = MSSQLDS
                picklable['class'] = 'MSSQLDS'
            else:
                raise ValueError(f'Unknown DS source_type: {source_type}')
            try:
                ds = dsClass(query=source,
                             user=integration['user'],
                             password=integration['password'],
                             host=integration['host'],
                             port=integration['port'])
            except Exception:
                shutil.rmtree(ds_meta_dir)
                raise
        else:
            # This probably only happens for urls
            print('Create URL data source !')
            try:
                ds = FileDS(source)
            except Exception:
                shutil.rmtree(ds_meta_dir)
                raise
            picklable = {'class': 'FileDS', 'args': [source], 'kwargs': {}}

        df = ds.df

        df_with_types = cast_df_columns_types(
            df,
            self.get_analysis(df)['data_analysis_v2'])
        create_sqlite_db(os.path.join(ds_dir, 'sqlite.db'), df_with_types)

        with open(os.path.join(ds_dir, 'ds.pickle'), 'wb') as fp:
            pickle.dump(picklable, fp)

        with open(os.path.join(ds_dir, 'metadata.json'), 'w') as fp:
            meta = {
                'name': name,
                'source_type': source_type,
                'source': source,
                'created_at': str(datetime.datetime.now()).split('.')[0],
                'updated_at': str(datetime.datetime.now()).split('.')[0],
                'row_count': len(df),
                'columns': [dict(name=x) for x in list(df.keys())]
            }
            json.dump(meta, fp)

        return self.get_datasource_obj(name, raw=True), name
Exemplo n.º 3
0
    def save_datasource(self, name, source_type, source, file_path=None):
        print(name, source_type, source)
        if source_type == 'file' and (file_path is None):
            raise Exception(
                '`file_path` argument required when source_type == "file"')

        for i in range(1, 1000):
            if name in [x['name'] for x in self.get_datasources()]:
                previous_index = i - 1
                name = name.replace(f'__{previous_index}__', '')
                name = f'{name}__{i}__'
            else:
                break

        ds_meta_dir = os.path.join(self.dir, name)
        os.mkdir(ds_meta_dir)

        ds_dir = os.path.join(ds_meta_dir, 'datasource')
        os.mkdir(ds_dir)

        print(source_type)
        if source_type == 'file':
            source = os.path.join(ds_dir, source)
            os.replace(file_path, source)
            ds = FileDS(source)
            picklable = {'class': 'FileDS', 'args': [source], 'kwargs': {}}
        elif source_type == 'clickhouse':
            user = self.config['integrations']['default_clickhouse']['user']
            password = self.config['integrations']['default_clickhouse'][
                'password']
            # TODO add host port params
            ds = ClickhouseDS(source, user=user, password=password)
            picklable = {
                'class': 'ClickhouseDS',
                'args': [source],
                'kwargs': {
                    'user': user,
                    'password': password
                }
            }
        elif source_type == 'mariadb':
            user = self.config['integrations']['default_mariadb']['user']
            password = self.config['integrations']['default_mariadb'][
                'password']
            host = self.config['integrations']['default_mariadb']['host']
            port = self.config['integrations']['default_mariadb']['port']
            ds = MariaDS(source,
                         user=user,
                         password=password,
                         host=host,
                         port=port)
            picklable = {
                'class': 'MariaDS',
                'args': [source],
                'kwargs': {
                    'user': user,
                    'password': password,
                    'host': host,
                    'port': port
                }
            }
        else:
            # This probably only happens for urls
            print('Create URL data source !')
            ds = FileDS(source)
            picklable = {'class': 'FileDS', 'args': [source], 'kwargs': {}}

        df = ds.df

        df_with_types = cast_df_columns_types(
            df,
            self.get_analysis(df)['data_analysis_v2'])
        create_sqlite_db(os.path.join(ds_dir, 'sqlite.db'), df_with_types)

        print(picklable)
        with open(os.path.join(ds_dir, 'ds.pickle'), 'wb') as fp:
            pickle.dump(picklable, fp)

        with open(os.path.join(ds_dir, 'metadata.json'), 'w') as fp:
            json.dump(
                {
                    'name': name,
                    'source_type': source_type,
                    'source': source,
                    'created_at': str(datetime.datetime.now()).split('.')[0],
                    'updated_at': str(datetime.datetime.now()).split('.')[0],
                    'row_count': len(df),
                    'columns': [dict(name=x) for x in list(df.keys())]
                }, fp)

        return self.get_datasource_obj(name, avoid_crash=True)