Пример #1
0
def importer_submit(request):
    source = json.loads(request.POST.get('source', '{}'))
    outputFormat = json.loads(request.POST.get('destination',
                                               '{}'))['outputFormat']
    destination = json.loads(request.POST.get('destination', '{}'))
    destination['ouputFormat'] = outputFormat  # Workaround a very weird bug
    start_time = json.loads(request.POST.get('start_time', '-1'))

    if destination['ouputFormat'] == 'index':
        source['columns'] = destination['columns']
        index_name = destination["name"]

        if destination['indexerRunJob']:
            _convert_format(source["format"], inverse=True)
            job_handle = _index(request,
                                source,
                                index_name,
                                start_time=start_time,
                                lib_path=destination['indexerJobLibPath'])
        else:
            client = SolrClient(request.user)
            unique_key_field = destination[
                'indexerDefaultField'] and destination['indexerDefaultField'][
                    0] or None
            df = destination['indexerPrimaryKey'] and destination[
                'indexerPrimaryKey'][0] or None
            kwargs = {}

            stats = request.fs.stats(source["path"])
            if stats.size > MAX_UPLOAD_SIZE:
                raise PopupException(_('File size is too large to handle!'))

            indexer = MorphlineIndexer(request.user, request.fs)
            fields = indexer.get_kept_field_list(source['columns'])
            if not unique_key_field:
                unique_key_field = 'hue_id'
                fields += [{"name": unique_key_field, "type": "string"}]
                kwargs['rowid'] = unique_key_field

            if not client.exists(index_name):
                client.create_index(name=index_name,
                                    fields=fields,
                                    unique_key_field=unique_key_field,
                                    df=df)

            data = request.fs.read(source["path"], 0, MAX_UPLOAD_SIZE)
            client.index(name=index_name, data=data, **kwargs)

            job_handle = {
                'status':
                0,
                'on_success_url':
                reverse('search:browse', kwargs={'name': index_name})
            }
    elif destination['ouputFormat'] == 'database':
        job_handle = _create_database(request, source, destination, start_time)
    else:
        job_handle = _create_table(request, source, destination, start_time)

    return JsonResponse(job_handle)
Пример #2
0
def index(request):
    response = {'status': -1}

    name = request.POST.get('name')
    data = request.POST.get('data')
    client = SolrClient(request.user)
    client.index(name, data)
    response['status'] = 0
    response['message'] = _('Data added')

    return JsonResponse(response)
Пример #3
0
    def update_data_from_hive(self, collection_or_core_name, columns,
                              fetch_handle):
        MAX_ROWS = 10000
        ROW_COUNT = 0
        FETCH_BATCH = 1000
        has_more = True

        client = SolrClient(self.user)

        try:
            while ROW_COUNT < MAX_ROWS and has_more:
                result = fetch_handle(FETCH_BATCH, ROW_COUNT == 0)
                has_more = result['has_more']

                if result['data']:
                    kwargs = {}
                    dataset = tablib.Dataset()
                    dataset.append(columns)
                    for i, row in enumerate(result['data']):
                        dataset.append([ROW_COUNT + i] + [
                            cell if cell else
                            (0 if isinstance(cell, numbers.Number) else '')
                            for cell in row
                        ])

                    if not client.index(name=collection_or_core_name,
                                        data=dataset.csv,
                                        **kwargs):
                        raise PopupException(
                            _('Could not update index. Check error logs for more info.'
                              ))

                ROW_COUNT += len(dataset)
        except Exception, e:
            raise PopupException(_('Could not update index: %s') % e)
Пример #4
0
    def update_data_from_hive(self,
                              collection_or_core_name,
                              columns,
                              fetch_handle,
                              indexing_options=None):
        MAX_ROWS = 10000
        FETCH_BATCH = 1000

        row_count = 0
        has_more = True
        if indexing_options is None:
            indexing_options = {}

        client = SolrClient(self.user)

        try:
            while row_count < MAX_ROWS and has_more:
                result = fetch_handle(FETCH_BATCH, row_count == 0)
                has_more = result['has_more']

                if result['data']:
                    dataset = tablib.Dataset()
                    dataset.append(columns)
                    for i, row in enumerate(result['data']):
                        dataset.append([
                            cell if cell else
                            (0 if isinstance(cell, numbers.Number) else '')
                            for cell in row
                        ])

                    if not client.index(name=collection_or_core_name,
                                        data=dataset.csv,
                                        **indexing_options):
                        raise PopupException(
                            _('Could not index the data. Check error logs for more info.'
                              ))

                row_count += len(dataset)
        except Exception as e:
            raise PopupException(_('Could not update index: %s') % e)

        return row_count
Пример #5
0
class Command(BaseCommand):
    """
  Install examples but do not overwrite them.
  """
    def handle(self, *args, **options):
        self.user = install_sample_user()
        self.client = SolrClient(self.user)

        collection = options['data']

        if collection == 'twitter_demo':
            LOG.info("Installing twitter collection")
            path = os.path.abspath(
                os.path.join(
                    os.path.dirname(__file__),
                    '../../../../../../../apps/search/examples/collections/solr_configs_twitter_demo/index_data.csv'
                ))
            self._setup_collection_from_csv(
                {
                    'name':
                    'twitter_demo',
                    'fields':
                    self._parse_fields(path,
                                       fieldtypes={
                                           'source': 'string',
                                           'username': '******',
                                       }),
                    'uniqueKeyField':
                    'id',
                    'df':
                    'text'
                }, path)
            LOG.info("Twitter collection successfully installed")

        if collection == 'yelp_demo':
            LOG.info("Installing yelp collection")
            path = os.path.abspath(
                os.path.join(
                    os.path.dirname(__file__),
                    '../../../../../../../apps/search/examples/collections/solr_configs_yelp_demo/index_data.csv'
                ))
            self._setup_collection_from_csv(
                {
                    'name':
                    'yelp_demo',
                    'fields':
                    self._parse_fields(path, fieldtypes={
                        'name': 'string',
                    }),
                    'uniqueKeyField':
                    'id',
                    'df':
                    'text'
                }, path)
            LOG.info("Yelp collection successfully installed")

        if collection == 'log_analytics_demo':
            LOG.info("Installing logs collection")
            path = os.path.abspath(
                os.path.join(
                    os.path.dirname(__file__),
                    '../../../../../../../apps/search/examples/collections/solr_configs_log_analytics_demo/index_data.csv'
                ))
            self._setup_collection_from_csv(
                {
                    'name':
                    'log_analytics_demo',
                    'fields':
                    self._parse_fields(path,
                                       fieldtypes={
                                           'region_code': 'string',
                                           'referer': 'string',
                                           'user_agent': 'string'
                                       }),
                    'uniqueKeyField':
                    'id',
                    'df':
                    'record'
                }, path)
            LOG.info("Logs collection successfully installed")

    def _setup_collection_from_csv(self, collection, path):
        if not self.client.exists(collection['name']):
            self.client.create_index(
                name=collection['name'],
                fields=collection['fields'],
                unique_key_field=collection['uniqueKeyField'],
                df=collection['df'])

            with open(path) as fh:
                self.client.index(collection['name'], fh.read())

    def _parse_fields(self,
                      path,
                      separator=',',
                      quote_character='"',
                      fieldtypes={}):
        with open(path) as fh:
            field_generator = utils.field_values_from_separated_file(
                fh, separator, quote_character)
            row = next(field_generator)
            field_names = list(row.keys())
            field_types = utils.get_field_types(
                (list(row.values())
                 for row in itertools.chain([row], field_generator)),
                iterations=51)
            return [{
                'name':
                field[0],
                'type':
                field[0] in fieldtypes and fieldtypes[field[0]] or field[1]
            } for field in zip(field_names, field_types)]