예제 #1
0
def solr_print(args):
    from utils.db import session_autocommit
    from models import BearerToken
    from sqlalchemy.orm.exc import NoResultFound

    if args.bearertoken_id:
        print(" * Printing the content of the Solr index for bearertoken_id: {}".format(
            args.bearertoken_id))

        with session_autocommit() as sex:
            try:
                bearertoken = sex.query(BearerToken).filter_by(id=args.bearertoken_id).one()
            except NoResultFound:
                print('There is no bearertoken_id {} in the database.'.format(args.bearertoken_id))
                return
            provider_name = bearertoken.provider.name
        query = 'bearertoken_id:{}'.format(args.bearertoken_id)

    if args.provider:
        print(" * Printing the Solr index for provider: {}".format(args.provider))
        provider_name = args.provider
        query = '*:*'

    solr = Solr(CORE_NAMES[provider_name])
    r = solr.search(q=query)
    for doc in r.documents:
        del doc['content']
        print(doc)

    print("\n * {} documents.".format(r.total_results))
    print("Done.")
예제 #2
0
def solr_reset(args):
    from utils.db import session_autocommit
    from models import BearerToken
    from sqlalchemy.orm.exc import NoResultFound

    if args.bearertoken_id:
        print(" * Resetting the Solr index for bearertoken_id: {}".format(args.bearertoken_id))

        with session_autocommit() as sex:
            try:
                bearertoken = sex.query(BearerToken).filter_by(id=args.bearertoken_id).one()
            except NoResultFound:
                print('There is no bearertoken_id {} in the database.'.format(args.bearertoken_id))
                return
            provider_name = bearertoken.provider.name
        query = 'bearertoken_id:{}'.format(args.bearertoken_id)

    if args.provider:
        print(" * Resetting the Solr index for provider: {}".format(args.provider))
        provider_name = args.provider
        query = '*:*'

    solr = Solr(CORE_NAMES[provider_name])
    solr.delete_by_query(query)
    solr.commit()
    print("Done.")
예제 #3
0
    def search(self, q):
        bearertoken = BearerToken.objects.only(
            'id').get(user=self.user, provider__name=self.provider_name)
        fq = 'bearertoken_id:{}'.format(bearertoken.id)

        solr = Solr(CORE_NAMES[self.provider_name])
        r = solr.search(q=q, fq=fq, **self.extra_query_args)
        return r.documents  # A list of dicts.
예제 #4
0
class AbstractSolrUpdater(metaclass=ABCMeta):
    """
    Receive an item like a Twitter tweet, a Facebook post, a Dropbox file, etc., convert it to
    a document and send it to Solr.

    Parameters:
    bearertoken_id -- a `models.BearerToken.id`.
    """
    def __init__(self, bearertoken_id):
        self.bearertoken_id = bearertoken_id
        self.solr = Solr(self.CORE_NAME)

    def add(self, redis_entry, commit=False):
        doc = self._convert_redis_entry_to_solr_doc(redis_entry)

        self.solr.update([doc], 'json', commit)

    def commit(self):
        self.solr.commit()

    @abstractmethod
    def _convert_redis_entry_to_solr_doc(self, redis_entry):
        pass
예제 #5
0
 def __init__(self, bearertoken_id):
     self.bearertoken_id = bearertoken_id
     self.solr = Solr(self.CORE_NAME)
예제 #6
0
class DropboxSolrUpdater:
    CORE_NAME = CORE_NAMES[Provider.NAME_DROPBOX]

    def __init__(self, bearertoken_id):
        self.bearertoken_id = bearertoken_id
        self.solr = Solr(self.CORE_NAME)

    def add(self, redis_entry, commit=False):
        """
        Add a new file to the Solr index for the current `bearertoken_id`.

        Parameters:
        redis_entry -- a `RedisDropboxEntry` instance.
        """
        local_file_path = normpath(join(settings.DROPBOX_TEMP_STORAGE_PATH,
                                        str(self.bearertoken_id),
                                        redis_entry.local_name))

        # Build Solr doc.
        doc = self._convert_redis_entry_to_solr_doc(redis_entry, local_file_path)
        log.debug('Posting file to Solr: {}'.format(self.solr.url) +
                  '\nDoc: {}'.format(doc) +
                  '\nFile: {}'.format(local_file_path))
        self.solr.add_file(doc, local_file_path)
        os.remove(local_file_path)  # Delete the downloaded file.
        os.remove(local_file_path + '.metadata')  # Delete the metadata file.

        if commit:
            self.commit()

    def _convert_redis_entry_to_solr_doc(self, redis_entry, local_file_path):
        # Build the metadata-file path.
        metadata_file_path = '{}.metadata'.format(local_file_path)
        # Read the content of the metadata-file.
        with open(metadata_file_path) as fin:
            data = fin.read()
        # Load the content (json) of the metadata-file in a Python dictionary.
        metadata = json.loads(data)

        doc = dict()
        doc['literal.bearertoken_id'] = self.bearertoken_id
        doc['literal.id'] = '{}:{}'.format(self.bearertoken_id, redis_entry.id)
        doc['literal.remote_path'] = metadata['path']
        doc['literal.modified_at'] = dropbox_date_to_solr_date(metadata['modified'])
        doc['literal.mime_type'] = metadata['mime_type']
        doc['literal.bytes'] = metadata['bytes']

        return doc

    def delete(self, redis_entry, commit=False):
        """
        Delete a Dropbox file from the Solr index for the current `bearertoken_id`.

        Parameters:
        redis_entry -- a `RedisDropboxEntry` instance.
        """

        # We want to delete the specific entry and all its children, in case of any.
        # Note: '/' is a keyword in Solr, so we need to escape it this way: '\/'.
        # Note: keep in mind that there is no way to know if the entry is a folder or a file (cause
        # as this is a entry to delete, Dropbox doesn't send us metadata, and we cannot check
        # what is already in our index, because we don't index folders). But this is not a problem.
        #
        # First we delete the item: remote_path:\/folder1\/folder2\/folder\ 3
        # Then all children, if any: remote_path:\/folder1\/folder2\/folder\ 3\/*
        # Or all together:
        #   remote_path:(\/folder1\/folder2\/folder\ 3 OR \/folder1\/folder2\/folder\ 3\/*)
        #
        # Note: this is smart because we don't delete: /folder1/folder2/folder 30

        root = escape_solr_query(redis_entry.remote_path)
        children = '{}\/*'.format(root)
        q = 'remote_path_ci:({} OR {}) '.format(root.lower(), children.lower()) + \
            'AND bearertoken_id:{}'.format(self.bearertoken_id)
        self.solr.delete_by_query(q)

        if commit:
            self.commit()

    def reset(self, commit=False):
        """
        Delete all files from the Solr index for the current `bearertoken_id`.
        """
        q = 'bearertoken_id:{}'.format(self.bearertoken_id)
        self.solr.delete_by_query(q)

        if commit:
            self.commit()

    def commit(self):
        self.solr.commit()