示例#1
0
    def test_flattener(self):
        formatter = DocumentFlattener()

        # Flat already
        transformed = dict(
            (k, formatter.transform_value(v)) for k, v in self.doc.items()
        )
        self.assertEqual(transformed, formatter.format_document(self.doc))

        # Nested
        transformed2 = formatter.format_document(self.doc_nested)
        constructed = dict(
            ("doc.%s" % k, formatter.transform_value(v)) for k, v in self.doc.items()
        )
        self.assertEqual(transformed2, constructed)

        # With a list
        constructed1 = dict(
            ("list.0.%s" % k, formatter.transform_value(v)) for k, v in self.doc.items()
        )
        constructed2 = dict(("list.1.%s" % k, v) for k, v in transformed2.items())
        constructed3 = dict(
            ("list.2.%d" % i, formatter.transform_value(v))
            for i, v in enumerate(self.lst)
        )
        constructed1.update(constructed2)
        constructed1.update(constructed3)
        self.assertEqual(formatter.format_document(self.doc_list), constructed1)
    def __init__(self,
                 url,
                 unique_key='_id',
                 auto_commit_interval=None,
                 chunk_size=100,
                 **kwargs):
        if 'mongoUrl' not in kwargs:
            raise InvalidConfiguration("The MongoUrl parameter is mandatory.")

        self.url = url
        self.unique_key = unique_key
        self.auto_commit_interval = auto_commit_interval
        self.chunk_size = chunk_size
        self._formatter = DocumentFlattener()
        self.pgsql = psycopg2.connect(url)
        self.insert_accumulator = {}
        self.client = MongoClient(kwargs['mongoUrl'])
        self.quiet = kwargs.get('quiet', False)

        mappings_json_file_name = kwargs.get('mappingFile',
                                             DEFAULT_MAPPINGS_JSON_FILE_NAME)
        register_adapter(ObjectId, object_id_adapter)

        if not os.path.isfile(mappings_json_file_name):
            raise InvalidConfiguration("no mapping file found at " +
                                       mappings_json_file_name)

        with open(mappings_json_file_name) as mappings_file:
            self.mappings = json.load(mappings_file)

        validate_mapping(self.mappings)
        self.pgsql.set_session(deferrable=True)
        self._init_schema()
示例#3
0
    def __init__(self,
                 url,
                 unique_key='_id',
                 auto_commit_interval=None,
                 chunk_size=100,
                 **kwargs):
        if 'mongoUrl' not in kwargs:
            raise InvalidConfiguration("The MongoUrl parameter is mandatory.")

        self.url = url
        self.unique_key = unique_key
        self.auto_commit_interval = auto_commit_interval
        self.chunk_size = chunk_size
        self._formatter = DocumentFlattener()
        self.pgsql = psycopg2.connect(url)
        self.insert_accumulator = {}
        self.client = MongoClient(kwargs['mongoUrl'])

        register_adapter(ObjectId, object_id_adapter)

        if not os.path.isfile(MAPPINGS_JSON_FILE_NAME):
            raise InvalidConfiguration("no mapping file found")

        with open(MAPPINGS_JSON_FILE_NAME) as mappings_file:
            self.mappings = json.load(mappings_file)

        self._init_schema()
示例#4
0
    def __init__(self, url, unique_key='_id', auto_commit_interval=None, chunk_size=100, **kwargs):
        print kwargs

        # if 'mongoUrl' not in kwargs:
        #     raise InvalidConfiguration("The MongoUrl parameter is mandatory.")

        self.url = url
        self.unique_key = unique_key
        self.auto_commit_interval = auto_commit_interval
        self.chunk_size = chunk_size
        self._formatter = DocumentFlattener()
        self.pgsql = psycopg2.connect(url)
        self.insert_accumulator = {}
        # self.client = MongoClient(kwargs['mongoUrl'])

        register_adapter(ObjectId, object_id_adapter)

        # TODO - remove mapping checks as we are just going to
        # translate to jsonb with an id column

        if not os.path.isfile(MAPPINGS_JSON_FILE_NAME):
            raise InvalidConfiguration("no mapping file found")

        with open(MAPPINGS_JSON_FILE_NAME) as mappings_file:
            self.mappings = json.load(mappings_file)

        validate_mapping(self.mappings)


        # TODO - this should create a table with an id pk column and
        # also a jsonb 'jdoc' column only
        self._init_schema()
 def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
              unique_key='_id', chunk_size=DEFAULT_MAX_BULK, **kwargs):
     """Verify Solr URL and establish a connection.
     """
     self.solr = Solr(url)
     self.unique_key = unique_key
     # pysolr does things in milliseconds
     if auto_commit_interval is not None:
         self.auto_commit_interval = auto_commit_interval * 1000
     else:
         self.auto_commit_interval = None
     self.chunk_size = chunk_size
     self.field_list = []
     self._build_fields()
     self._formatter = DocumentFlattener()
示例#6
0
    def __init__(self,
                 url,
                 auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
                 unique_key='_id',
                 chunk_size=DEFAULT_MAX_BULK,
                 **kwargs):
        """Verify Solr URL and establish a connection.
        """
        self.url = url
        self.solr = Solr(url, **kwargs.get('clientOptions', {}))
        self.unique_key = unique_key
        # pysolr does things in milliseconds
        if auto_commit_interval is not None:
            self.auto_commit_interval = auto_commit_interval * 1000
        else:
            self.auto_commit_interval = None
        self.chunk_size = chunk_size
        self.field_list = []
        self._build_fields()
        self._formatter = DocumentFlattener()

        self._content_type = kwargs.get("content_type", None)
        logging.info("begin to init content_type args ,value is %s" %
                     str(self._content_type))

        if self._content_type is None:
            logging.info("content_type args is none, will receive all type")
            self._receive_all_type = True
        else:
            logging.debug("begin to check content_type args")
            self._receive_all_type = False
            if isinstance(self._content_type, dict):
                self._content_type_list = dict(self._content_type).keys()
                logging.debug("the support type list is %s" %
                              str(self._content_type_list))

            else:
                raise errors.InvalidConfiguration(
                    "args content type is not is dict")
# coding: utf8
from future.utils import iteritems
from mongo_connector.doc_managers.formatters import DocumentFlattener

from mongo_connector.doc_managers.utils import db_and_collection, ARRAY_OF_SCALARS_TYPE

_formatter = DocumentFlattener()


def _clean_and_flatten_doc(mappings, doc, namespace):
    """Reformats the given document before insertion into Solr.
    This method reformats the document in the following ways:
      - removes extraneous fields that aren't defined in schema.xml
      - unwinds arrays in order to find and later flatten sub-documents
      - flattens the document so that there are no sub-documents, and every
        value is associated with its dot-separated path of keys
      - inserts namespace and timestamp metadata into the document in order
        to handle rollbacks
    An example:
      {"a": 2,
       "b": {
         "c": {
           "d": 5
         }
       },
       "e": [6, 7, 8]
      }
    becomes:
      {"a": 2, "b.c.d": 5, "e.0": 6, "e.1": 7, "e.2": 8}
    """