def insert(self, new_values):
        """ Insert new documents into Elastic Search """

        if self.rowid_column not in new_values:
            log2pg(
                'INSERT requires "{rowid}" column. Missing in: {values}'.
                format(rowid=self.rowid_column,
                       values=new_values), logging.ERROR)
            return (0, 0)

        document_id = new_values[self.rowid_column]
        new_values.pop(self.rowid_column, None)

        try:
            response = self.client.index(index=self.index,
                                         doc_type=self.doc_type,
                                         id=document_id,
                                         body=new_values)
            return response
        except Exception as exception:
            log2pg(
                "INDEX for /{index}/{doc_type}/{document_id} and document {document} failed: {exception}"
                .format(index=self.index,
                        doc_type=self.doc_type,
                        document_id=document_id,
                        document=new_values,
                        exception=exception), logging.ERROR)
            return (0, 0)
    def insert(self, new_values):
        """ Insert new documents into Elastic Search """

        if self.rowid_column not in new_values:
            log2pg(
                'INSERT requires "{rowid}" column. Missing in: {values}'.format(
                    rowid=self.rowid_column, values=new_values
                ),
                logging.ERROR,
            )
            return (0, 0)

        document_id = new_values[self.rowid_column]
        new_values.pop(self.rowid_column, None)

        try:
            response = self.client.index(index=self.index, doc_type=self.doc_type, id=document_id, body=new_values)
            return response
        except Exception as exception:
            log2pg(
                "INDEX for /{index}/{doc_type}/{document_id} and document {document} failed: {exception}".format(
                    index=self.index,
                    doc_type=self.doc_type,
                    document_id=document_id,
                    document=new_values,
                    exception=exception,
                ),
                logging.ERROR,
            )
            return (0, 0)
    def insert(self, new_values):
        """ Insert new documents into Elastic Search """

        if self.rowid_column not in new_values:
            log2pg(
                'INSERT requires "{rowid}" column. Missing in: {values}'.
                format(rowid=self.rowid_column, values=new_values),
                logging.ERROR,
            )
            return (0, 0)

        document_id = new_values[self.rowid_column]
        new_values.pop(self.rowid_column, None)

        for key in self.json_columns.intersection(new_values.keys()):
            new_values[key] = json.loads(new_values[key])

        try:
            response = self.client.index(id=document_id,
                                         body=new_values,
                                         **self.arguments)
            return response
        except Exception as exception:
            log2pg(
                "INDEX for {path}/{document_id} and document {document} failed: {exception}"
                .format(
                    path=self.path,
                    document_id=document_id,
                    document=new_values,
                    exception=exception,
                ),
                logging.ERROR,
            )
            return (0, 0)
 def _read_by_id(self, row_id):
     try:
         arguments = dict(self.arguments)
         results = self.client.search(
             body={"query": {
                 "ids": {
                     "values": [row_id]
                 }
             }}, **arguments)["hits"]["hits"]
         if results:
             return self._convert_response_row(results[0], self.columns,
                                               None, None)
         log2pg(
             "SEARCH for {path} row_id {row_id} returned nothing".format(
                 path=self.path, row_id=row_id),
             logging.WARNING,
         )
         return {self.rowid_column: row_id}
     except Exception as exception:
         log2pg(
             "SEARCH for {path} row_id {row_id} failed: {exception}".format(
                 path=self.path, row_id=row_id, exception=exception),
             logging.ERROR,
         )
         return {}
    def update(self, document_id, new_values):
        """ Update existing documents in Elastic Search """

        new_values.pop(self.rowid_column, None)

        for key in self.json_columns.intersection(new_values.keys()):
            new_values[key] = json.loads(new_values[key])

        try:
            response = self.client.index(id=document_id,
                                         body=new_values,
                                         refresh=self.refresh,
                                         **self.arguments)
            if self.complete_returning:
                return self._read_by_id(response["_id"])
            return {self.rowid_column: response["_id"]}
        except Exception as exception:
            log2pg(
                "INDEX for {path}/{document_id} and document {document} failed: {exception}"
                .format(
                    path=self.path,
                    document_id=document_id,
                    document=new_values,
                    exception=exception,
                ),
                logging.ERROR,
            )
            return (0, 0)
Exemplo n.º 6
0
    def build_spec(self, quals):
        Q = {}

        comp_mapper = {
            '>': '$gt',
            '>=': '$gte',
            '<=': '$lte',
            '<': '$lt',
            '<>': '$ne'
        }

        for qual in quals:
            val_formatter = self.fields[qual.field_name]['formatter']
            vform = lambda val: val_formatter(
                val) if val_formatter is not None else val
            if qual.operator == '=':
                Q[qual.field_name] = vform(qual.value)

            elif qual.operator in comp_mapper:
                comp = Q.setdefault(qual.field_name, {})
                comp[comp_mapper[qual.operator]] = vform(qual.value)
                Q[qual.field_name] = comp

            else:
                log2pg('Qual operator {} not implemented yet: {}'.format(
                    qual.field_name, qual))
        return Q
    def execute(self, quals, columns):
        """ Execute the query """

        try:
            query = self._get_query(quals)

            if query:
                response = self.client.search(index=self.index,
                                              doc_type=self.doc_type,
                                              size=self.scroll_size,
                                              scroll=self.scroll_duration,
                                              q=query)
            else:
                response = self.client.search(index=self.index,
                                              doc_type=self.doc_type,
                                              size=self.scroll_size,
                                              scroll=self.scroll_duration)

            while True:
                scroll_id = response['_scroll_id']

                for result in response['hits']['hits']:
                    yield self._convert_response_row(result, columns, query)

                if len(response['hits']['hits']) < self.scroll_size:
                    return
                response = self.client.scroll(scroll_id=scroll_id,
                                              scroll=self.scroll_duration)
        except Exception as exception:
            log2pg(
                "SEARCH for /{index}/{doc_type} failed: {exception}".format(
                    index=self.index,
                    doc_type=self.doc_type,
                    exception=exception), logging.ERROR)
            return
Exemplo n.º 8
0
    def execute(self, quals, columns):
        """ Execute the query """

        try:
            query = self._get_query(quals)

            if query:
                response = self.client.search(size=self.scroll_size,
                                              scroll=self.scroll_duration,
                                              q=query,
                                              **self.arguments)
            else:
                response = self.client.search(size=self.scroll_size,
                                              scroll=self.scroll_duration,
                                              **self.arguments)

            while True:
                scroll_id = response["_scroll_id"]

                for result in response["hits"]["hits"]:
                    yield self._convert_response_row(result, columns, query)

                if len(response["hits"]["hits"]) < self.scroll_size:
                    return
                response = self.client.scroll(scroll_id=scroll_id,
                                              scroll=self.scroll_duration)
        except Exception as exception:
            log2pg(
                "SEARCH for {path} failed: {exception}".format(
                    path=self.path, exception=exception),
                logging.ERROR,
            )
            return
Exemplo n.º 9
0
    def insert(self, new_values):
        log2pg('MARK Insert Request - new values:  %s' % new_values, logging.DEBUG)

        if not 'id' in new_values:
             log2pg('INSERT requires "id" column.  Missing in: %s' % new_values, logging.ERROR)

        id = new_values['id']
        new_values.pop('id', None)
        return self.es_index(id, new_values)
Exemplo n.º 10
0
    def delete(self, id):
        conn = httplib.HTTPConnection(self.host, self.port)
        conn.request("DELETE", "/%s/%s/%s" % (self.node, self.index, id))
        resp = conn.getresponse()
        if not 200 == resp.status:
            log2pg('Failed to delete: %s' % resp.read(), logging.ERROR)
            return

        raw = resp.read()
        return json.loads(raw)
Exemplo n.º 11
0
    def insert(self, new_values):
        log2pg('MARK Insert Request - new values:  %s' % new_values,
               logging.DEBUG)

        if not self.rowid_column in new_values:
            log2pg(
                'INSERT requires "%s" column.  Missing in: %s' %
                (self.rowid_colum, new_values), logging.ERROR)

        id = new_values.pop(self.rowid_column)
        return self.es_index(id, new_values)
Exemplo n.º 12
0
    def delete(self, document_id):
        """ Delete documents from Elastic Search """

        try:
            response = self.client.delete(id=document_id, **self.arguments)
            return response
        except Exception as exception:
            log2pg(
                "DELETE for {path}/{document_id} failed: {exception}".format(
                    path=self.path, document_id=document_id, exception=exception
                ),
                logging.ERROR,
            )
            return (0, 0)
    def delete(self, document_id):
        """ Delete documents from Elastic Search """

        try:
            response = self.client.delete(index=self.index, doc_type=self.doc_type, id=document_id)
            return response
        except Exception as exception:
            log2pg(
                "DELETE for /{index}/{doc_type}/{document_id} failed: {exception}".format(
                    index=self.index, doc_type=self.doc_type, document_id=document_id, exception=exception
                ),
                logging.ERROR,
            )
            return (0, 0)
    def execute(self, quals, columns, aggs=None, group_clauses=None):
        """ Execute the query """

        try:
            query, query_string = self._get_query(quals,
                                                  aggs=aggs,
                                                  group_clauses=group_clauses)

            is_aggregation = aggs or group_clauses

            if query:
                response = self.client.search(
                    size=self.scroll_size if not is_aggregation else 0,
                    scroll=self.scroll_duration
                    if not is_aggregation else None,
                    body=query,
                    **self.arguments)
            else:
                response = self.client.search(size=self.scroll_size,
                                              scroll=self.scroll_duration,
                                              **self.arguments)

            if not response["hits"]["hits"] and not is_aggregation:
                return

            if is_aggregation:
                yield from self._handle_aggregation_response(
                    query, response, aggs, group_clauses)
                return

            while True:
                self.scroll_id = response["_scroll_id"]

                for result in response["hits"]["hits"]:
                    yield self._convert_response_row(result, columns,
                                                     query_string)

                if len(response["hits"]["hits"]) < self.scroll_size:
                    return
                response = self.client.scroll(scroll_id=self.scroll_id,
                                              scroll=self.scroll_duration)
        except Exception as exception:
            log2pg(
                "SEARCH for {path} failed: {exception}".format(
                    path=self.path, exception=exception),
                logging.ERROR,
            )
            return
    def get_rel_size(self, quals, columns):
        """ Helps the planner by returning costs.
            Returns a tuple of the form (number of rows, average row width) """

        try:
            query = self._get_query(quals)
            q_dict = json.loads(query.encode('utf-8'))
            response = self.client.count(body=q_dict, index=self.index)
            return (response["count"], len(columns) * 100)
        except Exception as exception:
            log2pg(
                "COUNT for {path} failed: {exception}".format(
                    path=self.path, exception=exception),
                logging.ERROR,
            )
            return (0, 0)
    def delete(self, document_id):
        """ Delete documents from Elastic Search """

        try:
            response = self.client.delete(index=self.index,
                                          doc_type=self.doc_type,
                                          id=document_id)
            return response
        except Exception as exception:
            log2pg(
                "DELETE for /{index}/{doc_type}/{document_id} failed: {exception}"
                .format(index=self.index,
                        doc_type=self.doc_type,
                        document_id=document_id,
                        exception=exception), logging.ERROR)
            return (0, 0)
Exemplo n.º 17
0
    def get_rel_size(self, quals, columns):
        """ Helps the planner by returning costs.
            Returns a tuple of the form (number of rows, average row width) """

        try:
            query = self._get_query(quals)
            if query:
                response = self.client.count(q=query, **self.arguments)
            else:
                response = self.client.count(**self.arguments)
            return (response["count"], len(columns) * 100)
        except Exception as exception:
            log2pg(
                "COUNT for {path} failed: {exception}".format(
                    path=self.path, exception=exception),
                logging.ERROR,
            )
            return (0, 0)
    def execute(self, quals, columns):
        """ Execute the query """

        try:
            query = self._get_query(quals)

            if query:
                response = self.client.search(index=self.index, doc_type=self.doc_type, q=query)
            else:
                response = self.client.search(index=self.index, doc_type=self.doc_type)
            return self._convert_response(response, columns, query)
        except Exception as exception:
            log2pg(
                "SEARCH for /{index}/{doc_type} failed: {exception}".format(
                    index=self.index, doc_type=self.doc_type, exception=exception
                ),
                logging.ERROR,
            )
            return (0, 0)
    def delete(self, document_id):
        """ Delete documents from Elastic Search """

        if self.complete_returning:
            document = self._read_by_id(document_id)
        else:
            document = {self.rowid_column: document_id}

        try:
            self.client.delete(id=document_id, refresh=self.refresh, **self.arguments)
            return document
        except Exception as exception:
            log2pg(
                "DELETE for {path}/{document_id} failed: {exception}".format(
                    path=self.path, document_id=document_id, exception=exception
                ),
                logging.ERROR,
            )
            return (0, 0)
    def update(self, document_id, new_values):
        """ Update existing documents in Elastic Search """

        new_values.pop(self.rowid_column, None)

        try:
            response = self.client.index(index=self.index,
                                         doc_type=self.doc_type,
                                         id=document_id,
                                         body=new_values)
            return response
        except Exception as exception:
            log2pg(
                "INDEX for /{index}/{doc_type}/{document_id} and document {document} failed: {exception}"
                .format(index=self.index,
                        doc_type=self.doc_type,
                        document_id=document_id,
                        document=new_values,
                        exception=exception), logging.ERROR)
            return (0, 0)
    def update(self, document_id, new_values):
        """ Update existing documents in Elastic Search """

        new_values.pop(self.rowid_column, None)

        try:
            response = self.client.index(index=self.index, doc_type=self.doc_type, id=document_id, body=new_values)
            return response
        except Exception as exception:
            log2pg(
                "INDEX for /{index}/{doc_type}/{document_id} and document {document} failed: {exception}".format(
                    index=self.index,
                    doc_type=self.doc_type,
                    document_id=document_id,
                    document=new_values,
                    exception=exception,
                ),
                logging.ERROR,
            )
            return (0, 0)
    def get_rel_size(self, quals, columns):
        """ Helps the planner by returning costs.
            Returns a tuple of the form (number of rows, average row width) """

        try:
            query = self._get_query(quals)

            if query:
                response = self.client.count(index=self.index, doc_type=self.doc_type, q=query)
            else:
                response = self.client.count(index=self.index, doc_type=self.doc_type)
            return (response["count"], len(columns) * 100)
        except Exception as exception:
            log2pg(
                "COUNT for /{index}/{doc_type} failed: {exception}".format(
                    index=self.index, doc_type=self.doc_type, exception=exception
                ),
                logging.ERROR,
            )
            return (0, 0)
Exemplo n.º 23
0
    def build_spec(self, quals, trans=True):
        Q = {}

        comp_mapper = {
            '=': '$eq',
            '>': '$gt',
            '>=': '$gte',
            '<=': '$lte',
            '<>': '$ne',
            '<': '$lt',
            (u'=', True): '$in',
            (u'<>', False): '$nin',
            '~~': '$regex'
        }

        # TODO '!~~', '~~*', '!~~*', other binary ones that are composable

        for qual in quals:
            val_formatter = self.fields[qual.field_name]['formatter']
            vform = lambda val: val_formatter(
                val) if val is not None and val_formatter is not None else val
            if self.debug:
                log2pg('vform {} val_formatter: {} '.format(
                    vform, val_formatter))

            if trans and 'options' in self.fields[
                    qual.field_name] and 'mname' in self.fields[
                        qual.field_name]['options']:
                mongo_field_name = self.fields[
                    qual.field_name]['options']['mname']
            else:
                mongo_field_name = qual.field_name
            if self.debug:
                log2pg('Qual field_name: {} operator: {} value: {}'.format(
                    mongo_field_name, qual.operator, qual.value))

            if qual.operator in comp_mapper:
                comp = Q.setdefault(mongo_field_name, {})
                if qual.operator == '~~':
                    comp[comp_mapper[qual.operator]] = vform(
                        qual.value.replace('%', '.*'))
                else:
                    comp[comp_mapper[qual.operator]] = vform(qual.value)
                Q[mongo_field_name] = comp
                if self.debug:
                    log2pg('Qual {} comp {}'.format(qual.operator, qual.value))
            else:
                log2pg('Qual operator {} not implemented for value {}'.format(
                    qual.operator, qual.value))

        return Q
Exemplo n.º 24
0
    def plan(self, quals, columns):

        # Base pipeline
        pipe = []
        if self.pipe: pipe.extend(self.pipe)

        # Project (rename fields)
        fields = dict([(k, True) for k in columns])
        projectFields = {}
        for f in fields:
            if 'options' in self.fields[f] and 'mname' in self.fields[f][
                    'options']:
                projectFields[f] = '$' + self.fields[f]['options']['mname']
            else:
                projectFields[f] = fields[f]
        if len(projectFields) > 0: pipe.append({"$project": projectFields})
        if self.debug: log2pg('projectFields: {}'.format(projectFields))

        # Match
        Q = self.build_spec(quals)
        if Q: pipe.append({"$match": Q})
        if self.debug: log2pg('mathcFields: {}'.format(Q))

        # optimization 1: if columns include field(s) with equality predicate in query,
        # then we don't have to fetch it, as we add them back later
        eqfields = dict([(q.field_name, q.value) for q in quals
                         if q.operator == '='])
        for f in eqfields:
            fields.pop(f)

        if len(fields) == 0:
            # optimization 2: no fields need to be returned, just get counts
            pipe.append({"$count": "rows"})
        elif len(eqfields) > 0:
            # remove constant fields, that get added back later
            pipe.append({"$project": fields})

        # push-down filters through user supplied pipeline
        pipe = self.optimize(pipe)

        return (fields, eqfields, pipe)
Exemplo n.º 25
0
    def update(self, document_id, new_values):
        """ Update existing documents in Elastic Search """

        new_values.pop(self.rowid_column, None)

        try:
            response = self.client.index(
                id=document_id, body=new_values, **self.arguments
            )
            return response
        except Exception as exception:
            log2pg(
                "INDEX for {path}/{document_id} and document {document} failed: {exception}".format(
                    path=self.path,
                    document_id=document_id,
                    document=new_values,
                    exception=exception,
                ),
                logging.ERROR,
            )
            return (0, 0)
Exemplo n.º 26
0
    def build_spec(self, quals):
        Q = {}

        comp_mapper = {'>': '$gt',
                       '>=': '$gte',
                       '<=': '$lte',
                       '<': '$lt'}

        for qual in quals:
            val_formatter = self.fields[qual.field_name]['formatter']
            vform = lambda val: val_formatter(val) if val_formatter is not None else val
            if qual.operator == '=':
                Q[qual.field_name] = vform(qual.value)

            elif qual.operator in ('>', '>=', '<=', '<'):
                comp = Q.setdefault(qual.field_name, {})
                comp[comp_mapper[qual.operator]] = vform(qual.value)
                Q[qual.field_name] = comp

            else:
                log2pg('Qual operator {} not implemented yet: {}'.format(qual.field_name, qual))
        return Q
    def get_rel_size(self, quals, columns):
        """ Helps the planner by returning costs.
            Returns a tuple of the form (number of rows, average row width) """

        try:
            query = self._get_query(quals)

            if query:
                response = self.client.count(index=self.index,
                                             doc_type=self.doc_type,
                                             q=query)
            else:
                response = self.client.count(index=self.index,
                                             doc_type=self.doc_type)
            return (response['count'], len(columns) * 100)
        except Exception as exception:
            log2pg(
                "COUNT for /{index}/{doc_type} failed: {exception}".format(
                    index=self.index,
                    doc_type=self.doc_type,
                    exception=exception), logging.ERROR)
            return (0, 0)
    def execute(self, quals, columns):
        """ Execute the query """

        try:
            query = self._get_query(quals)
            q_dict = json.loads(query.encode('utf-8'))
            pg_id = self._get_pg_id(quals)
            response = self.client.search(body=q_dict,
                                          index=self.index,
                                          size=self.size,
                                          explain=self.explain)
            while True:
                for result in response["hits"]["hits"]:
                    yield self._format_out(result, pg_id=pg_id, query=query)

                return
        except Exception as exception:
            log2pg(
                "SEARCH for {path} failed: {exception}".format(
                    path=self.path, exception=exception),
                logging.ERROR,
            )
            return
Exemplo n.º 29
0
    def insert(self, new_values):
        """ Publish a new / updated / deleted document into RabbitMQ """

        log2pg('MARK Request - new values:  %s' % new_values, logging.DEBUG)

        if not 'table' in new_values:
            log2pg('It requires "table" column. Missing in: %s' % new_values,
                   logging.ERROR)

        if not 'id' in new_values:
            log2pg('It requires "id" column. Missing in: %s' % new_values,
                   logging.ERROR)

        if not 'action' in new_values:
            log2pg('It requires "action" column. Missing in: %s' % new_values,
                   logging.ERROR)

        return self.rabbitmq_publish(new_values)
Exemplo n.º 30
0
    def build_spec(self, quals, trans=True):
        Q = {}

        comp_mapper = {'=' : '$eq',
                       '>' : '$gt',
                       '>=': '$gte',
                       '<=': '$lte',
                       '<>': '$ne',
                       '<' : '$lt',
                       (u'=', True) : '$in',
                       (u'<>', False) : '$nin',
                       '~~': '$regex'
                      }

        # TODO '!~~', '~~*', '!~~*', other binary ones that are composable

        for qual in quals:
            val_formatter = self.fields[qual.field_name]['formatter']
            vform = lambda val: val_formatter(val) if val is not None and val_formatter is not None else val
            if self.debug: log2pg('vform {} val_formatter: {} '.format(vform, val_formatter))

            if trans and 'options' in self.fields[qual.field_name] and 'mname' in self.fields[qual.field_name]['options']:
               mongo_field_name=self.fields[qual.field_name]['options']['mname']
            else:
               mongo_field_name=qual.field_name
            if self.debug: log2pg('Qual field_name: {} operator: {} value: {}'.format(mongo_field_name, qual.operator, qual.value))

            if qual.operator in comp_mapper:
               comp = Q.setdefault(mongo_field_name, {})
               if qual.operator == '~~':
                  comp[comp_mapper[qual.operator]] = vform(qual.value.strip('%').replace('%','.*').replace('_','.'))
               else:
                  comp[comp_mapper[qual.operator]] = vform(qual.value)
               Q[mongo_field_name] = comp
               if self.debug: log2pg('Qual {} comp {}'.format(qual.operator, comp[comp_mapper[qual.operator]]))
            else:
               log2pg('Qual operator {} not implemented for value {}'.format(qual.operator, qual.value))

        return Q
Exemplo n.º 31
0
    def execute(self, quals, columns, sortkeys=None):

        fields, eqfields, pipe = self.plan(quals, columns)

        if self.debug: t0 = time.time()
        if self.debug:
            log2pg('Calling aggregate with {} stage pipe {} '.format(
                len(pipe), pipe))

        cur = self.coll.aggregate(pipe, cursor={})

        if self.debug: t1 = time.time()
        if self.debug: docCount = 0
        if self.debug:
            log2pg('cur is returned {} with total {} so far'.format(
                cur, t1 - t0))

        if len(fields) == 0:
            for res in cur:
                docCount = res['rows']
                break

            for x in xrange(docCount):
                if eqfields: yield eqfields
                else: yield {}
        else:
            for doc in cur:
                doc = dict([(col, dict_traverser(self.fields[col]['path'],
                                                 doc)) for col in columns])
                doc.update(eqfields)
                yield doc
                if self.debug: docCount = docCount + 1

        if self.debug: t2 = time.time()
        if self.debug:
            log2pg('Python rows {} Python_duration {} {} {}ms'.format(
                docCount, (t1 - t0) * 1000, (t2 - t1) * 1000,
                (t2 - t0) * 1000))
Exemplo n.º 32
0
    def execute(self, quals, columns):
        """ Should Execute the query but we don't handle it (for now?) """

        log2pg("SELECT isn't implemented for RabbitMQ", logging.ERROR)
        yield {0, 0}
Exemplo n.º 33
0
    def execute(self, quals, columns, d={}):

      if self.debug: t0 = time.time()
      ## Only request fields of interest:
      fields = dict([(k, True) for k in columns])

      Q = self.build_spec(quals)

      # optimization: if columns include field(s) with equality predicate in query, then we don't have to fetch it
      eqfields = dict([ (q.field_name , q.value) for q in quals if q.operator == '=' ])
      for f in eqfields: fields.pop(f)
      # instead we will inject the exact equality expression into the result set

      if len(fields)==0:    # no fields need to be returned, just get counts

        if not self.pipe:
            docCount = self.coll.find(Q).count()
        else:   # there's a pipe with unwind
            arr=self.pipe[0]['$unwind']    # may not be safe assumption in the future
            countpipe=[]
            if Q: countpipe.append({'$match':Q})
            # hack: everyone just gets array size,
            # TODO: this only works for one $unwind for now
            countpipe.append({'$project':{'_id':0, 'arrsize': {'$size':arr}}})
            countpipe.append({'$group':{'_id':None,'sum':{'$sum':'$arrsize'}}})
            cur = self.coll.aggregate(countpipe, cursor={})
            for res in cur:
               docCount=res['sum']
               break

        for x in xrange(docCount):
            if eqfields: yield eqfields
            else: yield d

        # we are done
        if self.debug: t1 = time.time()

      else:  # we have one or more fields requested, with or without pipe

        if '_id' not in fields:
            fields['_id'] = False

        if self.debug: log2pg('fields: {}'.format(columns))
        if self.debug: log2pg('fields: {}'.format(fields))

        pipe = []
        projectFields={}
        transkeys = [k for k in self.fields.keys() if 'mname' in self.fields[k].get('options',{})]
        transfields = set(fields.keys()) & set(transkeys)
        if self.debug: log2pg('transfields {} fieldskeys {} transkeys {}'.format(transfields,fields.keys(),transkeys))
        for f in fields:         # there are some fields wanted returned which must be transformed
           if self.debug: log2pg('f {} hasoptions {} self.field[f] {}'.format(f,'options' in self.fields[f],self.fields[f]))
           if 'options' in self.fields[f] and 'mname' in self.fields[f]['options']:
               if self.debug: log2pg('self field {} options {}'.format(f,self.fields[f]['options']['mname']))
               projectFields[f]='$'+self.fields[f]['options']['mname']
           else:
               projectFields[f]=fields[f]

        if self.debug: log2pg('projectFields: {}'.format(projectFields))

        # if there was field transformation we have to use the pipeline
        if self.pipe or transfields:
            if self.pipe: pipe.extend(self.pipe)
            if Q: pipe.insert(0, { "$match" : Q } )
            pipe.append( { "$project" : projectFields } )
            if transfields and Q:
                 # only needed if quals fields are array members, can check that TODO
                 postQ= self.build_spec(quals, False)
                 if Q != postQ: pipe.append( { "$match" : postQ } )

            if self.debug: log2pg('Calling aggregate with {} stage pipe {} '.format(len(pipe),pipe))
            cur = self.coll.aggregate(pipe, cursor={})
        else:
            if self.debug: log2pg('Calling find')
            cur = self.coll.find(Q, fields)

        if self.debug: t1 = time.time()
        if self.debug: docCount=0
        if self.debug: log2pg('cur is returned {} with total {} so far'.format(cur,t1-t0))
        for doc in cur:
            doc.update(eqfields)
            yield dict([(col, dict_traverser(self.fields[col]['path'], doc)) for col in columns])
            if self.debug: docCount=docCount+1

      if self.debug: t2 = time.time()
      if self.debug: log2pg('Python rows {} Python_duration {} {} {}ms'.format(docCount,(t1-t0)*1000,(t2-t1)*1000,(t2-t0)*1000))
Exemplo n.º 34
0
    def __init__(self, options, columns):
        super(Yamfdw, self).__init__(options, columns)

        self.host_name = options.get('host', 'localhost')
        self.port = int(options.get('port', '27017'))

        self.user = options.get('user')
        self.password = options.get('password')

        self.db_name = options.get('db', 'test')
        self.collection_name = options.get('collection', 'test')

        self.conn = MongoClient(host=self.host_name,
                             port=self.port)

        self.auth_db = options.get('auth_db', self.db_name)

        if self.user:
            self.conn.userprofile.authenticate(self.user,
                                            self.password,
                                            source=self.auth_db)

        self.db = getattr(self.conn, self.db_name)
        self.coll = getattr(self.db, self.collection_name)

        self.debug = options.get('debug', False)

        # if we need to validate or transform any fields this is a place to do it
        # we need column definitions for types to validate we're passing back correct types
        # self.db.add_son_manipulator(Transform(columns))

        if self.debug: log2pg('collection cols: {}'.format(columns))

        self.stats = self.db.command("collstats", self.collection_name)
        self.count=self.stats["count"]
        if self.debug: log2pg('self.stats: {} '.format(self.stats))

        self.indexes={}
        if self.stats["nindexes"]>1:
            indexdict = self.coll.index_information()
            if sys.version_info[0] < 3:
                self.indexes = dict([(idesc['key'][0][0], idesc.get('unique',False))  for iname, idesc in indexdict.iteritems()])
            else:
                self.indexes = dict([(idesc['key'][0][0], idesc.get('unique',False))  for iname, idesc in indexdict.items()])
            if self.debug: log2pg('self.indexes: {} '.format(self.indexes))

        self.fields = dict([(col, {'formatter': coltype_formatter(coldef.type_name, coldef.options.get('type',None)),
                             'options': coldef.options,
                             'path': col.split('.')}) for (col, coldef) in columns.items()])

        if self.debug: log2pg('self.fields: {} \n columns.items {}'.format(self.fields,columns.items()))

        self.pipe = options.get('pipe')
        if self.pipe:
            self.pipe = json.loads(self.pipe)
            if self.debug: log2pg('pipe is {}'.format(self.pipe))
        else:
            self.pkeys = [ (('_id',), 1), ]
            for f in self.fields: # calculate selectivity of each field (once per session)
                if f=='_id': continue
                # check for unique indexes and set those to 1
                if f in self.indexes and self.indexes.get(f): 
                   self.pkeys.append( ((f,), 1) )
                elif f in self.indexes:
                   self.pkeys.append( ((f,), min((self.count/10),1000) ) )
                else: 
                   self.pkeys.append( ((f,), self.count) )
Exemplo n.º 35
0
    def execute(self, quals, columns, d={}):

      if self.debug: t0 = time.time()
      ## Only request fields of interest:
      fields = dict([(k, True) for k in columns])

      Q = self.build_spec(quals)

      # optimization: if columns include field(s) with equality predicate in query, then we don't have to fetch it
      eqfields = dict([ (q.field_name , q.value) for q in quals if q.operator == '=' ])
      for f in eqfields: fields.pop(f)
      # instead we will inject the exact equality expression into the result set

      if len(fields)==0:    # no fields need to be returned, just get counts

        if not self.pipe:
            docCount = self.coll.find(Q).count()
        else:   # there's a pipe with unwind
            arr=self.pipe[0]['$unwind']    # may not be safe assumption in the future
            countpipe=[]
            if Q: countpipe.append({'$match':Q})
            # hack: everyone just gets array size, 
            # TODO: this only works for one $unwind for now
            countpipe.append({'$project':{'_id':0, 'arrsize': {'$size':arr}}})
            countpipe.append({'$group':{'_id':None,'sum':{'$sum':'$arrsize'}}})
            cur = self.coll.aggregate(countpipe, cursor={})
            for res in cur:
               docCount=res['sum']
               break
        if sys.version_info[0] < 3:
            for x in xrange(docCount):
                if eqfields: yield eqfields
                else: yield d
        else:
            for x in range(docCount):
                if eqfields: yield eqfields
                else: yield d
        # we are done
        if self.debug: t1 = time.time()

      else:  # we have one or more fields requested, with or without pipe

        if '_id' not in fields:
            fields['_id'] = False

        if self.debug: log2pg('fields: {}'.format(columns))
        if self.debug: log2pg('fields: {}'.format(fields))

        pipe = []
        projectFields={}
        transkeys = [k for k in self.fields.keys() if 'mname' in self.fields[k].get('options',{})]
        transfields = set(fields.keys()) & set(transkeys)
        if self.debug: log2pg('transfields {} fieldskeys {} transkeys {}'.format(transfields,fields.keys(),transkeys))
        for f in fields:         # there are some fields wanted returned which must be transformed
           if self.debug: log2pg('f {} hasoptions {} self.field[f] {}'.format(f,'options' in self.fields[f],self.fields[f]))
           if 'options' in self.fields[f] and 'mname' in self.fields[f]['options']:
               if self.debug: log2pg('self field {} options {}'.format(f,self.fields[f]['options']['mname']))
               projectFields[f]='$'+self.fields[f]['options']['mname']
           else:
               projectFields[f]=fields[f]

        if self.debug: log2pg('projectFields: {}'.format(projectFields))

        # if there was field transformation we have to use the pipeline
        if self.pipe or transfields:
            if self.pipe: pipe.extend(self.pipe)
            if Q: pipe.insert(0, { "$match" : Q } )
            pipe.append( { "$project" : projectFields } )
            if transfields and Q:
                 # only needed if quals fields are array members, can check that TODO
                 postQ= self.build_spec(quals, False)
                 if Q != postQ: pipe.append( { "$match" : postQ } )

            if self.debug: log2pg('Calling aggregate with {} stage pipe {} '.format(len(pipe),pipe))
            cur = self.coll.aggregate(pipe, cursor={})
        else:
            if self.debug: log2pg('Calling find')
            cur = self.coll.find(Q, fields)

        if self.debug: t1 = time.time()
        if self.debug: docCount=0
        if self.debug: log2pg('cur is returned {} with total {} so far'.format(cur,t1-t0))
        for doc in cur:
            doc.update(eqfields)
            yield dict([(col, dict_traverser(self.fields[col]['path'], doc)) for col in columns])
            if self.debug: docCount=docCount+1

      if self.debug: t2 = time.time()
      if self.debug: log2pg('Python rows {} Python_duration {} {} {}ms'.format(docCount,(t1-t0)*1000,(t2-t1)*1000,(t2-t0)*1000))
Exemplo n.º 36
0
    def __init__(self, options, columns):
        super(Yamfdw, self).__init__(options, columns)

        self.host_name = options.get('host', 'localhost')
        self.port = int(options.get('port', '27017'))

        self.user = options.get('user')
        self.password = options.get('password')

        self.db_name = options.get('db', 'test')
        self.collection_name = options.get('collection', 'test')

        self.conn = MongoClient(host=self.host_name,
                             port=self.port)

        self.auth_db = options.get('auth_db', self.db_name)

        if self.user:
            self.conn.userprofile.authenticate(self.user,
                                            self.password,
                                            source=self.auth_db)

        self.db = getattr(self.conn, self.db_name)
        self.coll = getattr(self.db, self.collection_name)

        self.debug = options.get('debug', False)

        # if we need to validate or transform any fields this is a place to do it
        # we need column definitions for types to validate we're passing back correct types
        # self.db.add_son_manipulator(Transform(columns))

        if self.debug: log2pg('collection cols: {}'.format(columns))

        self.stats = self.db.command("collstats", self.collection_name)
        self.count=self.stats["count"]
        if self.debug: log2pg('self.stats: {} '.format(self.stats))

        self.indexes={}
        if self.stats["nindexes"]>1:
            indexdict = self.coll.index_information()
            self.indexes = dict([(idesc['key'][0][0], idesc.get('unique',False))  for iname, idesc in indexdict.iteritems()])
            if self.debug: log2pg('self.indexes: {} '.format(self.indexes))

        self.fields = dict([(col, {'formatter': coltype_formatter(coldef.type_name, coldef.options.get('type',None)),
                             'options': coldef.options,
                             'path': col.split('.')}) for (col, coldef) in columns.items()])

        if self.debug: log2pg('self.fields: {} \n columns.items {}'.format(self.fields,columns.items()))

        self.pipe = options.get('pipe')
        if self.pipe:
            self.pipe = json.loads(self.pipe)
            if self.debug: log2pg('pipe is {}'.format(self.pipe))
        else:
            self.pkeys = [ (('_id',), 1), ]
            for f in self.fields: # calculate selectivity of each field (once per session)
                if f=='_id': continue
                # check for unique indexes and set those to 1
                if f in self.indexes and self.indexes.get(f):
                   self.pkeys.append( ((f,), 1) )
                elif f in self.indexes:
                   self.pkeys.append( ((f,), min((self.count/10),1000) ) )
                else:
                   self.pkeys.append( ((f,), self.count) )