Exemplo n.º 1
0
def _element_to_bson(key, value, check_keys):
    if not isinstance(key, basestring):
        raise InvalidDocument("documents must have only string keys, "
                              "key was %r" % key)

    if check_keys:
        if key.startswith("$"):
            raise InvalidDocument("key %r must not start with '$'" % key)
        if "." in key:
            raise InvalidDocument("key %r must not contain '.'" % key)

    name = _make_c_string(key, True)
    if isinstance(value, float):
        return "\x01" + name + struct.pack("<d", value)

    # Use Binary w/ subtype 3 for UUID instances
    try:
        import uuid

        if isinstance(value, uuid.UUID):
            value = Binary(value.bytes, subtype=3)
    except ImportError:
        pass

    if isinstance(value, Binary):
        subtype = value.subtype
        if subtype == 2:
            value = struct.pack("<i", len(value)) + value
        return "\x05%s%s%s%s" % (name, struct.pack("<i", len(value)),
                                 chr(subtype), value)
    if isinstance(value, Code):
        cstring = _make_c_string(value)
        scope = _dict_to_bson(value.scope, False, False)
        full_length = struct.pack("<i", 8 + len(cstring) + len(scope))
        length = struct.pack("<i", len(cstring))
        return "\x0F" + name + full_length + length + cstring + scope
    if isinstance(value, str):
        cstring = _make_c_string(value)
        length = struct.pack("<i", len(cstring))
        return "\x02" + name + length + cstring
    if isinstance(value, unicode):
        cstring = _make_c_string(value)
        length = struct.pack("<i", len(cstring))
        return "\x02" + name + length + cstring
    if isinstance(value, dict):
        return "\x03" + name + _dict_to_bson(value, check_keys, False)
    if isinstance(value, (list, tuple)):
        as_dict = SON(zip([str(i) for i in range(len(value))], value))
        return "\x04" + name + _dict_to_bson(as_dict, check_keys, False)
    if isinstance(value, ObjectId):
        return "\x07" + name + value.binary
    if value is True:
        return "\x08" + name + "\x01"
    if value is False:
        return "\x08" + name + "\x00"
    if isinstance(value, int):
        # TODO this is an ugly way to check for this...
        if value > MAX_INT64 or value < MIN_INT64:
            raise OverflowError("BSON can only handle up to 8-byte ints")
        if value > MAX_INT32 or value < MIN_INT32:
            return "\x12" + name + struct.pack("<q", value)
        return "\x10" + name + struct.pack("<i", value)
    if isinstance(value, long):
        # XXX No long type in Python 3
        if value > MAX_INT64 or value < MIN_INT64:
            raise OverflowError("BSON can only handle up to 8-byte ints")
        return "\x12" + name + struct.pack("<q", value)
    if isinstance(value, datetime.datetime):
        if value.utcoffset() is not None:
            value = value - value.utcoffset()
        millis = int(calendar.timegm(value.timetuple()) * 1000 +
                     value.microsecond / 1000)
        return "\x09" + name + struct.pack("<q", millis)
    if isinstance(value, Timestamp):
        time = struct.pack("<I", value.time)
        inc = struct.pack("<I", value.inc)
        return "\x11" + name + inc + time
    if value is None:
        return "\x0A" + name
    if isinstance(value, RE_TYPE):
        pattern = value.pattern
        flags = ""
        if value.flags & re.IGNORECASE:
            flags += "i"
        if value.flags & re.LOCALE:
            flags += "l"
        if value.flags & re.MULTILINE:
            flags += "m"
        if value.flags & re.DOTALL:
            flags += "s"
        if value.flags & re.UNICODE:
            flags += "u"
        if value.flags & re.VERBOSE:
            flags += "x"
        return "\x0B" + name + _make_c_string(pattern, True) + \
            _make_c_string(flags)
    if isinstance(value, DBRef):
        return _element_to_bson(key, value.as_doc(), False)
    if isinstance(value, MinKey):
        return "\xFF" + name
    if isinstance(value, MaxKey):
        return "\x7F" + name

    raise InvalidDocument("cannot convert value of type %s to bson" %
                          type(value))
Exemplo n.º 2
0
 def save(self):
     data = SON()
     data.update(self._data)
     self.db().insert(data)
Exemplo n.º 3
0
def charges():
    client = pymongo.MongoClient(os.environ['MONGO_URI'])
    db = client.va_circuit_court
    charges = db.criminal_cases.aggregate([{
        '$group': {
            '_id': {
                'CodeSection': '$CodeSection',
                'Race': '$Race'
            },
            'charge': {
                '$first': '$Charge'
            },
            'court': {
                '$first': '$Court'
            },
            'caseNumber': {
                '$first': '$CaseNumber'
            },
            'avgSentence': {
                '$avg': '$SentenceTimeDays'
            },
            'avgSentenceSuspended': {
                '$avg': '$SentenceSuspendedDays'
            },
            'count': {
                '$sum': 1
            }
        }
    }, {
        '$group': {
            '_id': {
                'CodeSection': '$_id.CodeSection'
            },
            'races': {
                '$push': {
                    'race': '$_id.Race',
                    'avgSentence': '$avgSentence',
                    'avgSentenceSuspended': '$avgSentenceSuspended',
                    'count': '$count'
                }
            },
            'count': {
                '$sum': '$count'
            },
            'avgSentence': {
                '$avg': '$avgSentence'
            },
            'avgSentenceSuspended': {
                '$avg': '$avgSentenceSuspended'
            },
            'charge': {
                '$first': '$charge'
            },
            'court': {
                '$first': '$court'
            },
            'caseNumber': {
                '$first': '$caseNumber'
            }
        }
    }, {
        '$match': {
            'count': {
                '$gt': 50
            }
        }
    }, {
        '$sort':
        SON([('_id.CodeSection', 1)])
    }])['result']

    charges_amended = db.criminal_cases.aggregate([{
        '$match': {
            'AmendedCharge': {
                '$ne': None
            }
        }
    }, {
        '$group': {
            '_id': {
                'CodeSection': '$CodeSection',
                'Race': '$Race'
            },
            'charge': {
                '$first': '$Charge'
            },
            'court': {
                '$first': '$Court'
            },
            'caseNumber': {
                '$first': '$CaseNumber'
            },
            'avgSentence': {
                '$avg': '$SentenceTimeDays'
            },
            'avgSentenceSuspended': {
                '$avg': '$SentenceSuspendedDays'
            },
            'count': {
                '$sum': 1
            }
        }
    }, {
        '$group': {
            '_id': {
                'CodeSection': '$_id.CodeSection'
            },
            'races': {
                '$push': {
                    'race': '$_id.Race',
                    'avgSentence': '$avgSentence',
                    'avgSentenceSuspended': '$avgSentenceSuspended',
                    'count': '$count'
                }
            },
            'count': {
                '$sum': '$count'
            },
            'avgSentence': {
                '$avg': '$avgSentence'
            },
            'avgSentenceSuspended': {
                '$avg': '$avgSentenceSuspended'
            },
            'charge': {
                '$first': '$charge'
            },
            'court': {
                '$first': '$court'
            },
            'caseNumber': {
                '$first': '$caseNumber'
            }
        }
    }, {
        '$sort':
        SON([('_id.CodeSection', 1)])
    }])['result']

    for charge in charges:
        charge['amended'] = {
            'count': 0,
            'avgSentence': 0,
            'avgSentenceSuspended': 0,
            'races': []
        }
        for charge_amended in charges_amended:
            if charge_amended['_id']['CodeSection'] == charge['_id'][
                    'CodeSection']:
                charge['amended'] = charge_amended
                break
        charge['races_dict'] = {
            'White Caucasian (Non-Hispanic)': {
                'count': 0,
                'avgSentence': 0,
                'avgSentenceSuspended': 0
            },
            'Black (Non-Hispanic)': {
                'count': 0,
                'avgSentence': 0,
                'avgSentenceSuspended': 0
            }
        }
        charge['amended']['races_dict'] = {
            'White Caucasian (Non-Hispanic)': {
                'count': 0,
                'avgSentence': 0,
                'avgSentenceSuspended': 0
            },
            'Black (Non-Hispanic)': {
                'count': 0,
                'avgSentence': 0,
                'avgSentenceSuspended': 0
            }
        }
        for race in charge['races']:
            if 'race' in race:
                charge['races_dict'][race['race']] = race
        for race in charge['amended']['races']:
            if 'race' in race:
                charge['amended']['races_dict'][race['race']] = race

    return render_template('charges.html',
                           charges=charges,
                           charges_amended=charges_amended)
Exemplo n.º 4
0
def main(uri):
    client = MongoClient(uri)
    #connect to database
    db = client.get_default_database()
    #by Email:
    pipeline = [{
        "$match": {
            "E-mail Address": {
                "$nin": ["null", "?"]
            },
        }
    }, {
        "$group": {
            "_id": {
                "E-mail Address": "$E-mail Address"
            },
            "uniqueIds": {
                "$addToSet": "$_id"
            },
            "count": {
                "$sum": 1
            }
        }
    }, {
        "$match": {
            "count": {
                "$gt": 1
            }
        }
    }, {
        "$sort": SON([("count", -1), ("_id", -1)])
    }]
    '''
  #by First AND Last Name
  pipeline = [
    {"$match":{"First Name":{"$nin":["null","?"]},"Last Name":{"$nin":["null","?"]}}},
    {"$group":{"_id":{"First Name":"$First Name", "Last Name":"$Last Name"},"uniqueIds":{"$addToSet":"$_id"},"count": {"$sum": 1}}},
    {"$match":{"count": {"$gt": 1}}},
    {"$sort": SON([("count", -1), ("_id", -1)])}
  ]
  '''
    #send data to list
    data = list(db.cleancontacts.aggregate(pipeline))
    #db.command('aggregate', 'contacts', pipeline=pipeline, explain=True)
    #pprint.pprint(data)
    #put it in a json object
    json_string = dumps(data, json_options=RELAXED_JSON_OPTIONS)
    json_data = json.loads(json_string)
    #print(json_data)
    new_ids = []
    num_iter = 0
    error_count = 0
    #iterate over json objects
    for contact in json_data:
        num_iter += 1
        ids = contact["uniqueIds"]
        #print(ids)
        ids_to_merge = []
        #iterate over id values in json object contact
        for id in ids:
            id_value = id["$oid"]
            #print(id_value)
            #send ids to an array
            ids_to_merge.append(id_value)
        #get the first object from the id array, send to json
        aggregated_contact_string = dumps(db.cleancontacts.find_one(
            {"_id": ObjectId(ids_to_merge[0])}),
                                          json_options=RELAXED_JSON_OPTIONS)
        aggregated_contact = json.loads(aggregated_contact_string)
        #pprint.pprint(aggregated_contact)
        for id in ids_to_merge[1:]:
            #get data from contacts collection, iterating by id over the array
            contact_data = db.cleancontacts.find_one({"_id": ObjectId(id)})
            #pprint.pprint(contact_data)
            count = 0
            for field in contact_data:
                #iterate over each field and append to json object aggregated_contact, in the correct field, if necessary
                try:
                    #print(field, aggregated_contact[field], contact_data[field])
                    if aggregated_contact[field] == None or aggregated_contact[
                            field] == "null" or aggregated_contact[
                                field] == "" or bool(
                                    re.search(str(contact_data[field]),
                                              str(aggregated_contact[field]))):
                        aggregated_contact[field] = contact_data[field]
                    else:
                        aggregated_contact[field] = str(
                            aggregated_contact[field]) + ', ' + str(
                                contact_data[field])
                except:
                    error_count += 1
        #get rid of id fields in json object
        aggregated_contact.pop('_id', None)
        #pprint.pprint(aggregated_contact)
        #delete old contacts (now aggregated)
        for id in ids_to_merge:
            db.cleancontacts.delete_one({"_id": ObjectId(id)})
        #send new json object to database
        post_id = db.cleancontacts.insert_one(aggregated_contact).inserted_id
        #print(post_id)
        new_ids.append(str(post_id))
        #db.cleancontacts.delete_one({"_id":ObjectId(post_id)})
        #print out information about post
        if num_iter % 10 == 0:
            print("Created " + str(num_iter) +
                  " aggregates from the duplicates.")
    print("Deleted " + str(num_iter) + " duplicate contacts, replaced with " +
          str(len(new_ids)) + " new aggregates. IDs: " +
          ' '.join(str(value)
                   for value in new_ids) + "key errors: " + str(error_count))
Exemplo n.º 5
0
    def run_query(self, query, user):
        db = self._get_db()

        logger.debug(
            "mongodb connection string: %s", self.configuration["connectionString"]
        )
        logger.debug("mongodb got query: %s", query)

        try:
            query_data = parse_query_json(query)
        except ValueError:
            return None, "Invalid query format. The query is not a valid JSON."

        if "collection" not in query_data:
            return None, "'collection' must have a value to run a query"
        else:
            collection = query_data["collection"]

        q = query_data.get("query", None)
        f = None

        aggregate = query_data.get("aggregate", None)
        if aggregate:
            for step in aggregate:
                if "$sort" in step:
                    sort_list = []
                    for sort_item in step["$sort"]:
                        sort_list.append((sort_item["name"], sort_item["direction"]))

                    step["$sort"] = SON(sort_list)

        if "fields" in query_data:
            f = query_data["fields"]

        s = None
        if "sort" in query_data and query_data["sort"]:
            s = []
            for field_data in query_data["sort"]:
                s.append((field_data["name"], field_data["direction"]))

        columns = []
        rows = []

        cursor = None
        if q or (not q and not aggregate):
            if s:
                cursor = db[collection].find(q, f).sort(s)
            else:
                cursor = db[collection].find(q, f)

            if "skip" in query_data:
                cursor = cursor.skip(query_data["skip"])

            if "limit" in query_data:
                cursor = cursor.limit(query_data["limit"])

            if "count" in query_data:
                cursor = cursor.count()

        elif aggregate:
            allow_disk_use = query_data.get("allowDiskUse", False)
            r = db[collection].aggregate(aggregate, allowDiskUse=allow_disk_use)

            # Backwards compatibility with older pymongo versions.
            #
            # Older pymongo version would return a dictionary from an aggregate command.
            # The dict would contain a "result" key which would hold the cursor.
            # Newer ones return pymongo.command_cursor.CommandCursor.
            if isinstance(r, dict):
                cursor = r["result"]
            else:
                cursor = r

        if "count" in query_data:
            columns.append(
                {"name": "count", "friendly_name": "count", "type": TYPE_INTEGER}
            )

            rows.append({"count": cursor})
        else:
            rows, columns = parse_results(cursor)

        if f:
            ordered_columns = []
            for k in sorted(f, key=f.get):
                column = _get_column_by_name(columns, k)
                if column:
                    ordered_columns.append(column)

            columns = ordered_columns

        if query_data.get("sortColumns"):
            reverse = query_data["sortColumns"] == "desc"
            columns = sorted(columns, key=lambda col: col["name"], reverse=reverse)

        data = {"columns": columns, "rows": rows}
        error = None
        json_data = json_dumps(data, cls=MongoDBJSONEncoder)

        return json_data, error
Exemplo n.º 6
0
def _authenticate_gssapi(credentials, sock_info):
    """Authenticate using GSSAPI.
    """
    if not HAVE_KERBEROS:
        raise ConfigurationError('The "kerberos" module must be '
                                 'installed to use GSSAPI authentication.')

    try:
        username = credentials.username
        password = credentials.password
        props = credentials.mechanism_properties
        # Starting here and continuing through the while loop below - establish
        # the security context. See RFC 4752, Section 3.1, first paragraph.
        host = sock_info.address[0]
        if props.canonicalize_host_name:
            host = _canonicalize_hostname(host)
        service = props.service_name + '@' + host
        if props.service_realm is not None:
            service = service + '@' + props.service_realm

        if password is not None:
            if _USE_PRINCIPAL:
                # Note that, though we use unquote_plus for unquoting URI
                # options, we use quote here. Microsoft's UrlUnescape (used
                # by WinKerberos) doesn't support +.
                principal = ":".join((quote(username), quote(password)))
                result, ctx = kerberos.authGSSClientInit(
                    service, principal, gssflags=kerberos.GSS_C_MUTUAL_FLAG)
            else:
                if '@' in username:
                    user, domain = username.split('@', 1)
                else:
                    user, domain = username, None
                result, ctx = kerberos.authGSSClientInit(
                    service,
                    gssflags=kerberos.GSS_C_MUTUAL_FLAG,
                    user=user,
                    domain=domain,
                    password=password)
        else:
            result, ctx = kerberos.authGSSClientInit(
                service, gssflags=kerberos.GSS_C_MUTUAL_FLAG)

        if result != kerberos.AUTH_GSS_COMPLETE:
            raise OperationFailure('Kerberos context failed to initialize.')

        try:
            # pykerberos uses a weird mix of exceptions and return values
            # to indicate errors.
            # 0 == continue, 1 == complete, -1 == error
            # Only authGSSClientStep can return 0.
            if kerberos.authGSSClientStep(ctx, '') != 0:
                raise OperationFailure('Unknown kerberos '
                                       'failure in step function.')

            # Start a SASL conversation with mongod/s
            # Note: pykerberos deals with base64 encoded byte strings.
            # Since mongo accepts base64 strings as the payload we don't
            # have to use bson.binary.Binary.
            payload = kerberos.authGSSClientResponse(ctx)
            cmd = SON([('saslStart', 1), ('mechanism', 'GSSAPI'),
                       ('payload', payload), ('autoAuthorize', 1)])
            response = sock_info.command('$external', cmd)

            # Limit how many times we loop to catch protocol / library issues
            for _ in range(10):
                result = kerberos.authGSSClientStep(ctx,
                                                    str(response['payload']))
                if result == -1:
                    raise OperationFailure('Unknown kerberos '
                                           'failure in step function.')

                payload = kerberos.authGSSClientResponse(ctx) or ''

                cmd = SON([('saslContinue', 1),
                           ('conversationId', response['conversationId']),
                           ('payload', payload)])
                response = sock_info.command('$external', cmd)

                if result == kerberos.AUTH_GSS_COMPLETE:
                    break
            else:
                raise OperationFailure('Kerberos '
                                       'authentication failed to complete.')

            # Once the security context is established actually authenticate.
            # See RFC 4752, Section 3.1, last two paragraphs.
            if kerberos.authGSSClientUnwrap(ctx, str(
                    response['payload'])) != 1:
                raise OperationFailure('Unknown kerberos '
                                       'failure during GSS_Unwrap step.')

            if kerberos.authGSSClientWrap(
                    ctx, kerberos.authGSSClientResponse(ctx), username) != 1:
                raise OperationFailure('Unknown kerberos '
                                       'failure during GSS_Wrap step.')

            payload = kerberos.authGSSClientResponse(ctx)
            cmd = SON([('saslContinue', 1),
                       ('conversationId', response['conversationId']),
                       ('payload', payload)])
            sock_info.command('$external', cmd)

        finally:
            kerberos.authGSSClientClean(ctx)

    except kerberos.KrbError as exc:
        raise OperationFailure(str(exc))
Exemplo n.º 7
0
def searchMongoAlerts(mozdefdb):
    attackers = mozdefdb['attackers']
    alerts = mozdefdb['alerts']
    # search the last X alerts for IP addresses
    # aggregated by CIDR mask/24

    # aggregate IPv4 addresses in the most recent alerts
    # to find common attackers.
    ipv4TopHits = alerts.aggregate([
        {
            "$sort": {
                "utcepoch": -1
            }
        },  # reverse sort the current alerts
        {
            "$limit": 100
        },  #most recent 100
        {
            "$match": {
                "events.documentsource.details.sourceipaddress": {
                    "$exists": True
                }
            }
        },  # must have an ip address
        {
            "$match": {
                "attackerid": {
                    "$exists": False
                }
            }
        },  # must not be already related to an attacker
        {
            "$group": {
                "_id": {
                    "ipaddress":
                    "$events.documentsource.details.sourceipaddress"
                }
            }
        },  # grab ip address from the events
        {
            "$unwind": "$_id.ipaddress"
        },  # separate all ips from their alerts
        {
            "$group": {
                "_id": "$_id.ipaddress",
                "hitcount": {
                    "$sum": 1
                }
            }
        },  # count by ip
        {
            "$match": {
                "hitcount": {
                    "$gt": 10
                }
            }
        },  # limit to those with 10 observances
        {
            "$sort": SON([("hitcount", -1), ("_id", -1)])
        },  # sort 
        {
            "$limit": 10
        }  # top 10
    ])
    for ip in ipv4TopHits['result']:
        if netaddr.valid_ipv4(ip['_id']):
            ipcidr = netaddr.IPNetwork(ip['_id'])
            # expand it to a /24 CIDR
            # todo: lookup ipwhois for asn_cidr value
            # potentially with a max mask value (i.e. asn is /8, limit attackers to /24)
            ipcidr.prefixlen = 24

            # append to or create attacker.
            # does this match an existing attacker's indicators
            if not ipcidr.ip.is_loopback() and not ipcidr.ip.is_private(
            ) and not ipcidr.ip.is_reserved():
                logger.debug('searching for alert ip ' + str(ipcidr))
                attacker = attackers.find_one(
                    {'indicators.ipv4address': str(ipcidr)})

                if attacker is None:
                    # new attacker
                    # generate a meteor-compatible ID
                    # save the ES document type, index, id
                    # and add a sub list for future events
                    logger.debug('new attacker from alerts')
                    newAttacker = genNewAttacker()

                    # str to get the ip/cidr rather than netblock cidr.
                    # i.e. '1.2.3.4/24' not '1.2.3.0/24'
                    newAttacker['indicators'].append(
                        dict(ipv4address=str(ipcidr)))
                    matchingalerts = alerts.find({
                        "events.documentsource.details.sourceipaddress":
                        str(ipcidr.ip),
                    })
                    if matchingalerts is not None:
                        # update list of alerts this attacker matched.
                        for alert in matchingalerts:
                            newAttacker['alerts'].append(
                                dict(alertid=alert['_id']))
                            # update alert with attackerID
                            alert['attackerid'] = newAttacker['_id']
                            alerts.save(alert)

                            #add the events from this alert:
                            #add the events from this alert:
                            for e in alert['events']:
                                newAttacker['events'].append(e)
                    newAttacker['alertscount'] = len(newAttacker['alerts'])
                    newAttacker['eventscount'] = len(newAttacker['events'])
                    if newAttacker['eventscount'] > 0:
                        newAttacker['lastseentimestamp'] = toUTC(
                            newAttacker['events'][-1]['documentsource']
                            ['utctimestamp'], 'UTC')
                    attackers.insert(newAttacker)
                    #upate geoIP info
                    latestGeoIP = [
                        a['events'] for a in alerts.find({
                            "events.documentsource.details.sourceipaddress":
                            str(ipcidr.ip),
                        })
                    ][-1][0]['documentsource']
                    updateAttackerGeoIP(mozdefdb, newAttacker['_id'],
                                        latestGeoIP)

                else:
                    logger.debug('found existing attacker in alerts')
                    # if alert not present in this attackers list
                    # append this to the list
                    # todo: trim the list at X (i.e. last 100)
                    # search alerts without attackerid
                    matchingalerts = alerts.find({
                        "events.documentsource.details.sourceipaddress":
                        str(ipcidr.ip),
                        "attackerid": {
                            "$exists": False
                        }
                    })
                    if matchingalerts is not None:
                        #attacker['eventscount'] = len(attacker['events'])
                        logger.debug('matched alert with attacker')

                        # update list of alerts this attacker matched.
                        for alert in matchingalerts:
                            attacker['alerts'].append(
                                dict(alertid=alert['_id']))
                            # update alert with attackerID
                            alert['attackerid'] = attacker['_id']
                            alerts.save(alert)
                            #add the events from this alert:
                            for e in alert['events']:
                                attacker['events'].append(e)

                            # geo ip could have changed, update it
                            # to the latest
                            updateAttackerGeoIP(
                                mozdefdb, attacker['_id'],
                                alert['events'][-1]['documentsource'])

                        # update last seen time
                        attacker['lastseentimestamp'] = toUTC(
                            attacker['events'][-1]['documentsource']
                            ['utctimestamp'], 'UTC')
                        # update counts
                        attacker['alertscount'] = len(attacker['alerts'])
                        attacker['eventscount'] = len(attacker['events'])
                        attackers.save(attacker)
Exemplo n.º 8
0
    def __query_spec(self):
        """Get the spec to use for a query.
        """
        operators = {}
        if self.__ordering:
            operators["$orderby"] = self.__ordering
        if self.__explain:
            operators["$explain"] = True
        if self.__hint:
            operators["$hint"] = self.__hint
        if self.__snapshot:
            operators["$snapshot"] = True
        if self.__max_scan:
            operators["$maxScan"] = self.__max_scan
        if self.__collection.database.connection.is_mongos:
            read_pref = {
                'mode': read_preferences.mongos_mode(self.__read_preference)}

            if self.__tag_sets and self.__tag_sets != [{}]:
                read_pref['tags'] = self.__tag_sets

            operators['$readPreference'] = read_pref

        if operators:
            # Make a shallow copy so we can cleanly rewind or clone.
            spec = self.__spec.copy()

            # Only commands that can be run on secondaries should have any
            # operators added to the spec.  Command queries can be issued
            # by db.command or calling find_one on $cmd directly
            is_cmd = self.collection.name == "$cmd"
            if is_cmd:
                # Don't change commands that can't be sent to secondaries
                command_name = spec.keys()[0].lower()
                if command_name not in secondary_ok_commands:
                    return spec
                elif command_name == 'mapreduce':
                    # mapreduce shouldn't be changed if its not inline
                    out = spec.get('out')
                    if not isinstance(out, dict) or not out.get('inline'):
                        return spec
            elif "$query" not in spec:
                # $query has to come first
                spec = SON({"$query": spec})

            if not isinstance(spec, SON):
                # Ensure the spec is SON. As order is important this will
                # ensure its set before merging in any extra operators.
                spec = SON(spec)

            spec.update(operators)
            return spec
        # Have to wrap with $query if "query" is the first key.
        # We can't just use $query anytime "query" is a key as
        # that breaks commands like count and find_and_modify.
        # Checking spec.keys()[0] covers the case that the spec
        # was passed as an instance of SON or OrderedDict.
        elif ("query" in self.__spec and
              (len(self.__spec) == 1 or self.__spec.keys()[0] == "query")):
                return SON({"$query": self.__spec})

        return self.__spec
Exemplo n.º 9
0
def test_create():
    cmd = parse_spec(SON([("create", "foo")]))
    assert cmd.name == "create"
    assert cmd.coll == "foo"
    assert cmd.tags == {}
    assert cmd.metrics == {}
Exemplo n.º 10
0
        6: "$hashNum",
        7: "$date"
    }
    pipe = [{
        "$match": {
            "user": user_num
        }
    }, {
        "$group": {
            "_id": metric_dict[metric],
            "count": {
                "$sum": 1
            }
        }
    }, {
        "$sort": SON([("_id", -1)])
    }]
    result = db.command('aggregate', 'calls', pipeline=pipe)

    bins = {}
    for each_bin in result['result']:
        bins[each_bin['_id']] = each_bin['count']
    print bins

    if metric is 7:
        times = bins.keys()
        if not times:
            pass
        elif group_time is 0:  # group by hour
            plt.hist([t.hour for t in times], bins=24)  # to bin by hour
            plt.title('Histogram of call times by hour for user #{num}'.format(
Exemplo n.º 11
0
 def __init__(self, args, ver):
         '''
         Получение атрибутов, необходимых заточенной под многопроцессовое выполнение
         функции разбиения коллекций по хромосомам. Атрибуты ни в коем случае не
         должны будут потом в параллельных процессах изменяться. Получаются они
         в основном из указанных исследователем аргументов. Некоторые неочевидные,
         но важные детали об атрибутах. Квази-расширение коллекций. Оно нужно,
         как минимум, для определения правил сортировки и форматирования конечных
         файлов. Сортировка src-db-VCF и src-db-BED. Она делается по координатам
         для обеспечения поддержки tabix-индексации конечных таблиц. Проджекшен
         (отбор полей). Для src-db-VCF его крайне трудно реализовать из-за наличия
         в соответствующих коллекциях разнообразных вложенных структур и запрета
         со стороны MongoDB на применение точечной формы обращения к отбираемым
         элементам массивов. Что касается src-db-BED, когда мы оставляем только часть
         полей, невозможно гарантировать соблюдение спецификаций BED-формата, поэтому
         вывод будет формироваться не более, чем просто табулированным (trg-(db-)TSV).
         '''
         client = MongoClient()
         self.src_db_name = args.src_db_name
         self.src_coll_names = client[self.src_db_name].list_collection_names()
         src_coll_ext = self.src_coll_names[0].rsplit('.', maxsplit=1)[1]
         if '/' in args.trg_place:
                 self.trg_dir_path = os.path.normpath(args.trg_place)
         elif args.trg_place != self.src_db_name:
                 self.trg_db_name = args.trg_place
                 resolve_db_existence(self.trg_db_name)
         else:
                 raise DbAlreadyExistsError()
         if src_coll_ext == 'vcf':
                 self.chrom_field_name = '#CHROM'
         elif src_coll_ext == 'bed':
                 self.chrom_field_name = 'chrom'
         elif args.chrom_field_name is None:
                 self.chrom_field_name = list(client[self.src_db_name][self.src_coll_names[0]].find_one())[1]
         else:
                 self.chrom_field_name = args.chrom_field_name
         self.mongo_aggr_draft = [{'$match': {self.chrom_field_name: None}}]
         if src_coll_ext == 'vcf':
                 self.mongo_aggr_draft.append({'$sort': SON([('#CHROM', ASCENDING),
                                                             ('POS', ASCENDING)])})
         elif src_coll_ext == 'bed':
                 self.mongo_aggr_draft.append({'$sort': SON([('chrom', ASCENDING),
                                                             ('start', ASCENDING),
                                                             ('end', ASCENDING)])})
         if args.proj_fields is None or src_coll_ext == 'vcf':
                 self.mongo_findone_args = [None, None]
                 self.trg_file_fmt = src_coll_ext
         else:
                 mongo_project = {field_name: 1 for field_name in args.proj_fields.split(',')}
                 self.mongo_aggr_draft.append({'$project': mongo_project})
                 self.mongo_findone_args = [None, mongo_project]
                 self.trg_file_fmt = 'tsv'
         if args.sec_delimiter == 'colon':
                 self.sec_delimiter = ':'
         elif args.sec_delimiter == 'comma':
                 self.sec_delimiter = ','
         elif args.sec_delimiter == 'low_line':
                 self.sec_delimiter = '_'
         elif args.sec_delimiter == 'pipe':
                 self.sec_delimiter = '|'
         elif args.sec_delimiter == 'semicolon':
                 self.sec_delimiter = ';'
         if args.ind_field_names is None:
                 self.ind_field_names = args.ind_field_names
         else:
                 self.ind_field_names = args.ind_field_names.split(',')
         self.ver = ver
         client.close()
Exemplo n.º 12
0
        del df['_id']
    return df

def upload3(id, j):
    step0 = time.time()
    frames = []
    for i in range(1,50):
        frames.append(read_mongo(sensors,{"id":id},page_num=i))
    step1 = time.time()
    df = pd.concat(frames)
    print j, ": Data loaded... (%ss)" % (round((step1 - step0), 1))
    return df

pipeline = [
                { "$match": { "id": int(id), "ts": {"$gt": install_date } } },
                { "$sort" : SON([("ts", 1)]) }
            ]
    sensor_data     = sensors.aggregate(pipeline, allowDiskUse = True)


def read_mongo(collection, chunksize = 1000, page_num=1, no_id=True):
    # Calculate number of documents to skip
    skips = chunksize * (page_num - 1)
    print skips
    # Sorry, this is in spanish
    # https://www.toptal.com/python/c%C3%B3digo-buggy-python-los-10-errores-m%C3%A1s-comunes-que-cometen-los-desarrolladores-python/es
    # Make a query to the specific DB and Collection
    pipeline = [
                { "$match": { "id": 209, "ts": {"$gt": install_date } } },
                { "$skip" : skips},
                { "$limit": chunksize },
Exemplo n.º 13
0
    def _execute_command(
        self,
        generator,
        write_concern,
        session,
        sock_info,
        op_id,
        retryable,
        full_result,
        final_write_concern=None,
    ):
        db_name = self.collection.database.name
        client = self.collection.database.client
        listeners = client._event_listeners

        if not self.current_run:
            self.current_run = next(generator)
            self.next_run = None
        run = self.current_run

        # sock_info.command validates the session, but we use
        # sock_info.write_command.
        sock_info.validate_session(client, session)
        last_run = False

        while run:
            if not self.retrying:
                self.next_run = next(generator, None)
                if self.next_run is None:
                    last_run = True

            cmd_name = _COMMANDS[run.op_type]
            bwc = self.bulk_ctx_class(
                db_name,
                cmd_name,
                sock_info,
                op_id,
                listeners,
                session,
                run.op_type,
                self.collection.codec_options,
            )

            while run.idx_offset < len(run.ops):
                # If this is the last possible operation, use the
                # final write concern.
                if last_run and (len(run.ops) - run.idx_offset) == 1:
                    write_concern = final_write_concern or write_concern

                cmd = SON([(cmd_name, self.collection.name),
                           ("ordered", self.ordered)])
                if self.comment:
                    cmd["comment"] = self.comment
                if not write_concern.is_server_default:
                    cmd["writeConcern"] = write_concern.document
                if self.bypass_doc_val:
                    cmd["bypassDocumentValidation"] = True
                if self.let is not None and run.op_type in (_DELETE, _UPDATE):
                    cmd["let"] = self.let
                if session:
                    # Start a new retryable write unless one was already
                    # started for this command.
                    if retryable and not self.started_retryable_write:
                        session._start_retryable_write()
                        self.started_retryable_write = True
                    session._apply_to(cmd, retryable, ReadPreference.PRIMARY,
                                      sock_info)
                sock_info.send_cluster_time(cmd, session, client)
                sock_info.add_server_api(cmd)
                ops = islice(run.ops, run.idx_offset, None)

                # Run as many ops as possible in one command.
                if write_concern.acknowledged:
                    result, to_send = bwc.execute(cmd, ops, client)

                    # Retryable writeConcernErrors halt the execution of this run.
                    wce = result.get("writeConcernError", {})
                    if wce.get("code", 0) in _RETRYABLE_ERROR_CODES:
                        # Synthesize the full bulk result without modifying the
                        # current one because this write operation may be retried.
                        full = copy.deepcopy(full_result)
                        _merge_command(run, full, run.idx_offset, result)
                        _raise_bulk_write_error(full)

                    _merge_command(run, full_result, run.idx_offset, result)

                    # We're no longer in a retry once a command succeeds.
                    self.retrying = False
                    self.started_retryable_write = False

                    if self.ordered and "writeErrors" in result:
                        break
                else:
                    to_send = bwc.execute_unack(cmd, ops, client)

                run.idx_offset += len(to_send)

            # We're supposed to continue if errors are
            # at the write concern level (e.g. wtimeout)
            if self.ordered and full_result["writeErrors"]:
                break
            # Reset our state
            self.current_run = run = self.next_run
Exemplo n.º 14
0
 def add_delete(self, selector, limit):
     """Create a delete document and add it to the list of ops.
     """
     cmd = SON([('q', selector), ('limit', limit)])
     self.ops.append((_DELETE, cmd))
Exemplo n.º 15
0
    async def test_query_array_of_documents(self):
        db = self.db

        # Start Example 29
        # Subdocument key order matters in a few of these examples so we have
        # to use bson.son.SON instead of a Python dict.
        from bson.son import SON
        await db.inventory.insert_many([{
            "item":
            "journal",
            "instock": [
                SON([("warehouse", "A"), ("qty", 5)]),
                SON([("warehouse", "C"), ("qty", 15)])
            ]
        }, {
            "item":
            "notebook",
            "instock": [SON([("warehouse", "C"), ("qty", 5)])]
        }, {
            "item":
            "paper",
            "instock": [
                SON([("warehouse", "A"), ("qty", 60)]),
                SON([("warehouse", "B"), ("qty", 15)])
            ]
        }, {
            "item":
            "planner",
            "instock": [
                SON([("warehouse", "A"), ("qty", 40)]),
                SON([("warehouse", "B"), ("qty", 5)])
            ]
        }, {
            "item":
            "postcard",
            "instock": [
                SON([("warehouse", "B"), ("qty", 15)]),
                SON([("warehouse", "C"), ("qty", 35)])
            ]
        }])
        # End Example 29

        # Start Example 30
        cursor = db.inventory.find(
            {"instock": SON([("warehouse", "A"), ("qty", 5)])})
        # End Example 30

        self.assertEqual(await count(cursor), 1)

        # Start Example 31
        cursor = db.inventory.find(
            {"instock": SON([("qty", 5), ("warehouse", "A")])})
        # End Example 31

        self.assertEqual(await count(cursor), 0)

        # Start Example 32
        cursor = db.inventory.find({'instock.0.qty': {"$lte": 20}})
        # End Example 32

        self.assertEqual(await count(cursor), 3)

        # Start Example 33
        cursor = db.inventory.find({'instock.qty': {"$lte": 20}})
        # End Example 33

        self.assertEqual(await count(cursor), 5)

        # Start Example 34
        cursor = db.inventory.find(
            {"instock": {
                "$elemMatch": {
                    "qty": 5,
                    "warehouse": "A"
                }
            }})
        # End Example 34

        self.assertEqual(await count(cursor), 1)

        # Start Example 35
        cursor = db.inventory.find(
            {"instock": {
                "$elemMatch": {
                    "qty": {
                        "$gt": 10,
                        "$lte": 20
                    }
                }
            }})
        # End Example 35

        self.assertEqual(await count(cursor), 3)

        # Start Example 36
        cursor = db.inventory.find({"instock.qty": {"$gt": 10, "$lte": 20}})
        # End Example 36

        self.assertEqual(await count(cursor), 4)

        # Start Example 37
        cursor = db.inventory.find({
            "instock.qty": 5,
            "instock.warehouse": "A"
        })
        # End Example 37

        self.assertEqual(await count(cursor), 2)
Exemplo n.º 16
0
def test_empty():
    cmd = parse_spec(SON([]))
    assert cmd is None
Exemplo n.º 17
0
def _authenticate_scram(credentials, sock_info, mechanism):
    """Authenticate using SCRAM."""
    username = credentials.username
    if mechanism == 'SCRAM-SHA-256':
        digest = "sha256"
        digestmod = hashlib.sha256
        data = saslprep(credentials.password).encode("utf-8")
    else:
        digest = "sha1"
        digestmod = hashlib.sha1
        data = _password_digest(username, credentials.password).encode("utf-8")
    source = credentials.source
    cache = credentials.cache

    # Make local
    _hmac = hmac.HMAC

    ctx = sock_info.auth_ctx.get(credentials)
    if ctx and ctx.speculate_succeeded():
        nonce, first_bare = ctx.scram_data
        res = ctx.speculative_authenticate
    else:
        nonce, first_bare, cmd = _authenticate_scram_start(
            credentials, mechanism)
        res = sock_info.command(source, cmd)

    server_first = res['payload']
    parsed = _parse_scram_response(server_first)
    iterations = int(parsed[b'i'])
    if iterations < 4096:
        raise OperationFailure("Server returned an invalid iteration count.")
    salt = parsed[b's']
    rnonce = parsed[b'r']
    if not rnonce.startswith(nonce):
        raise OperationFailure("Server returned an invalid nonce.")

    without_proof = b"c=biws,r=" + rnonce
    if cache.data:
        client_key, server_key, csalt, citerations = cache.data
    else:
        client_key, server_key, csalt, citerations = None, None, None, None

    # Salt and / or iterations could change for a number of different
    # reasons. Either changing invalidates the cache.
    if not client_key or salt != csalt or iterations != citerations:
        salted_pass = hashlib.pbkdf2_hmac(digest, data,
                                          standard_b64decode(salt), iterations)
        client_key = _hmac(salted_pass, b"Client Key", digestmod).digest()
        server_key = _hmac(salted_pass, b"Server Key", digestmod).digest()
        cache.data = (client_key, server_key, salt, iterations)
    stored_key = digestmod(client_key).digest()
    auth_msg = b",".join((first_bare, server_first, without_proof))
    client_sig = _hmac(stored_key, auth_msg, digestmod).digest()
    client_proof = b"p=" + standard_b64encode(_xor(client_key, client_sig))
    client_final = b",".join((without_proof, client_proof))

    server_sig = standard_b64encode(
        _hmac(server_key, auth_msg, digestmod).digest())

    cmd = SON([('saslContinue', 1), ('conversationId', res['conversationId']),
               ('payload', Binary(client_final))])
    res = sock_info.command(source, cmd)

    parsed = _parse_scram_response(res['payload'])
    if not hmac.compare_digest(parsed[b'v'], server_sig):
        raise OperationFailure("Server returned an invalid signature.")

    # A third empty challenge may be required if the server does not support
    # skipEmptyExchange: SERVER-44857.
    if not res['done']:
        cmd = SON([('saslContinue', 1),
                   ('conversationId', res['conversationId']),
                   ('payload', Binary(b''))])
        res = sock_info.command(source, cmd)
        if not res['done']:
            raise OperationFailure('SASL conversation failed to complete.')
Exemplo n.º 18
0
def get_logged_task_info(client, task_id):
    # task name
    task_name = client.find_one({
        "_meta.task_id": task_id,
        "_meta.task_name": {
            "$exists": True
        }
    })
    if task_name is not None:
        task_name = task_name["_meta"]["task_name"]

    # date
    task_start = client.find({
        "_meta.task_id": task_id
    }).sort([("$natural", pymongo.ASCENDING)]).limit(1)
    if task_start.alive:
        task_start = task_start.next()["_meta"]["inserted_at"].strftime(
            "%Y/%m/%d %H:%M:%S")
    else:
        task_start = "N/A"
    task_end = client.find({
        "_meta.task_id": task_id
    }).sort([("$natural", pymongo.DESCENDING)]).limit(1)
    if task_end.alive:
        task_end = task_end.next()["_meta"]["inserted_at"].strftime(
            "%Y/%m/%d %H:%M:%S")
    else:
        task_end = "N/A"

    # count
    data_size = client.find({"_meta.task_id": task_id}).count()

    # stat
    stat = client.aggregate([
        {
            "$match": {
                "_meta.task_id": task_id
            }
        },
        {
            "$group": {
                "_id": "$_meta.stored_type",
                "size": {
                    "$sum": 1
                }
            }
        },
        {
            "$sort": SON([("_id", pymongo.ASCENDING)])
        },
    ])
    if stat["ok"] != 1.0:
        stat = None
    else:
        stat = {d["_id"]: d["size"] for d in stat["result"]}

    return {
        "Task ID": task_id,
        "Task Name": task_name,
        "Date": task_start + " - " + task_end,
        "Data Size": data_size,
        "Data": stat,
    }
Exemplo n.º 19
0
 def speculate_command(self):
     cmd = SON([('authenticate', 1), ('mechanism', 'MONGODB-X509')])
     if self.credentials.username is not None:
         cmd['user'] = self.credentials.username
     return cmd
Exemplo n.º 20
0
    def printDbStats(self):
        #data01 = ( self.db.command( { "serverStatus" : 1, "repl": 0, "metrics": 0, "locks": 1, "wiredTiger": 0 } ) )
        data01 = {}
        data01 = ( self.db.command( "serverStatus" )) 
        Host01 = data01["host"][0:14]
        Version01 = data01["version"]
        Connections01 = data01["connections"]["current"]
        Warning = data01["asserts"]["warning"]
        UMess = data01["asserts"]["user"]
        MaxMem = data01["wiredTiger"]["cache"]["maximum bytes configured"]
        CurrMem = data01["wiredTiger"]["cache"]["bytes currently in the cache"]

        Inser = data01["opcounters"]["insert"]
        query = data01["opcounters"]["query"]
        Updat = data01["opcounters"]["update"]
        delet = data01["opcounters"]["delete"]
        getmo = data01["opcounters"]["getmore"]
        comma = data01["opcounters"]["command"]

        Scan = data01["metrics"]["operation"]["scanAndOrder"]
        WConfl = data01["metrics"]["operation"]["writeConflicts"]
        CurTimeout = data01["metrics"]["cursor"]["timedOut"]

        """
        print("\n\n"+"="*20,"\n"+"="*20)
        print("="*2 +" "+ Host01 +" "+ self.thetime() +" "+"="*2)
        print("="*63)
        template01="%15s%8s%10s%15s%15s"
        header01=('Host','Version','Cur_Conn','#ofWarning','#ofUserMessage')
        print( template01 % header01)
        print("="*63)
        print( template01 % (Host01,Version01,Connections01,Warning,UMess))

        template02="%12s%12s%12s%12s%12s%12s%12s%12s"
        header02=('MaxMem MB','CurrMem MB','insert','query','update','delete','getmore','command')
        print( template02 % header02)
        print("="*96)
        print( template02 % (MaxMem,CurrMem,Inser,query,Updat,delet,getmo,comma))

        template03="%15s%15s%15s"
        header03=('scanAndOrder','writeConflicts','CursorTimedOut')
        print( template03 % header03)
        print("="*45)
        print( template03 % (Scan,WConfl,CurTimeout))
        """

        """
        self.matr01={'TS': int(self.thetime()) ,'Host': Host01, 'Version': Version01, 'CurrConn': Connections01, 
          'NofWarning': Warning, 'NofUserMessage': UMess,  
          'MaxMem': MaxMem, 
          'CurrMem': CurrMem, 
          'Insert': Inser, 'Query': query, 'Update': Updat, 
          'Delete': delet, 'Getmore': getmo, 'Command': comma, 
          'ScanAndOrder': Scan, 'WriteConflicts': WConfl, 'CursorTimedOut': CurTimeout }
        """

        self.matr01=SON([('TS', int(self.thetime()) ) ,('Host', Host01), ('Version', Version01), ('CurrConn', Connections01), 
          ('NofWarning', Warning), ('NofUserMessage', UMess),  
          ('MaxMem', MaxMem), 
          ('CurrMem', CurrMem), 
          ('Insert', Inser), ('Query', query), ('Update', Updat), 
          ('Delete', delet), ('Getmore', getmo), ('Command', comma), 
          ('ScanAndOrder', Scan), ('WriteConflicts', WConfl), ('CursorTimedOut', CurTimeout) ])
Exemplo n.º 21
0
def get_imdh_data(lat, long, n, variable):
    #get list of lat longs
    start_lat = lat - 0.25 * n
    end_lat = lat + 0.25 * n
    start_long = long - 0.25 * n
    end_long = long + 0.25 * n
    a1_lat = np.arange(start_lat, lat, 0.25)
    a2_lat = np.arange(lat, (end_lat + 0.25), 0.25)
    a1_long = np.arange(start_long, long, 0.25)
    a2_long = np.arange(long, (end_long + 0.25), 0.25)
    lats = list(a1_lat) + list(a2_lat)
    longs = list(a1_long) + list(a2_long)
    ''' start_lat = lat - 1 * n
    end_lat = lat + 1 * n
    start_long = long - 1 * n
    end_long = long + 1 * n
    a1_lat = np.arange(start_lat, lat, 1)
    a2_lat = np.arange(lat, (end_lat + 1), 1)
    a1_long = np.arange(start_long, long, 1)
    a2_long = np.arange(long, (end_long + 1), 1)
    lats = list(a1_lat) + list(a2_lat)
    longs = list(a1_long) + list(a2_long) '''

    # extract data from database online
    db = config.get_db()
    imdhist = db.imdhist
    imdhist.create_index("lt")
    # 25.0,25.25,25.5,25.75,26.00  92.0,92.25,92.5,92.75,93.0
    pipeline = [{
        "$match": {
            "id": variable,
            "lt": {
                "$in": lats
            },
            "ln": {
                "$in": longs
            }
        }
    }, {
        "$group": {
            "_id": "$ts",
            "val": {
                "$push": "$val"
            },
            "lat": {
                "$push": "$lt"
            },
            "long": {
                "$push": "$ln"
            }
        }
    }, {
        "$sort": SON([("_id", 1)])
    }]

    imdh = list(imdhist.aggregate(pipeline, allowDiskUse=True))
    '''
    pipeline_temp = [
        {"$match": {"id": "t", "lt": {"$in": lats}, "ln": {"$in": longs}}},
        {"$group": {"_id": "$ts", "val": {"$push": "$val"}, "lat": {"$push": "$lt"}, "long": {"$push": "$ln"}}},
        {"$sort": SON([("_id", 1)])}
    ]

    imdh_temp = list(imdhist.aggregate(pipeline_temp, allowDiskUse=True))'''

    return imdh
Exemplo n.º 22
0
def prepare_spec_arguments(spec, arguments, opname, entity_map,
                           with_txn_callback):
    for arg_name in list(arguments):
        c2s = camel_to_snake(arg_name)
        # PyMongo accepts sort as list of tuples.
        if arg_name == "sort":
            sort_dict = arguments[arg_name]
            arguments[arg_name] = list(iteritems(sort_dict))
        # Named "key" instead not fieldName.
        if arg_name == "fieldName":
            arguments["key"] = arguments.pop(arg_name)
        # Aggregate uses "batchSize", while find uses batch_size.
        elif ((arg_name == "batchSize" or arg_name == "allowDiskUse")
              and opname == "aggregate"):
            continue
        # Requires boolean returnDocument.
        elif arg_name == "returnDocument":
            arguments[c2s] = getattr(ReturnDocument,
                                     arguments.pop(arg_name).upper())
        elif c2s == "requests":
            # Parse each request into a bulk write model.
            requests = []
            for request in arguments["requests"]:
                if 'name' in request:
                    # CRUD v2 format
                    bulk_model = camel_to_upper_camel(request["name"])
                    bulk_class = getattr(operations, bulk_model)
                    bulk_arguments = camel_to_snake_args(request["arguments"])
                else:
                    # Unified test format
                    bulk_model, spec = next(iteritems(request))
                    bulk_class = getattr(operations,
                                         camel_to_upper_camel(bulk_model))
                    bulk_arguments = camel_to_snake_args(spec)
                requests.append(bulk_class(**dict(bulk_arguments)))
            arguments["requests"] = requests
        elif arg_name == "session":
            arguments['session'] = entity_map[arguments['session']]
        elif (opname in ('command', 'run_admin_command')
              and arg_name == 'command'):
            # Ensure the first key is the command name.
            ordered_command = SON([(spec['command_name'], 1)])
            ordered_command.update(arguments['command'])
            arguments['command'] = ordered_command
        elif opname == 'open_download_stream' and arg_name == 'id':
            arguments['file_id'] = arguments.pop(arg_name)
        elif opname != 'find' and c2s == 'max_time_ms':
            # find is the only method that accepts snake_case max_time_ms.
            # All other methods take kwargs which must use the server's
            # camelCase maxTimeMS. See PYTHON-1855.
            arguments['maxTimeMS'] = arguments.pop('max_time_ms')
        elif opname == 'with_transaction' and arg_name == 'callback':
            if 'operations' in arguments[arg_name]:
                # CRUD v2 format
                callback_ops = arguments[arg_name]['operations']
            else:
                # Unified test format
                callback_ops = arguments[arg_name]
            arguments['callback'] = lambda _: with_txn_callback(
                copy.deepcopy(callback_ops))
        elif opname == 'drop_collection' and arg_name == 'collection':
            arguments['name_or_collection'] = arguments.pop(arg_name)
        elif opname == 'create_collection' and arg_name == 'collection':
            arguments['name'] = arguments.pop(arg_name)
        elif opname == 'create_index' and arg_name == 'keys':
            arguments['keys'] = list(arguments.pop(arg_name).items())
        elif opname == 'drop_index' and arg_name == 'name':
            arguments['index_or_name'] = arguments.pop(arg_name)
        else:
            arguments[c2s] = arguments.pop(arg_name)
Exemplo n.º 23
0
         },
     }
 },
 'topics': {
     'pagination': False,
     'datasource': {
         'source': 'documents',
         'aggregation': {
             'pipeline': [{
                 "$unwind": "$topics"
             }, {
                 "$group": {
                     "_id": "$topics"
                 }
             }, {
                 "$sort": SON([("_id", 1)])
             }]
         }
     }
 },
 'places': {
     'pagination': False,
     'datasource': {
         'source': 'documents',
         'aggregation': {
             'pipeline': [{
                 "$unwind": "$places"
             }, {
                 "$group": {
                     "_id": "$places"
                 }
Exemplo n.º 24
0
                # TCP_KEEPIDLE and friends. Don't attempt to set the
                # values there.
                default = sock.getsockopt(socket.IPPROTO_TCP, sockopt)
                if default > max_value:
                    sock.setsockopt(socket.IPPROTO_TCP, sockopt, max_value)
            except socket.error:
                pass

    def _set_keepalive_times(sock):
        _set_tcp_option(sock, 'TCP_KEEPIDLE', _MAX_TCP_KEEPIDLE)
        _set_tcp_option(sock, 'TCP_KEEPINTVL', _MAX_TCP_KEEPINTVL)
        _set_tcp_option(sock, 'TCP_KEEPCNT', _MAX_TCP_KEEPCNT)


_METADATA = SON([
    ('driver', SON([('name', 'PyMongo'), ('version', __version__)])),
])

if sys.platform.startswith('linux'):
    # platform.linux_distribution was deprecated in Python 3.5.
    if sys.version_info[:2] < (3, 5):
        # Distro name and version (e.g. Ubuntu 16.04 xenial)
        _name = ' '.join(
            [part for part in platform.linux_distribution() if part])
    else:
        _name = platform.system()
    _METADATA['os'] = SON([
        ('type', platform.system()),
        ('name', _name),
        ('architecture', platform.machine()),
        # Kernel version (e.g. 4.4.0-17-generic).
Exemplo n.º 25
0
        "item": "帆布",
        "quantity": 100,
        "tags": ["棉布"],
        "size": {"height": 28, "weight": 35.5, "uom": "cm"}
    }
)
result_set1 = db.inventory.find({})
for item in result_set1:
    pprint(item)

# 带条件的查询
from bson.son import SON
db.inventory.insert_many([
    {"item": "journal",
     "qty": 25,
     "size": SON([("h", 14), ("w", 21), ("uom", "cm")]),
     "status": "A"},
    {"item": "notebook",
     "qty": 50,
     "size": SON([("h", 8.5), ("w", 11), ("uom", "in")]),
     "status": "A"},
    {"item": "paper",
     "qty": 100,
     "size": SON([("h", 8.5), ("w", 11), ("uom", "in")]),
     "status": "D"},
    {"item": "planner",
     "qty": 75,
     "size": SON([("h", 22.85), ("w", 30), ("uom", "cm")]),
     "status": "D"},
    {"item": "postcard",
     "qty": 45,
Exemplo n.º 26
0
    def command(self,
                dbname,
                spec,
                slave_ok=False,
                read_preference=ReadPreference.PRIMARY,
                codec_options=DEFAULT_CODEC_OPTIONS,
                check=True,
                allowable_errors=None,
                check_keys=False,
                read_concern=None,
                write_concern=None,
                parse_write_concern_error=False,
                collation=None,
                session=None,
                client=None,
                retryable_write=False,
                publish_events=True):
        """Execute a command or raise an error.

        :Parameters:
          - `dbname`: name of the database on which to run the command
          - `spec`: a command document as a dict, SON, or mapping object
          - `slave_ok`: whether to set the SlaveOkay wire protocol bit
          - `read_preference`: a read preference
          - `codec_options`: a CodecOptions instance
          - `check`: raise OperationFailure if there are errors
          - `allowable_errors`: errors to ignore if `check` is True
          - `check_keys`: if True, check `spec` for invalid keys
          - `read_concern`: The read concern for this command.
          - `write_concern`: The write concern for this command.
          - `parse_write_concern_error`: Whether to parse the
            ``writeConcernError`` field in the command response.
          - `collation`: The collation for this command.
          - `session`: optional ClientSession instance.
          - `client`: optional MongoClient for gossipping $clusterTime.
          - `retryable_write`: True if this command is a retryable write.
          - `publish_events`: Should we publish events for this command?
        """
        self.validate_session(client, session)
        if (read_concern and self.max_wire_version < 4
                and not read_concern.ok_for_legacy):
            raise ConfigurationError(
                'read concern level of %s is not valid '
                'with a max wire version of %d.' %
                (read_concern.level, self.max_wire_version))
        if not (write_concern is None or write_concern.acknowledged
                or collation is None):
            raise ConfigurationError(
                'Collation is unsupported for unacknowledged writes.')
        if self.max_wire_version >= 5 and write_concern:
            spec['writeConcern'] = write_concern.document
        elif self.max_wire_version < 5 and collation is not None:
            raise ConfigurationError(
                'Must be connected to MongoDB 3.4+ to use a collation.')

        if (client or session) and not isinstance(spec, ORDERED_TYPES):
            # Ensure command name remains in first place.
            spec = SON(spec)
        if session:
            spec['lsid'] = session._use_lsid()
            if retryable_write:
                spec['txnNumber'] = session._transaction_id()
        self.send_cluster_time(spec, session, client)
        listeners = self.listeners if publish_events else None
        try:
            return command(self.sock,
                           dbname,
                           spec,
                           slave_ok,
                           self.is_mongos,
                           read_preference,
                           codec_options,
                           session,
                           client,
                           check,
                           allowable_errors,
                           self.address,
                           check_keys,
                           listeners,
                           self.max_bson_size,
                           read_concern,
                           parse_write_concern_error=parse_write_concern_error,
                           collation=collation)
        except OperationFailure:
            raise
        # Catch socket.error, KeyboardInterrupt, etc. and close ourselves.
        except BaseException as error:
            self._raise_connection_failure(error)
Exemplo n.º 27
0
def _school(urn):
    if request.method == 'POST':
        mongo.db['school-address'].find_one_and_update({'school': urn}, {
            '$set': {
                'school': urn,
                'address': request.form['address'],
                'address-match': 'byhand'
            }
        },
                                                       upsert=True)
        return redirect("/school/" + urn, code=303)

    edubase = latest(mongo.db.edubase.find({'URN': urn}))
    if not edubase:
        return abort(404)
    key = uprn = postcode = ''
    address = street = {}
    addresses = parents = children = streets = []

    key = ''
    doc = mongo.db['school-address'].find_one({'school': urn})
    if doc:
        key = doc['address']

    if key != '':
        key = key.split(";")[0]
        uprn = decode(key)
        addresses = llist(mongo.db.address.find({'address': key}))
        address = latest(addresses)
        if address:
            street = latest(mongo.db.street.find({'street':
                                                  address['street']}))
            children = sorted_naturally(
                llist(mongo.db.address.find({'parent-address': key})))
            parents = address_parents(address)
            addresses = addresses + children + parents
            postcode = mongo.db['address-postcode'].find_one({'address':
                                                              key})['postcode']

    point = []
    if edubase['Easting']:
        lat, lon = pyproj.transform(osgb36, wgs84, edubase['Easting'],
                                    edubase['Northing'])
        point = [lon, lat]
        addresses = addresses + llist(
            mongo.db.address.find({
                'point':
                SON([('$nearSphere', [lat, lon]), ('$maxDistance', 0.00004)])
            }))
        streets = streets + llist(
            mongo.db.street.find({
                'point':
                SON([('$nearSphere', [lat, lon]), ('$maxDistance', 0.00004)])
            }))

    guesses = {}
    ignore = ['the']
    words = n7e(edubase['EstablishmentName'], ignore).split()
    words = words + [
        'school', 'academy', 'infant', 'junior', 'middle', 'college', 'jmi',
        'campus'
    ]
    for a in addresses:
        if set(words).intersection(set(n7e(a['name'], ignore).split())):
            guesses[a['address'] + ":" + a['name']] = a

    guesses = [guesses[k] for k in sorted(guesses)]

    return render_template("school.html",
                           edubase=edubase,
                           guesses=guesses,
                           point=point,
                           address=address,
                           addresses=addresses,
                           streets=streets,
                           street=street,
                           postcode=postcode,
                           uprn=uprn,
                           parents=parents,
                           children=children)
Exemplo n.º 28
0
    async def test_query_embedded_documents(self):
        db = self.db

        # Start Example 14
        # Subdocument key order matters in a few of these examples so we have
        # to use bson.son.SON instead of a Python dict.
        from bson.son import SON
        await db.inventory.insert_many([{
            "item":
            "journal",
            "qty":
            25,
            "size":
            SON([("h", 14), ("w", 21), ("uom", "cm")]),
            "status":
            "A"
        }, {
            "item":
            "notebook",
            "qty":
            50,
            "size":
            SON([("h", 8.5), ("w", 11), ("uom", "in")]),
            "status":
            "A"
        }, {
            "item":
            "paper",
            "qty":
            100,
            "size":
            SON([("h", 8.5), ("w", 11), ("uom", "in")]),
            "status":
            "D"
        }, {
            "item":
            "planner",
            "qty":
            75,
            "size":
            SON([("h", 22.85), ("w", 30), ("uom", "cm")]),
            "status":
            "D"
        }, {
            "item":
            "postcard",
            "qty":
            45,
            "size":
            SON([("h", 10), ("w", 15.25), ("uom", "cm")]),
            "status":
            "A"
        }])
        # End Example 14

        # Start Example 15
        cursor = db.inventory.find(
            {"size": SON([("h", 14), ("w", 21), ("uom", "cm")])})
        # End Example 15

        self.assertEqual(await count(cursor), 1)

        # Start Example 16
        cursor = db.inventory.find(
            {"size": SON([("w", 21), ("h", 14), ("uom", "cm")])})
        # End Example 16

        self.assertEqual(await count(cursor), 0)

        # Start Example 17
        cursor = db.inventory.find({"size.uom": "in"})
        # End Example 17

        self.assertEqual(await count(cursor), 2)

        # Start Example 18
        cursor = db.inventory.find({"size.h": {"$lt": 15}})
        # End Example 18

        self.assertEqual(await count(cursor), 4)

        # Start Example 19
        cursor = db.inventory.find({
            "size.h": {
                "$lt": 15
            },
            "size.uom": "in",
            "status": "D"
        })
        # End Example 19

        self.assertEqual(await count(cursor), 1)
Exemplo n.º 29
0
def graph():
    categories = request.get_json(force=True)['categories']
    print categories

    category = categories[0]['category']
    sub_category = categories[1]['category']
    sort_by = categories[0]['sort']
    if sort_by == 'alpha':
        sort_by = '_id.' + category
    sort_direction = int(categories[0]['sortDirection'])
    sort = (sort_by, sort_direction)
    filters = categories[0]['filter']

    first_group_stage = {
        '$group': {
            '_id': {
                category: '$' + category
            },
            'count': {
                '$sum': 1
            }
        }
    }
    second_group_stage = None
    if sub_category != '':
        first_group_stage['$group']['_id'][sub_category] = '$' + sub_category
        second_group_stage = {
            '$group': {
                '_id': {
                    category: '$_id.' + category,
                },
                'data': {
                    '$push': {
                        sub_category: '$_id.' + sub_category,
                        'count': '$count'
                    }
                },
                'count': {
                    '$sum': '$count'
                }
            }
        }
    sort_stage = {'$sort': SON([sort])}

    client = pymongo.MongoClient(os.environ['MONGO_URI'])
    db = client.va_circuit_court
    data = None
    if second_group_stage is None:
        data = db.criminal_cases.aggregate([first_group_stage,
                                            sort_stage])['result']
    else:
        data = db.criminal_cases.aggregate(
            [first_group_stage, second_group_stage, sort_stage])['result']

    sub_category_names = []
    if sub_category != '':
        for group in data:
            for sub_category_group in group['data']:
                sub_category_name = 'None'
                if sub_category in sub_category_group:
                    sub_category_name = sub_category_group[sub_category]
                if sub_category_name not in sub_category_names:
                    sub_category_names.append(sub_category_name)
                group[sub_category_name] = sub_category_group['count']
    print pprint(data)

    pprint(sub_category_names)
    values = [str(x['_id'][category]) for x in data]
    labels = [v for v in values if v not in filters][:20]

    bar_chart = pygal.Bar(height=450, style=LightStyle, x_label_rotation=70)
    bar_chart.title = 'VA Circuit Court Cases in 2014'
    bar_chart.x_labels = labels
    if sub_category == '':
        bar_chart.add(category, [
            x['count'] for x in data if str(x['_id'][category]) not in filters
        ][:20])
    else:
        for item in sub_category_names[:10]:
            item_counts = []
            for x in data:
                if str(x['_id'][category]) in filters: continue
                if item in x:
                    item_counts.append(x[item])
                else:
                    item_counts.append(0)
            bar_chart.add(item, item_counts[:20])

    return str(render_template('stats_filters.html',
        category=category,
        filter_values=sorted(values),
        filters_unchecked=filters)) + \
        bar_chart.render()
Exemplo n.º 30
0
    def __send_message(self, operation):
        """Send a query or getmore operation and handles the response.

        If operation is ``None`` this is an exhaust cursor, which reads
        the next result batch off the exhaust socket instead of
        sending getMore messages to the server.

        Can raise ConnectionFailure.
        """
        client = self.__collection.database.client
        listeners = client._event_listeners
        publish = listeners.enabled_for_commands
        from_command = False
        start = datetime.datetime.now()

        def duration(): return datetime.datetime.now() - start

        if operation:
            kwargs = {
                "read_preference": self.__read_preference,
                "exhaust": self.__exhaust,
            }
            if self.__address is not None:
                kwargs["address"] = self.__address

            try:
                response = client._send_message_with_response(operation,
                                                              **kwargs)
                self.__address = response.address
                if self.__exhaust:
                    # 'response' is an ExhaustResponse.
                    self.__exhaust_mgr = _SocketManager(response.socket_info,
                                                        response.pool)

                cmd_name = operation.name
                reply = response.data
                rqst_id = response.request_id
                from_command = response.from_command
            except AutoReconnect:
                # Don't try to send kill cursors on another socket
                # or to another server. It can cause a _pinValue
                # assertion on some server releases if we get here
                # due to a socket timeout.
                self.__killed = True
                raise
        else:
            # Exhaust cursor - no getMore message.
            rqst_id = 0
            cmd_name = 'getMore'
            if publish:
                # Fake a getMore command.
                cmd = SON([('getMore', self.__id),
                           ('collection', self.__collection.name)])
                if self.__batch_size:
                    cmd['batchSize'] = self.__batch_size
                if self.__max_time_ms:
                    cmd['maxTimeMS'] = self.__max_time_ms
                listeners.publish_command_start(
                    cmd, self.__collection.database.name, 0, self.__address)
            try:
                reply = self.__exhaust_mgr.sock.receive_message(None)
            except Exception as exc:
                if publish:
                    listeners.publish_command_failure(
                        duration(), _convert_exception(exc), cmd_name, rqst_id,
                        self.__address)
                if isinstance(exc, ConnectionFailure):
                    self.__die()
                raise

        try:
            docs = self._unpack_response(response=reply,
                                         cursor_id=self.__id,
                                         codec_options=self.__codec_options)
            if from_command:
                first = docs[0]
                client._receive_cluster_time(first, self.__session)
                helpers._check_command_response(first)
        except OperationFailure as exc:
            self.__killed = True

            # Make sure exhaust socket is returned immediately, if necessary.
            self.__die()

            if publish:
                listeners.publish_command_failure(
                    duration(), exc.details, cmd_name, rqst_id, self.__address)

            # If this is a tailable cursor the error is likely
            # due to capped collection roll over. Setting
            # self.__killed to True ensures Cursor.alive will be
            # False. No need to re-raise.
            if self.__query_flags & _QUERY_OPTIONS["tailable_cursor"]:
                return
            raise
        except NotMasterError as exc:
            # Don't send kill cursors to another server after a "not master"
            # error. It's completely pointless.
            self.__killed = True

            # Make sure exhaust socket is returned immediately, if necessary.
            self.__die()

            if publish:
                listeners.publish_command_failure(
                    duration(), exc.details, cmd_name, rqst_id, self.__address)

            client._reset_server_and_request_check(self.__address)
            raise
        except Exception as exc:
            if publish:
                listeners.publish_command_failure(
                    duration(), _convert_exception(exc), cmd_name, rqst_id,
                    self.__address)
            raise

        if publish:
            # Must publish in find / getMore / explain command response format.
            if from_command:
                res = docs[0]
            elif cmd_name == "explain":
                res = docs[0] if reply.number_returned else {}
            else:
                res = {"cursor": {"id": reply.cursor_id,
                                  "ns": self.__collection.full_name},
                       "ok": 1}
                if cmd_name == "find":
                    res["cursor"]["firstBatch"] = docs
                else:
                    res["cursor"]["nextBatch"] = docs
            listeners.publish_command_success(
                duration(), res, cmd_name, rqst_id, self.__address)

        if from_command and cmd_name != "explain":
            cursor = docs[0]['cursor']
            self.__id = cursor['id']
            if cmd_name == 'find':
                documents = cursor['firstBatch']
            else:
                documents = cursor['nextBatch']
            self.__data = deque(documents)
            self.__retrieved += len(documents)
        else:
            self.__id = reply.cursor_id
            self.__data = deque(docs)
            self.__retrieved += reply.number_returned

        if self.__id == 0:
            self.__killed = True


        if self.__limit and self.__id and self.__limit <= self.__retrieved:
            self.__die()

        # Don't wait for garbage collection to call __del__, return the
        # socket to the pool now.
        if self.__exhaust and self.__id == 0:
            self.__exhaust_mgr.close()