def CountSalaryPoundSterling(request, json): client = pymongo.MongoClient(URI_CONNECTION, serverSelectionTimeoutMS=MONGODB_TIMEOUT) db = client["Grupo09"] salary = db["salary"] mapper = Code(""" function () { if (this.salary.datatype == "poundSterling") { emit(this.label.value.replace(/'/i, ""), this.salary.value); } } """) reducer = Code(""" function (key, values) { var max = values[0]; values.forEach(function(val){ if (val > max) max = val; }); return max; } """) salary.map_reduce(mapper, reducer, "result_salary_poundsterling") result = db.result_salary_poundsterling.find().sort( "value", pymongo.DESCENDING).limit(20) client.close() return HttpResponse(result)
def get(self): map_func = Code("function() { " "if(this.city.trim() != '')" "emit(this.city.toUpperCase().trim(), 1) }") reduce_func = Code("function(key, values) { return values.length }") result = mongo.db.jobs.map_reduce(map_func, reduce_func, "myresults") return result.find()
def target_user_stats(): """ Generate targets from email To/CC fields, then generate divisions from targets list. No cleanup or logic is being done on the To/CC fields. If they are not valid email addresses (user@domain), they do not get added as a target. """ mapcode = """ function () { try { this.to.forEach(function(z) { emit(z.toLowerCase(), {count: 1}); }); } catch(err) {} } """ reducecode = """ function(k,v) { var count = 0; v.forEach(function(v) { count += v["count"]; }); return {count: count}; } """ m = Code(mapcode) r = Code(reducecode) results = Email.objects(to__exists=True).map_reduce(m, r, 'inline') for result in results: try: targs = Target.objects(email_address__iexact=result.key) if not targs: targs = [Target()] targs[0].email_address = result.key.strip().lower() for targ in targs: targ.email_count = result.value['count'] targ.save() except: pass mapcode = """ function() { if ("division" in this) { emit(this.division, {count: this.email_count}) } } """ m = Code(mapcode) try: results = Target.objects().map_reduce(m, r, 'inline') for result in results: div = Division.objects(division__iexact=result.key).first() if not div: div = Division() div.division = result.key div.email_count = result.value['count'] div.save() except: raise
def field_mapper_reducer(): mapper = Code(""" function() { function isInt(n) { return n % 1 === 0; } for (var key in this) { let type = typeof this[key] if(type === 'number') { if(isInt(this[key])) { type = 'integer' } else { type = 'float' } } emit(key, [type, this[key]]); } //for (var key in this) { emit(key, null); } } """) reducer = Code(""" function(key, stuff) { let obj = {} stuff.filter(e => e[0] !== undefined).forEach(e => obj[e[0]] = e[1]) return obj; } """) return mapper, reducer
def generate_yara_hits(): """ Generate yara hits mapreduce. """ samples = mongo_connector(settings.COL_SAMPLES) map_code = """ function() { this.analysis.forEach(function(z) { if ("results" in z && z.service_name == "yara") { z.results.forEach(function(x) { emit({engine: z.service_name, version: x.version, result: x.result} ,{count: 1}); }) } }) } """ m = Code(map_code, {}) r = Code('function(k,v) { var count=0; v.forEach(function(v) { count += v["count"]; }); return {count: count}; }', {}) try: yarahits = samples.inline_map_reduce(m, r, query={'analysis.service_name': 'yara'}) except: return yarahits_col = mongo_connector(settings.COL_YARAHITS) yarahits_col.drop() sv = YaraHit._meta['latest_schema_version'] for hit in yarahits: yarahits_col.update({'engine': hit["_id"]["engine"], "version": hit["_id"]["version"], "result": hit["_id"]["result"]}, {"$set": {"sample_count": hit["value"]["count"], "schema_version": sv}}, True, False)
def map_reduce_ui(self): """ map_reduce_ui map和reduce都是一个javascript的函数; map_reduce 方法会将统计结果保存到一个临时的数据集合中。 :return: """ mapper = Code(""" function () { emit(this.x, 1); } """) reducer = Code(""" function (key, values) { var total = 0; for (var i = 0; i < values.length; i++) { total += values[i]; } return total; } """) result = self.db.point.map_reduce(mapper, reducer, "num").find() result = [val for val in result] self.render("modal_tpl.html", info=result, title="X轴Map/Reduce练习")
def get_keys(db, collection): client = MongoClient() db = client[db] map = Code("function() { for (var key in this) { emit(key, null); } }") reduce = Code("function(key, stuff) { return null; }") result = db[collection].map_reduce(map, reduce, "myresults") return result.distinct('_id')
def generate_exploits(): """ Generate exploits mapreduce. """ samples = mongo_connector(settings.COL_SAMPLES) m = Code( 'function() { this.exploit.forEach(function(z) {emit({cve: z.cve} ,{count: 1});}) }', {}) r = Code( 'function(k,v) { var count = 0; v.forEach(function(v) { count += v["count"]; }); return {count: count}; }', {}) try: exploits = samples.inline_map_reduce( m, r, query={"exploit.cve": { "$exists": 1 }}) except: return exploit_details = mongo_connector(settings.COL_EXPLOIT_DETAILS) for exploit in exploits: exploit_details.update( {"name": exploit["_id"]["cve"]}, {"$set": { "sample_count": exploit["value"]["count"] }})
def generate_backdoors(): """ Generate backdoors mapreduce. """ samples = mongo_connector(settings.COL_SAMPLES) m = Code('function() emit({name: this.backdoor.name} ,{count: 1});}) }', {}) r = Code( 'function(k,v) { var count = 0; v.forEach(function(v) { count += v["count"]; }); return {count: count}; }', {}) try: backdoors = samples.inline_map_reduce( m, r, query={"backdoor.name": { "$ne": "None" }}) except: return backdoor_details = mongo_connector(settings.COL_BACKDOOR_DETAILS) for backdoor in backdoors: backdoor_details.update( {"name": backdoor["_id"]["name"]}, {"$set": { "sample_count": backdoor["value"]["count"] }})
def TablaHashtags(request, json): lstData = simplejson.loads(json) objAuthor = lstData["Author"] client = pymongo.MongoClient(URI_CONNECTION, serverSelectionTimeoutMS=MONGODB_TIMEOUT) db = client["Grupo09"] hashtags = db["hashtags"] mapper = Code(""" function () { emit(this.hashtag, 1); } """) if objAuthor != "0": mapper = Code(""" function () { if (this.author_id == % s) { emit(this.hashtag, 1); } } """ % objAuthor) reducer = Code(""" function (key, values) { return Array.sum(values); } """) hashtags.map_reduce(mapper, reducer, "result_hashtags") result = db.result_hashtags.find().sort("value", -1) client.close() return render(request, "views/tabla_hashtags.html", context={"lstResult": result})
def TablaScaleBySubjectivity(request, json): lstData = simplejson.loads(json) objAuthor = lstData["Author"] client = pymongo.MongoClient(URI_CONNECTION, serverSelectionTimeoutMS=MONGODB_TIMEOUT) db = client["Grupo09"] tweets = db["tweets"] mapper = Code(""" function () { var label = this.scale + '-' + this.subjectivity; emit(label, 1); } """) if objAuthor != "0": mapper = Code(""" function () { if (this.author_id == % s) { var label = this.scale + '-' + this.subjectivity; emit(label, 1); } } """ % objAuthor) reducer = Code(""" function (key, values) { return Array.sum(values); } """) tweets.map_reduce(mapper, reducer, "result_scale_subjectivity") result = db.result_scale_subjectivity.find().sort("value", -1) client.close() return render(request, "views/tabla_scale_subjectivity.html", context={"lstResult": result})
def CountEmployees(request, json): client = pymongo.MongoClient(URI_CONNECTION, serverSelectionTimeoutMS=MONGODB_TIMEOUT) db = client["Grupo09"] employees = db["employees"] mapper = Code(""" function () { emit(this.label.value.replace(/'/i, ""), this.numberOfEmployees.value); } """) reducer = Code(""" function (key, values) { var max = values[0]; values.forEach(function(val){ if (val > max) max = val; }); return max; } """) employees.map_reduce(mapper, reducer, "result_employees") result = db.result_employees.find().sort("value", pymongo.DESCENDING).limit(20) client.close() return HttpResponse(result)
def updatekeyfield(collection): map = Code(""" function(){ for (var key in this) { emit(key, null); } } """) reduce = Code(""" function (key, values) { return key; } """) collection.map_reduce(map, reduce, out="tempkey") connection = MongoClient(MongodbAddrRemote) keycollection = connection.mydb.keyfield # updatekeyfield_Operator(keycollection) results = connection.mydb.tempkey.distinct("value") for result in results: if result != "Samples" and result != "_id": existdata = keycollection.find_one({"value": result}) if not existdata: keycollection.insert({"value": result, "category": "field"}) #清除临时数据表数据 connection.mydb.tempkey.remove() return
def get_keys(): client = create_client(request) database = client['3deposit'] map = Code("function() { for (var key in this.deposit_metadata) { emit(key, null); } }") reduce = Code("function(key, stuff) { return null; }") result = database[COLLECTION_NAME].map_reduce(map, reduce, "metadata_keys") return jsonify({"keys": result.distinct('_id')})
def generateClassWiseUnigrams(self): print("generating class wise unigrams") classes = ["Food", "Service", "Ambiance", "Deals", "Price"] for each_class in classes: mapFunction = Code(""" function(){ if(this.%s==1){ review_text = this.review.toLowerCase(); tokens = review_text.split(" ") tokens.forEach(function(word){ emit(word,1) }); } } """ % each_class) reduce = Code(""" function(word,count){ return Array.sum(count) } """) print("generating unigrams for %s" % each_class) self.db.Review_no_punctuations.map_reduce( mapFunction, reduce, "%s_Unigrams" % each_class) self.cleanUnigramsStopwords( "%s_Unigrams" % each_class, "%s_Unigrams_no_stopwords" % each_class, "%s_Unigrams_with_stopwords" % each_class) print("generating unigrams done")
def CreatIndex(collection): map = Code(""" function(){ for (var key in this) { emit(key, null); } } """) reduce = Code(""" function (key, values) { return key; } """) keys = collection.map_reduce(map, reduce, out={'inline': 1}, full_response=True) for key in keys['results']: if key['value'].upper() in MongoIndexField: try: collection.create_index([(key['value'], 1)], background=True) except: continue #建立INFO字段的全文索引,暂时不用,内存消耗严重 # collection.create_index({"INFO":'text'}) #ontology字段创建索引 collection.create_index([('INFO.HPO', "text"), ('INFO.DO', "text"), ('INFO.SO', "text"), ('INFO.MC', "text"), ('INFO.GO', "text")], background=True)
def analysis_title(collection): mapper = Code(""" function () { var text = this.title if (text) { words = text.toLowerCase().split(" ") for(var i = words.length - 1; i >= 0; i--) { word = words[i].replace(/[&\/\\#,+()$~%.'":*?<>{}]/g, '') if (word != '-' && word != '') { emit(word, 1); } } } }; """) reducer = Code(""" function (key, values) { var result = 0; for (var i = 0; i < values.length; i++) { result += values[i]; } return result; } """) return collection.map_reduce(mapper, reducer, "title_result")
def test_bson_classes(self): _id = '5a918f9fa08bff9c7688d3e1' for a, b in [ (Binary(b'foo'), mockup_bson.Binary(b'foo')), (Code('foo'), mockup_bson.Code('foo')), (Code('foo', {'x': 1}), mockup_bson.Code('foo', {'x': 1})), (DBRef('coll', 1), mockup_bson.DBRef('coll', 1)), (DBRef('coll', 1, 'db'), mockup_bson.DBRef('coll', 1, 'db')), (Decimal128('1'), mockup_bson.Decimal128('1')), (MaxKey(), mockup_bson.MaxKey()), (MinKey(), mockup_bson.MinKey()), (ObjectId(_id), mockup_bson.ObjectId(_id)), (Regex('foo', 'i'), mockup_bson.Regex('foo', 'i')), (Timestamp(1, 2), mockup_bson.Timestamp(1, 2)), ]: # Basic case. self.assertTrue( Matcher(Command(y=b)).matches(Command(y=b)), "MockupDB %r doesn't equal itself" % (b, )) # First Command argument is special, try comparing the second also. self.assertTrue( Matcher(Command('x', y=b)).matches(Command('x', y=b)), "MockupDB %r doesn't equal itself" % (b, )) # In practice, users pass PyMongo classes in message specs. self.assertTrue( Matcher(Command(y=b)).matches(Command(y=a)), "PyMongo %r != MockupDB %r" % (a, b)) self.assertTrue( Matcher(Command('x', y=b)).matches(Command('x', y=a)), "PyMongo %r != MockupDB %r" % (a, b))
def CountLocation(request, json): client = pymongo.MongoClient(URI_CONNECTION, serverSelectionTimeoutMS=MONGODB_TIMEOUT) db = client["Grupo09"] users = db["users"] mapper = Code(""" function () { emit(this.location, 1); } """) reducer = Code(""" function (key, values) { return Array.sum(values); } """) users.map_reduce(mapper, reducer, "result_location") result = db.result_location.find({ "_id": { "$ne": None } }).sort("value", pymongo.DESCENDING).limit(10) client.close() return HttpResponse(result)
def on_command(self, message): d = datetime.now() - timedelta(days=7) agg = {} for x in range(0, 8): r = yield from self.map_reduce( Code( 'function map(){for(var k in this.played){emit(k,this.played[k])}}' ), Code('function reduce(names,totals){return Array.sum(totals);}' ), d.strftime("%Y%m%d")) d += timedelta(days=1) for result in r: if result['_id'] not in agg: agg[result['_id']] = result['value'] else: agg[result['_id']] += result['value'] ordered = [] for k, v in agg.items(): ordered.append({'g': k, 'v': v}) ordered = list(reversed(sorted(ordered, key=lambda k: k['v']))) txt = "**Most played games in the last 7 days**\n\n" num = 5 if len(ordered) < num: num = len(ordered) for x in range(0, num): row = ordered[x] hrs = round(row['v'] / 120) txt += "" + str(x + 1) + ". " + row['g'] + " (" + str(hrs) + " hrs)\n" yield from self.client.send_message(message.channel, txt)
def CountScale(request, json): lstData = simplejson.loads(json) objAuthor = lstData["Author"] client = pymongo.MongoClient(URI_CONNECTION, serverSelectionTimeoutMS=MONGODB_TIMEOUT) db = client["Grupo09"] tweets = db["tweets"] mapper = Code(""" function () { emit(this.scale, 1); } """) if objAuthor != "0": mapper = Code(""" function () { if (this.author_id == % s) emit(this.scale, 1); } """ % objAuthor) reducer = Code(""" function (key, values) { return Array.sum(values); } """) tweets.map_reduce(mapper, reducer, "result_scale") result = db.result_scale.find() client.close() return HttpResponse(result)
def generate_sources(): """ Generate sources mapreduce. """ samples = mongo_connector(settings.COL_SAMPLES) m = Code( 'function() { this.source.forEach(function(z) {emit({name: z.name}, {count: 1});}) }', {}) r = Code( 'function(k,v) { var count=0; v.forEach(function(v) { count += v["count"]; }); return {count: count}; }', {}) try: sources = samples.inline_map_reduce( m, r, query={"source.name": { "$exists": 1 }}) except: return source_access = mongo_connector(settings.COL_SOURCE_ACCESS) for source in sources: source_access.update( {"name": source["_id"]["name"]}, {"$set": { "sample_count": source["value"]["count"] }})
def generate_campaign_stats(source_name=None): """ Generate campaign stats. :param source_name: Limit to a specific source. :type source_name: None, str """ # build the query used in the mapreduces stat_query = {} stat_query["campaign.name"] = {"$exists": "true"} if source_name: stat_query["source.name"] = source_name campaigns = mongo_connector(settings.COL_CAMPAIGNS) domains = mongo_connector(settings.COL_DOMAINS) emails = mongo_connector(settings.COL_EMAIL) events = mongo_connector(settings.COL_EVENTS) indicators = mongo_connector(settings.COL_INDICATORS) ips = mongo_connector(settings.COL_IPS) pcaps = mongo_connector(settings.COL_PCAPS) samples = mongo_connector(settings.COL_SAMPLES) # generate an initial campaign listing so we can make sure all campaigns get updated campaign_listing = campaigns.find({}, {'name': 1}) # initialize each campaign to zeroed out stats campaign_stats = {} for campaign in campaign_listing: campaign_stats[campaign["name"]] = zero_campaign() mapcode = """ function() { if ("campaign" in this) { campaign_list = this.campaign; } if (campaign_list.length > 0) { campaign_list.forEach(function(c) { emit(c.name, {count: 1}); }); } } """ m = Code(mapcode, {}) r = Code( 'function(k,v) { var count = 0; v.forEach(function(v) { count += v["count"]; }); return {count: count}; }', {}) campaign_stats = update_results(domains, m, r, stat_query, "domain_count", campaign_stats) campaign_stats = update_results(emails, m, r, stat_query, "email_count", campaign_stats) campaign_stats = update_results(events, m, r, stat_query, "event_count", campaign_stats) campaign_stats = update_results(indicators, m, r, stat_query, "indicator_count", campaign_stats) campaign_stats = update_results(ips, m, r, stat_query, "ip_count", campaign_stats) campaign_stats = update_results(pcaps, m, r, stat_query, "pcap_count", campaign_stats) campaign_stats = update_results(samples, m, r, stat_query, "sample_count", campaign_stats) # update all of the campaigns here for campaign in campaign_stats.keys(): campaigns.update({"name": campaign}, {"$set": campaign_stats[campaign]}, upsert=True)
def get_columns(self, db_param): client = MongoClient() db = client[db_param["db"]] map = Code("function() { for (var key in this) { emit(key, null); } }") reduce = Code("function(key, stuff) { return null; }") result = db[db_param["collection"]].map_reduce(map, reduce, "myresults") return result.distinct('_id')
def GetKeys(p): mr = Code("function() {for (var key in this) { emit(key, null);}}") r = Code("function(key, stuff) { return null;}") result = p.map_reduce(mr, r, "keyresults") keys = result.distinct("_id") return keys
def findAllKeysInCollection(self, collectionName): map = Code("function() { for (var key in this) { emit(key, null); } }") reduce = Code("function(key, stuff) { return null; }") tempResultCollection = f"{collectionName}_keys_temp" result = self.db[collectionName].map_reduce(map, reduce, tempResultCollection) keys = [v for v in result.distinct('_id')] self.db.drop_collection(tempResultCollection) return keys
def map_reduce(self, mapper, reducer, out, full_response=False, **kwargs): if isinstance(mapper, basestring): mapper = Code(mapper) if isinstance(reducer, basestring): reducer = Code(reducer) return self.collection.map_reduce(mapper, reducer, out, full_response=full_response, **kwargs)
def get_list_of_field_names(mongo_db_connection, collection_name): map = Code("function() { for (var key in this) { emit(key, null); }}") reduce = Code("function(key, stuff) { return null; }") # mr = mongo_db_connection.command({"mapreduce": collection_name, # "map": map, "reduce": reduce, "out": collection_name + "_keys"}) # return db[mr.result].distinct("_id") mr = mongo_db_connection[collection_name].map_reduce(map, reduce, out="_keys") return mr.find().distinct("_id")
def get_stats_by_matricule(): mon_map = Code( "function(){" "emit({'matricule':this.matricule, 'type':this.typeQuestion}, 1)" "}") mon_reduce = Code("function(key, values){" "return Array.sum(values)" "}") result = midterm_questions_collection_pointer.map_reduce( mon_map, mon_reduce, "stats") cursor = result.find() return ({value['_id']['matricule']: value['value'] for value in cursor})
def get_fields(self): """ Get provider field information (names, types) :returns: dict of fields """ map = Code("function() { for (var key in this.properties) " "{ emit(key, null); } }") reduce = Code("function(key, stuff) { return null; }") result = self.featuredb[self.collection].map_reduce( map, reduce, "myresults") return result.distinct('_id')