示例#1
0
class CountryDB(object):
    def __init__(self, mongo_host):
        self._db = Connection(mongo_host)['crunch']['country']

    def save(self, country):
        self._db.save({'_id':country})

    def increment(self, country):
        self._db.update({'_id':country}, {'$inc': {'c': 1}}, upsert=True)
示例#2
0
class LocalTouristClassifier():
    def __init__(self):
        self.tweets = Connection().tweetsDB.tweetsCollection
        self.tweetUsers = Connection().tweetsDB.tweetUsersCollection
        self.tweetUsers.ensure_index( [("loc", GEO2D )] )
        
        self.photos = Connection().flickrDB.flickrCollection
        self.linked = Connection().linkedDB.linkedCollection
        
        API_KEY = 'dj0yJmk9UUY5TWxNMXBRb0M3JmQ9WVdrOVV6RlVOWFEzTjJzbWNHbzlNVGMzTVRBNE5EazJNZy0tJnM9Y29uc3VtZXJzZWNyZXQmeD0zYQ--'
        SHARED_SECRET = '92a96753c369996f18b6a2ef4a6b1b9c85de04f5'
        self.y = yql.TwoLegged(API_KEY, SHARED_SECRET)
        self.yqlCache = {}
        
    def unescape_html_chars(self, item):
        return item.replace("&amp;", "&").replace("&gt;", ">").replace("&lt;", "<").replace("&quot;", "\"")
        

        
    def classifyTwitter(self):
        for tweet in self.tweets.find({"place":self.place}):
            if tweet['fromUserID'] is not None:
                tweetUser = self.tweetUsers.find_one({'id':tweet['fromUserID']})
                if tweetUser is not None:
                    tweetUserLocation = tweetUser['location']
                    if tweetUserLocation is not None and tweetUser['loc'] is None:
                        tweetUserLocation = tweetUserLocation.encode('utf-8')
                        #print "%s || %s" % (tweetUserLocation, self.place)
                        # we use the yqlCache local dictionary to use as few calls as possible
                        if self.yqlCache.get(tweetUserLocation) is not None and self.yqlCache[tweetUserLocation] != 0:
                            tweetUser['loc'] = self.yqlCache[tweetUserLocation]
                            print 'cacheSuccess: %20s %15s %s' % (tweetUserLocation, tweetUser['id'], tweetUser['loc'])
                        else:
                            # send request out to YQL
                            yqlQuery = 'select * from geo.placefinder where text="%s";' % tweetUserLocation
                            try:
                                yqlResult = self.y.execute(yqlQuery)
                                if yqlResult.rows == []:
                                    # yql couldn't figure out where this is, so don't save a loc
                                    self.yqlCache[tweetUserLocation] = 0
                                    print 'fail: %20s %s' % (tweetUserLocation, tweetUser['id'])
                                else:
                                    # yql found a lat and lon, so let's tag it
                                    loc = [float(yqlResult.rows[0].get('latitude')), float(yqlResult.rows[0].get('longitude'))]
                                    tweetUser['loc'] = loc
                                    self.yqlCache[tweetUserLocation] = loc
                                    print 'success: %20s %15s %s' % (tweetUserLocation, tweetUser['id'], loc)
                            except:
                                print "Exception Detected:", sys.exc_info()[0]
                                
                        # ready to save user
                        self.tweetUsers.save(tweetUser)
示例#3
0
def fetch_extended(mongo_host, start_idx=None):
    ext_col = Connection(mongo_host)['crunch']['extended']
    companies = [c['_id'] for c in get_companies_list(mongo_host, start_idx)]

    BATCH = 10

    for s in range(0, len(companies), BATCH):
        print companies[s]
        urls = [COMPANY_ENDPOINT % companies[i] for i in range(s, s + BATCH)]
        results = _get_batch(urls)
        for result in results:
            data = result
            data['_id'] = result['permalink']
            ext_col.save(data)
    print companies
示例#4
0
def fetch_companies(mongo_host, start_idx=1):
    col = Connection(mongo_host)['crunch']['company']

    TOTAL = 8200
    BATCH = 10

    for s in range(start_idx, TOTAL, BATCH):
        print s
        urls = [SEARCH_ENDPOINT % i for i in range(s, s + BATCH)]
        results_list = _get_batch(urls, 'results')
        for results in results_list:
            for result in results:
                data = result
                data['_id'] = result['permalink']
                col.save(data)
示例#5
0
文件: handler.py 项目: xbx/rng
class RngDbHandler(object):
    """ Database access handler """

    def __init__(self, collection, db='rng', host=os.getenv('MONGO_HOST', 'localohst'),
                 port=None):
        """ Init log handler and store the collection handle """

        self.collection = Connection(host, port)[db][collection]

    def insert(self, record):
        """ Store the record to the collection. Async insert """
        try:
            self.collection.save(record)
        except InvalidDocument, e:
            logging.error("Unable to save log record: %s", e.message)
示例#6
0
class StatsDB(object):
    def __init__(self, mongo_host):
        self._db = Connection(mongo_host)['crunch']['company_stats']

    def save(self, company, stats):
        self._db.save({'_id':company, 'data': stats})

    def find(self, query=None, *args, **kwargs):
        end_ts = datetime(1995, 1, 1)
        query = query or {}
        query['data.founded_at'] = {'$gte': end_ts}
        query['data.category_code'] = {'$ne': None}
        query['data.number_of_employees'] = {'$gt': 0}
        query['data.total_money_raised'] = {'$gt': 0}
        for i in self._db.find(query, *args, **kwargs):
            if get_dotted(i, 'data.founded_at') >= end_ts:
                yield i
        raise StopIteration
示例#7
0
class MongoEmitter(Emitter):
    
    def __init__(self):
        super(MongoEmitter, self).__init__()
        self._mongo = Connection()['k2']['locations']
    
    def emit_record(self, record):
        doc = self._mongo.find_one({'code': record['msa_code']})
        if doc:
            doc['ffiec'] = {
                'low': record['low'],
                'high': record['high'],
                'avg': record['avg'],
            }
            self._mongo.save(doc)
        else:
            print "[%s] %s not found" % (record['msa_code'], record['name'])
    
    def done(self):
        pass
	for i in tmp_items:
		if i['_id']<5000:
			items.append(i)
		else:
			runes.append(i)

	conn = Connection()['lol']['items']

	import codecs
	ct = codecs.open('loltw_constant.py', 'w', 'utf-8')
	ct.write(codecs.BOM_UTF8)

	f = open('items.urls', 'w')
	ct.write('ITEM_NAME_LOOKUP={')
	for i in items:
		conn.save(i)
		f.write('url="http://na.leagueoflegends.com/sites/default/files/game_data/%s/content/item/%d.gif"\n' % (NA_VER, i['_id']))
		f.write('out="../../static/img/items/%d.gif"\n' % (i['_id']))
		f.write('create-dirs\n\n')

		ct.write("'%s': '%s'," % (i['_id'], i['displayname'].get('zh_TW')))
	f.close()
	ct.write('}\n')
		
	conn = Connection()['lol']['runes']
	f = open('runes.urls', 'w')
	ct.write('RUNE_NAME_LOOKUP={')
	for r in runes:
		conn.save(r)
		f.write('url="http://na.leagueoflegends.com/sites/default/files/game_data/%s/content/rune/%d.gif"\n' % (NA_VER, r['_id']))
		f.write('out="../../static/img/runes/%d.gif"\n' % (r['_id']))
示例#9
0
class Collective(object):
    class Adaptive(object):
        # See the XCS paper for details: Butz and Wilson, 2001
        # http://citeseer.ist.psu.edu/old/700101.html
        N = population_size = 1000
        B = learning_rate = .1
        a = accuracy_slope = .1
        e0 = error_minimum = 1
        v = power_parameter = -5
        g = discount_factor = .8
        OGA = ga_threshold = 40
        X = crossover_prob = .75
        u = mutation_prob = .01
        Odel = experience_threshold = 20
        d = fitness_threshold = .01
        Osub = subsumption_threshold = 50
        P = mask_probability = .3
        p1 = initial_prediction = 1
        e1 = initial_error = .1
        F1 = initial_fitness = .01
        pexplr = exploration_probability = .25
        Omna = coverage_threshold = 4
        
        # Reduced learning rate for the environmental error
        Be = variance_learning_rate = .05
        
        # Subsumption is probably not useful here.
        doGASubsumption = False
        doActionSetSubsumption = False
    
    # Todo: Consider weak refs for this
    singletons = {}
    
    def __new__(cls, table):
        try:
            result = cls.singletons[table]
        except KeyError:
            result = object.__new__(cls)
            cls.singletons[table] = result
            result.init(table)
        return result
    
    def init(self, table):
        # Not __init__, because that gets run too often.
        self.values = self.Adaptive()
        self.rules = self.retrieve(table)
        self.timestamp = 0
    
    def retrieve(self, table):
        try:
            from pymongo import Connection
        except ImportError:
            rules = []
        else:
            self.table = Connection().parang[table]
            rules = [Classifier(row) for row in self.table.find()]
        return rules
    
    def save(self, rule):
        # Called whenever a classifier is created or changed.
        if self.table:
            if rule.n > 0:
                uid = self.table.save(rule.values())
                rule._id = uid
            elif rule._id:
                self.table.remove(rule._id)
    
    def generate(self, msg):
        self.timestamp += 1
        results = defaultdict(list)
        
        for rule in self.rules:
            output = rule.matches(msg)
            if output is not None:
                results[output].append(rule)
        
        while sum(r.n for s in results for r in results[s]) < self.values.Omna:
            rule = self.coverage(msg, results)
            output = rule.matches(msg)
            results[output].append(rule)
        self.delete()
        
        actions = dict((key, sum(r.p * r.F for r in results[key]) /
                sum(r.F for r in results[key]))
            for key in results
            if results[key])
        action = weighted_choice(actions)
        action_set = results[action]
        
        brigade = self.values.g * max(actions.values())
        return action, action_set, brigade
    
    def coverage(self, msg, actions):
        r"Creates a new classifier to fit the under-represented message."
        
        # Ignore only P% of the pattern bits.
        mask = 0
        for n in range(Classifier.bits):
            if random() > self.values.P:
                mask |= 1 << n
        
        while True:
            # Generate an action not in the match set.
            # This also guarantees that the new classifier is unique.
            action = randrange(1 << Classifier.bits)
            if action not in actions:
                break
        
        values = {
            "pattern": msg,
            "pattern_mask": mask,
            "output": action,
            "output_mask": 0,
            "prediction": self.values.p1,
            "error": self.values.e1,
            "fitness": self.values.F1,
            "experience": 0,
            "timestamp": self.timestamp,
            "setsize": len(actions),
            "numerosity": 1,
        }
        
        rule = Classifier(values)
        self.rules.append(rule)
        self.save(rule)
        return rule
    
    def update(self, action_set, bonus, msg):
        # Update the action set
        set_size = sum(rule.n for rule in action_set)
        if not set_size:
            # All classifiers have been deleted.
            # Continuing would cause division by zero errors.
            return
        
        # Factor out the error due to environmental changes
        ubar = min(bonus - rule.p for rule in action_set)
        
        accuracy = 0
        for rule in action_set:
            rule.exp += 1
            factor = max(1. / rule.exp, self.values.B)
            
            # Reordering these updates may help for more complex problems.
            rule.u += (ubar - rule.u) * self.values.Be
            rule.p += (bonus - rule.p) * factor
            err = abs(bonus - rule.p) - rule.u
            if err < 0: err = self.values.e0
            rule.e += (err - rule.e) * factor
            
            rule.s += (set_size - rule.s) * factor
            
            if rule.e < self.values.e0:
                rule.k = 1
            else:
                rule.k = self.values.a * (rule.e / self.values.e0) ** self.values.v
            accuracy += rule.k * rule.n
        
        # Update the fitness separately, using the total accuracy.
        for rule in action_set:
            rule.F += (rule.k * rule.n / accuracy - rule.F) * self.values.B
        
        for rule in action_set:
            self.save(rule)
        
        # Run the genetic algorithm every so often
        avetime = sum(r.ts * r.n for r in action_set) / set_size
        if self.timestamp - avetime > self.values.OGA:
            self.genetic(action_set, msg)
    
    def genetic(self, action_set, msg):
        # Set timestamps for future use
        for rule in action_set:
            rule.ts = self.timestamp
        
        # Choose two, based on their fitness values
        fitness = dict((rule, rule.F) for rule in action_set)
        first = weighted_choice(fitness).copy()
        second = weighted_choice(fitness).copy()
        
        if random() < self.values.X:
            self.crossover(first, second)
        
        self.mutate(first, msg)
        self.mutate(second, msg)
        self.insert(first)
        self.insert(second)
        self.delete()
    
    def crossover(self, first, second):
        x = randrange(Classifier.bits)
        y = randrange(Classifier.bits)
        if x > y:
            x, y = y, x
        
        mask = 0
        for n in range(x, y + 1):
            mask |= 1 << n
        
        fp, fpm, fo, fom = first.unpack()
        sp, spm, so, som = second.unpack()
        
        # Swap the pattern, using the bitwise trick
        fp ^= sp & mask
        sp ^= fp & mask
        fp ^= sp & mask
        
        # Swap the pattern mask
        fpm ^= spm & mask
        spm ^= fpm & mask
        fpm ^= spm & mask
        
        first.pack(fp, fpm, fo, fom)
        second.pack(sp, spm, so, som)
        
        # Average out the performance measurements
        first.p = second.p = (first.p + second.p) / 2
        first.e = second.e = (first.e + second.e) / 2
        first.F = second.F = (first.F + second.F) / 2
    
    def mutate(self, rule, msg):
        prob = self.values.u
        pattern, pattern_mask, output, output_mask = rule.unpack()
        
        for n in range(Classifier.bits):
            bit = 1 << n
            if random() < prob:
                # Mutate only within the matching niche
                pattern_mask ^= bit
                if msg & bit:
                    pattern |= bit
                else:
                    pattern &= ~bit
            if random() < prob:
                output ^= bit
            if random() < prob:
                output_mask ^= bit
        
        # Save the new values
        rule.pack(pattern, pattern_mask, output, output_mask)
        
        # Temporarily decrease fitness
        rule.F *= 0.1
    
    def insert(self, rule):
        for r in self.rules:
            if r.chromosome == rule.chromosome:
                r.n += rule.n
                self.save(r)
                break
        else:
            self.rules.append(rule)
            self.save(rule)
    
    def delete(self):
        total = sum(rule.n for rule in self.rules)
        excess = total - self.values.N
        if excess < 1:
            return []
        
        fitness = sum(rule.F for rule in self.rules) / total
        scores = dict((rule, self.unfitness(rule, fitness))
            for rule in self.rules)
        
        deleted = []
        while excess > 0:
            rule = weighted_choice(scores)
            rule.n -= 1
            if rule.n <= 0:
                self.rules.remove(rule)
                del scores[rule]
                deleted.append(rule)
            self.save(rule)
            excess -= 1
        return deleted
    
    def unfitness(self, rule, average):
        result = rule.n * rule.s
        if rule.exp > self.values.Odel and rule.F < average * self.values.d * rule.n:
            result *= average * rule.n / rule.F
        return result
示例#10
0
            # Grab file information
            # try:
            #    meta = extractMetadata(createParser(media['path']))
            # except KeyboardInterrupt:
            #    raise
            #
            # except:
            #    meta = None
            #    logging.debug("Failed grabbing meta.", exc_info=3)

            poster_name = None
            if pt_url:  # Download poster
                poster_name = "%s%s%s-%s.%s" % (dirname, os.sep, fn, "poster", "jpg")
                urlretrieve(pt_url, poster_name)
                movie["poster"] = poster_name

            backdrop_name = None
            if bd_url:  # Download backdrop
                backdrop_name = "%s%s%s-%s.%s" % (dirname, os.sep, fn, "backdrop", "jpg")
                urlretrieve(bd_url, backdrop_name)
                movie["backdrop"] = backdrop_name

            movie["rating"] = ctx["rating"]  # Update document
            movie["certification"] = ctx["certification"]
            movie["overview"] = ctx["overview"]
            movie["tagline"] = info["tagline"]
            movie["runtime"] = info["runtime"]
            movie["genres"] = [item["name"] for item in info["genres"]]

            coll.save(movie)  # Store updates
示例#11
0
文件: scaler.py 项目: safl/tecalibre
    if isinstance(size, int):
        w,h=img.size
        if w>h:
            ratio       = w/float(size)
            new_size    = ( size, int(h/ratio) )
        elif w<h:
            ratio       = h/float(size)            
            new_size    = ( int(w/ratio), size )
        else:
            new_size = (size, size)            
    
    return img.resize(new_size, Image.ANTIALIAS)

for media in coll.find():
    
    fn = media['path']
    if os.path.exists(fn):
        orig = Image.open(fn)
                
        for scale, size in image_scales:
            
            scaled_fn       = "%s.%s" % (fn,scale)
            if not scale in media:
                if not os.path.exists(scaled_fn):
                    scaled_image    = resize_lsma(orig, size)
                    scaled_image.save(scaled_fn, 'jpeg')
                
                media[scale] = scaled_fn
        
        coll.save(media)
示例#12
0
#!/usr/bin/python

from pymongo import Connection
from time import time
from django.core.mail import send_mail
from settings import *

import os; os.environ['DJANGO_SETTINGS_MODULE'] = 'settings'

compromises = Connection(host="127.0.0.1", port=27017)['compDB']['compromiseCollection']
for compromise in compromises.find({'type':'event'}):
	compromise_id = compromise["_id"]
	timeEvent = compromise["timestamp"]
	deltaTime = time() * 1000 - (timeEvent + 300000)
	if deltaTime > 0:
		for user in compromises.find({'idEvent':str(compromise_id)}):
			send_mail(EMAIL_SUBJECT_RESULT, (EMAIL_TEXT_RESULT % compromise_id), EMAIL_HOST_USER, [user["mail"]])
		compromise["type"] = "protuxlo"
		compromises.save(compromise)
示例#13
0
#!/usr/bin/env python
#! coding: utf-8

"""
初始化数据库,添加五千万条数据。每条数据为一个list
"""
from pymongo import Connection

coll = Connection("127.0.0.1", 27017)["test"]["test"]
data = [2934234]*200
for i in xrange(50000000):
	dic = {
		"data": data,
		"uid": i,
	}
	coll.save(dic)