def entry_generator(self): col_zakon = utils.get_collection( const.CONF_MONGO_ZAKON, self.conf, const.CONF_MONGO_PARSED, self.db ) col_tlac = utils.get_collection( const.CONF_MONGO_HLASOVANIETLAC, self.conf, const.CONF_MONGO_PARSED, self.db ) for entry in col_zakon.iterate_all(): zakon = col_tlac.get({const.MONGO_ID: entry[const.MONGO_ID]}) hlasovania = zakon.get(const.HLASOVANIETLAC_LIST, {}) zmeny = entry.get(const.ZAKON_ZMENY, {}) ids = sorted(zmeny.keys()) names = [zmeny[i][const.ZAKON_ZMENY_PREDKLADATEL].split(",")[0] for i in ids] hlas_text = pd.Series({ key: value[const.HLASOVANIE_NAZOV].split("Hlasovanie")[-1] for key, value in hlasovania.items() if "druhé čítanie" in value[const.HLASOVANIE_NAZOV] }) if len(hlas_text) == 0: continue counts = [0] * len(ids) for j, name in enumerate(names): if names.count(name) > 1: counts[j] = names[:j+1].count(name) for j, i in enumerate(ids): hlas_name = hlas_text[hlas_text.str.contains(names[j][:-1])] if counts[j] > 0: hlas_name = hlas_name[hlas_name.str.contains("{}. návrh".format(counts[j]))] for id_hlas, text in hlas_name.items(): if not "dopracovanie" in text and not "preložiť" in text: yield { const.NEO4J_BEGINNING_ID: int(id_hlas), const.NEO4J_ENDING_ID: int(i) }
def create_optimizer(self, optimizer=None): model = get_collection('model') inputs = get_collection('inputs') alpha, l1_ratio = self.alpha, self.l1_ratio (x, y, class_weights, learning_rate, theta, logits, probabilities, predictions) = (inputs['x'], inputs['y'], inputs['class_weights'], inputs['learning_rate'], model['theta'], model['logits'], model['probabilities'], model['predictions']) with tf.name_scope('metrics'): xe = cross_entropy(self.n_classes, logits=logits, labels=y) loss = tf.reduce_mean(xe, name='loss') weights = tf.reduce_sum(class_weights * y, axis=1) weighted_loss = tf.reduce_mean(xe * weights, name='weighted_loss') penalty = elastic_net(theta, l1_ratio=l1_ratio) penalized_loss = tf.add(weighted_loss, alpha * penalty, name='penalized_loss') targets = tf.argmax(y, axis=1, name='targets') match = tf.cast(tf.equal(predictions, targets), tf.float32) accuracy = tf.reduce_mean(match, name='accuracy') add_to_collection('metrics', loss, penalized_loss, accuracy) with tf.name_scope('training'): opt = tf.train.GradientDescentOptimizer(learning_rate) training_op = opt.minimize(penalized_loss) add_to_collection('training', training_op)
def create_optimizer(self, optimizer=None): model = get_collection('model') inputs = get_collection('inputs') x, y, logits, probabilities, predictions = ( inputs['x'], inputs['y'], model['logits'], model['probabilities'], model['predictions'] ) with tf.name_scope('metrics'): xe = cross_entropy(self.n_classes, logits=logits, labels=y) loss = tf.reduce_mean(xe, name='loss') targets = tf.argmax(y, axis=1, name='targets') match = tf.cast(tf.equal(predictions, targets), tf.float32) accuracy = tf.reduce_mean(match, name='accuracy') add_to_collection('metrics', loss, accuracy) with tf.name_scope('training'): if optimizer is None: optimizer = tf.train.GradientDescentOptimizer opt = optimizer(inputs['learning_rate']) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): training_op = opt.minimize(loss) add_to_collection('training', training_op)
def __init__(self, db, conf, source_collection=None, target_collection=None): self.db = db self.conf = conf if source_collection is None: source_collection = utils.get_collection( self, self.conf, const.CONF_MONGO_RAW, self.db) if target_collection is None: target_collection = utils.get_collection( self, self.conf, const.CONF_MONGO_PARSED, self.db) self.source_collection = source_collection self.target_collection = target_collection self.log = logging.getLogger(str(self.__class__).split("'")[1]) self.unique_ids = [const.MONGO_ID]
def score(self, X, y): """Scores model quality on testing dataset, returning dictionary with model's metrics. """ self._check_if_session_exists() graph = self.graph with graph.as_default(): inputs = get_collection('inputs') metrics = get_collection('metrics') feed = self.generate_feed(tensors=inputs, x=X, y=y, training=False) scores = self._session.run(metrics, feed) return scores
def predict_proba(self, X): """Predicts classes probabilities for dataset.""" self._check_if_session_exists() graph = self.graph with graph.as_default(): inputs = get_collection('inputs') model = get_collection('model') probabilities = model['probabilities'] feed = self.generate_feed(tensors=inputs, x=X, training=False) probs = self._session.run(probabilities, feed) return probs
def worker(): source_urls = get_source_urls() papers = utils.get_newspapers(source_urls) processed_articles = [] for paper in papers: processed_articles += utils.process_articles_for_paper(paper) curator = ArticleCurator(processed_articles) curator.curate() # Make sure we actually have new articles first if curator.curated_articles: # get and empty collection before filling with new articles articles_collection = utils.get_collection('newsarticles') articles_collection.remove() # put new curated articles in the collection for article in curator.curated_articles: articles_collection.insert({ 'title': article.title, 'summary': article.summary, 'image_url': article.top_image, 'url': article.url, 'keywords': article.keywords })
def entry_generator(self): source_collection = utils.get_collection( const.CONF_MONGO_HLASOVANIE, self.conf, const.CONF_MONGO_PARSED, self.db) for entry in source_collection.iterate_all(): del entry[const.MONGO_TIMESTAMP] del entry[const.HLASOVANIE_INDIVIDUALNE] yield entry
def get_raw_gt_data(): gt_raw = get_collection('lwevents', query_filter={"name": "gatherTownUsersCheck"}, db=get_mongo_db_object()) gt_raw['time'] = gt_raw['properties'].str['time'] gt_raw['gatherTownUsers'] = gt_raw['properties'].str['gatherTownUsers'] return gt_raw
def test_std_doc(): doc = StdDoc(field1="lolz", field2="", field3=12) assert not hasattr(doc, "_id") doc.save() assert hasattr(doc, "_id") assert find_valid_parent_docs(type(doc)) == [] # not mutable field mutated = False try: doc["field3"] = 1 mutated = True except: pass assert mutated is False # mutable field but wrong type mutated = False try: doc["field1"] = 1 mutated = True except: pass assert mutated is False # mutable and should have no errors doc["field1"] = "looo" doc.save() assert doc["field1"] == "looo" collec = get_collection(doc) elts = list(collec.find({"_id": doc._id})) assert len(elts) == 1 assert elts[0]["field1"] == "looo"
def update_doubt(): id = request.args.get('id') doubt = request.json.get('doubt') answer = request.json.get('answer') topic = request.json.get('topic') if isinstance(doubt, str) and isinstance(answer, str): documents = get_collection('doubts') query = {'_id': ObjectId(id)} if documents.find_one(query): new_values = { '$set': { 'doubt': doubt, 'answer': answer, 'topic': topic } } documents.update_one(query, new_values, upsert=True) return 'Accepted', 202 else: return 'Doubt not found', 404 else: return 'Bad request', 400
def entry_generator(self): vybory = [ entry[const.MONGO_ID] for entry in storage.MongoCollection(self.db, "nodes_vybor").iterate_all() ] source_collection = utils.get_collection( const.CONF_MONGO_ZAKON, self.conf, const.CONF_MONGO_PARSED, self.db ) def result_form(entry, vybor, lehota): return { const.NEO4J_BEGINNING_ID: vybor, const.NEO4J_ENDING_ID: entry[const.MONGO_ID], const.NAVRHNUTY_LEHOTA: lehota } for entry in source_collection.iterate_all(): if const.ZAKON_ROZHODNUTIE_VYBORY in entry: sprava = entry[const.ZAKON_ROZHODNUTIE_VYBORY] if sprava == "": break lehota = self.get_lehota(sprava) for vybor in vybory: flag = False if vybor in sprava: result = result_form(entry, vybor, lehota) result[const.NAVRHNUTY_TYP] = const.NAVRHNUTY_DOPLNUJUCI flag = True if vybor in entry[const.ZAKON_ROZHODNUTIE_GESTORSKY]: result = result_form(entry, vybor, lehota) result[const.NAVRHNUTY_TYP] = const.NAVRHNUTY_GESTORSKY flag = True if flag: yield result
def entry_generator(self): source_collection = utils.get_collection( const.CONF_MONGO_ZMENA, self.conf, const.CONF_MONGO_PARSED, self.db) for entry in source_collection.iterate_all(): entry.pop(const.ZMENA_PODPISANI, None) entry.pop(const.ZMENA_DALSI, None) entry.pop(const.ZMENA_PREDKLADATEL) yield entry
def restore_ensemble_model(self): self.ensemble_graph = tf.Graph() self.ensemble_session = U.get_session(self.ensemble_graph) with self.ensemble_graph.as_default(): saver = tf.train.import_meta_graph("checkpoint/ensemble.meta") saver.restore(self.ensemble_session, "checkpoint/ensemble") names = ["probs", "logits", "temperature_ph", "inputs"] self.ensemble_model = dict( zip(names, U.get_collection(names, self.ensemble_graph)))
def entry_generator(self): source_collection = utils.get_collection( const.CONF_MONGO_ZAKON, self.conf, const.CONF_MONGO_PARSED, self.db ) for entry in source_collection.iterate_all(): for zmena_id in entry.get(const.ZAKON_ZMENY, {}): yield { const.NEO4J_BEGINNING_ID: int(zmena_id), const.NEO4J_ENDING_ID: entry[const.MONGO_ID] }
def entry_generator(self): source_collection = utils.get_collection( const.CONF_MONGO_HLASOVANIETLAC, self.conf, const.CONF_MONGO_PARSED, self.db ) for entry in source_collection.iterate_all(): for hlasovanie_id in entry.get(const.HLASOVANIETLAC_LIST, {}): yield { const.NEO4J_BEGINNING_ID: int(hlasovanie_id), const.NEO4J_ENDING_ID: entry[const.MONGO_ID] }
def ingest_derivative(): for bag in list_missing_ingest(): mmsid = get_mmsid(bag) collection = get_collection(mmsid) if mmsid else None if collection is not None: pass # call islandora remote worker to ingest bag # update digital catalog else: print("Could not determine collection for: {0}".format(bag))
def entry_generator(self): source_collection = utils.get_collection( const.CONF_MONGO_POSLANEC, self.conf, const.CONF_MONGO_PARSED, self.db) orgs = set() for entry in source_collection.iterate_all(): for org in entry[const.POSLANEC_CLENSTVO]: if const.POSLANEC_DELEGACIA.lower() in org.lower(): orgs.add(org) for org in orgs: yield {const.MONGO_ID: org}
def entry_generator(self): source_collection = utils.get_collection( const.CONF_MONGO_ZMENA, self.conf, const.CONF_MONGO_PARSED, self.db ) for entry in source_collection.iterate_all(): for poslanec in entry.get(const.ZMENA_PODPISANI, []): yield { const.NEO4J_BEGINNING_ID: utils.get_poslanec_id(self.db, poslanec), const.NEO4J_ENDING_ID: entry[const.MONGO_ID] }
def entry_generator(self): source_collection = utils.get_collection( const.CONF_MONGO_ZAKON, self.conf, const.CONF_MONGO_PARSED, self.db ) for entry in source_collection.iterate_all(): if const.ZAKON_GESTORSKY in entry: yield { const.NEO4J_BEGINNING_ID: entry[const.ZAKON_GESTORSKY], const.NEO4J_ENDING_ID: entry[const.MONGO_ID] }
def entry_generator(self): source_collection = utils.get_collection( const.CONF_MONGO_LEGISLATIVNAINICIATIVA, self.conf, const.CONF_MONGO_PARSED, self.db ) for entry in source_collection.iterate_all(): for zakon_id in entry.get(const.PREDLOZILZAKON_LIST, {}): yield { const.NEO4J_BEGINNING_ID: entry[const.MONGO_ID], const.NEO4J_ENDING_ID: int(zakon_id) }
def entry_generator(self): source_collection = utils.get_collection( const.CONF_MONGO_ROZPRAVA, self.conf, const.CONF_MONGO_PARSED, self.db ) for entry in source_collection.iterate_all(): for vystupenie in entry[const.ROZPRAVA_VYSTUPENIA]: if const.ROZPRAVA_TLAC in vystupenie: yield { const.NEO4J_BEGINNING_ID: vystupenie[const.MONGO_ID], const.NEO4J_ENDING_ID: vystupenie[const.ROZPRAVA_TLAC] }
def worker(): temperature_data = get_system_temp() temperature_data_collection = utils.get_collection('systemtemperaturedatas') try: temperature_data_collection.insert(temperature_data) except: # TODO: Add some logging should this fail return else: tweeter = CPUTemperatureTweeter() tweeter.tweet_it()
def create_model(self): inputs = get_collection('inputs') with tf.name_scope('model'): init = tf.truncated_normal((self.n_features, self.n_classes)) theta = tf.Variable(init, name='theta') bias = tf.Variable(0.0, name='bias') logits = tf.add(tf.matmul(inputs['x'], theta), bias, name='logits') activate = tf.nn.sigmoid if self.n_classes == 2 else tf.nn.softmax probabilities = activate(logits, name='probabilities') predictions = tf.argmax(probabilities, axis=1, name='predictions') add_to_collection('model', theta, logits, probabilities, predictions)
def delete_doubt(): id = request.args.get('id') documents = get_collection('doubts') query = {'_id': ObjectId(id)} if documents.find_one(query): documents.remove(query) return 'Accepted', 202 else: return 'Doubt not found', 404
def create_model(self): inputs = get_collection('inputs') with tf.name_scope('model'): x = inputs['x'] for layer_config in self.config: layer = Dense(**layer_config) x = layer.build(x, training=inputs['training']) logits = tf.layers.dense(x, units=self.n_classes, name='logits') activate = tf.nn.sigmoid if self.n_classes == 2 else tf.nn.softmax probabilities = activate(logits, name='probabilities') predictions = tf.argmax(probabilities, axis=1, name='predictions') add_to_collection('model', logits, probabilities, predictions)
def entry_generator(self): source_collection = utils.get_collection( const.CONF_MONGO_HLASOVANIE, self.conf, const.CONF_MONGO_PARSED, self.db ) last_entry = source_collection.get({}, projection=[const.HLASOVANIE_INDIVIDUALNE], sort=[(const.MONGO_ID, -1)]) hlasy = last_entry[const.HLASOVANIE_INDIVIDUALNE].values() kluby = [value[const.HLASOVANIE_KLUB] for value in hlasy] values, counts = np.unique(kluby, return_counts=True) for val, count in zip(values, counts): val = utils.parse_klub(val) entry = {const.MONGO_ID: val, const.KLUB_POCET: int(count)} yield entry
def entry_generator(self): source_collection = utils.get_collection( const.CONF_MONGO_POSLANEC, self.conf, const.CONF_MONGO_PARSED, self.db ) for entry in source_collection.iterate_all(): for org, typ in entry[const.POSLANEC_CLENSTVO].items(): if const.POSLANEC_DELEGACIA.lower() in org.lower(): result = { const.NEO4J_BEGINNING_ID: entry[const.MONGO_ID], const.NEO4J_ENDING_ID: org, const.CLEN_TYP: const.CLEN_TYP_DICT[typ] } yield result
def entry_generator(self): source_collection = utils.get_collection( const.CONF_MONGO_HLASOVANIE, self.conf, const.CONF_MONGO_PARSED, self.db ) for entry in source_collection.iterate_all(): for poslanec_id, poslanec in entry[const.HLASOVANIE_INDIVIDUALNE].items(): hlas = { const.NEO4J_BEGINNING_ID: int(poslanec_id), const.NEO4J_ENDING_ID: entry[const.MONGO_ID], const.HLASOVAL_HLAS: const.HLASOVAL_HLAS_DICT[poslanec[const.HLASOVANIE_HLAS]], const.HLASOVAL_KLUB: utils.parse_klub(poslanec[const.HLASOVANIE_KLUB]) } yield hlas
def setUpClass(cls): coll = get_collection(GeometryModel) GeometryModel.objects.create(geom=cls.point) GeometryModel.objects.create(geom=cls.line) GeometryModel.objects.create(geom=cls.polygon) GeometryModel.objects.create(geom=cls.multi_point) GeometryModel.objects.create(geom=cls.multi_line) GeometryModel.objects.create(geom=cls.multi_polygon) GeometryModel.objects.create(geom=cls.geom_collection) # not sure why the tests don't create the index... coll.ensure_index([('geom', pymongo.GEOSPHERE)])
def get_doubts(): documents = get_collection('doubts') output = [] for document in documents.find({}): output.append({ '_id': str(document['_id']), 'doubt': document['doubt'], 'answer': document['answer'], 'topic': document['topic'], 'user': document['user'], }) return jsonify(output), 200
def get_topics_from_user(): user = request.args.get('user') if user_exists(user): documents = get_collection('doubts') topics = [] query = {'user': user} for document in documents.find(query): if not document['topic'] in topics: topics.append(document['topic']) return jsonify(topics), 200 else: return 'User not found', 404
def entry_generator(self): source_collection = utils.get_collection( const.CONF_MONGO_ROZPRAVA, self.conf, const.CONF_MONGO_PARSED, self.db ) for entry in source_collection.iterate_all(): for vystupenie in entry[const.ROZPRAVA_VYSTUPENIA]: klub = vystupenie[const.ROZPRAVA_POSLANEC_KLUB] klub = const.KLUB_DICT.get("Klub " + klub, const.NEO4J_NULLVALUE) yield { const.NEO4J_BEGINNING_ID: entry[const.MONGO_ID], const.NEO4J_ENDING_ID: vystupenie[const.MONGO_ID], const.ROZPRAVA_POSLANEC_KLUB: klub, const.ROZPRAVA_POSLANEC_TYP: vystupenie[const.ROZPRAVA_POSLANEC_TYP] }
def run_import(csv_input_file, db_host, db_port, db_name, db_collection): """ Imports to the CSV data to a MongoDB database. """ print('Started CSV import - {0}'.format(datetime.now())) collection = utils.get_collection(db_host, db_port, db_name, db_collection) with open(csv_input_file, encoding='utf-8', errors='ignore') as csv_file: reader = csv.DictReader(csv_file) for json_obj in create_json(reader): collection.insert_one(json_obj) print('Finished: CSV import - {0}'.format(datetime.now()))
def worker(): storage_data = get_system_storage() storage_data_collection = utils.get_collection('systemstoragedatas') storage_data_collection.insert(storage_data)
def worker(): config_data = get_system_config() config_data_collection = utils.get_collection('systemconfigdatas') config_data_collection.insert(config_data)
def get_collection_view(request): ret = utils.get_collection(request) return public.success_result_http(ret)
def worker(): memory_data = get_system_memory() memory_data_collection = utils.get_collection('systemmemorydatas') memory_data_collection.insert(memory_data)
def test_map_reduce(self, inline=False): mapfunc = """ function map() { for(i=0; i<this.n; ++i) { emit(this._id, this.m) } } """ reducefunc = """ function reduce(key, values) { var res = 0 values.forEach(function(x) { res += x}) return res } """ if inline: map_reduce = MapReduceModel.objects.inline_map_reduce else: map_reduce = partial(MapReduceModel.objects.map_reduce, out='m/r-out') map_reduce = partial(map_reduce, mapfunc, reducefunc) random_numbers = [ (3, 4), (6, 19), (5, 8), (0, 20), # This instance won't be emitted by `map`. (2, 77), (300, 10), ] for n, m in random_numbers: MapReduceModel(n=n, m=m).save() # Test mapfunc + reducefunc. documents = map_reduce() documents = list(documents) self.assertEqual(len(documents), len(random_numbers) - 1) self.assertEqual(sum(doc.value for doc in documents), sum(n * m for n, m in random_numbers)) # Test MapReduceResult. obj = documents[0].model.objects.get(id=documents[0].key) self.assert_(isinstance(obj, MapReduceModel)) self.assertEqual((obj.n, obj.m), random_numbers[0]) self.assert_(obj.id) # Collection should not have been perished. if not inline: result_collection = get_collection('m/r-out') self.assertEqual(result_collection.count(), len(random_numbers) - 1) # Test drop_collection. map_reduce(drop_collection=True).next() self.assertEqual(get_collection('m/r-out').count(), 0) # Test arbitrary kwargs. documents = list(map_reduce(limit=3)) self.assertEqual(len(documents), 3) self.assertEqual(sum(doc.value for doc in documents), sum(n * m for n, m in random_numbers[:3])) # Test with .filter(...). qs = MapReduceModel.objects.filter(n__lt=300).filter(~Q(m__in=[4])) if inline: documents = qs.inline_map_reduce(mapfunc, reducefunc) else: documents = list(qs.map_reduce(mapfunc, reducefunc, out='m/r-out')) self.assertEqual(len(documents), len(random_numbers) - 2 - 1) self.assertEqual(sum(doc.value for doc in documents), sum(n * m for n, m in random_numbers[1:-1]))
import utils import csv def create_feature_dict(names, values): output = {} for x in range(2, len(values)): output[names[x]] = float(values[x]) return output collection = utils.get_collection() f = open('data/danfeatures.mar102011.csv', 'rb') reader = csv.reader(f, delimiter=',') is_header = True headers = [] for row in reader: if is_header: headers = row is_header = False continue doc = utils.find_doc(int(row[0]), int(row[1])) if doc is not None: features = create_feature_dict(headers, row) gender = 'female' if int(row[0]) == doc['male_id']: gender = 'male' key = 'lexical_features.' + gender collection.update({'_id':doc['_id']}, {'$set': {key: features}}, False, True)