Пример #1
0
def has_mode(db, conn, model, field, mode_table="Fcq"):
    model_id = "{0}_id".format(model).lower()
    mode_query = (
        r.db(db)
        .table(mode_table)
        .group(model_id)
        .ungroup()
        .for_each(
            lambda doc: r.db(db)
            .table(model)
            .get(doc["group"])
            .update(
                {
                    field: doc["reduction"]
                    .group(field)
                    .count()
                    .ungroup()
                    .order_by("reduction")
                    .nth(-1)
                    .default({"group": None})["group"]
                }
            )
        )
        .run(conn, array_limit=200000)
    )
    logging.info(mode_query)
Пример #2
0
def create_registration(event_id, custom_fields):
    # Validate custom fields by comparing them to the event fields
    cursor = rethink.db(config['database']['name']).table(
        'events'
    ).get(event_id).get_field('fields').run(database.connection)

    fields = list(cursor)  # TODO: len(fields) == 0 -> invalid event ID error?

    custom_fields = __sanitize_registration(fields, custom_fields)
    invalid_fields = __validate_registration(fields, custom_fields)
    if invalid_fields:
        return None, RegistrationValidateException(
            'Invalid fields', invalid_fields
        )

    response = rethink.db(config['database']['name']).table(
        'registrations'
    ).insert({
        'event_id': event_id,
        'custom_fields': custom_fields
    }).run(database.connection)

    if response['inserted'] != 1:
        return None, RegistrationInsertException()

    # returns the inserted ID
    return response['generated_keys'][0], None
Пример #3
0
def update_registration(registration_id, new_registration):
    registration, err = get_registration(registration_id)
    if not registration:
        return None, RegistrationNotFoundException()
    event_id = registration.get('event_id')

    # Validate custom fields by comparing them to the event fields
    cursor = rethink.db(config['database']['name']).table(
        'events'
    ).get(event_id).get_field('fields').run(database.connection)

    fields = list(cursor)

    custom_fields = __sanitize_registration(
        fields, registration.get('custom_fields')
    )
    invalid_fields = __validate_registration(fields, custom_fields)
    if invalid_fields:
        return None, RegistrationValidateException(
            'Invalid fields', invalid_fields
        )

    response = rethink.db(config['database']['name']).table(
        'registrations'
    ).get(
        registration_id
    ).update(
        new_registration
    ).run(database.connection)

    if response['errors'] != 0:
        return None, RegistrationUpdateException()

    return registration_id, None
Пример #4
0
def setup():
    tables = [
        {
            'name' : 'testbeds',
            'pkey' : 'id'
        },
        {
            'name' : 'resources',
            'pkey' : 'hostname'
        }
    ]

    c = connect()

    try:
        r.db_create(Config.rethinkdb["db"]).run(c)
        logger.info('MyOps2 database created successfully')
    except RqlRuntimeError:
        logger.info('MyOps2 database already exists')

    for t in tables:
        try:
            r.db(Config.rethinkdb["db"]).table_create(t['name'], primary_key=t['pkey']).run(c)
            logger.info('MyOps2 table %s setup completed', t['name'])
        except RqlRuntimeError:
            logger.info('MyOps2 table %s already exists', t['name'])

    c.close()
Пример #5
0
def init(conn, event):
    # try to drop table (may or may not exist)
    rv = ''
    try:
        r.db_drop(TIX).run(conn)
        rv = 'dropped, then created'
    except:
        rv = 'created'
    r.db_create(TIX).run(conn)
    r.db(TIX).table_create(VENU).run(conn)
    r.db(TIX).table(VENU).index_create(TS).run(conn)

    smap = {}
    umap = {}
    for x in range(1, CNT + 1):
        smap[str(x)] = 'free' 
        umap[str(x)] = ''

    rv += str(r.db(TIX).table(VENU).insert({
        ID: 0,
        SMAP: smap,
        UMAP: umap,
        MAX: CNT,
        TS: time.time()
    }).run(conn))

    return rv
Пример #6
0
def upload_project(project_id):
    """
    Upload the bup backup of this project to the gcloud bucket.
    """
    path = path_to_project(project_id)

    run("sudo chmod a+r -R %s"%path)

    log('path: ', project_id)
    bup = os.path.join(path, 'bup')
    if not os.path.exists(bup):
        raise RuntimeError("no bup directory to upload -- done")
    target = os.path.join('gs://{bucket}/projects/{project_id}.zfs/bup'.format(
            bucket=GCLOUD_BUCKET, project_id=project_id))

    log('upload: rsync new pack files')
    run(['gsutil', '-m', 'rsync', '-x', '.*\.bloom|.*\.midx', '-r',
         '{bup}/objects/'.format(bup=bup),
         '{target}/objects/'.format(target=target)])
    log('gsutil upload refs/logs')
    for path in ['refs', 'logs']:
        run(['gsutil', '-m', 'rsync', '-c', '-r',
             '{bup}/{path}/'.format(bup=bup, path=path),
             '{target}/{path}/'.format(target=target, path=path)])

    #auth_key = open(RETHINKDB_SECRET).read().strip()
    conn = rethinkdb.connect(host=DB_HOST, timeout=10)#, auth_key=auth_key)
    timestamp = datetime.datetime.fromtimestamp(time.time()).strftime(TIMESTAMP_FORMAT)
    rethinkdb.db('smc').table('projects').get(project_id).update(
        {'last_backup_to_gcloud':timestamp_to_rethinkdb(timestamp)}).run(conn)
Пример #7
0
 def test_multi_join(self, conn):
     query = r.db('x').table('employees').eq_join(
         'person', r.db('x').table('people')
     ).map(
         lambda d: d['left'].merge({'person': d['right']['name']})
     ).eq_join(
         'job', r.db('x').table('jobs')
     ).map(
         lambda d: d['left'].merge({'job': d['right']['name']})
     )
     expected = [
         {
             'id': 'joe-employee-id',
             'person': 'joe',
             'job': 'Lawyer'
         },
         {
             'id': 'tim-employee-id',
             'person': 'tim',
             'job': 'Nurse'
         },
         {
             'id': 'bob-employee-id',
             'person': 'bob',
             'job': 'Assistant'
         },
         {
             'id': 'todd-employee-id',
             'person': 'todd',
             'job': 'Lawyer'
         }
     ]
     assertEqUnordered(expected, list(query.run(conn)))
Пример #8
0
def step1():

    response = {}
    conn = r.connect(host=current_app.config['RETHINKDB_HOST'])

    users = json.loads(request.data)
    users = {
        'name': users['name'],
        'user': users['user'],
        'email': users['email'],
        'password': users['password'],
        'ubication': [],
        'sale': []
    }
    
    check_user = r.db('food').table('user_register').filter({'email': users['email']}).run(conn)
    check_user = list(check_user)
    if len(check_user) > 0:
        
        response['success'] = 200
        response['message'] = u'El usuario ya existe'
        response['code'] = 1

    else:    
     
        insert = r.db(current_app.config['DATABASE']).table('user_register').insert(users).run(conn)
        response['success'] = 200
        response['message'] = u'Usuario registrado'
        response['code'] = 0

    pprint.pprint(response)
    return jsonify(response)
def main():
    # connect rethinkdb
    rethinkdb.connect("localhost", 28015, "mysql")
    try:
        rethinkdb.db_drop("mysql").run()
    except:
        pass
    rethinkdb.db_create("mysql").run()

    tables = ["dept_emp", "dept_manager", "titles",
              "salaries", "employees", "departments"]
    for table in tables:
        rethinkdb.db("mysql").table_create(table).run()

    stream = BinLogStreamReader(
        connection_settings=MYSQL_SETTINGS,
        blocking=True,
        only_events=[DeleteRowsEvent, WriteRowsEvent, UpdateRowsEvent],
    )

    # process Feed
    for binlogevent in stream:
        if not isinstance(binlogevent, WriteRowsEvent):
            continue

        for row in binlogevent.rows:
            if not binlogevent.schema == "employees":
                continue

            vals = {}
            vals = {str(k): str(v) for k, v in row["values"].iteritems()}
            rethinkdb.table(binlogevent.table).insert(vals).run()

    stream.close()
Пример #10
0
def go():
    with except_printer():
        r.connect(host="localhost", port="123abc")
    with except_printer():
        r.expr({'err': r.error('bob')}).run(c)
    with except_printer():
        r.expr([1,2,3, r.error('bob')]).run(c)
    with except_printer():
        (((r.expr(1) + 1) - 8) * r.error('bob')).run(c)
    with except_printer():
        r.expr([1,2,3]).append(r.error('bob')).run(c)
    with except_printer():
        r.expr([1,2,3, r.error('bob')])[1:].run(c)
    with except_printer():
        r.expr({'a':r.error('bob')})['a'].run(c)
    with except_printer():
        r.db('test').table('test').filter(lambda a: a.contains(r.error('bob'))).run(c)
    with except_printer():
        r.expr(1).do(lambda x: r.error('bob')).run(c)
    with except_printer():
        r.expr(1).do(lambda x: x + r.error('bob')).run(c)
    with except_printer():
        r.branch(r.db('test').table('test').get(0)['a'].contains(r.error('bob')), r.expr(1), r.expr(2)).run(c)
    with except_printer():
        r.expr([1,2]).reduce(lambda a,b: a + r.error("bob")).run(c)
Пример #11
0
def setDictionary():
	dict = {}
	#print "getting top stories from hacker-news"
	result = firebase.get('/v0/topstories', None)
	# result = result[:200]
	for itemid in result:
		try:
			data = firebase.get('/v0/item/' + str(itemid), None)
			if (data['type'] == 'story'):
				# get tags
				url = data['url']
				(to_insert, tags) = selectTags(itemid)
				# store to temp db
				r.db("tagger_db").table("id2html").insert({"id": itemid, "tag_string": to_insert}).run(connection)
				if len(tags) > 1:
					title = data['title']
					score = str(data['score'])
					usr = data['by']
					comments = str(data['descendants'])
					myString = "<tr class='athing'><td align=\"right\" valign=\"top\" class=\"title\"><span class=\"rank\"> </span></td><td><center><a id=\"up_10287983\"><div class=\"votearrow\" title=\"upvote\"></div></a></center></td><td class=\"title\"><span class=\"deadmark\"></span><a href=\"" + url + "\">" + title + "</a>" + to_insert + "</td><td><center><a id=\"up_10287983\"><div class=\"votearrow\" title=\"upvote\"></div></a></center></td></tr><tr><td colspan=\"2\"></td><td class=\"subtext\"><span class=\"score\">" + score + " points</span> by <a>" + usr + "</a> | <a>" + comments +" comments</a></td></tr><tr class=\"spacer\" style=\"height:5px\"></tr>"
					print "tags: ", tags[0], tags[1]
					add(tags[0], myString, dict)
					add(tags[1], myString, dict)
		except KeyError:
			pass
	# r.db("test").table("tag_dict").delete().run(connection)
	r.db("tagger_db").table("tag2html").insert(dict).run(connection)
Пример #12
0
 def __init__(self, count):
     self.con = r.connect("localhost", 28015).repl()
     tables = r.db("test").table_list().run(self.con)
     if "items" in tables:
         r.db("test").table_drop("items").run(self.con)
     r.db("test").table_create("items").run(self.con)
     self.count = count
Пример #13
0
def sync_facebook(name):
    #import ipdb; ipdb.set_trace();
    try:
        form_data = json.loads(request.data)
    except:
        return response_msg('error', 'data not correct')

    try:
        graph = GraphAPI(form_data['access_token'])
        try:
            # #import ipdb; ipdb.set_trace();
            email = graph.get_object('me', fields='email')['email']
            pic = graph.get_object('me/picture', width='400', height='400')['url']
            print pic
            if email != form_data['fb_email']:
                return response_msg('error', 'incorrect facebook email')
        except:
            return response_msg('error', 'data not complete')
    except:
        return response_msg('error', 'invalid access token')

    try:
        connection = get_rdb_conn()
        cursor = rdb.db(TODO_DB).table('user').filter(
            rdb.row['username'] == name
            ).update({'fb_email': email, 'pic': pic}
            ).run(connection)
        cursor = rdb.db(TODO_DB).table('user').filter(
            rdb.row['username'] == name
            ).run(connection)
    except:
        return response_msg('error', 'Could not connect to db')

    return response_msg('success', 'OK', data=cursor.items[0])
Пример #14
0
def sync_ratings():
    try:
        connection = get_rdb_conn()
        cursor = rdb.db(TODO_DB).table('user').run(connection)
    except:
        return response_msg('error', 'could not connect to db')
    for user in cursor.items:
        ratings = rating(user['cfhandle'], user['cchandle'], user['colg_rating'])
        ratings = json.loads(ratings[0])
        colg_rating = 0
        try:
            colg_rating = colg_rating + 20 * ((ratings['cf_rating']/100)**2)
            colg_rating = colg_rating + 2000 + 7 * (((ratings['lrating']/1000)**2) + (ratings['lrating']/20))
            colg_rating = colg_rating + 2000 + 5 * (((ratings['srating']/100)**2) + (ratings['srating']/20))
        except:
            pass
        print colg_rating
        try:
            cursor = rdb.db(TODO_DB).table('user').filter(
                rdb.row['username'] == user['username']
                ).update({
                'lrating': ratings['lrating'],
                'srating': ratings['srating'],
                'cfrating': ratings['cf_rating'],
                'colg_rating': colg_rating/3,
                }).run(connection)
            print user['username']
        except:
            print 'error' + user['username']

    return response_msg('sucess', 'OK')
Пример #15
0
 def remove_pending_user(self, user_id, row_id, user_pending_name=None):
     """
     removes a user id to a model's pending list.
     """
     if user_id is None:
         logging.error("user_id cannot be None")
         return False
     if row_id is None:
         logging.error("row_id cannot be None")
         return False
     row_table = self.__class__.__name__
     user_table = 'User'
     user_data = r.db(self.DB).table(user_table).get(user_id).run(self.conn)
     row_data = r.db(self.DB).table(row_table).get(row_id).run(self.conn)
     if user_data is None:
         logging.error("User {0} does not exist".format(user_data))
         return False
     if row_data is None:
         logging.error("{0} {1} does not exist".format(row_table, row_data))
         return False
     if user_pending_name is not None:
         user_pending = user_data.get(user_pending_name, [])
         try:
             user_pending.remove(row_id)
         except ValueError:
             logging.warn("row_id {0} not in user {1}".format(row_id, user_pending_name))
             pass
         r.db(self.DB).table(user_table).get(user_id).update({user_pending_name: user_pending}).run(self.conn)
     penders = row_data['penders']
     try:
         penders.remove(user_id)
     except ValueError:
         pass
     return r.db(self.DB).table(row_table).get(row_id).update({'penders': penders}).run(self.conn)
Пример #16
0
    def save(self):
        try:
            r.db_create(self.db).run(self.bigchain.conn)
        except r.ReqlOpFailedError:
            pass

        try:
            r.db(self.db).table_create('accounts').run(self.bigchain.conn)
        except r.ReqlOpFailedError:
            pass

        user_exists = list(r.db(self.db)
                           .table('accounts')
                           .filter(lambda user: (user['name'] == self.name)
                                                & (user['ledger']['id'] == self.ledger['id']))
                           .run(self.bigchain.conn))
        if not len(user_exists):
            r.db(self.db)\
                .table('accounts')\
                .insert(self.as_dict(), durability='hard')\
                .run(self.bigchain.conn)
        else:
            user_persistent = user_exists[0]
            self.vk = user_persistent['vk']
            self.sk = user_persistent['sk']
Пример #17
0
def insert_r(conn,table,sent,rel,val):
	bulk = {}
	if isinstance(rel["e1"],unicode):
		bulk["e1"] = rel["e1"]
	else:
		bulk["e1"] = unicode(rel["e1"],errors="ignore")

	if isinstance(rel["rel"],unicode):
		bulk["rel"] = rel["rel"]
	else:
		bulk["rel"] = unicode(rel["rel"],errors="ignore")

	if isinstance(rel["e2"],unicode):
		bulk["e2"] = rel["e2"]
	else:
		bulk["e2"] = unicode(rel["e2"],errors="ignore")

	if isinstance(sent,unicode):
		bulk["sent"] = sent
	else:
		bulk["sent"] = unicode(sent,errors="ignore")

	bulk["cfval"] = val
		

	r.db("wikikb").table(table).insert(bulk).run(conn)
Пример #18
0
	def get_table():
		try:
			r.db(dbname).table_create('boards').run(_get_conn())
		except r.RqlRuntimeError:
			# already created
			pass
		return r.db(dbname).table('boards')
Пример #19
0
def import_from_queue(progress, conn, task_queue, error_queue, replace_conflicts, durability, write_count):
    if progress[0] is not None and not replace_conflicts:
        # We were interrupted and it's not ok to overwrite rows, check that the batch either:
        # a) does not exist on the server
        # b) is exactly the same on the server
        task = progress[0]
        pkey = r.db(task[0]).table(task[1]).info().run(conn)["primary_key"]
        for i in reversed(range(len(task[2]))):
            obj = pickle.loads(task[2][i])
            if pkey not in obj:
                raise RuntimeError("Connection error while importing.  Current row has no specified primary key, so cannot guarantee absence of duplicates")
            row = r.db(task[0]).table(task[1]).get(obj[pkey]).run(conn)
            if row == obj:
                write_count[0] += 1
                del task[2][i]
            else:
                raise RuntimeError("Duplicate primary key `%s`:\n%s\n%s" % (pkey, str(obj), str(row)))

    task = task_queue.get() if progress[0] is None else progress[0]
    while not isinstance(task, StopIteration):
        try:
            # Unpickle objects (TODO: super inefficient, would be nice if we could pass down json)
            objs = [pickle.loads(obj) for obj in task[2]]
            conflict_action = 'replace' if replace_conflicts else 'error'
            res = r.db(task[0]).table(task[1]).insert(objs, durability=durability, conflict=conflict_action).run(conn)
        except:
            progress[0] = task
            raise

        if res["errors"] > 0:
            raise RuntimeError("Error when importing into table '%s.%s': %s" %
                               (task[0], task[1], res["first_error"]))

        write_count[0] += len(objs)
        task = task_queue.get()
Пример #20
0
 def subscribe_user(self, user_id, row_id, user_subscription_name=None):
     """
     adds a user id to a model's subscription list.
     """
     row_table = self.__class__.__name__
     user_table = 'User'
     user_data = r.db(self.DB).table(user_table).get(user_id).run(self.conn)
     row_data = r.db(self.DB).table(row_table).get(row_id).run(self.conn)
     if user_data is None:
         logging.error("User {0} does not exist".format(user_data))
         return False
     if user_data is None:
         logging.error("{0} {1} does not exist".format(table, row_data))
         return False
     try:
         if user_subscription_name is not None:
             user_subscription = user_data[user_subscription_name]
             user_subscription.append(row_id)
             r.db(self.DB).table(user_table).get(user_id).update({user_subscription_name: user_subscription}).run(self.conn)
     except KeyError:
         logging.error("user subscription {0} not known in user data".format(user_subscription_name))
         return False
     subscribers = row_data['subscribers']
     subscribers.append(user_id)
     return r.db(self.DB).table(row_table).get(row_id).update({'subscribers': subscribers}).run(self.conn)
Пример #21
0
    def save(db_host, db_port, db_name, db_table, data):

        if not isinstance(db_host, str):
            raise TypeError("Invalid database host name argument type. Can't create Cache Walker instance.")

        if not isinstance(db_port, int):
            raise TypeError("Invalid database port argument type. Can't create Cache Walker instance.")

        if not isinstance(db_name, str):
            raise TypeError("Invalid database name argument type. Can't create Cache Walker instance.")

        if not isinstance(db_table, str):
            raise TypeError("Invalid database table name argument type. Can't create Cache Walker instance.")

        try:
            connection = r.connect(db_host, db_port)

        except Exception as e:
            logger.debug("Can't connect to the database.")
            raise e

        try:
            r.db(db_name).table(db_table).insert(data).run(connection)

        except Exception as e:
            logger.debug("Can't insert data into the database.")
            raise e
Пример #22
0
def LoadTestData(file, db, conn, v = False):
  '''Loading test data into the database.'''

  ## Loading data.
  data_dir = os.path.split(dir)[0]
  path = os.path.join(data_dir, 'tests', 'data', file)
  print path
  try:
    with open(path) as csv_file:
      data = csv.DictReader(csv_file)
      test_data = []
      for row in data:
        test_data.append(row)

  except Exception as e:
    print "Couldn't load test data."
    return False


  ## Storing in db.
  try:
    # Checking for existing records.
    n = r.db(db['name']).table('values').count().run(conn)
    if n > 0:
      if v:
        print "Data already in db. Deleting ..."
      r.db(db['name']).table('values').delete().run(conn)

    r.db(db['name']).table('values').insert(test_data).run(conn)
    return True

  except Exception as e:
    print "Could not insert data into database."
    return False
Пример #23
0
 def create_table(self):
   try:
     r.db('Raiden').table_create(self.corpus_table).run(self.connection)
     print 'Created table [Raiden.'+self.corpus_table+']'
   except Exception, e:
     print 'Error occured during '+self.corpus_table+' table creation! Maybe it already exists!'
     print str(e)
Пример #24
0
def table_reader(options, file_info, task_queue, error_queue, exit_event):
    try:
        db = file_info["db"]
        table = file_info["table"]
        primary_key = file_info["info"]["primary_key"]
        conn = r.connect(options["host"], options["port"], auth_key=options["auth_key"])

        if table not in r.db(db).table_list().run(conn):
            r.db(db).table_create(table, primary_key=primary_key).run(conn)

        if file_info["format"] == "json":
            json_reader(task_queue,
                        file_info["file"],
                        db, table,
                        primary_key,
                        options["fields"],
                        exit_event)
        elif file_info["format"] == "csv":
            csv_reader(task_queue,
                       file_info["file"],
                       db, table,
                       primary_key,
                       options,
                       exit_event)
        else:
            raise RuntimeError("unknown file format specified")
    except (r.RqlClientError, r.RqlDriverError, r.RqlRuntimeError) as ex:
        error_queue.put((RuntimeError, RuntimeError(ex.message), traceback.extract_tb(sys.exc_info()[2])))
    except InterruptedError:
        pass # Don't save interrupted errors, they are side-effects
    except:
        ex_type, ex_class, tb = sys.exc_info()
        error_queue.put((ex_type, ex_class, traceback.extract_tb(tb), file_info["file"]))
Пример #25
0
def init_database_with_default_tables(args):
    """
    Create a new RethinkDB database and initialise (default) tables

    :param args: an argparse argument (force)
    """
    # Add additional (default) tables here...
    def_tables = ['determined_variants', 'strains_under_investigation',
                  'references', 'reference_features', 'strain_features']
    with database.make_connection() as connection:
        try:
            r.db_create(connection.db).run(connection)
            for atable in def_tables:
                r.db(connection.db).table_create(atable).run(connection)
        except RqlRuntimeError:
            print ("Database %s already exists. Use '--force' option to "
                   "reinitialise the database." % (connection.db))
            if args.force:
                print "Reinitialising %s" % (connection.db)
                r.db_drop(connection.db).run(connection)
                r.db_create(connection.db).run(connection)
                for atable in def_tables:
                    r.db(connection.db).table_create(atable).run(connection)
            else:
                sys.exit(1)
        print ("Initalised database %s. %s contains the following tables: "
               "%s" % (connection.db, connection.db, ', '.join(def_tables)))
Пример #26
0
def get_tables(host, port, auth_key, tables):
    try:
        conn = r.connect(host, port, auth_key=auth_key)
    except r.RqlDriverError as ex:
        raise RuntimeError(ex.message)

    dbs = r.db_list().run(conn)
    res = []

    if len(tables) == 0:
        tables = [[db] for db in dbs]

    for db_table in tables:
        if db_table[0] not in dbs:
            raise RuntimeError("Error: Database '%s' not found" % db_table[0])

        if len(db_table) == 1: # This is just a db name
            res.extend([(db_table[0], table) for table in r.db(db_table[0]).table_list().run(conn)])
        else: # This is db and table name
            if db_table[1] not in r.db(db_table[0]).table_list().run(conn):
                raise RuntimeError("Error: Table not found: '%s.%s'" % tuple(db_table))
            res.append(tuple(db_table))

    # Remove duplicates by making results a set
    return set(res)
Пример #27
0
    def create(self):
        conn = self.connect()

        db_list = r.db_list().run(conn)

        db_created = False
        table_created = False

        if not self.db_name in db_list:
            r.db_create(self.db_name).run(conn)
            db_created = True

        table_list = r.db(self.db_name).table_list().run(conn)

        if not self.config_table_name in table_list:
            r.db(self.db_name).table_create(
                self.config_table_name, primary_key=self.primary_key
            ).run(conn)

            r.db(self.db_name).table(self.config_table_name)\
                .index_create(self.secondary_index).run(conn)

            table_created = True

        return {"db": db_created, "table": table_created}
Пример #28
0
def read_table_into_queue(progress, conn, db, table, pkey, task_queue, progress_info, exit_event):
    read_rows = 0
    if progress[0] is None:
        cursor = r.db(db).table(table).order_by(index=pkey).run(conn, time_format="raw", binary_format='raw')
    else:
        cursor = r.db(db).table(table).between(progress[0], None, left_bound="open").order_by(index=pkey).run(conn, time_format="raw", binary_format='raw')

    try:
        for row in cursor:
            if exit_event.is_set():
                break
            task_queue.put([row])

            # Set progress so we can continue from this point if a connection error occurs
            progress[0] = row[pkey]

            # Update the progress every 20 rows - to reduce locking overhead
            read_rows += 1
            if read_rows % 20 == 0:
                progress_info[0].value += 20
    finally:
        progress_info[0].value += read_rows % 20

    # Export is done - since we used estimates earlier, update the actual table size
    progress_info[1].value = progress_info[0].value
Пример #29
0
    async def put(self):
        """
        .. http:put:: /?queue={string:queue}

            Creates a queue if it does not exist.

        **Example request**:

        .. sourcecode:: http

            GET /?queue=foo
            Host: example.com
            Accept: application/json, text/javascript

        **Example response**:

        .. sourcecode:: http

            HTTP/1.1 200 OK
            Vary: Accept
            Content-Type: text/javascript

            ok

        :query queue: queue (table) to create
        :statuscode 200: This method always should return 200

        """
        opts = self.request.app['rethinkdb']
        conn = await r.connect(**opts)
        qname = self.request.GET['queue']
        with suppress(r.errors.ReqlOpFailedError):
            r.db(opts['db']).table_create(qname).run(conn)

        return web.Response(body=b'ok')
Пример #30
0
def bulk_insert(ifile):
	bulk_size = 1000
	i = 0
	bulk_ins = []
	bulk = {}
	for line in ifile:
		bulk = {}
		if line[0] == '#' or len(line) < 10 or line[0] == '@':
			continue
		line = line[:len(line)-2].replace("<","").replace(">","").strip()
		line_arr = line.split("\t")
		print line_arr,i
		bulk["id"] = unicode(line_arr[0],errors="ignore")
		bulk["rel"] = unicode(line_arr[1],errors="ignore")
		bulk["id2"] = unicode(line_arr[2],errors="ignore")

		if i < bulk_size - 1:
			bulk_ins.append(bulk)
			i += 1
		elif i == bulk_size - 1:
			bulk_ins.append(bulk)
			r.db("yago").table("test").insert(bulk_ins).run(conn)
			i = 0


	if i < bulk_size - 1 and i > 0:
		bulk_ins.append(bulk)
		r.db("yago").table("test").insert(bulk_ins).run(conn)
Пример #31
0
 def test_reduce_1(self, conn):
     expected = 191
     result = r.db('d').table('nums').map(lambda doc: doc['points']).reduce(
         lambda elem, acc: elem + acc).run(conn)
     assertEqual(expected, result)
Пример #32
0
def insert(tablename, thing, conn):
    res = r.db(DB).table(tablename).insert(thing).run(conn)
    return res
Пример #33
0
def jumbo_write_json(data, db_name, table_name, chunk_size=5000, silent=True):
    '''Write big JSON lists to RethinkDB.

    Essential for datasets that are larger than 100,000 docs (ReQL max write).
    Often necessary even for smaller ones.

    data [list]: a list of dicts in JSON format.
    db_name [str]: a RethinkDB database, existing or not.
    table_name [str]: a RethinkDB table, existing or not.
    chunk_size [int or float of form BASEeEXP]: input list will be broken into
        chunks of this size. If you encounter memory use issues, reduce this
        value.
    silent [bool]: if True, does not print reports.

    Must be connected to a RethinkDB instance before using this.'''

    if chunk_size > 1e5:
        raise (Exception('Maximum JSON chunk_size is 100,000.'))

    #determine list length, number of chunks, and remainder
    list_length = len(data)
    chunk_size = int(
        chunk_size
    )  #max array length for a ReQL write is 100k; but that uses too much mem
    nchunks = math.ceil(list_length / chunk_size)
    rem = list_length % chunk_size

    #create database if it doesn't already exist
    if db_name not in r.db_list().run():
        print('Creating database "' + db_name + '".')
        r.db_create(db_name).run()

    #create table if it doesn't already exist
    if table_name not in r.db(db_name).table_list().run():
        print('Creating table "' + table_name + '" in database "' \
            + db_name + '".')
        r.db(db_name).table_create(table_name).run()

    if silent == False:
        print('Writing list of ' + str(list_length) + ' trips to table "' \
            + table_name + '".')

    #digest data and write to RethinkDB
    for i in range(nchunks):
        s = i * chunk_size  #chunk_start

        if i == nchunks - 1 and rem != 0:
            e = s + rem + 1
        else:
            e = (i + 1) * chunk_size

        if silent == False:
            print('Writing trips ' + str(s) + '-' + str(e - 1) + '.')

        #write chunk to rethink (some data may be lost in case of power failure)
        r.db(db_name).table(table_name).insert(data[s:e]).run(
            durability='soft', noreply=False)

    if silent == False:
        ndocs = r.db(db_name).table(table_name).count().run()
        print('Table "' + table_name + '" now contains ' + str(ndocs) \
            + ' trips.')
Пример #34
0
        if value == 1:
            result = value
        elif value == 0:
            result = is_following(user, owner)

        if result == 1:
            collaboration_cache[user][owner] = True

        return result


con = rdb.connect()
db_name, table_name = 'member_events', 'year_2016'

db_ref = rdb.db(db_name).table(table_name)

if db_name not in rdb.db_list().run(con):
    rdb.db_create(db_name).run(con)

if table_name not in rdb.db(db_name).table_list().run(con):
    rdb.db(db_name).table_create(table_name).run(con)

for i in range(2, 7):
    print '2016, {0}'.format(i)

    with open('{0}.json'.format(i)) as f:
        events = json.load(f)
        events = events[0]

    entries = []
Пример #35
0
def run_vod_kpis(ucis, view_type):
    started_views = view_count(ucis)
    week_ucis = ucis.filter((dt_end - timedelta(days=6) < ucis.firstEvent)
                            & (ucis.firstEvent < dt_end + timedelta(days=1)))
    week_ago_ucis = ucis.filter((dt_end - timedelta(days=13) < ucis.firstEvent)
                                &
                                (ucis.firstEvent < dt_end - timedelta(days=6)))
    weekly_active_user = user_number(week_ucis)
    total_active_user = user_number(ucis)
    total_viewtime = total_viewing_time(ucis)
    user_viewtime = avg_user_viewtime(week_ucis)
    weekly_hibernation = user_hibernation(week_ucis, week_ago_ucis)
    top_program = top_programs_in_vod(ucis, 20)
    top_channel = normalize(top_tag_by_view_count(ucis, 'channelName'),
                            started_views)
    hour_of_day = normalize(view_count_by_hour_of_day(ucis), started_views)
    day_of_week = normalize(view_count_by_day_of_week(ucis), started_views)
    tag_user_package, user_package = users_package_overview(ucis)
    package_overview = {
        "{} user".format(view_type): tag_user_package,
        "linear TV user": user_package
    }
    res = [{
        "title": 'started-views',
        "id": 'started-views',
        "started-views": started_views
    }, {
        "title": 'weekly-active-user',
        "id": 'weekly-active-user',
        "weekly-active-user": weekly_active_user
    }, {
        "title": 'total-active-user',
        "id": 'total-active-user',
        "total-active-user": total_active_user
    }, {
        "title": 'total-viewing-time',
        "id": 'total-viewing-time',
        "total-viewing-time": total_viewtime
    }, {
        "title": 'viewing-time',
        "id": 'viewing-time',
        "viewing-time": user_viewtime
    }, {
        "title": 'user-hibernation',
        "id": 'user-hibernation',
        "user-hibernation": weekly_hibernation
    }, {
        "title": 'top-programs',
        "id": 'top-programs',
        "data": top_program
    }, {
        "title": 'top-provider',
        "id": 'top-provider',
        "data": top_channel
    }, {
        "title": 'hour-of-day',
        "id": 'hour-of-day',
        "data": hour_of_day
    }, {
        "title": 'day-of-week',
        "id": 'day-of-week',
        "data": day_of_week
    }, {
        "title": 'package-overview',
        "id": 'package-overview',
        "data": package_overview
    }]
    r.db('telenortv_insight_api').table(view_type).insert(
        res, conflict='replace').run()
Пример #36
0
 def test_order_by_bracket(self, conn):
     res = r.db('x').table('farms').order_by(lambda doc: doc['id']).map(
         lambda doc: doc['id']).run(conn)
     expected = [1, 2]
     assertEqual(expected, list(res))
Пример #37
0
 def user_leave(self, user, room):
     r.db(self.db).table(self.table).filter({
         'room': room,
         'room_user': user
     }).delete().run(self.conn)
Пример #38
0
 def add_user(self, user, room, color):
     r.db(self.db).table(self.table).insert({
         'room': room,
         'room_user': user,
         'color': color
     }).run(self.conn)
Пример #39
0
 def create_table(self, table):
     try:
         r.db(self.db).table_create(table).run(self.conn)
         print('table created')
     except:
         print('table exists')
Пример #40
0
def read(DB, tablename, accountaddress, conn):
    cursor = r.db(DB).table(tablename).filter({
        'address': accountaddress
    }).pluck('address', 'balance').run(conn)
    for document in cursor:
        return document
Пример #41
0
import rethinkdb as r
import algos

c = r.connect()
cursor = r.db("themis").table("pages").limit(1).run(c)
data = []
for document in cursor:
    databaseId = document['id']
    print(databaseId)
    kmeansResult = algos.kmeans(str(document['content']).decode('unicode-escape'))
    r.db("themis").table("pages").get(databaseId).update({"cluster": kmeansResult}).run(c)
Пример #42
0
    # Returned Docopt arguments.
    docArgs = doc(__doc__, version="0.0.1")

    # Values from Docopt.
    noDB    = True if (int(docArgs["--nodb"]) == 1) else (False if (int(docArgs["--nodb"]) == 0) else None)
    online  = True if (int(docArgs["-o"    ]) == 1) else (False if (int(docArgs["-o"    ]) == 0) else None)

    #print(docArgs)

    dbA     = str(docArgs["--dba"][0])
    dbN     = str(docArgs["--dbn"][0])
    tOut    = int(docArgs["--tout"][0])

    app     = Flask(__name__)
    sIO     = sio(app)
    db      = r.db(dbN)

    if not noDB:
        c = database.conn(dbA);

    #print(db)
    #print(type(db))

    # Routings.
    @app.route("/")
    def index(): return render_template("index.html")

    @app.route("/api/client")
    def api_client():
        return DatabaseAPI(c, db, dbA, noDB, "client_name")
Пример #43
0
def tables():
    import rethinkdb as r
    r.connect(host=DB_HOST, auth_key=open(AUTH).read().strip(), timeout=20).repl()
    return r.db('smc').table_list().run()
Пример #44
0
    """
    Create the tables we are going to use
    """
    global connection, tables

    print "Creating databases/tables...",
    sys.stdout.flush()
    try:
        r.db_drop("test").run(connection)
    except r.errors.RqlRuntimeError, e:
        pass

    r.db_create("test").run(connection)

    for table in tables:
        r.db("test").table_create(table["name"]).run(connection)

    for table in tables:
        r.db("test").table(
            table["name"]).index_create("field0").run(connection)
        r.db("test").table(
            table["name"]).index_create("field1").run(connection)

    print " Done."
    sys.stdout.flush()


def execute_read_write_queries(suffix):
    """
    Execute all the queries (inserts/update, reads, delete)
    """
Пример #45
0
def tests():
    print r.expr(1).run(c)
    print r.expr("bob").run(c)
    print r.expr(True).run(c)
    print r.expr(False).run(c)
    print r.expr(3.12).run(c)
    print r.expr([1, 2, 3, 4, 5]).run(c)
    print r.expr({'a': 1, 'b': 2}).run(c)
    #print r.js('1 + 1').run(c)

    print(r.expr(1) == 2).run(c)  # false
    print(r.expr(1) != 2).run(c)  # true
    print(r.expr(1) < 2).run(c)  # true
    print(r.expr(1) <= 2).run(c)  # true
    print(r.expr(1) > 2).run(c)  # false
    print(r.expr(1) >= 2).run(c)  # false
    print(~r.expr(True)).run(c)  # false
    print(~r.expr(False)).run(c)  # true

    print(r.expr(1) + 2).run(c)  # 3
    print(r.expr(1) - 2).run(c)  # -1
    print(r.expr(1) * 2).run(c)  # 2
    print(r.expr(1) / 2).run(c)  # .5
    print(r.expr(12) % 10).run(c)  # 2

    print(((r.expr(12) / 6) * 4) - 3).run(c)  # 5

    arr = r.expr([1, 2, 3, 4])

    print arr.append(5).run(c)
    print arr[1].run(c)
    print arr[2].run(c)
    print arr[1:2].run(c)
    print arr[:2].run(c)
    print arr[2:].run(c)
    print arr.count().run(c)
    print arr.union(arr).run(c)
    print arr.union(arr).distinct().run(c)
    print arr.inner_join(arr, lambda a, b: a == b).run(c)
    print arr.outer_join(arr, lambda a, b: a == (b - 2)).run(c)

    #print r.expr([{'id':0, 'a':0}, {'id':1, 'a':0}]).eq_join([{'id':0, 'b':1}, {'id':1, 'b':1}], 'id').run(c)

    obj = r.expr({'a': 1, 'b': 2})

    print obj['a'].run(c)
    print obj.contains('a').run(c)
    print obj.pluck('a').run(c)
    print obj.without('a').run(c)
    print obj.merge({'c': 3}).run(c)

    print r.db_list().run(c)
    print r.db_create('bob').run(c)
    print r.db_create('test').run(c)
    print r.db_list().run(c)
    print r.db('test').table_list().run(c)
    print r.db('test').table_create('test').run(c)
    print r.db('test').table_create('bob').run(c)
    print r.db('test').table_list().run(c)
    print r.db('test').table_drop('bob').run(c)
    print r.db('test').table_list().run(c)

    test = r.db('test').table('test')

    print test.run(c)
    print test.insert({'id': 1, 'a': 2}).run(c)
    print test.insert({'id': 2, 'a': 3}).run(c)
    print test.insert({'id': 3, 'a': 4}).run(c)
    print test.run(c)
    print test.between(right_bound=2).run(c)

    print test.update(lambda row: {'a': row['a'] + 1}).run(c)
    print test.run(c)
    print test.replace(lambda row: {'id': row['id'], 'a': row['a'] + 1}).run(c)
    print test.run(c)
    print test.delete().run(c)
    print test.run(c)

    print r.expr(1).do(lambda a: a + 1).run(c)
    print r.expr(2).do(lambda a: {'b': a / a}).run(c)
    print r.expr([1, 2, 3]).map(lambda a: a + 1).run(c)
    print r.expr([1, 2, 3]).map(lambda a: a.do(lambda b: b + a)).run(c)
    print r.expr([1, 2, 3]).reduce(lambda a, b: a + b).run(c)
    print r.expr([1, 2, 3, 4]).filter(lambda a: a < 3).run(c)

    print r.expr([1, 2]).concat_map(lambda a: [a, a]).run(c)

    print r.branch(r.expr(1) < 2, "a", "b").run(c)
    print r.branch(r.expr(1) < 0, "a", "b").run(c)

    print(r.expr(True) & r.expr(False)).run(c)
    print(r.expr(True) | r.expr(False)).run(c)
    print(r.expr(True) & r.expr(True)).run(c)
    print(r.expr(False) | r.expr(False)).run(c)

    #print r.expr([1,2]).map(3).run(c)
    #print r.expr([1,2]).map(r.row + 3).run(c)
    print r.expr([{'id': 2}, {'id': 3}, {'id': 1}]).order_by('id').run(c)
    print r.expr([{
        'g': 0,
        'v': 1
    }, {
        'g': 0,
        'v': 2
    }, {
        'g': 1,
        'v': 1
    }, {
        'g': 1,
        'v': 2
    }]).grouped_map_reduce(lambda row: row['g'], lambda row: row['v'] + 1,
                           lambda a, b: a + b).run(c)

    #print r.expr([1,2]).for_each(lambda i: [test.insert({'id':i, 'a': i+1})]).run(c)
    print test.run(c)
Пример #46
0
def execute_read_write_queries(suffix):
    """
    Execute all the queries (inserts/update, reads, delete)
    """
    global results, connection, time_per_query, executions_per_query, constant_queries

    print "Running inserts...",
    sys.stdout.flush()
    for table in tables:
        docs = []
        num_writes = gen_num_docs(table["size_doc"])
        for i in xrange(num_writes):
            docs.append(gen_doc(table["size_doc"], i))

        i = 0

        durations = []
        start = time.time()
        while (time.time() - start < time_per_query) & (i < num_writes):
            start_query = time.time()
            result = r.db('test').table(table['name']).insert(
                docs[i]).run(connection)
            durations.append(time.time() - start_query)

            if "generated_keys" in result:
                table["ids"].append(result["generated_keys"][0])
            i += 1

        durations.sort()
        results["single-inserts-" + table["name"] + "-" + suffix] = {
            "average": (time.time() - start) / i,
            "min": durations[0],
            "max": durations[len(durations) - 1],
            "first_centile":
            durations[int(math.floor(len(durations) / 100. * 1))],
            "last_centile":
            durations[int(math.floor(len(durations) / 100. * 99))]
        }

        # Save it to know how many batch inserts we did
        single_inserts = i

        # Finish inserting the remaining data
        size_batch = 500
        durations = []
        start = time.time()
        count_batch_insert = 0
        if i < num_writes:
            while i + size_batch < num_writes:
                start_query = time.time()
                resutl = r.db('test').table(table['name']).insert(
                    docs[i:i + size_batch]).run(connection)
                durations.append(time.time() - start_query)
                end = time.time()
                count_batch_insert += 1

                table["ids"] += result["generated_keys"]
                i += size_batch

            if i < num_writes:
                result = r.db('test').table(table['name']).insert(
                    docs[i:len(docs)]).run(connection)
                table["ids"] += result["generated_keys"]

        if num_writes - single_inserts != 0:
            results["batch-inserts-" + table["name"] + "-" + suffix] = {
                "average": (end - start) / (count_batch_insert * size_batch),
                "min":
                durations[0],
                "max":
                durations[len(durations) - 1],
                "first_centile":
                durations[int(math.floor(len(durations) / 100. * 1))],
                "last_centile":
                durations[int(math.floor(len(durations) / 100. * 99))]
            }

        table["ids"].sort()

    print " Done."
    sys.stdout.flush()

    # Execute the insert queries
    print "Running update/replace...",
    sys.stdout.flush()
    for table in tables:
        for p in xrange(len(write_queries)):
            docs = []
            num_writes = gen_num_docs(table["size_doc"])
            for i in xrange(num_writes):
                docs.append(gen_doc(table["size_doc"], i))

            i = 0

            durations = []
            start = time.time()
            while (time.time() - start < time_per_query) & (i < len(
                    table["ids"])):
                start_query = time.time()
                eval(write_queries[p]["query"]).run(connection)
                durations.append(time.time() - start_query)
                i += 1

            durations.sort()
            results[write_queries[p]["tag"] + "-" + table["name"] + "-" +
                    suffix] = {
                        "average": (time.time() - start) / i,
                        "min":
                        durations[0],
                        "max":
                        durations[len(durations) - 1],
                        "first_centile":
                        durations[int(math.floor(len(durations) / 100. * 1))],
                        "last_centile":
                        durations[int(math.floor(len(durations) / 100. * 99))]
                    }

            i -= 1  # We need i in write_queries[p]["clean"] (to revert only the document we updated)
            # Clean the update
            eval(write_queries[p]["clean"]).run(connection)

    print " Done."
    sys.stdout.flush()

    # Execute the read queries on every tables
    print "Running reads...",
    sys.stdout.flush()
    for table in tables:
        for p in xrange(len(table_queries)):
            count = 0
            i = 0
            if "imax" in table_queries[p]:
                max_i = table_queries[p]["imax"] + 1
            else:
                max_i = 1

            durations = []
            start = time.time()
            while (time.time() - start <
                   time_per_query) & (count < executions_per_query):
                start_query = time.time()
                try:
                    cursor = eval(table_queries[p]["query"]).run(connection)
                    if isinstance(cursor, r.net.Cursor):
                        list(cursor)
                        cursor.close()

                    if i >= len(table["ids"]) - max_i:
                        i = 0
                    else:
                        i += 1
                except:
                    print "Query failed"
                    print constant_queries[p]
                    sys.stdout.flush()
                    break
                durations.append(time.time() - start_query)
                count += 1

            durations.sort()
            results[table_queries[p]["tag"] + "-" + table["name"] + "-" +
                    suffix] = {
                        "average": (time.time() - start) / count,
                        "min":
                        durations[0],
                        "max":
                        durations[len(durations) - 1],
                        "first_centile":
                        durations[int(math.floor(len(durations) / 100. * 1))],
                        "last_centile":
                        durations[int(math.floor(len(durations) / 100. * 99))]
                    }

    print " Done."
    sys.stdout.flush()

    # Execute the delete queries
    print "Running delete...",
    sys.stdout.flush()
    for table in tables:
        for p in xrange(len(delete_queries)):
            start = time.time()

            i = 0

            durations = []
            start = time.time()
            while (time.time() - start < time_per_query) & (i < len(
                    table["ids"])):
                start_query = time.time()
                eval(delete_queries[p]["query"]).run(connection)
                durations.append(time.time() - start_query)

                i += 1

            durations.sort()
            results[delete_queries[p]["tag"] + "-" + table["name"] + "-" +
                    suffix] = {
                        "average": (time.time() - start) / i,
                        "min":
                        durations[0],
                        "max":
                        durations[len(durations) - 1],
                        "first_centile":
                        durations[int(math.floor(len(durations) / 100. * 1))],
                        "last_centile":
                        durations[int(math.floor(len(durations) / 100. * 99))]
                    }

    print " Done."
    sys.stdout.flush()
Пример #47
0
def drop():
    """Delete all chats (truncate)"""
    r.db('chat').table('chats').delete().run(conn)
Пример #48
0
def jumbo_write_df(df,
                   db_name,
                   table_name,
                   df_chunk_size=5e5,
                   json_chunk_size=5e3,
                   verbosity=1):
    '''Write big pandas dataframes to RethinkDB.

    Essential for datasets that are larger than 100,000 rows (ReQL max write).
    Often necessary even for smaller ones.

    df [pandas DataFrame]: 'nuff said.
    db_name [str]: a RethinkDB database, existing or not.
    table_name [str]: a RethinkDB table, existing or not.
    df_chunk_size [int or float of form BASEeEXP]: input df will be broken into
        chunks of this many rows. If you encounter memory use issues, reduce
        this value first. Maximum accepted value is 1,000,000.
    json_chunk_size [int or float of form BASEeEXP]: input list passed to
        jumbo_write_json will be broken into chunks of this size. If you
        encounter memory use issues, reduce this value second. Maximum
        accepted value is 100,000 (ReQL write limit).
    verbosity [int]: determines the number of reports that will be printed.
        0 = no reports
        1 = reports from this function only
        2 = reports from this function and subroutine jumbo_write_json.

    Calls jumbo_write_json.
    Must be connected to a RethinkDB instance before using this.'''

    if df_chunk_size > 1e6:
        raise (Exception('Maximum df_chunk_size is 1,000,000.'))
    if json_chunk_size > 1e5:
        raise (Exception('Maximum json_chunk_size is 100,000. This size is \
            rarely a good idea.'))

    #set verbosity for jumbo_write_json
    sil = False if verbosity == 2 else True

    if verbosity > 0:
        print('Preparing ' + str(len(df)) + '-row DataFrame for database.')

    # json_list = []
    while len(df):  #runs as long as rows remain in the dataframe

        #take a chunk of the dataframe and convert to json list
        l = min(len(df), int(df_chunk_size)
                )  #get the first chunk_size lines, or all the rest if fewer
        chunk = df.iloc[0:l]  #subset them from the df
        df = df.drop(df.index[0:l])  #drop those lines
        json_list = chunk.to_dict('records')

        if verbosity > 0:
            print('Converting chunk of ' + str(l) + ' rows to JSON format.')

        # s_buf = io.StringIO() #create string buffer
        # chunk.to_csv(s_buf, index=False) #send chunk as csv to buffer
        # s_buf.seek(0) #reset buffer to first position
        # json_list = list(csv.DictReader(s_buf)) #read csv into json list
        # s_buf.close() #close string buffer

        #free up some memory
        del (chunk)
        gc.collect()  #remove all vars no longer referenced to free a bit more

        #open connection to null device for banishing unneeded outputs
        black_hole = open(os.devnull, 'w')
        # black_hole = [json_list[i].pop('', None) for i in range(len(json_list))]
        # black_hole = [json_list[i].pop('Unnamed: 0', None) for i in range(len(json_list))]

        #sort by hash.
        json_list = sorted(json_list, key=operator.itemgetter('hash'))

        #group json list by hash and remove hash from each reduction
        jl2 = []
        for hsh, red in itt.groupby(json_list,
                                    key=operator.itemgetter('hash')):
            red = list(red)
            black_hole = [red[i].pop('hash', None) for i in range(len(red))]
            jl2.append({'group': hsh, 'reduction': red})
        del (json_list)

        if verbosity > 0:
            print('Finished grouping chunk by hash. Passing list of length ' \
                + str(len(jl2)) + ' to jumbo_write_json.')

        #write list to rethink
        jumbo_write_json(data=jl2,
                         db_name=db_name,
                         table_name=table_name,
                         chunk_size=json_chunk_size,
                         silent=sil)
        del (jl2)

    if verbosity > 0:
        ndocs = r.db(db_name).table(table_name).count().run()
        print('Finished writing day of records. Wrote ' + str(ndocs) \
            + ' docs to table "' + table_name + '".')
Пример #49
0
 def count_documents(self, table):
     '''
     return integer count of number of documents in table
     '''
     return r.db(self.database).table(table).count().run()
Пример #50
0
 def __init__(self):
     self.conn = r.connect(host="172.16.1.2",port=28015)
    # r.db_list().contains('Atlas').do(lambda databaseExists: r.branch(databaseExists, 0 ,r.db_create('Atlas'))).run(self.conn)
    # r.db('Atlas').table_create('DomainTable').run(self.conn)
     #r.db('Atlas').contains('DomainTable').do(lambda exists : r.branch( exists, 0,  r.db('Atlas').table_create('DomainTable'))).run(self.conn)
     self.table = r.db('Atlas').table("WordSearchCount")
Пример #51
0
def retrieve_records(
        api_key,
        sensor_path,
        db_name,
        end_date=(
            datetime.datetime.strptime(time.strftime('%Y-%m-%d'), '%Y-%m-%d') -
            datetime.timedelta(days=1)).strftime('%Y-%m-%d'),
        start_date=None,
        json_chunk_size=5e3,
        verbosity=1):
    '''Pull records from Acyclica's API and write to RethinkDB.

    api_key [str]: the 41-character alphanumeric key you were given by Acyclica.
        Should be read in from an environment variable, encrypted if possible.
    sensor_path [str]: the path to Acyclica_sensors_CBD.csv
        (should be fetched automatically once we package this thing).
    db_name [str]: the name of the RethinkDB database that will be populated.
    end_date [str]: a date string of the form 'YYYY-MM-DD' specifying the last
        day of data to pull from Acyclica. Defaults to yesterday.
    start_date [str]: a date string of the form 'YYYY-MM-DD' specifying the first
        day of data to fetch from Acyclica. Defaults to None, which means only
        end_date will be fetched. Set this to 'prev_week' to fetch the full week
        starting 8 days ago and ending yesterday.
    json_chunk_size [int or float of form BASEeEXP]: lists passed to
        jumbo_write_json will be broken into chunks of this size. No need to
        modify unless you encounter memory use issues, in which case you should
        first try reducing the default value of 5,000.
    verbosity [int]: determines the number of reports that will be printed.
        0 = no reports
        1 = reports from this function only
        2 = more reports from this function and from subroutine
            jumbo_write_json.

    Calls jumbo_write_df, which calls jumbo_write_json.
    Must be connected to a RethinkDB instance before using this.

    Pull at minimum 1 day and at maximum 1 week of data in increments of 1
    day.'''

    #start timing
    start_time = time.time()

    #check for size limit errors
    # if df_chunk_size > 1e6:
    #     raise(Exception('Maximum df_chunk_size is 1,000,000.'))
    if json_chunk_size > 1e5:
        raise (Exception('Maximum json_chunk_size is 100,000. This size is \
            rarely a good idea.'))

    #check for end_date format error
    try:
        nul = datetime.datetime.strptime(end_date, '%Y-%m-%d')
    except:
        raise (Exception('end_date must be of the form "YYYY-MM-DD".'))

    #set appropriate start dates based on input
    if start_date == 'prev_week':
        start_date = (datetime.datetime.strptime(end_date, '%Y-%m-%d') -
                      datetime.timedelta(days=6)).strftime('%Y-%m-%d')
    elif start_date is None:
        start_date = end_date
    else:
        pass

    #check for start_date format error
    try:
        nul = datetime.datetime.strptime(start_date, '%Y-%m-%d')
    except:
        raise (Exception('start_date must be of the form "YYYY-MM-DD".'))

    #add 23 h, 59 m, and 59 s to the end date (to grab the whole day)
    end_date = datetime.datetime.strptime(end_date,
                                          '%Y-%m-%d') + datetime.timedelta(
                                              hours=23, minutes=59, seconds=59)

    #convert datetime objects to unix time
    start_unix = int(
        time.mktime(
            datetime.datetime.strptime(start_date, '%Y-%m-%d').timetuple()))
    end_unix = int(time.mktime(end_date.timetuple()))

    #make sure the user isn't trying to grab more than a week of data, and that
    #end is after start
    if end_unix - start_unix > 604800:
        raise (Exception(
            'Please specify a range of dates no greater than one week.'))
    if end_unix - start_unix < 0:
        raise (Exception('end_date must be later than start date.'))

    #determine how many days have been selected
    dif = end_unix - start_unix
    ndays = math.ceil(dif / (24 * 3600))

    #get sensor data
    sensors = pd.read_csv(sensor_path)
    # sensors = sensors.drop(['name', 'short_name','latitude','longitude'], axis=1)
    sensors.columns = ['IntersectionID', 'sensor']
    sensor_list = list(sensors['sensor'])

    if verbosity > 0:
        print('Preparing to acquire data for ' + str(ndays) + ' day(s) and ' \
            + str(len(sensor_list)) + ' sensors.')

    #create database if it doesn't already exist
    if db_name not in r.db_list().run():
        r.db_create(db_name).run()

    #request and process one day at a time (roughly 5-10m records acquired per day)
    day_start_unix = start_unix
    for day in range(ndays):

        print('Acquiring records for day ' + str(day + 1) + ' of ' \
            + str(ndays) + '. May take several minutes.')

        #date string will be the table name on RethinkDB
        tname = datetime.datetime.fromtimestamp(
            int(day_start_unix)).strftime('%Y_%m_%d')
        if tname in r.db(db_name).table_list().run():
            print('Table "' + tname + '" already exists in database "' \
                + db_name + '". Skipping this day.')
            day_start_unix = day_start_unix + (24 * 3600)  #increment day
            continue
        else:
            r.db(db_name).table_create(tname).run()

        #get endpoints for each iteration and (re)instantiate dataframe
        day_end_unix = day_start_unix + (23 * 3600) + 3599
        df = pd.DataFrame(
            columns=['Timestamp', 'MAC Hash', 'Strength', 'Serial'])

        #request and preprocess each sensor separately
        for i in range(len(sensor_list)):

            # sensorID = sensor_list[1]
            URL = "https://cr.acyclica.com/datastream/device/csv/time/" \
                + api_key + "/" + str(sensor_list[i]) + "/" \
                + str(day_start_unix) + "/" + str(day_end_unix)

            #get raw web content and read into a dataframe
            items = requests.get(URL).content
            newdf = pd.read_csv(
                io.StringIO(items.decode('utf-8')),
                usecols=['Timestamp', 'MAC Hash', 'Strength', 'Serial'])

            #round timestamp to nearest second
            newdf['Timestamp'] = newdf['Timestamp'].round().astype('int')

            #drop repeated reads within 1s, keeping read with highest strength
            strmaxes = newdf.groupby(['Timestamp',
                                      'MAC Hash'])['Serial'].transform(max)
            newdf = newdf[newdf['Serial'] == strmaxes]

            #append to main dataframe
            df = df.append(newdf, ignore_index=True)

            if verbosity == 2:
                if i + 1 in [15, 30, 45]:
                    print('Got data for ' + str(i + 1) + ' of ' \
                        + str(len(sensor_list)) \
                        + ' sensors. So far there are ' + str(len(df)) \
                        + ' reads for day ' + str(day + 1) + '.')

        del (newdf)

        #drop repeated reads again, keeping read with highest strength
        strmaxes = df.groupby(['Timestamp',
                               'MAC Hash'])['Serial'].transform(max)
        df = df[df['Serial'] == strmaxes]

        pre_filt_len = str(len(df))
        if verbosity > 0:
            print('Found ' + pre_filt_len + ' sensor reads for day ' \
                + str(day + 1) + '. Cleaning those now.')

        json_list = df_to_json_etc(df, verbosity, pre_filt_len, sensors)

        if verbosity > 0:
            print('Converted DataFrame to JSON list and grouped by hash. ' \
                + 'Passing list of length ' + str(len(json_list)) \
                + ' to jumbo_write_json.')

        #set verbosity for jumbo_write_json
        sil = False if verbosity == 2 else True

        jumbo_write_json(data=json_list,
                         db_name=db_name,
                         table_name=tname,
                         chunk_size=json_chunk_size,
                         silent=sil)

        #increment day
        day_start_unix = day_start_unix + (24 * 3600)

    if verbosity > 0:
        run_time = round((time.time() - start_time) / 60, 2)
        print('Finished writing all records for ' + str(ndays) + ' day(s) ' \
            + 'in ' + str(run_time) + ' minutes.\nRecords are in database "' \
            + db_name + '".')
Пример #52
0
def create_table(name, conn):
    res = r.db(DB).table_create(name).run(conn)
Пример #53
0
def clear_current(sample_id, conn):
    r.db("samplesdb").table("sample2attribute_set")\
                     .get_all(sample_id, index="sample_id")\
                     .update({"current": False})\
                     .run(conn)
Пример #54
0
parameters = yaml.load(parameter_file)

print "Connecting database ..."
rethink = r.connect(parameters['rethinkdb_server']['host'],
                    parameters['rethinkdb_server']['port']).repl()
rethink_db = parameters['rethinkdb_server']['database']
url_queue_table = parameters['rethinkdb_server']['tables']['url_queue']
raw_result_table = parameters['rethinkdb_server']['tables']['raw_result']
indexed_result_table = parameters['rethinkdb_server']['tables'][
    'indexed_result']
# Init database
db_list = r.db_list().run(rethink)
if rethink_db not in db_list:
    print "Init database ..."
    r.db_create(rethink_db).run(rethink)
    r.db(rethink_db).table_create(url_queue_table).run(rethink)
    r.db(rethink_db).table(url_queue_table).index_create('ts').run(rethink)
    r.db(rethink_db).table_create(raw_result_table).run(rethink)
    r.db(rethink_db).table_create(indexed_result_table).run(rethink)

rethink.use(rethink_db)


def main(argv):
    # Main code here
    print "I'm manager :)"

    if len(argv) > 1:
        seed_url = argv[1]

        r.table(url_queue_table).insert({
Пример #55
0
 def __init__(self):
     r.connect(settings['RETHINKDB_SERVER'],
               settings['RETHINKDB_PORT']).repl()
     self.db = r.db(settings['RETHINKDB_DB']).table(
         settings['RETHINKDB_TABLE'])
Пример #56
0
#!/usr/bin/env python
# coding: utf-8
import rethinkdb as r
r.connect ('localhost', 28015).repl()
watchcount = r.db('polltime').table('votes').get_all('b0aae840-f52e-4bdd-abcd-74789f52c6bd', index='choice').count().run()
dontwatchcount = r.db('polltime').table('votes').get_all('a966c7b3-9277-4c09-9254-8806762bbea0', index='choice').count().run()
watchint = int(watchcount)
dontwatchint = int(dontwatchcount)
file = open("tmp/finalcountq4.txt","w")
if watchint > dontwatchint:
	file.write("TRY")
else:
	file.write("GIVE")
file.close()
Пример #57
0
# along with BigBlueTutor.  If not, see <http://www.gnu.org/licenses/>.
#Prints the contents of all the tables in a RethinkDB database
#You can also pass the names of specific tables as command-line arguments to print only those tables
#Users' messages are excluded from printing

import rethinkdb as r
import dotenv
import os
import json
import sys

dotenv.load_dotenv("./.env")

r.connect(os.environ.get("DB_HOST"), int(os.environ.get("DB_PORT"))).repl()
tableList = []
if (len(sys.argv) > 1):
    tableList = sys.argv[1:len(sys.argv)]
else:
    tableList = r.db("deepstream").table_list().run()

print("Table list:")
print(tableList)
print()

for table in tableList:
    print("Table name: " + table)
    table = list(r.db("deepstream").table(table).run())
    #.without("messages").run())
    print(json.dumps(table, indent=1, sort_keys=True))
    print()
Пример #58
0
 def test_simple(self, conn):
     res = r.db('x').table('farms').map(lambda doc: doc['animals'][0]).run(
         conn)
     assertEqual(set(['frog', 'horse']), set(list(res)))
Пример #59
0
 def test_filter_by_bracket(self, conn):
     res = r.db('x').table('farms').filter(lambda doc: doc['id'] < 2).run(
         conn)
     expected = [1]
     results = [doc['id'] for doc in res]
     assertEqual(expected, results)
Пример #60
0
 def test_set_intersection(self, conn):
     expected = [set(['x', 'y']), set(['x'])]
     result = r.db('z').table('t').map(
         lambda doc: doc['simple'].set_intersection(['x', 'y'])).run(conn)
     result = map(lambda d: set(d), result)
     assertEqUnordered(expected, result)