Пример #1
0
    def deleteUntrackedFromCache(self, configs):
        """
        Delete data from cache for entities that are no longer cached
        """
        if not self.config['delete_cache_for_untracked_entities']:
            return

        # Get the list of cached entity types
        tableTemplate = self.config['rethink_entity_table_template']
        existingTables = rethinkdb.table_list().run(self.rethink)

        existingCacheTables = []
        tablePattern = tableTemplate.format(type="*")
        for table in existingTables:
            if fnmatch.fnmatch(table, tablePattern):
                existingCacheTables.append(table)

        usedCacheTables = [c['table'] for c in configs]
        unusedCacheTables = [t for t in existingCacheTables if t not in usedCacheTables]
        LOG.debug("Unusesd cache tables: {0}".format(unusedCacheTables))

        LOG.info("Deleting {0} cache tables".format(len(unusedCacheTables)))
        for table in unusedCacheTables:
            LOG.info("Deleting table: {0}".format(table))
            rethinkdb.table_drop(table).run(self.rethink)
Пример #2
0
    def deleteUntrackedFromCache(self, configs):
        """
        Delete data from cache for entities that are no longer cached
        """
        if not self.config['delete_cache_for_untracked_entities']:
            return

        # Get the list of cached entity types
        tableTemplate = self.config['rethink_entity_table_template']
        existingTables = rethinkdb.table_list().run(self.rethink)

        existingCacheTables = []
        tablePattern = tableTemplate.format(type="*")
        for table in existingTables:
            if fnmatch.fnmatch(table, tablePattern):
                existingCacheTables.append(table)

        usedCacheTables = [c['table'] for c in configs]
        unusedCacheTables = [
            t for t in existingCacheTables if t not in usedCacheTables
        ]
        LOG.debug("Unusesd cache tables: {0}".format(unusedCacheTables))

        LOG.info("Deleting {0} cache tables".format(len(unusedCacheTables)))
        for table in unusedCacheTables:
            LOG.info("Deleting table: {0}".format(table))
            rethinkdb.table_drop(table).run(self.rethink)
Пример #3
0
def initialSetup():
    print "Setting up database..."
    dbs = rethinkdb.db_list().run()

    if not con.general.databases["rethink"]["db"] in dbs:
        print "Creating database in rethink"
        rethinkdb.db_create(con.general.databases["rethink"]["db"]).run()

    dbt = list(rethinkdb.table_list().run())
    for db in c.general.flush["rethink"]:
        if c.general.flush["rethink"][db]:
            print "Flushing rethink "+db+" table..."
            if db in dbt:
                rethinkdb.table_drop(db).run()
                dbt.pop(dbt.index(db))

    print "Creating new rethink tables..."
    for table in c.general.tables:
        if not table in dbt:
            print "Creating table {}".format(table)
            rethinkdb.table_create(table).run()

    for key in c.general.flush["redis"]:
        if c.general.flush["redis"][key]:
            print "Flushing redis "+key+" keys..."
            keys = con.redis.keys(key+":*")
            for key in keys: con.redis.delete(key)
Пример #4
0
	def setUp(self):
		
		self.db_name = 'radiowcs_test'
		assert self.db_name != 'radiowcs'
		self.table_name = 'test'

		self.db = database.Database()
		self.db.database_name = self.db_name
		self.db.table_name = self.table_name

		self.db.connect()

		self.connection = r.connect(
			host='localhost',
			port=28015,
			db=self.db_name,
			auth_key='',
			timeout=30
		)
		try:
			r.db_create(self.db_name).run(self.connection)
			r.table_create(self.table_name).run(self.connection)
		except r.RqlRuntimeError:
			print 'unittest setup: Drop table'
			r.table_drop(self.table_name).run(self.connection)
			r.table_create(self.table_name).run(self.connection)
		r.db(self.db_name).table(self.table_name).index_create( 'title').run(self.connection)
		r.db(self.db_name).table(self.table_name).index_create('artist').run(self.connection)
		r.db(self.db_name).table(self.table_name).index_create(  'date').run(self.connection)
		# 'out of order' insertions
		r.db(self.db_name).table(self.table_name).insert({'title':'foobar',      'artist': 'Selena',   'date': '1430183323'}).run(self.connection)
		r.db(self.db_name).table(self.table_name).insert({'title':'hello world', 'artist': 'John',     'date': '1430082566'}).run(self.connection)
		r.db(self.db_name).table(self.table_name).insert({'title':'zombie apoc', 'artist': 'xxJANExx', 'date': '1430385845'}).run(self.connection)
		r.db(self.db_name).table(self.table_name).insert({'title':'Black',       'artist': 'Kettle',   'date': '1430284300'}).run(self.connection)
Пример #5
0
def do_fix(db, collection=None):

	if collection is None:
		bad_meta, bad_tables = find_spurious_meta_and_tables(r.table('__METADATA__').run(db), r.table_list().run(db))
		
		if len(bad_meta) == 0 and len(bad_tables) == 0:
			return 0, 0

		r.table('__METADATA__').get_all(*bad_meta).delete().run(db)

		for table in bad_tables:
			r.table_drop(table).run(db)

		return len(bad_meta), len(bad_tables)

	#else
	check_collection_name(collection)

	meta = r.table('__METADATA__').get(collection).run(db)

	if meta is None:
		raise BadCollection('collection {} does not exist.'.format(collection))

	doing_init = meta.get('doing_init')
	appending_filenames = meta.get('appending_filenames')
	


	if not collection in r.table_list().run(db):
		raise BadCollection("this is a spurious collection.")

	if doing_init:
		do_delete(db, collection)
		return 'doing_init'

	if appending_filenames:
		bad_samples = [k for k in meta['samples'] if meta['samples'][k] in appending_filenames]
		result = r.table(collection) \
					.filter(r.row['IDs'].keys().set_intersection(appending_filenames) != [])\
					.replace(lambda x: r.branch(x['IDs'].keys().set_difference(appending_filenames) == [],
						None, # delete record
						x.merge({
							'IDs': r.literal(x['IDs'].without(appending_filenames)),
							'QUALs': r.literal(x['QUALs'].without(appending_filenames)),
							'FILTERs': r.literal(x['FILTERs'].without(appending_filenames)),
							'INFOs': r.literal(x['INFOs'].without(appending_filenames)),
							'samples': r.literal(x['samples'].without(bad_samples)),
							}))).run(db)
		
		r.table('__METADATA__').get(collection)\
			.replace(lambda x: x.merge({
				'vcfs': r.literal(x['vcfs'].without(appending_filenames)),
				'samples': r.literal(x['samples'].without(bad_samples))
				}).without('appending_filenames')).run(db)

		return appending_filenames, bad_samples, result['deleted'], result['replaced']

	return None
Пример #6
0
def do_delete(db, collection):
	check_collection_name(collection)

	if not collection in r.table_list().run(db):
		return None

	r.table_drop(collection).run(db)
	r.table('__METADATA__').get(collection).delete().run(db)
	return True
Пример #7
0
def init_table(name, **kwargs):
    """
    Initialize a table in the database.
    """
    with connect() as con:
        try:
            r.table_drop(name).run(con)
        except r.ReqlOpFailedError:
            pass
        r.table_create(name, **kwargs).run(con)
Пример #8
0
    def test_connection_with_database(self):
        app = Flask(__name__)
        db = RethinkDB(app, db='test')

        with app.test_request_context():
            try:
                # Make sure RethinkDB is turned on!
                r.table_create('table').run(db.conn)
            except (RqlDriverError, RqlRuntimeError) as e:
                self.fail(e)
            else:
                # Do some cleanup
                r.table_drop('table').run(db.conn)
Пример #9
0
    def setUp(self):

        self.db_name = 'radiowcs_test'
        assert self.db_name != 'radiowcs'
        self.table_name = 'test'

        self.db = database.Database()
        self.db.database_name = self.db_name
        self.db.table_name = self.table_name

        self.db.connect()

        self.connection = r.connect(host='localhost',
                                    port=28015,
                                    db=self.db_name,
                                    auth_key='',
                                    timeout=30)
        try:
            r.db_create(self.db_name).run(self.connection)
            r.table_create(self.table_name).run(self.connection)
        except r.RqlRuntimeError:
            print 'unittest setup: Drop table'
            r.table_drop(self.table_name).run(self.connection)
            r.table_create(self.table_name).run(self.connection)
        r.db(self.db_name).table(self.table_name).index_create('title').run(
            self.connection)
        r.db(self.db_name).table(self.table_name).index_create('artist').run(
            self.connection)
        r.db(self.db_name).table(self.table_name).index_create('date').run(
            self.connection)
        # 'out of order' insertions
        r.db(self.db_name).table(self.table_name).insert({
            'title': 'foobar',
            'artist': 'Selena',
            'date': '1430183323'
        }).run(self.connection)
        r.db(self.db_name).table(self.table_name).insert({
            'title': 'hello world',
            'artist': 'John',
            'date': '1430082566'
        }).run(self.connection)
        r.db(self.db_name).table(self.table_name).insert({
            'title': 'zombie apoc',
            'artist': 'xxJANExx',
            'date': '1430385845'
        }).run(self.connection)
        r.db(self.db_name).table(self.table_name).insert({
            'title': 'Black',
            'artist': 'Kettle',
            'date': '1430284300'
        }).run(self.connection)
def reseed():
    db_list = r.db_list().run(conn)

    if db_name not in db_list:
        r.db_create(db_name).run(conn)

    table_list = r.table_list().run(conn)

    if products_table in table_list:
        r.table_drop(products_table).run(conn)

    r.table_create(products_table).run(conn)

    r.table(products_table).insert(test_products).run(conn)
Пример #11
0
def flush_rethink_tables():
    tables_to_flush = [ table for table, flush in rethink_bags.flush.iteritems() if flush ]
    logger.debug("Tables that should be flushed: {}".format(tables_to_flush))

    current_tables = rethinkdb.table_list().coerce_to("array").run()
    logger.debug("Current tables in rethink: {}".format(current_tables))

    flushing_tables = list(set(current_tables).intersection(tables_to_flush))
    logger.info("Flushing tables in rethink: {}".format(flushing_tables))

    for table in flushing_tables:
        rethinkdb.table_drop(table).run()
        rethinkdb.table_create(table).run()
        logger.debug("Table {} flushed in rethink".format(table))
Пример #12
0
def main():
    options = {
        'server': config['JIRA']
    }
    jira = JIRA(options, basic_auth=(config['USERNAME'], config['PASSWORD']))

    months = [
        ('2015-03', '2015-04'),
        ('2015-04', '2015-05'),
        ('2015-05', '2015-06'),
        ('2015-06', '2015-07'),
        ('2015-07', '2015-08'),
        ('2015-08', '2015-09'),
        ('2015-09', '2015-10'),
        ('2015-10', '2015-11'),
        ('2015-11', '2015-12'),
        ('2015-12', '2016-01'),
        ('2016-01', '2016-02'),
        ('2016-02', '2016-03'),
        ('2016-03', '2016-04')
    ]

    total_issues = 0
    bulk_add = []
    for month in months:
        print("Downloading issues for interval %s/%s" % month)
        jql = "created >= '%s-01' AND created < '%s-01'" % month
        issues_in_month = jira.search_issues(jql, maxResults=1000, json_result=True)
        issues = issues_in_month['issues']
        
        filtered_issues = filter_issues(issues)
        issues_count = len(issues)
        filtered_count = len(filtered_issues)
        
        assert filtered_count == issues_count

        total_issues = total_issues + issues_count

        bulk_add.extend(filtered_issues)

    print("Successfully downloaded %d issues" % total_issues)
    print("Loading %d issues into RethinkDB" % len(bulk_add))

    r.connect(config['RETHINKDB'], 28015, db='jira').repl()
    r.table_drop('issues').run()
    r.table_create('issues').run()
    r.table('issues').insert(bulk_add).run()

    print("OK! Bye")
Пример #13
0
    def test_connection_with_inexisting_database(self):
        app = Flask(__name__)
        db = RethinkDB(app, db='doesnotexist')

        with app.test_request_context():
            try:
                # Make sure RethinkDB is turned on!
                # Specifying an inexisting database should raise an exception
                r.table_create('table').run(db.conn)
            except (RqlDriverError, RqlRuntimeError):
                pass
            else:
                # Do some cleanup
                r.table_drop('table').run(db.conn)
                self.fail("Should have raised a RqlDriverError")
Пример #14
0
    def _reset_data(self, table):
        if table in rdb.table_list().run(self.session):
            result = rdb.table_drop(table).run(self.session)
            assert result['dropped'] == 1

        result = rdb.table_create(table).run(self.session)
        result = rdb.table(table).index_create('date').run(self.session)
        return result.get('created', 0) == 1
Пример #15
0
    def _reset_data(self, table):
        if table in rdb.table_list().run(self.session):
            result = rdb.table_drop(table).run(self.session)
            assert result['dropped'] == 1

        result = rdb.table_create(table).run(self.session)
        result = rdb.table(table).index_create('date').run(self.session)
        return result.get('created', 0) == 1
Пример #16
0
def CleanupOldState(client, blocklist):
    """
    Remove the tables for state that are no longer necessary

   :param SawtoothClient client: sawtooth.client.SawtoothClient for
       accessing the ledger
   :param list blocklist: list of block identifiers
    """

    statenames = map(lambda b: 'blk' + b, blocklist)
    tablelist = rethinkdb.table_list().run()
    for table in tablelist:
        if table.startswith('blk') and table not in statenames:
            try:
                logger.info('drop old state table %s', table)
                rethinkdb.table_drop(table).run()
            except:
                logger.exception('failed to drop state table %s', table)
Пример #17
0
def drop_tables():
    from .registry import model_registry

    created_tables = r.table_list().run()
    for model_cls in model_registry.all().values():
        if model_cls._table in created_tables:
            result = r.table_drop(model_cls._table).run()
            if result['tables_dropped'] != 1:
                raise RuntimeError('Could not drop table %s for model %s' % (
                                   model_cls._table, model_cls.__name__))
Пример #18
0
def drop_tables():
    from .registry import model_registry

    created_tables = r.table_list().run()
    for model_cls in model_registry.all().values():
        if model_cls._table in created_tables:
            result = r.table_drop(model_cls._table).run()
            if result['tables_dropped'] != 1:
                raise RuntimeError('Could not drop table %s for model %s' %
                                   (model_cls._table, model_cls.__name__))
Пример #19
0
def LocalMain(config):
    """
    Main processing loop for the synchronization process
    """

    # pull database and collection names from the configuration and set up the
    # connections that we need
    dbhost = config.get('DatabaseHost', 'localhost')
    dbport = int(config.get('DatabasePort', 28015))
    dbname = config['DatabaseName']

    rconn = rethinkdb.connect(dbhost, dbport)
    rconn.repl()
    rconn.use(dbname)

    tablelist = rethinkdb.table_list().run()
    for table in tablelist:
        try:
            logger.info('drop table %s', table)
            rethinkdb.table_drop(table).run()
        except:
            logger.exception('failed to drop table %s', table)

    rconn.close()
Пример #20
0
def delete_unused_tables(conn):
    run_rql(r.table_drop("userprofiles"), conn)
    run_rql(r.table_drop("usergroups"), conn)
    run_rql(r.table_drop("runs"), conn)
    run_rql(r.table_drop("reviews"), conn)
    run_rql(r.table_drop("review2item"), conn)
    run_rql(r.table_drop("machines"), conn)
    run_rql(r.table_drop("ui"), conn)
    run_rql(r.table_drop("elements"), conn)
    run_rql(r.table_drop("sample2sample"), conn)
    run_rql(r.db('mcpub').table_drop("sample2sample"), conn)
    run_rql(r.table_drop("shares"), conn)
    run_rql(r.table_drop("user2share"), conn)
    run_rql(r.table_drop("experimenttasks"), conn)
    run_rql(r.table_drop("experiment2experimenttask"), conn)
    run_rql(r.table_drop("experimenttask2process"), conn)
    run_rql(r.table_drop("experimentnotes"), conn)
    run_rql(r.table_drop("experiment2experimentnote"), conn)
    run_rql(r.table_drop("dataset2experimentnote"), conn)
Пример #21
0
 def table_drop(cls):
     return r.table_drop(cls.__table_name__).run(get_conn())
Пример #22
0
 def drop_table(self):
     r.table_drop(self._table_name).run(self._conn)
Пример #23
0
import rethinkdb as r
r.connect(port = 42865).repl()
r.table_drop("foo").run()
r.table_create("foo").run()
print r.table("foo").index_create("sid", lambda x: r.js("1")).run()
Пример #24
0
 def delete_table(self, table):
     with self._get_conn() as conn:
         rdb.table_drop(table).run(conn)
Пример #25
0
 def deleteTable(self, tname):
     conn = rdb.connect(db=DB_NAME)
     res = rdb.table_drop(tname).run(conn)
     print res
Пример #26
0
import rethinkdb as r
r.connect(port=42865).repl()
r.table_drop("foo").run()
r.table_create("foo").run()
print r.table("foo").index_create("sid", lambda x: r.js("1")).run()
Пример #27
0
 def table_drop(cls):
     return r.table_drop(cls.Meta.table_name).run(get_conn())
Пример #28
0
def drop_table(table, conn):
    run(r.table_drop(table), conn)
def delete_unused_tables(conn):
    run_rql(r.table_drop("userprofiles"), conn)
    run_rql(r.table_drop("usergroups"), conn)
    run_rql(r.table_drop("runs"), conn)
    run_rql(r.table_drop("reviews"), conn)
    run_rql(r.table_drop("review2item"), conn)
    run_rql(r.table_drop("machines"), conn)
    run_rql(r.table_drop("ui"), conn)
    run_rql(r.table_drop("elements"), conn)
    run_rql(r.table_drop("sample2sample"), conn)
    run_rql(r.db('mcpub').table_drop("sample2sample"), conn)
    run_rql(r.table_drop("shares"), conn)
    run_rql(r.table_drop("user2share"), conn)
    run_rql(r.table_drop("experimenttasks"), conn)
    run_rql(r.table_drop("experiment2experimenttask"), conn)
    run_rql(r.table_drop("experimenttask2process"), conn)
    run_rql(r.table_drop("experimentnotes"), conn)
    run_rql(r.table_drop("experiment2experimentnote"), conn)
    run_rql(r.table_drop("dataset2experimentnote"), conn)
Пример #30
0
def retrieve_records(api_key, sensor_path, table_name,
    end_date=(datetime.datetime.strptime(time.strftime('%Y-%m-%d'),
        '%Y-%m-%d') - datetime.timedelta(days=1)).strftime('%Y-%m-%d'),
    start_date=None, json_chunk_size=5e3, verbosity=1):

    '''Pull records from Acyclica's API and write to RethinkDB.

    api_key [str]: the 41-character alphanumeric key you were given by Acyclica.
        Should be read in from an environment variable, encrypted if possible.
    sensor_path [str]: the path to Acyclica_sensors_CBD.csv
        (should be fetched automatically once we package this thing).
    table_name [str]: the name of the RethinkDB table that will be written. If
        a table of the same name already exists, it will be overwritten.
    end_date [str]: a date string of the form 'YYYY-MM-DD' specifying the last
        day of data to pull from Acyclica. Defaults to yesterday.
    start_date [str]: a date string of the form 'YYYY-MM-DD' specifying the first
        day of data to fetch from Acyclica. Defaults to None, which means only
        end_date will be fetched. Set this to 'prev_week' to fetch the full week
        starting 8 days ago and ending yesterday.
    json_chunk_size [int or float of form BASEeEXP]: lists passed to
        jumbo_write_json will be broken into chunks of this size. No need to
        modify unless you encounter memory use issues, in which case you should
        first try reducing the default value of 5,000.
    df_chunk_size [int or float of form BASEeEXP]: DataFrames passed to
        jumbo_write_df will be broken into chunks of this many rows.
        No need to modify unless you encounter memory use issues, in which
        case you should next try reducing the default value of 500,000.
    verbosity [int]: determines the number of reports that will be printed.
        0 = no reports
        1 = reports from this function only
        2 = more reports from this function and from subroutine
            jumbo_write_json.

    Calls jumbo_write_df, which calls jumbo_write_json.
    Must be connected to a RethinkDB instance before using this.

    Pull at minimum 1 day and at maximum 1 week of data in increments of 1
    day.'''

    #start timing
    start_time = time.time()

    #check for size limit errors
    # if df_chunk_size > 1e6:
    #     raise(Exception('Maximum df_chunk_size is 1,000,000.'))
    if json_chunk_size > 1e5:
        raise(Exception('Maximum json_chunk_size is 100,000. This size is \
            rarely a good idea.'))

    #check for end_date format error
    try:
        nul = datetime.datetime.strptime(end_date, '%Y-%m-%d')
    except:
        raise(Exception('end_date must be of the form "YYYY-MM-DD".'))

    #set appropriate start dates based on input
    if start_date == 'prev_week':
        start_date = (datetime.datetime.strptime(end_date,
            '%Y-%m-%d') - datetime.timedelta(days=6)).strftime('%Y-%m-%d')
    elif start_date is None:
        start_date = end_date
    else: pass

    #check for start_date format error
    try:
        nul = datetime.datetime.strptime(start_date, '%Y-%m-%d')
    except:
        raise(Exception('start_date must be of the form "YYYY-MM-DD".'))

    #add 23 h, 59 m, and 59 s to the end date (to grab the whole day)
    end_date = datetime.datetime.strptime(end_date,
        '%Y-%m-%d') + datetime.timedelta(hours=23, minutes=59, seconds=59)

    #convert datetime objects to unix time
    start_unix = int(time.mktime(datetime.datetime.strptime(start_date,
        '%Y-%m-%d').timetuple()))
    end_unix = int(time.mktime(end_date.timetuple()))

    #make sure the user isn't trying to grab more than a week of data, and that
    #end is after start
    if end_unix - start_unix > 604800:
        raise(Exception('Please specify a range of dates no greater than one week.'))
    if end_unix - start_unix < 0:
        raise(Exception('end_date must be later than start date.'))

    #determine how many days have been selected
    dif = end_unix - start_unix
    ndays = math.ceil(dif / (24 * 3600))

    #get sensor data
    sensors = pd.read_csv(sensor_path)
    # sensors = sensors.drop(['name', 'short_name','latitude','longitude'], axis=1)
    sensors.columns = ['IntersectionID','sensor']
    sensor_list = list(sensors['sensor'])

    if verbosity > 0:
        print('Preparing to acquire data for ' + str(ndays) + ' day(s) and ' \
            + str(len(sensor_list)) + ' sensors.')

    #remove tables and create them anew
    try:
        r.table_drop(table_name).run()

        if verbosity > 0:
            print('Table "' + table_name + '" already existed and has ' \
                + 'been deleted.')
    except: pass
    finally:
        r.table_create(table_name).run()

        if verbosity > 0:
            print('Created table "' + table_name + '".')

    #request and process one day at a time (roughly 5-10m records acquired per day)
    day_start_unix = start_unix
    for day in range(ndays):

        print('Acquiring records for day ' + str(day + 1) + ' of ' \
            + str(ndays) + '. May take several minutes.')

        #get endpoints for each iteration and (re)instantiate dataframe
        day_end_unix = day_start_unix + (23 * 3600) + 3599
        df = pd.DataFrame(columns=['Timestamp','MAC Hash','Strength','Serial'])

        #request and preprocess each sensor separately
        for i in range(len(sensor_list)):

            # sensorID = sensor_list[1]
            URL = "https://cr.acyclica.com/datastream/device/csv/time/" \
                + api_key + "/" + str(sensor_list[i]) + "/" \
                + str(day_start_unix) + "/" + str(day_end_unix)

            #get raw web content and read into a dataframe
            items = requests.get(URL).content
            newdf = pd.read_csv(io.StringIO(items.decode('utf-8')),
                usecols=['Timestamp','MAC Hash','Strength','Serial'])

            #round timestamp to nearest second
            newdf['Timestamp'] = newdf['Timestamp'].round().astype('int')

            #drop repeated reads within 1s, keeping read with highest strength
            strmaxes = newdf.groupby(['Timestamp',
                'MAC Hash'])['Serial'].transform(max)
            newdf = newdf[newdf['Serial'] == strmaxes]

            #append to main dataframe
            df = df.append(newdf, ignore_index=True)

            if verbosity == 2:
                if i + 1 in [15,30,45]:
                    print('Got data for ' + str(i + 1) + ' of ' \
                        + str(len(sensor_list)) \
                        + ' sensors. So far there are ' + str(len(df)) \
                        + ' reads for day ' + str(day + 1) + '.')

        del(newdf)

        # df = pd.read_csv('/home/mike/Desktop/untracked/aug5_df_justAfterAppending.csv')
        # print('WARNING: STILL READING LOCAL DATAFRAME FROM CSV')

        #drop repeated reads again, keeping read with highest strength
        strmaxes = df.groupby(['Timestamp',
            'MAC Hash'])['Serial'].transform(max)
        df = df[df['Serial'] == strmaxes]

        pre_filt_len = str(len(df))
        if verbosity > 0:
            print('Found ' + pre_filt_len + ' sensor reads for day ' \
                + str(day + 1) + '. Cleaning those now.')

        # df.to_csv('/home/mike/Desktop/untracked/aug5_df_justAfterAppending.csv', index=False)

        json_list = df_to_json_etc(df, verbosity, pre_filt_len, sensors)

        if verbosity > 0:
            print('Converted DataFrame to JSON list and grouped by hash. ' \
                + 'Passing list of length ' + str(len(json_list)) \
                + ' to jumbo_write_json.')

        # json_list = json_list[0:11000]

        #set verbosity for jumbo_write_json
        sil = False if verbosity == 2 else True

        #write list to rethink
        # while len(json_list): #runs as long as rows remain in the dataframe
        #
        #     #take a chunk of the dataframe and convert to json list
        #     l = min(len(df), int(df_chunk_size)) #get the first chunk_size lines, or all the rest if fewer
        #     chunk = df.iloc[0:l] #subset them from the df
        #     df = df.drop(df.index[0:l]) #drop those lines
        #     json_list = chunk.to_dict('records')

            # if verbosity > 0:
            #     print('Converting chunk of ' + str(l) + ' rows to JSON format.')

        jumbo_write_json(data=json_list, table_name=table_name,
            chunk_size=json_chunk_size, silent=sil)

    # if verbosity > 0:
    #     ndocs = r.table(table_name).count().run()
    #     print('Finished writing day of records. Wrote ' + str(ndocs) \
    #         + ' docs to table "' + table_name + '".')

        # insert df into table as JSON (calls subroutine jumbo_write_json)
        # jumbo_write_df(df=df, table_name=table_name,
        #     df_chunk_size=df_chunk_size, json_chunk_size=json_chunk_size,
        #     verbosity=verbosity)

        #increment day
        day_start_unix = day_start_unix + (24 * 3600)

    if verbosity > 0:
        run_time = round((time.time() - start_time) / 60, 2)
        print('Finished writing all records for ' + str(ndays) + ' day(s) ' \
            + 'in ' + str(run_time) + ' minutes.')
Пример #31
0
def drop_db():
    tables = r.table_list().run(db.conn)
    for table in tables:
        r.table_drop(table).run(db.conn)
    print 'Tables have been dropped.'
Пример #32
0
 def table_drop(cls):
     return r.table_drop(cls.Meta.table_name).run(get_conn())