def write_edges(self, fh, edge_func): c = SSCursor(self.dbh) _start = time.time() c.execute(self.edges_query) # write the header: fh.write("*Arcs\r\n") # I need some duplicate protection count = 0 res = c.fetchone() while (res): (_to, _from, _weight) = res count += 1 # could call external func here fh.write("%s %s %f\r\n" % (self.map[_to], self.map[_from], (_weight or 1))) if (count % 1000 == 0): sys.stdout.write('.') sys.stdout.flush() # get next record res = c.fetchone() # end this function c.close() print "\nWrote %i Edges in %f\n" % (count, time.time() - _start)
def write_edges(self, fh, edge_func): c = SSCursor(self.dbh) _start = time.time() c.execute(self.edges_query) # write the header: fh.write("*Arcs\r\n") # I need some duplicate protection count = 0 res = c.fetchone() while( res): (_to, _from, _weight) = res count +=1 # could call external func here fh.write("%s %s %f\r\n" %(self.map[_to], self.map[_from], (_weight or 1))) if(count % 1000 == 0): sys.stdout.write('.') sys.stdout.flush() # get next record res = c.fetchone() # end this function c.close() print "\nWrote %i Edges in %f\n" %(count, time.time() - _start)
def run_generator_get_query(connection, query): """Ultra-efficient cursor for huge amounts of data""" cursor = SSCursor(connection) try: cursor.execute(query) for row in cursor: yield row except Error as e: print("An Error occurred") traceback.print_exc()
def execute(self, query, args=None): # The super implementation likes to call nextset() in a loop to # scroll through anything left dangling. But we promise not to do that # so we can save some effort. self.nextset = _noop # pylint:disable=attribute-defined-outside-init try: return SSCursor.execute(self, query, args) finally: del self.nextset
def write_edges(self, fh, edge_func): c = SSCursor(self.dbh) _start = time.time() c.execute(self.edges_query) # write the header: fh.write("*Edges\r\n") # I need some duplicate protection count = 0 res = c.fetchone() while( res): (_to, _from, _weight) = res count +=1 # check it: min = _to max = _from if(min > max): min = _from max = _to if(self._biglist.matrix[min][max] == 1): res = c.fetchone() continue else: self._biglist.matrix[min][max] = 1 # could call external func here fh.write("%s %s %f\r\n" %(self.map[_to], self.map[_from], (_weight or 1))) if(count % 50 == 0): sys.stdout.write('.') sys.stdout.flush() # get next record res = c.fetchone() # end this function c.close() print "\nWrote %i Edges in %f\n" %(count, time.time() - _start)
def import_data(self, limit=None): '''Import etsa database to given document and segment storage. Keyword arguments: limit - if not None, then import only `limit` number of first documents. ''' self.logger.info('Importing ETSA data. Limit is ' + str(limit)) self._pre_import_data_hook() cur = SSCursor(self._conn) cur.execute(self._get_query(limit)) result = cur.fetchone() numprocessed = 0 while result is not None: self._process_single(result) numprocessed += 1 if limit is not None and numprocessed >= limit: break result = cur.fetchone() # TODO: add multithreading self.logger.info('Processed {0} documents!'.format(numprocessed))
dtype = 'f8' elif fun2 in [types.IntType, types.LongType]: dtype = 'i4' elif fun2 in [types.StringType]: dtype = '|S%d'%(internal_size,) descr.append((name, dtype)) return descr database_and_table, filename = sys.argv[1:] database_name, table_name = database_and_table.split('.') connection = MySQLdb.connect(host='imgdb02', user='******', passwd=getpass.getpass(), db=database_name) cursor = SSCursor(connection) cursor.execute('select count(*) from %s' % table_name) nrows, = cursor.fetchall()[0] print nrows, 'rows' cursor.execute('SELECT * from %s' % table_name) colnames = result_names(cursor) m = Measurements(filename) object_names = set(name.split('_', 1)[0] for name in colnames if '_' in name) print 'Object names:', ', '.join(object_names) columns = [] for i, (name, dtype) in enumerate(result_dtype(cursor)):
dtype = 'i4' elif fun2 in [types.StringType]: dtype = '|S%d' % (internal_size, ) descr.append((name, dtype)) return descr database_and_table, filename = sys.argv[1:] database_name, table_name = database_and_table.split('.') connection = MySQLdb.connect(host='imgdb02', user='******', passwd=getpass.getpass(), db=database_name) cursor = SSCursor(connection) cursor.execute('select count(*) from %s' % table_name) nrows, = cursor.fetchall()[0] print nrows, 'rows' cursor.execute('SELECT * from %s' % table_name) colnames = result_names(cursor) m = Measurements(filename) object_names = set(name.split('_', 1)[0] for name in colnames if '_' in name) print 'Object names:', ', '.join(object_names) columns = [] for i, (name, dtype) in enumerate(result_dtype(cursor)): if name == 'ImageNumber': image_number_index = i elif name == 'ObjectNumber':
if fun2 in [decimal.Decimal, types.FloatType]: dtype = "f8" elif fun2 in [types.IntType, types.LongType]: dtype = "i4" elif fun2 in [types.StringType]: dtype = "|S%d" % (internal_size,) descr.append((name, dtype)) return descr database_and_table, filename = sys.argv[1:] database_name, table_name = database_and_table.split(".") connection = MySQLdb.connect(host="imgdb02", user="******", passwd=getpass.getpass(), db=database_name) cursor = SSCursor(connection) cursor.execute("select count(*) from %s" % table_name) nrows, = cursor.fetchall()[0] print nrows, "rows" cursor.execute("SELECT * from %s" % table_name) colnames = result_names(cursor) m = Measurements(filename) object_names = set(name.split("_", 1)[0] for name in colnames if "_" in name) print "Object names:", ", ".join(object_names) columns = [] for i, (name, dtype) in enumerate(result_dtype(cursor)): if name == "ImageNumber": image_number_index = i