def go_main(self): # NOTE: You can override the default database (set in CONFIG) if you # set self.cli_opts.database, which calls db_glue.DB.set_db_name. log.info('Schema-upgrade started on %s...' % (db_glue.DB.db_name,)) # This is the user's connection. self.db = db_glue.new() # This is the admin's connection. self.pg_db = db_glue.new(conf.db_owner, use_transaction=False) # This is the list of schemas. self.schemas = instance_list(self.db) # When we imported pyserver above, we switched to the pyserver directory. # Switch to the directory where we'll find the '[0-9][0-9][0-9]-*.sql' # scripts. log.debug('go_main: changing to dir: %s' % (self.cli_opts.scripts_dir,)) os.chdir(self.cli_opts.scripts_dir) # Is the database sufficiently new for us? If not, complain. if (len(self.db.table_columns('upgrade_event')) == 0): log.error('ERROR: table upgrade_event not found.') log.error('Upgrade through 083-upgrade_event.sql manually.') sys.exit(1) self.recently_vacuumed = False script_ct = 0 time_0 = time.time() try: self.next_script = None self.next_schema = None self.setup_next_script() while self.next_script: script_ct += 1 self.run_script() # Find the next script to run and the schema to run against. self.setup_next_script() self.vacuum_finally_maybe() except Exception, e: log.info('Exception: "%s" / %s' % (e, traceback.format_exc(),)) sys.exit(1)
def ccp_cache_populate(self): # We cleared the db handle earlier, so get a new one, and lock it. g.assurt_soft(self.qb.db is None) self.qb.db = db_glue.new() # FIXME: What's gtfsdb_cache_edges? Or don't we care? #self.qb.db.transaction_begin_rw('gtfsdb_cache_edges', # 'gtfsdb_cache_links') # EXPLAIN: Who are we competing with? Just other instances of this # script? locked = self.qb.db.transaction_lock_try('gtfsdb_cache_links', caller='gtfsdb_build_cache') g.assurt(locked) self.qb.db.insert( 'gtfsdb_cache_register', { 'username': self.qb.username, 'branch_id': self.qb.branch_hier[0][0], 'revision_id': self.revision_id, 'gtfs_caldate': self.tfeed_zipdate, }, {}) self.ccp_clear_cache() self.ccp_save_cache() log.info('Committing transaction [ccp_cache_populate]') self.qb.db.transaction_commit() self.qb.db.close() self.qb.db = None
def main(): global verbose op = optparse.OptionParser(usage=usage) op.add_option('-m', '--changenote', dest="changenote", default=changenote_default) op.add_option('-v', '--verbose', action="store_true", dest="verbose") op.add_option('-e', '--wremail', action="store_true", dest="wremail") (options, args) = op.parse_args() if (len(args) == 0): # no grade argument was provided hill_grade_min = hill_grade_min_default elif (len(args) == 1): hill_grade_min = args[0] else: op.error('More than one argument was provided.') try: hill_grade_min = float(hill_grade_min) except(ValueError): hill_grade_min = -1 if (hill_grade_min <= 0): op.error('Grade must be a positive number.') verbose = options.verbose wremail = options.wremail changenote = options.changenote db = db_glue.new() byway_hills_tag(db, hill_grade_min, changenote, wremail) db.commit() db.close()
def ccp_cache_populate(self): # We cleared the db handle earlier, so get a new one, and lock it. g.assurt_soft(self.qb.db is None) self.qb.db = db_glue.new() # FIXME: What's gtfsdb_cache_edges? Or don't we care? #self.qb.db.transaction_begin_rw('gtfsdb_cache_edges', # 'gtfsdb_cache_links') # EXPLAIN: Who are we competing with? Just other instances of this # script? locked = self.qb.db.transaction_lock_try('gtfsdb_cache_links', caller='gtfsdb_build_cache') g.assurt(locked) self.qb.db.insert( 'gtfsdb_cache_register', { 'username': self.qb.username, 'branch_id': self.qb.branch_hier[0][0], 'revision_id': self.revision_id, 'gtfs_caldate': self.tfeed_zipdate, }, {}) self.ccp_clear_cache() self.ccp_save_cache() log.info('Committing transaction [ccp_cache_populate]') self.qb.db.transaction_commit() self.qb.db.close() self.qb.db = None
def finalize_alerts(msgng_ids, username): g.assurt(msgng_ids) Watcher_Composer.emailed_usernames.add(username) msgng_ids_str = ','.join([str(x) for x in msgng_ids]) rows_updated = 0 if not DEV_SKIP_UPDATE_ITEM_EVENT_ALERT: # Getting a new database connection is expensive and not something # you'd want to do a lot if it can be avoided, but we want to commit # to the database and remember that we just emailed a user, in case # the code fails while processing the next email (so that when the # script is restarted, we don't send duplicate emails). db = db_glue.new() db.transaction_begin_rw() update_sql = ( """ UPDATE item_event_alert SET date_alerted = NOW() WHERE (date_alerted IS NULL) AND (messaging_id IN (%s)) """ % (msgng_ids_str,)) db.sql(update_sql) log.debug( 'finalize_alerts: date_alerted: given: %s / updated: %d / user: %s' % (len(msgng_ids), db.curs.rowcount, username,)) db.transaction_commit() rows_updated = db.curs.rowcount return rows_updated
def go(self): db = db_glue.new() # See that the UUID doesn't already exist. existings = db.sql( """ SELECT value FROM key_value_pair WHERE key = 'cp_instance_uuid'; """) if len(existings): raise Exception('UUID already exists!: %s' % (existings[0]['value'],)) # Create a new UUID # Python docs on uuid1: "Generate a UUID from a host ID, sequence number, # and the current time." instance_uuid = uuid.uuid1() # Insert the new UUID db.transaction_begin_rw() db.insert( 'key_value_pair', { 'key': 'cp_instance_uuid', 'value': str(instance_uuid), }, {}) db.transaction_commit() db.close() # All done! log.debug('Inserted new UUID: %s' % (instance_uuid,))
def test_load_graph(): db_cyclopath = db_glue.new() db_cyclopath.sql('set transaction isolation level serializable') tgraph = Trans_Graph() tgraph.load(db_cyclopath) log.debug('tgraph.size: %d' % (tgraph.size,)) log.debug('tgraph.get_vertex: 1301221: %s', (tgraph.get_vertex("1301221"),)) log.debug('tgraph.get_vertex: 1240905: %s', (tgraph.get_vertex("1240905"),)) tgraph.destroy() db_cyclopath.close()
def test_load_graph(): db_cyclopath = db_glue.new() db_cyclopath.sql('set transaction isolation level serializable') tgraph = Trans_Graph() tgraph.load(db_cyclopath) log.debug('tgraph.size: %d' % (tgraph.size, )) log.debug('tgraph.get_vertex: 1301221: %s', (tgraph.get_vertex("1301221"), )) log.debug('tgraph.get_vertex: 1240905: %s', (tgraph.get_vertex("1240905"), )) tgraph.destroy() db_cyclopath.close()
def db_vacuum(self, full_vacuum=False): time_0 = time.time() log.info('Vacuuming...') if not full_vacuum: db = db_glue.new(use_transaction=False) db.sql("VACUUM ANALYZE minnesota.geofeature (geometry);") db.close() else: # Vacuum and analyze. # EXPLAIN: ANALYZE vs. VACUUM ANALYZE vs. VACUUM FULL vs. CLUSTER. # See also: vacuum analyze verbose. # NOTE: Should be database owner, lest some tables go unvacced. pg_db = db_glue.new(conf.db_owner, use_transaction=False) pg_db.sql("VACUUM ANALYZE;") pg_db.close() log.info('... Vacuum took %s' % (misc.time_format_elapsed(time_0),))
def compile_user_ids(self): db = db_glue.new() input_count = (len(self.cli_opts.email_addrs) + len(self.cli_opts.usernames) + len(self.cli_opts.userids)) if self.cli_opts.email_addrs: self.get_user_ids_by_email_addr(db, self.cli_opts.email_addrs) if self.cli_opts.usernames: self.get_user_ids_by_username(db, self.cli_opts.usernames) if self.cli_opts.userids: self.get_user_ids_by_userid(db, self.cli_opts.userids) if input_count: log.debug('User supplied %d users / %d users okay to spam.' % ( input_count, len(self.user_ids), )) else: # Get all IDs from the database. log.debug('Getting all emails from database!') self.get_all_user_ids(db) if self.invalid_ids: log.debug('%d users have invalid email addresses: %s' % ( len(self.invalid_ids), self.invalid_ids, )) if self.not_okay: log.debug('%d users are not to be emailed: %s' % ( len(self.not_okay), self.not_okay, )) if not self.user_ids: log.error('No users are okay to spam.') sys.exit(0) seen_id = set() culled = [] for user_id in self.user_ids: if user_id not in seen_id: seen_id.add(user_id) culled.append(user_id) else: log.debug('Ignoring duplicate user_id: %d' % (user_id, )) self.user_ids = culled db.close()
def get_qb_cur(self): qb_cur = None username = self.wtem.created_by db = db_glue.new() rev = revision.Current(allow_deleted=False) (branch_id, branch_hier) = branch.Many.branch_id_resolve(db, self.wtem.branch_id, branch_hier_rev=rev) if branch_id is not None: g.assurt(branch_hier) qb_cur = Item_Query_Builder(db, username, branch_hier, rev) Query_Overlord.finalize_query(qb_cur) return qb_cur
def baseline_id(db): if Many.baseline_id_ is None: if db is None: db = db_glue.new() else: # Clone the db, since we might be in a dont_fetchall fetch. db = db.clone() log.verbose1('baseline_id: disabling dont_fetchall') db.dont_fetchall = False # Get the branch ID of the public base map Many.baseline_id_ = int(db.sql( "SELECT cp_branch_baseline_id() AS bid")[0]['bid']) #log.debug('Many.baseline_id = %d' % (Many.baseline_id_,)) g.assurt(Many.baseline_id_ > 0) db.close() return Many.baseline_id_
def baseline_id(db): if Many.baseline_id_ is None: if db is None: db = db_glue.new() else: # Clone the db, since we might be in a dont_fetchall fetch. db = db.clone() log.verbose1('baseline_id: disabling dont_fetchall') db.dont_fetchall = False # Get the branch ID of the public base map Many.baseline_id_ = int( db.sql("SELECT cp_branch_baseline_id() AS bid")[0]['bid']) #log.debug('Many.baseline_id = %d' % (Many.baseline_id_,)) g.assurt(Many.baseline_id_ > 0) db.close() return Many.baseline_id_
def compile_user_ids(self): db = db_glue.new() input_count = (len(self.cli_opts.email_addrs) + len(self.cli_opts.usernames) + len(self.cli_opts.userids)) if self.cli_opts.email_addrs: self.get_user_ids_by_email_addr(db, self.cli_opts.email_addrs) if self.cli_opts.usernames: self.get_user_ids_by_username(db, self.cli_opts.usernames) if self.cli_opts.userids: self.get_user_ids_by_userid(db, self.cli_opts.userids) if input_count: log.debug('User supplied %d users / %d users okay to spam.' % (input_count, len(self.user_ids),)) else: # Get all IDs from the database. log.debug('Getting all emails from database!') self.get_all_user_ids(db) if self.invalid_ids: log.debug('%d users have invalid email addresses: %s' % (len(self.invalid_ids), self.invalid_ids,)) if self.not_okay: log.debug('%d users are not to be emailed: %s' % (len(self.not_okay), self.not_okay,)) if not self.user_ids: log.error('No users are okay to spam.') sys.exit(0) seen_id = set() culled = [] for user_id in self.user_ids: if user_id not in seen_id: seen_id.add(user_id) culled.append(user_id) else: log.debug('Ignoring duplicate user_id: %d' % (user_id,)) self.user_ids = culled db.close()
def main(): global verbose op = optparse.OptionParser(usage=usage) op.add_option('-c', '--clean', action="store_true", dest="clean") op.add_option('-v', '--verbose', action="store_true", dest="verbose") (options, args) = op.parse_args() if (len(args) == 1): # import elevation from a custom file elevation.elevation_source_set(filename=args[0]) clean = options.clean verbose = options.verbose db = db_glue.new() db.transaction_begin_rw('revision') insert_elevations(db, clean) db.transaction_commit() db.close()
def write_cfgfile(self): # Write the preamble. self.cfg_f.write(Gen_TC_Cfg.cfg_preamble) # We want layers for every instance for every branch for every skin. if self.cli_opts.ccp_instance: server_instances = [ self.cli_opts.ccp_instance, ] else: server_instances = conf.server_instances for cur_instance in server_instances: log.debug('write_cfgfile: processing instance: %s' % (cur_instance, )) # This is a little... cheaty. self.qb.db.close() conf.instance_name = cur_instance # Reset the db so we reset SEARCH_PATH. self.qb.db = db_glue.new() # NOTE: If you want to exclude a branch from the tilecache.cfg, you # should set its tile_skins to NULL or ''. You won't accomplish # it just be disabling permissions for a branch: branch_iterate # finds them all. # Ignore self.cli_args.branch_id and just go through all branches. self.branch_iterate(qb=self.qb, branch_id=None, branch_callback=self.output_cfg_for_branch, debug_limit=debug_limit) # Write the postamble. self.write_postamble()
def load(self, keep_running=None): # Get a handle to the db. We'll get a fresh handle if updating again. # Bug 2688: ERROR: could not serialize access due to concurrent update # This used to happen because we used a serializable trans- # action, which we no longer do. Serializable transactions # are an annoyance; it's better to do assertive locking. # NO: self.update_db = db_glue.new(trans_serializable=True) # Just get a new db handle, which sets up the transaction as read # committed, so we only see data from whence the db handle is created. So # once we call new(), our view of the data won't change. And since we're # updating a specific revision to Current(), we'll be a-okay, since the # data view doesn't change. But once it's done, we'll want to re-check # the latest revision ID and see if we have to update again. Which we do, # in load_wrap, by recycling the cursor... self.update_db = db_glue.new() try: self.load_wrap(keep_running=None) finally: self.update_db.close() self.update_db = None
def load(self, keep_running=None): # Get a handle to the db. We'll get a fresh handle if updating again. # Bug 2688: ERROR: could not serialize access due to concurrent update # This used to happen because we used a serializable trans- # action, which we no longer do. Serializable transactions # are an annoyance; it's better to do assertive locking. # NO: self.update_db = db_glue.new(trans_serializable=True) # Just get a new db handle, which sets up the transaction as read # committed, so we only see data from whence the db handle is created. So # once we call new(), our view of the data won't change. And since we're # updating a specific revision to Current(), we'll be a-okay, since the # data view doesn't change. But once it's done, we'll want to re-check # the latest revision ID and see if we have to update again. Which we do, # in load_wrap, by recycling the cursor... self.update_db = db_glue.new() try: self.load_wrap(keep_running=None) finally: self.update_db.close() self.update_db = None
def write_cfgfile(self): # Write the preamble. self.cfg_f.write(Gen_TC_Cfg.cfg_preamble) # We want layers for every instance for every branch for every skin. if self.cli_opts.ccp_instance: server_instances = [self.cli_opts.ccp_instance,] else: server_instances = conf.server_instances for cur_instance in server_instances: log.debug('write_cfgfile: processing instance: %s' % (cur_instance,)) # This is a little... cheaty. self.qb.db.close() conf.instance_name = cur_instance # Reset the db so we reset SEARCH_PATH. self.qb.db = db_glue.new() # NOTE: If you want to exclude a branch from the tilecache.cfg, you # should set its tile_skins to NULL or ''. You won't accomplish # it just be disabling permissions for a branch: branch_iterate # finds them all. # Ignore self.cli_args.branch_id and just go through all branches. self.branch_iterate(qb=self.qb, branch_id=None, branch_callback=self.output_cfg_for_branch, debug_limit=debug_limit) # Write the postamble. self.write_postamble()
def setup_qbs(self): username = self.wtem.created_by db = db_glue.new() if self.wtem.revision_id is None: rev = revision.Current() else: rev = revision.Historic(self.wtem.revision_id, allow_deleted=False) (branch_id, branch_hier) = branch.Many.branch_id_resolve(db, self.wtem.branch_id, branch_hier_rev=rev) if not branch_hier: raise Exception( 'Branch with stack_id %d not found in database at %s.' % (self.wtem.branch_id, rev.short_name(),)) self.qb = Item_Query_Builder(db, username, branch_hier, rev) self.qb.item_mgr = self.qb.item_mgr Query_Overlord.finalize_query(self.qb)
def main(): global verbose op = optparse.OptionParser(usage=usage) op.add_option('-m', '--changenote', dest="changenote", default=changenote_default) op.add_option('-v', '--verbose', action="store_true", dest="verbose") op.add_option('-e', '--wremail', action="store_true", dest="wremail") (options, args) = op.parse_args() if (len(args) == 0): # no grade argument was provided hill_grade_min = hill_grade_min_default elif (len(args) == 1): hill_grade_min = args[0] else: op.error('More than one argument was provided.') try: hill_grade_min = float(hill_grade_min) except (ValueError): hill_grade_min = -1 if (hill_grade_min <= 0): op.error('Grade must be a positive number.') verbose = options.verbose wremail = options.wremail changenote = options.changenote db = db_glue.new() byway_hills_tag(db, hill_grade_min, changenote, wremail) db.commit() db.close()
def main(): # optparse doesn't support boolean types; this is a work around def check_bool(option, opt_str, value, parser): value = value.lower() if (value not in ['true', 'false', '1', '0']): parser.error(opt_str + " must be set to a boolean value.") setattr(parser.values, option.dest, value in ['true','1']) # parse args # SYNC_ME: The option names are mapped to database column names # in user_email.flag_db_map. op = optparse.OptionParser(usage=usage) op.add_option('-e', '--enable-email', type='string', action='callback', callback=check_bool, dest='enable-email') op.add_option('-r', '--enable-research-email', type='string', action='callback', callback=check_bool, dest='enable-research-email') op.add_option('-d', '--enable-wr-digest', type='string', action='callback', callback=check_bool, dest='enable-wr-digest') op.add_option('-a', '--dont-study', type='string', action='callback', callback=check_bool, dest='dont-study') op.add_option('-b', '--bouncing', type='string', action='callback', callback=check_bool, dest='bouncing') op.add_option('-l', '--login-permitted', type='string', action='callback', callback=check_bool, dest='login-permitted') (options, args) = op.parse_args() if (len(args) == 0): op.error('USER must be set') # FIXME: Who else locks this table? what about wiki? you just want to row lock, # anyway... db = db_glue.new() db.transaction_begin_rw('user_') usernames = [args[0]] # If arg contains an @ symbol, assume it's an e-mail address and look up # the corresponding username(s). if (usernames[0].find('@') != -1): usernames = user_email.usernames_get(db, usernames[0]) # set specified flags for username in usernames: for option in valid_flags: value = getattr(options, option) if (value is not None): user_email.flag_set(db, username, option, value) print 'setting %s to %s for %s' % (option, value, username,) db.transaction_commit() # print flags and their values for username in usernames: values = user_email.flags_get(db, username) print ('flags for %s, %s (db: %s):' % (username, user_email.addr_get(db, username, False), db_glue.DB.db_name,)) for (key, value) in values.items(): print ' %s = %s' % (key, value,) db.close()
def setup_qb_cur(self, all_errs, min_acl=Access_Level.viewer): # For both import and export, qb_src is used to retrieve items from the # database, and qb_cur is used to check the user's group accesses and # maybe to search for regions if a restrictive bbox is being imposed. # But qb_cur is also used during import to save changes to the database; # qb_cur is not used during export to save anything to the database. # # NOTE: On import, we row-lock on the grac tables, group_membership # and new_item_policy. We also row-lock the destination branch. # So other operations might block while this code runs. # CONFIRM: We don't lock anything on export, right? qb_cur = None username = self.mjob.wtem.created_by db = db_glue.new() rev = revision.Current(allow_deleted=False) (branch_id, branch_hier) = branch.Many.branch_id_resolve(db, self.mjob.wtem.branch_id, branch_hier_rev=rev) if branch_id is None: # EXPLAIN: How come we don't raise here, like we do in the else? # Or, why doesn't the else block use all_errs? # See: raise_error_on_branch. # And if you look at export_cyclop.substage_initialize, # you'll see that it assurts not all_errs, so I guess # it expects us to raise. all_errs.append('setup_qb_cur: not a branch: %s at %s' % ( self.mjob.wtem.branch_id, str(rev), )) else: g.assurt(branch_hier) g.assurt(branch_id == branch_hier[0][0]) raise_error_on_branch = False if not self.spf_conf.branch_name: # This happens on export, since export_cyclop.substage_initialize # only sets branch_id when setting up the qbs. This is because it # uses the merge_job's branch_id, and since merge_job is just an # item_versioned item, all it has is its branch_id, as items do # not also store the branch name. self.spf_conf.branch_name = branch_hier[0][2] elif self.spf_conf.branch_name != branch_hier[0][2]: # The branch name in the shapefile should match. log.error('setup_qb_cur: branch_name mismatch: %s / %s' % ( self.spf_conf.branch_name, branch_hier[0][2], )) raise_error_on_branch = True # else, the branch_name in the conf matches the one we loaded by ID. # if self.spf_conf.branch_id != branch_id: # But the branch ID we can tolerate being wrong. log.warning('setup_qb_cur: unexpected spf_conf.branch_id: %s' % (self.spf_conf.branch_id, )) # For the Metc Bikeways shapefile, this just means [lb] hasn't # update the branch ID attribute in the shapefile... g.assurt(self.spf_conf.branch_name) (try_branch_id, try_branch_hier) = branch.Many.branch_id_resolve( db, self.spf_conf.branch_name, branch_hier_rev=rev) if try_branch_id == branch_id: log.warning('setup_qb_cur: ok: overriding branch_id: %s' % (branch_id, )) self.spf_conf.branch_id = branch_id else: log.error( 'setup_qb_cur: try_branch_id != branch_id: %s != %s' % ( try_branch_id, branch_id, )) raise_error_on_branch = True if raise_error_on_branch: if conf.break_on_assurt: import pdb pdb.set_trace() raise GWIS_Error( 'Shapefile branch ID and name do not match job details: ' 'work_item: %s/%s | shapefile: %s/%s' % ( branch_hier[0][2], branch_hier[0][0], self.spf_conf.branch_name, self.spf_conf.branch_id, )) qb_cur = Item_Query_Builder(db, username, branch_hier, rev) # Load both the raw geometry and the WKT geometry; we need to be # flexible. qb_cur.filters.skip_geometry_raw = False qb_cur.filters.skip_geometry_svg = True qb_cur.filters.skip_geometry_wkt = False # To save things, we need to set the group ID explicitly. self.user_group_id = User.private_group_id(qb_cur.db, username) qb_cur.user_group_id = self.user_group_id qb_cur.item_mgr = Item_Manager() # Load the attachment cache now. On import, if we create new # attributes (see metc_bikeways_defs.py), we'll keep it updated. qb_cur.item_mgr.load_cache_attachments(qb_cur) Query_Overlord.finalize_query(qb_cur) # FIXME: This comment. I like it. But it's not true... yet. # Getting row lock in branches_prepare. So don't table lock. # # Start the transaction, since the grac_mgr does some row locking. # We'll keep the rows locked until we've verified permissions. # FIXME: Verify you rollback and start a new 'revision' lock... # or maybe just start a new 'revision' lock? or can you # write to a Shapfile first and zip through the Shapefile # to save quickly and not hold the lock so long? # BUG nnnn: Investigate using a row-level branch lock; for now, # just lock rev. qb_cur.db.transaction_begin_rw() qb_cur.grac_mgr = Grac_Manager() load_grp_mmbrshps = True qb_cur.grac_mgr.prepare_mgr('user', qb_cur, load_grp_mmbrshps) # FIXME: Does qb_src need grac_mgr? #self.qb_src.grac_mgr = qb_cur.grac_mgr # Check user's minimum access level. target_branch = self.verify_branch_access(qb_cur, min_acl, all_errs) g.assurt(target_branch.stack_id == self.spf_conf.branch_id) if (self.spf_conf.branch_name and (self.spf_conf.branch_name != qb_cur.branch_hier[0][2])): log.warning('Unexpected spf_conf.branch_name: %s' % (self.spf_conf.branch_name, )) self.spf_conf.branch_name = qb_cur.branch_hier[0][2] self.qb_cur = qb_cur log.debug('setup_qb_cur: spf_conf: %s' % (str(self.spf_conf), ))
def setup_qb_src(self, all_errs): qb_src = None username = self.mjob.wtem.created_by # The source qb is just for reading... db = db_glue.new() # ... but we'll be making temporary tables of stack IDs, so start a # transaction. db.transaction_begin_rw() # The byways in the conflated were not marked deleted when they were # exported for conflation, so we don't need to look for deleted. # NOTE: The original MetC import script used based rev off # self.target_branch.last_merge_rid rather than what's in the # config file. g.assurt(self.spf_conf.revision_id) revision_id = self.spf_conf.revision_id rev = revision.Historic(revision_id, allow_deleted=False) # Make the branch_hier. (branch_id, branch_hier) = branch.Many.branch_id_resolve(db, self.mjob.wtem.branch_id, branch_hier_rev=rev) # Put it all together. if branch_id is None: all_errs.append('setup_qb_src: not a branch: %s at %s' % ( self.mjob.wtem.branch_id, str(rev), )) # Don't forget to close. Not too big a deal, but oddly, if we don't, # the next attempt by this thread to get the db will result in the # same DB() object being created and the same self.conn returned, # and then db_glue complains that it's got that self and conn in # conn_lookup. db.close() db = None else: g.assurt(branch_hier) qb_src = Item_Query_Builder(db, username, branch_hier, rev) # It's nice to have both the raw, opaque hexadecimal geometry as well # as the WKT geometry, since not all APIs are that flexible, and also # because it's easier to work with WKT in Python and OSGeo (and also # because [lb] hasn't seen an OGR fcn. to convert raw PostGIS geom, # but he's probably not looking hard enough). qb_src.filters.skip_geometry_raw = False qb_src.filters.skip_geometry_svg = True qb_src.filters.skip_geometry_wkt = False qb_src.item_mgr = Item_Manager() # FIXME: Is this right? What about tgraph? qb_src.item_mgr.load_cache_attachments(qb_src) Query_Overlord.finalize_query(qb_src) # Check that user has viewer access on the source branch. source_branch = self.verify_branch_access(qb_src, Access_Level.viewer, all_errs) # NOTE: The job itself is already access-controlled, so generally the # user has arbiter access to the branch at the Current revision. self.qb_src = qb_src
def branch_iterate(self, qb, branch_id, branch_callback, debug_limit=None): log.debug('branch_iterate: getting tmp db') # Get a new qb, and rather than clone the db, get a new connection, lest # we cannot commit ("Cannot commit when multiple cursors open"). db = db_glue.new() username = '' # Using gia_userless, so not really needed. branch_hier = copy.copy(qb.branch_hier) qb_branches = Item_Query_Builder(db, username, branch_hier, qb.revision) if branch_id: # Find just the one. qb_branches.branch_hier_limit = 1 # Indicate our non-pyserverness so that gia_userless works. qb_branches.request_is_local = True qb_branches.request_is_script = True # Get all active branches, regardless of user rights. qb_branches.filters.gia_userless = True # If debugging, just grab a handful of results. if debug_limit: qb_branches.use_limit_and_offset = True qb_branches.filters.pagin_count = int(debug_limit) g.assurt(qb_branches.sql_clauses is None) # For whatever reason, use a generator. So, in the future, when there are # millions of branches, this script runs peacefully. g.assurt(not qb_branches.db.dont_fetchall) qb_branches.db.dont_fetchall = True # Leaving as client: qb_branches.filters.min_access_level qb_branches.sql_clauses = branch.Many.sql_clauses_cols_all.clone() Query_Overlord.finalize_query(qb_branches) branches = branch.Many() branches.search_get_items(qb_branches) log.info('branch_iterate: found %d branches.' % (qb_branches.db.curs.rowcount,)) # Skipping: # prog_log = Debug_Progress_Logger(copy_this=debug_prog_log) # prog_log.log_freq = 1 # prog_log.loop_max = qb_branches.db.curs.rowcount generator = branches.results_get_iter(qb_branches) for branch_ in generator: # NOTE: We don't correct self.qb, so callers should be sure not to use # its branch_hier thinking it represents this branch_. branch_callback(branch_) # Skipping: # if prog_log.loops_inc(): # break # Skipping prog_log.loops_fin() generator.close() log.debug('branch_iterate: closing tmp db') qb_branches.db.close()
def main(): db = db_glue.new() db.transaction_begin_rw() # Handle db locking? # Parse args op = optparse.OptionParser(usage=usage) op.add_option('-u', '--user', dest='user') op.add_option('-i', '--ip', dest='ip') op.add_option('-y', '--year', type='int', dest='year') op.add_option('-m', '--minute', type='int', dest='minute') op.add_option('-d', '--day', type='int', dest='day') op.add_option('-b', '--ban', dest='ban') op.add_option('-a', '--active', action='store_true', dest='active') op.add_option('-g', '--get', action='store_true', dest='get') op.add_option('-r', '--remove', action='store_true', dest='rem') op.set_defaults(ban=None, active=False, get=False, rem=False, user=None, ip=None, year=None, minute=None, day=None) (clopts, args) = op.parse_args() if (clopts.ban is None and (clopts.year is not None or clopts.day is not None or clopts.minute is not None)): op.error( '--year, --day and --minute are only allowed if --ban is used') # delegate command options if (clopts.ban is not None): if (clopts.year is None and clopts.day is None and clopts.minute is None): # permanent ban end_date = datetime.timedelta(days=999999999) else: days = 0 minutes = 0 if (clopts.minute is not None): minutes = clopts.minute if (clopts.day is not None): days = clopts.day if (clopts.year is not None): days += 365 * clopts.year end_date = datetime.timedelta(days=days, minutes=minutes) if (clopts.ban != 'full' and clopts.ban != 'public'): op.error("--ban argument SCOPE must be 'public', or 'full'") if (clopts.user is None and clopts.ip is None): op.error('Must specify at least one of --user or --ip') do_ban(db, clopts.user, clopts.ip, clopts.ban, end_date) elif (clopts.rem): if (clopts.user is None and clopts.ip is None): op.error('Must specify at least one of --user or --ip') do_clear_ban(db, clopts.user, clopts.ip, clopts.rem) else: do_get(db, clopts.user, clopts.ip, not clopts.get) db.transaction_commit() db.close()
def main(): db = db_glue.new() db.transaction_begin_rw() # Handle db locking? # Parse args op = optparse.OptionParser(usage=usage) op.add_option('-u', '--user', dest='user') op.add_option('-i', '--ip', dest='ip') op.add_option('-y', '--year', type='int', dest='year') op.add_option('-m', '--minute', type='int', dest='minute') op.add_option('-d', '--day', type='int', dest='day') op.add_option('-b', '--ban', dest='ban') op.add_option('-a', '--active', action='store_true', dest='active') op.add_option('-g', '--get', action='store_true', dest='get') op.add_option('-r', '--remove', action='store_true', dest='rem') op.set_defaults(ban=None, active=False, get=False, rem=False, user=None, ip=None, year=None, minute=None, day=None) (clopts, args) = op.parse_args() if (clopts.ban is None and (clopts.year is not None or clopts.day is not None or clopts.minute is not None)): op.error('--year, --day and --minute are only allowed if --ban is used'); # delegate command options if (clopts.ban is not None): if (clopts.year is None and clopts.day is None and clopts.minute is None): # permanent ban end_date = datetime.timedelta(days=999999999) else: days = 0 minutes = 0 if (clopts.minute is not None): minutes = clopts.minute if (clopts.day is not None): days = clopts.day if (clopts.year is not None): days += 365 * clopts.year end_date = datetime.timedelta(days=days, minutes=minutes) if (clopts.ban != 'full' and clopts.ban != 'public'): op.error("--ban argument SCOPE must be 'public', or 'full'") if (clopts.user is None and clopts.ip is None): op.error('Must specify at least one of --user or --ip') do_ban(db, clopts.user, clopts.ip, clopts.ban, end_date) elif (clopts.rem): if (clopts.user is None and clopts.ip is None): op.error('Must specify at least one of --user or --ip') do_clear_ban(db, clopts.user, clopts.ip, clopts.rem) else: do_get(db, clopts.user, clopts.ip, not clopts.get) db.transaction_commit() db.close()
def clone(self, skip_clauses=False, skip_filtport=False, db_get_new=False, db_clone=False): sql_clauses = None if (self.sql_clauses is not None) and (not skip_clauses): # FIXME: I [lb] think this operation is a wash. I think all the times # that clone() is called, the caller always sets sql_clauses itself. # For we're just wasting string math here. I think. sql_clauses = self.sql_clauses.clone() if not skip_filtport: viewport = self.viewport # FIXME: Shouldn't we be cloning filters? #filters = self.filters # FIXME: copy.copy is not perfect. Should really be out own clone, or # maybe a deepcopy. But if this even that important? filters = copy.copy(self.filters) use_filters_and_viewport = self.use_filters_and_viewport use_limit_and_offset = self.use_limit_and_offset else: viewport = None filters = None use_filters_and_viewport = False use_limit_and_offset = False g.assurt(not (db_get_new and db_clone)) if db_get_new: db = db_glue.new() elif db_clone: db = self.db.clone() else: db = self.db # # HMMM: [lb] is not sure if we need to copy or not. But if we do copy, # we need to maintain the relationship between the revision and # branch_hier. branch_hier = copy.copy(self.branch_hier) if self.revision == self.branch_hier[0][1]: # revision.Current, revision.Historic g.assurt(self.revision.gids == self.branch_hier[0][1].gids) g.assurt(id(self.revision) == id(self.branch_hier[0][1])) rev = branch_hier[0][1] elif self.revision is not None: # revision.Updated rev = self.revision.clone() else: rev = self.revision # I.e., None # qb = Item_Query_Builder(db, self.username, # This makes a new list but the tuples are shared # (so callers should not, e.g., hier[0][1] = ...) branch_hier, rev, viewport, filters, self.user_id) # Already got: qb.username # Already got: qb.user_id qb.user_group_id = self.user_group_id # Already got: qb.branch_hier # Already got: qb.revision # Already got: qb.viewport # Already got: qb.filters # Except for gia_userless, which we always copy. if ((skip_filtport) and (not self.username) and (self.filters.gia_userless)): qb.filters.gia_userless = True qb.sql_clauses = sql_clauses # Skipping: qb.confirm_leafiness = self.confirm_leafiness qb.use_filters_and_viewport = use_filters_and_viewport qb.use_limit_and_offset = use_limit_and_offset # FIXME: Really clone diff_group? And diff_hier? Probably if copying # revision.Diff object... qb.diff_group = self.diff_group # Skipping: qb.diff_items = self.diff_items # [mm] has enabled diff_counterparts cloning so that the functions that # create temporary tables can name their tables appropriately to avoid # the same table being created twice (funny why that happens...) # e.g. item_manager::load_feats_and_attcs_load_stack_ids(..) # (2013.05.14) qb.diff_counterparts = self.diff_counterparts qb.diff_hier = self.diff_hier qb.request_is_local = self.request_is_local qb.request_is_script = self.request_is_script qb.request_is_a_test = self.request_is_a_test qb.request_is_secret = self.request_is_secret qb.cp_maint_lock_owner = self.cp_maint_lock_owner # Copy the session ID since we may have copied filters.gia_use_sessid. try: qb.session_id = self.session_id except AttributeError, e: pass
def go_main(self): log.debug('Starting') time_0 = time.time() if (self.cli_opts.cache_table_drop or self.cli_opts.cache_table_create): if self.cli_opts.cache_table_drop: self.gtfsdb_cache_delete() if self.cli_opts.cache_table_create: self.gtfsdb_cache_create() log.info('Committing transaction [go_main]') self.qb.db.transaction_commit() else: os.chdir(self.dname_gtfsdb) self.tools_check() # Download the transit archive. self.gtfs_download() # Get the date. There are three ways we can get it (well, there are # up to three different dates we can get). We use the date to compare # against saved archives, to know if we really need to update our # cache and restart the route planner (i.e., if the archive hasn't # changed, we can do nothing). self.gtfs_get_feed_dates() # If we really downloaded the archive, keep a copy of it. if not self.tfeed_not_retrieved: self.gtfs_archive() self.cache_prepare() # If a new transit feed was downloaded, or if the gtfs database or the # graphserver database are missing, rebuild the gtfs and gserver dbs. if ((not self.tfeed_not_retrieved) or (not os.path.exists(conf.transitdb_filename)) or (not os.path.exists(self.fname_transit_gdb))): self.gtfsdb_compile() self.graphserver_import() self.graphserver_inspect() else: log.debug('Transit feed up-to-date; skipping compile.') self.files_fixperms() if not self.cache_up_to_date: self.ccp_cache_populate() # log.info('Vacuuming the database') db = db_glue.new(use_transaction=False) db.sql("VACUUM ANALYZE;") db.close() else: log.debug('Transit cache up-to-date; skipping cache.') log.debug('gtfsdb_build_cache: complete: %s' % (misc.time_format_elapsed(time_0), ))
def main(): # Initialization global db db = db_glue.new() db.transaction_begin_rw('revision') print 'Connected to database' # Clean up invalid loops db.sql(""" UPDATE byway_segment SET start_node_id = 0, end_node_id = 0 WHERE start_node_id = end_node_id AND start_node_id != 0 AND Distance(StartPoint(geometry), EndPoint(geometry)) > %g""" % (epsilon)) print 'Reset %d invalid loops' % (db.rowcount()) # Create nodes for byway segments row_count = 0 while True: b = byway_row() row_count += 1 if (b is None): break if (row_count % 13 == 1): #db.sql("ANALYZE") print ('%d rows remain at %s' % (db.sql(""" SELECT count(*) FROM byway_segment WHERE start_node_id = 0 OR end_node_id = 0""")[0]['count'], util.nowstr())) # new start point ID and geometry if (b['start_node_id'] == 0): (b['start_node_id_new'], b['start_point_new']) = node_get(b, 'start') else: b['start_node_id_new'] = b['start_node_id'] b['start_point_new'] = b['start_point'] # new end point ID and geometry if (b['end_node_id'] == 0): if (b['is_loop']): b['end_node_id_new'] = b['start_node_id_new'] b['end_point_new'] = b['start_point_new'] else: (b['end_node_id_new'], b['end_point_new']) = node_get(b, 'end') else: b['end_node_id_new'] = b['end_node_id'] b['end_point_new'] = b['end_point'] # update byway db.sql("""UPDATE byway_segment SET start_node_id = %(start_node_id_new)d, end_node_id = %(end_node_id_new)d, geometry = SetPoint(SetPoint(geometry, %(point_count)d - 1, '%(end_point_new)s'), 0, '%(start_point_new)s') WHERE id = %(id)d AND version = %(version)d""" % b) # Save to database print 'done' db.transaction_commit() db.close() print 'Done. You may want to VACUUM ANALYZE;'
def commit_qbs(self, do_commit, commit_msg, skip_vacuum=False, skip_geometry=False): g.assurt(self.qb_cur is not None) if (self.qb_cur.item_mgr.rid_new is not None) and (self.target_groups): # Both of these operations take a little bit of time. # # To make the new revision, we call cp_revision_geosummary_update, # which takes a while. And to commit everything, well, e.g., # committing five hundred thousand rows takes a while. time_0 = time.time() log.info('Saving new revision: %s...' % (self.qb_cur.item_mgr.rid_new,)) # NOTE: We could use the host or IP address of the machine when the # user submitted the work job, but we also have a non-anonymous # username, so setting the host to 'localhost' doesn't really matter. host = 'localhost' # Make group_revisions for the public group and the shared group. groups_ids = [] for grp_mship, acl_id in self.target_groups.iteritems(): groups_ids.append(grp_mship.group_id) g.assurt(len(groups_ids) > 0) # FIXME: Make sure the new revision is marked not revertable: # that would be crazy-silly if someone was able to issue a # revert request on it! Item_Manager.revision_save( self.qb_cur, self.qb_cur.item_mgr.rid_new, self.qb_cur.branch_hier, host, self.mjob.wtem.created_by, commit_msg, groups_ids, activate_alerts=False, processed_items=None, reverted_revs=None, skip_geometry_calc=False, skip_item_alerts=False) # Claim the new revision ID. revision.Revision.revision_claim(self.qb_cur.db, self.qb_cur.item_mgr.rid_new) # Claim the new stack IDs. self.qb_cur.item_mgr.finalize_seq_vals(self.qb_cur.db) log.info('... new revision took %s' % (misc.time_format_elapsed(time_0),)) g.assurt(self.qb_cur is not None) time_0 = time.time() if do_commit: log.debug('Committing the database transaction.') # BUG 2688: Use transaction_retryable? self.qb_cur.db.transaction_commit() elif self.qb_cur is not None: log.debug('Rolling back the database!!') self.qb_cur.db.transaction_rollback() log.info('... %s took %s' % ('Commit' if do_commit else 'Rollback', misc.time_format_elapsed(time_0),)) # FIXME: Put in debug? And then delete this... skip_vacuum = True if do_commit and not skip_vacuum: self.db_vacuum() if do_commit: time_0 = time.time() # Update the revision's geometry approximation. # FIXME: Hopefully, this is faster after a vacuum. db = db_glue.new() db.transaction_begin_rw() branch_hier = self.qb_cur.branch_hier revision.Revision.geosummary_update(db, self.qb_cur.item_mgr.rid_new, branch_hier, groups_ids, skip_geometry) db.transaction_commit() db.close() log.info('... Rev Geom took %s' % (misc.time_format_elapsed(time_0),)) # FIXME: Test this: 2013.10.30: Do like commit.py, and call do_post_commit # (which just tickles Mr. Do!) and called routed_hup (which sends # an interrupt to the route daemon). if do_commit: log.debug('commit_qbs: signalling Mr. Do!') self.qb_cur.item_mgr.do_post_commit(self.qb_cur) log.debug('commit_qbs: signalling route daemon') command_base.Op_Handler.routed_hup(self.qb_cur.db) self.qb_cur.item_mgr.rid_new = None
def go_main(self): # Get the content templates. content_plain_f = open(self.cli_opts.content_plain) content_plain = content_plain_f.read() content_plain_f.close() content_html_f = open(self.cli_opts.content_html) content_html = content_html_f.read() content_html_f.close() # Assemble the recipients. # The file should be of the form # # username\temail_address # # PERFORMANCE: Cyclopath circa 2012 doesn't have that many users (~5,000) # so we can load all the emails into memory. If we end up with lots more # users, this operation might take a sizeable bite of memory. recipients = [] user_ids = [] recipients_f = open(self.cli_opts.recipient_file) try: deprecation_warned = False for line in recipients_f: line = line.strip() # NOTE: Skip comment lines. if line and (not line.startswith('#')): try: fake_uid = 0 username, email = line.split('\t') # NOTE: unsubscribe_proof is unknown since we don't # select from db, which is why this path is deprecated. unsubscribe_proof = '' recipients.append(( fake_uid, username, email, unsubscribe_proof, )) if not deprecation_warned: log.warning( 'Using username/email file is deprecated.') deprecation_warned = True except ValueError: user_id = int(line) user_ids.append(user_id) except ValueError: log.error( 'The format of the recipient file is unexpected / line: %s' % (line, )) raise finally: recipients_f.close() if recipients and user_ids: log.error( 'Please specify only "username, email" or "user IDs" but not both' ) sys.exit(0) db = db_glue.new() if user_ids: extra_where = ("id IN (%s)" % (",".join([str(x) for x in user_ids]), )) (valid_ids, invalid_ids, not_okay, user_infos, info_lookup) = (User.spam_get_user_info( db, extra_where, sort_mode='id ASC', make_lookup=True, ignore_flags=self.cli_opts.ignore_flags)) if invalid_ids or not_okay: log.error('%s%s' % ('Please recheck the user ID list: ', '%d okay / %d invalid / %d not_okay' % ( len(valid_ids), len(invalid_ids), len(not_okay), ))) log.error('not_okay: %s' % (not_okay, )) sys.exit(0) g.assurt(len(set(valid_ids)) == len(set(user_infos))) g.assurt(len(set(valid_ids)) == len(set(user_ids))) # Resort according to the input. for uid in user_ids: # NOTE: info_tuple is formatted: (user_id, username, email,) recipients.append(info_lookup[uid]) all_okay = True for info_tuple in recipients: if not User.email_valid(info_tuple[2]): log.error('Invalid email for user %s: %s' % ( info_tuple[1], info_tuple[2], )) all_okay = False if not all_okay: sys.exit(0) log.debug('Found %d recipients.' % (len(recipients), )) if not recipients: log.info('No one to email. Bye!') sys.exit(0) # Always send a copy to us, too. g.assurt(conf.internal_email_addr) unsubscribe_proof = '' recipients.append(( 0, 'Cyclopath Team', conf.internal_email_addr, unsubscribe_proof, )) # Combine recipients if bcc'ing. if self.cli_opts.bcc_size: addr_lists = [] addrs_processed = 0 while addrs_processed < len(recipients): last_index = addrs_processed + self.cli_opts.bcc_size bcc_list = recipients[addrs_processed:last_index] g.assurt(bcc_list) addrs_processed += self.cli_opts.bcc_size addr_lists.append(bcc_list) recipients = addr_lists # 2012.11.12: Using bcc is not cool. Don't do it. log.error('BCC is too impersonal. Please consider not using it.') g.assurt(False) # Process the recipients one or many at a time. prompted_once = False prog_log = Debug_Progress_Logger(loop_max=len(recipients)) # MAYBE: Don't log for every email? #prog_log.log_freq = prog_log.loop_max / 100.0 for recipient_or_list in recipients: email_unames = [] # Make the To and Bcc headers. if self.cli_opts.bcc_size: g.assurt(False) # DEVs: Reconsider using BCC. # Otherwise you cannot personalize messages, i.e., # with usernames of private UUID links. # Use a generic user name, since there are multiple recipients. msg_username = '******' # Send the email to ourselves... recipient_email = self.cli_opts.mail_from recipient_addr = ('"Cyclopath.org" <%s>' % (self.cli_opts.mail_from, )) # ...and Bcc everyone else. email_addrs = [] for recipient in recipient_or_list: # C.f. emailer.check_email, but using Bcc is deprecated, so # don't worry about it. msg_username = recipient[1] recipient_email = recipient[2] really_send = False if ((len(conf.mail_ok_addrs) == 1) and ('ALL_OKAY' in conf.mail_ok_addrs)): log.debug('go_main: conf says ALL_OKAY: %s' % (recipient_addr, )) really_send = True elif recipient_email in conf.mail_ok_addrs: log.debug('go_main: email in mail_ok_addrs: %s' % (recipient_addr, )) really_send = True elif not conf.mail_ok_addrs: log.error('go_main: mail_ok_addrs is not set: %s' % (recipient_addr, )) else: # This is a dev. machine and we don't want to email users. log.debug('go_main: skipping non-dev email: %s' % (recipient_addr, )) if really_send: log.debug('Emailing user at: %s' % (recipient_addr, )) email_addr = ('"%s" <%s>' % ( msg_username, recipient_email, )) email_addrs.append(email_addr) email_unames.append(msg_username) addrs_str = ','.join(email_addrs) addr_bcc = 'Bcc: %s\n' % (addrs_str, ) unsubscribe_proof = '' unsubscribe_link = '' else: # This is just a normal, send-directly-to-one-user email. msg_username = recipient_or_list[1] recipient_email = recipient_or_list[2] recipient_addr = ('"%s" <%s>' % ( msg_username, recipient_email, )) email_unames.append(recipient_email) addr_bcc = '' unsubscribe_proof = recipient_or_list[3] unsubscribe_link = Emailer.make_unsubscribe_link( 'user_unsubscribe', recipient_email, unsubscribe_proof) # To test the unsubscribe feature, try a link like this: # http://ccpv3/gwis?request=user_unsubscribe&[email protected]&proof=asdasdasd db.close() the_msg = Emailer.compose_email( self.cli_opts.mail_from, msg_username, recipient_addr, unsubscribe_proof, unsubscribe_link, self.cli_opts.email_subject, content_plain, content_html, addr_bcc) if not prompted_once: do_send = self.ask_permission(the_msg) if not do_send: log.warning('Canceled by user. Bye!') sys.exit(0) prompted_once = True # NOTE: Emailer.send_email will check conf.mail_ok_addrs. # ALSO: This is the only place/caller/script that uses do_not_email. # It's really just for testing, and this is the last stop. if not self.cli_opts.do_not_email: Emailer.send_email(email_unames, the_msg, prog_log, self.cli_opts.delay_time, self.cli_opts.dont_shake) # end: for recipient_or_list in recipients. prog_log.loops_fin()
% (os.path.abspath(os.curdir),))) import pyserver_glue import conf import g from util_ import db_glue from item import waypoint import route import util def clean(s): return re.sub(r'\s+', ' ', s.strip()) # Init db = db_glue.new() db.transaction_begin_rw('revision') rid = db.revision_create() body = TidyTools.getbody('../misc/tcbc-bikeshops.html') rows = body.findall('.//center/table/tbody/tr') pb = util.Progress_Bar(len(rows)) for row in rows: cols = row.findall('./td') name = cols[0].find('.//strong').text if (name is None): name = cols[0].find('./strong/a').text assert (name is not None) name = clean(name) link = cols[0].find('.//a')
def go_main(self): # Get the content templates. content_plain_f = open(self.cli_opts.content_plain) content_plain = content_plain_f.read() content_plain_f.close() content_html_f = open(self.cli_opts.content_html) content_html = content_html_f.read() content_html_f.close() # Assemble the recipients. # The file should be of the form # # username\temail_address # # PERFORMANCE: Cyclopath circa 2012 doesn't have that many users (~5,000) # so we can load all the emails into memory. If we end up with lots more # users, this operation might take a sizeable bite of memory. recipients = [] user_ids = [] recipients_f = open(self.cli_opts.recipient_file) try: deprecation_warned = False for line in recipients_f: line = line.strip() # NOTE: Skip comment lines. if line and (not line.startswith('#')): try: fake_uid = 0 username, email = line.split('\t') # NOTE: unsubscribe_proof is unknown since we don't # select from db, which is why this path is deprecated. unsubscribe_proof = '' recipients.append( (fake_uid, username, email, unsubscribe_proof,)) if not deprecation_warned: log.warning('Using username/email file is deprecated.') deprecation_warned = True except ValueError: user_id = int(line) user_ids.append(user_id) except ValueError: log.error('The format of the recipient file is unexpected / line: %s' % (line,)) raise finally: recipients_f.close() if recipients and user_ids: log.error( 'Please specify only "username, email" or "user IDs" but not both') sys.exit(0) db = db_glue.new() if user_ids: extra_where = ("id IN (%s)" % (",".join([str(x) for x in user_ids]),)) (valid_ids, invalid_ids, not_okay, user_infos, info_lookup) = ( User.spam_get_user_info( db, extra_where, sort_mode='id ASC', make_lookup=True, ignore_flags=self.cli_opts.ignore_flags)) if invalid_ids or not_okay: log.error('%s%s' % ('Please recheck the user ID list: ', '%d okay / %d invalid / %d not_okay' % (len(valid_ids), len(invalid_ids), len(not_okay),))) log.error('not_okay: %s' % (not_okay,)) sys.exit(0) g.assurt(len(set(valid_ids)) == len(set(user_infos))) g.assurt(len(set(valid_ids)) == len(set(user_ids))) # Resort according to the input. for uid in user_ids: # NOTE: info_tuple is formatted: (user_id, username, email,) recipients.append(info_lookup[uid]) all_okay = True for info_tuple in recipients: if not User.email_valid(info_tuple[2]): log.error('Invalid email for user %s: %s' % (info_tuple[1], info_tuple[2],)) all_okay = False if not all_okay: sys.exit(0) log.debug('Found %d recipients.' % (len(recipients),)) if not recipients: log.info('No one to email. Bye!') sys.exit(0) # Always send a copy to us, too. g.assurt(conf.internal_email_addr) unsubscribe_proof = '' recipients.append( (0, 'Cyclopath Team', conf.internal_email_addr, unsubscribe_proof,)) # Combine recipients if bcc'ing. if self.cli_opts.bcc_size: addr_lists = [] addrs_processed = 0 while addrs_processed < len(recipients): last_index = addrs_processed + self.cli_opts.bcc_size bcc_list = recipients[addrs_processed:last_index] g.assurt(bcc_list) addrs_processed += self.cli_opts.bcc_size addr_lists.append(bcc_list) recipients = addr_lists # 2012.11.12: Using bcc is not cool. Don't do it. log.error('BCC is too impersonal. Please consider not using it.') g.assurt(False) # Process the recipients one or many at a time. prompted_once = False prog_log = Debug_Progress_Logger(loop_max=len(recipients)) # MAYBE: Don't log for every email? #prog_log.log_freq = prog_log.loop_max / 100.0 for recipient_or_list in recipients: email_unames = [] # Make the To and Bcc headers. if self.cli_opts.bcc_size: g.assurt(False) # DEVs: Reconsider using BCC. # Otherwise you cannot personalize messages, i.e., # with usernames of private UUID links. # Use a generic user name, since there are multiple recipients. msg_username = '******' # Send the email to ourselves... recipient_email = self.cli_opts.mail_from recipient_addr = ('"Cyclopath.org" <%s>' % (self.cli_opts.mail_from,)) # ...and Bcc everyone else. email_addrs = [] for recipient in recipient_or_list: # C.f. emailer.check_email, but using Bcc is deprecated, so # don't worry about it. msg_username = recipient[1] recipient_email = recipient[2] really_send = False if ((len(conf.mail_ok_addrs) == 1) and ('ALL_OKAY' in conf.mail_ok_addrs)): log.debug('go_main: conf says ALL_OKAY: %s' % (recipient_addr,)) really_send = True elif recipient_email in conf.mail_ok_addrs: log.debug('go_main: email in mail_ok_addrs: %s' % (recipient_addr,)) really_send = True elif not conf.mail_ok_addrs: log.error('go_main: mail_ok_addrs is not set: %s' % (recipient_addr,)) else: # This is a dev. machine and we don't want to email users. log.debug('go_main: skipping non-dev email: %s' % (recipient_addr,)) if really_send: log.debug('Emailing user at: %s' % (recipient_addr,)) email_addr = ('"%s" <%s>' % (msg_username, recipient_email,)) email_addrs.append(email_addr) email_unames.append(msg_username) addrs_str = ','.join(email_addrs) addr_bcc = 'Bcc: %s\n' % (addrs_str,) unsubscribe_proof = '' unsubscribe_link = '' else: # This is just a normal, send-directly-to-one-user email. msg_username = recipient_or_list[1] recipient_email = recipient_or_list[2] recipient_addr = ('"%s" <%s>' % (msg_username, recipient_email,)) email_unames.append(recipient_email) addr_bcc = '' unsubscribe_proof = recipient_or_list[3] unsubscribe_link = Emailer.make_unsubscribe_link( 'user_unsubscribe', recipient_email, unsubscribe_proof) # To test the unsubscribe feature, try a link like this: # http://ccpv3/gwis?request=user_unsubscribe&[email protected]&proof=asdasdasd db.close() the_msg = Emailer.compose_email( self.cli_opts.mail_from, msg_username, recipient_addr, unsubscribe_proof, unsubscribe_link, self.cli_opts.email_subject, content_plain, content_html, addr_bcc) if not prompted_once: do_send = self.ask_permission(the_msg) if not do_send: log.warning('Canceled by user. Bye!') sys.exit(0) prompted_once = True # NOTE: Emailer.send_email will check conf.mail_ok_addrs. # ALSO: This is the only place/caller/script that uses do_not_email. # It's really just for testing, and this is the last stop. if not self.cli_opts.do_not_email: Emailer.send_email( email_unames, the_msg, prog_log, self.cli_opts.delay_time, self.cli_opts.dont_shake) # end: for recipient_or_list in recipients. prog_log.loops_fin()
def go_main(self): log.debug('Starting') time_0 = time.time() if (self.cli_opts.cache_table_drop or self.cli_opts.cache_table_create): if self.cli_opts.cache_table_drop: self.gtfsdb_cache_delete() if self.cli_opts.cache_table_create: self.gtfsdb_cache_create() log.info('Committing transaction [go_main]') self.qb.db.transaction_commit() else: os.chdir(self.dname_gtfsdb) self.tools_check() # Download the transit archive. self.gtfs_download() # Get the date. There are three ways we can get it (well, there are # up to three different dates we can get). We use the date to compare # against saved archives, to know if we really need to update our # cache and restart the route planner (i.e., if the archive hasn't # changed, we can do nothing). self.gtfs_get_feed_dates() # If we really downloaded the archive, keep a copy of it. if not self.tfeed_not_retrieved: self.gtfs_archive() self.cache_prepare() # If a new transit feed was downloaded, or if the gtfs database or the # graphserver database are missing, rebuild the gtfs and gserver dbs. if ((not self.tfeed_not_retrieved) or (not os.path.exists(conf.transitdb_filename)) or (not os.path.exists(self.fname_transit_gdb))): self.gtfsdb_compile() self.graphserver_import() self.graphserver_inspect() else: log.debug('Transit feed up-to-date; skipping compile.') self.files_fixperms() if not self.cache_up_to_date: self.ccp_cache_populate() # log.info('Vacuuming the database') db = db_glue.new(use_transaction=False) db.sql("VACUUM ANALYZE;") db.close() else: log.debug('Transit cache up-to-date; skipping cache.') log.debug('gtfsdb_build_cache: complete: %s' % (misc.time_format_elapsed(time_0),))
MICROSESSION_DURATION = 30 # SYNC_ME: Search: Scripts: Load pyserver. import os import sys sys.path.insert( 0, os.path.abspath('%s/../../util' % (os.path.abspath(os.curdir), ))) import pyserver_glue import conf import g from util_ import db_glue import util db = db_glue.new() db.transaction_begin_rw() print 'Microsession duration is %d seconds' % (MICROSESSION_DURATION) print 'Truncating apache_event_session' db.sql("TRUNCATE apache_event_session") print 'Loading apache_event' # NOTE: We use an explicit cursor to avoid loading the whole result set, which # is quite huge, into memory. curs = db.conn.cursor() curs.execute("""SELECT COALESCE(username, client_host) as user_, timestamp_tz as time_start, timestamp_tz + '%d seconds'::interval as time_end
def setup_qb_src(self, all_errs): qb_src = None username = self.mjob.wtem.created_by # The source qb is just for reading... db = db_glue.new() # ... but we'll be making temporary tables of stack IDs, so start a # transaction. db.transaction_begin_rw() # The byways in the conflated were not marked deleted when they were # exported for conflation, so we don't need to look for deleted. # NOTE: The original MetC import script used based rev off # self.target_branch.last_merge_rid rather than what's in the # config file. g.assurt(self.spf_conf.revision_id) revision_id = self.spf_conf.revision_id rev = revision.Historic(revision_id, allow_deleted=False) # Make the branch_hier. (branch_id, branch_hier) = branch.Many.branch_id_resolve(db, self.mjob.wtem.branch_id, branch_hier_rev=rev) # Put it all together. if branch_id is None: all_errs.append( 'setup_qb_src: not a branch: %s at %s' % (self.mjob.wtem.branch_id, str(rev),)) # Don't forget to close. Not too big a deal, but oddly, if we don't, # the next attempt by this thread to get the db will result in the # same DB() object being created and the same self.conn returned, # and then db_glue complains that it's got that self and conn in # conn_lookup. db.close() db = None else: g.assurt(branch_hier) qb_src = Item_Query_Builder(db, username, branch_hier, rev) # It's nice to have both the raw, opaque hexadecimal geometry as well # as the WKT geometry, since not all APIs are that flexible, and also # because it's easier to work with WKT in Python and OSGeo (and also # because [lb] hasn't seen an OGR fcn. to convert raw PostGIS geom, # but he's probably not looking hard enough). qb_src.filters.skip_geometry_raw = False qb_src.filters.skip_geometry_svg = True qb_src.filters.skip_geometry_wkt = False qb_src.item_mgr = Item_Manager() # FIXME: Is this right? What about tgraph? qb_src.item_mgr.load_cache_attachments(qb_src) Query_Overlord.finalize_query(qb_src) # Check that user has viewer access on the source branch. source_branch = self.verify_branch_access(qb_src, Access_Level.viewer, all_errs) # NOTE: The job itself is already access-controlled, so generally the # user has arbiter access to the branch at the Current revision. self.qb_src = qb_src
def clone(self, skip_clauses=False, skip_filtport=False, db_get_new=False, db_clone=False): sql_clauses = None if (self.sql_clauses is not None) and (not skip_clauses): # FIXME: I [lb] think this operation is a wash. I think all the times # that clone() is called, the caller always sets sql_clauses itself. # For we're just wasting string math here. I think. sql_clauses = self.sql_clauses.clone() if not skip_filtport: viewport = self.viewport # FIXME: Shouldn't we be cloning filters? #filters = self.filters # FIXME: copy.copy is not perfect. Should really be out own clone, or # maybe a deepcopy. But if this even that important? filters = copy.copy(self.filters) use_filters_and_viewport = self.use_filters_and_viewport use_limit_and_offset = self.use_limit_and_offset else: viewport = None filters = None use_filters_and_viewport = False use_limit_and_offset = False g.assurt(not (db_get_new and db_clone)) if db_get_new: db = db_glue.new() elif db_clone: db = self.db.clone() else: db = self.db # # HMMM: [lb] is not sure if we need to copy or not. But if we do copy, # we need to maintain the relationship between the revision and # branch_hier. branch_hier = copy.copy(self.branch_hier) if self.revision == self.branch_hier[0][1]: # revision.Current, revision.Historic g.assurt(self.revision.gids == self.branch_hier[0][1].gids) g.assurt(id(self.revision) == id(self.branch_hier[0][1])) rev = branch_hier[0][1] elif self.revision is not None: # revision.Updated rev = self.revision.clone() else: rev = self.revision # I.e., None # qb = Item_Query_Builder( db, self.username, # This makes a new list but the tuples are shared # (so callers should not, e.g., hier[0][1] = ...) branch_hier, rev, viewport, filters, self.user_id) # Already got: qb.username # Already got: qb.user_id qb.user_group_id = self.user_group_id # Already got: qb.branch_hier # Already got: qb.revision # Already got: qb.viewport # Already got: qb.filters # Except for gia_userless, which we always copy. if ((skip_filtport) and (not self.username) and (self.filters.gia_userless)): qb.filters.gia_userless = True qb.sql_clauses = sql_clauses # Skipping: qb.confirm_leafiness = self.confirm_leafiness qb.use_filters_and_viewport = use_filters_and_viewport qb.use_limit_and_offset = use_limit_and_offset # FIXME: Really clone diff_group? And diff_hier? Probably if copying # revision.Diff object... qb.diff_group = self.diff_group # Skipping: qb.diff_items = self.diff_items # [mm] has enabled diff_counterparts cloning so that the functions that # create temporary tables can name their tables appropriately to avoid # the same table being created twice (funny why that happens...) # e.g. item_manager::load_feats_and_attcs_load_stack_ids(..) # (2013.05.14) qb.diff_counterparts = self.diff_counterparts qb.diff_hier = self.diff_hier qb.request_is_local = self.request_is_local qb.request_is_script = self.request_is_script qb.request_is_a_test = self.request_is_a_test qb.request_is_secret = self.request_is_secret qb.cp_maint_lock_owner = self.cp_maint_lock_owner # Copy the session ID since we may have copied filters.gia_use_sessid. try: qb.session_id = self.session_id except AttributeError, e: pass
def setup_qb_cur(self, all_errs, min_acl=Access_Level.viewer): # For both import and export, qb_src is used to retrieve items from the # database, and qb_cur is used to check the user's group accesses and # maybe to search for regions if a restrictive bbox is being imposed. # But qb_cur is also used during import to save changes to the database; # qb_cur is not used during export to save anything to the database. # # NOTE: On import, we row-lock on the grac tables, group_membership # and new_item_policy. We also row-lock the destination branch. # So other operations might block while this code runs. # CONFIRM: We don't lock anything on export, right? qb_cur = None username = self.mjob.wtem.created_by db = db_glue.new() rev = revision.Current(allow_deleted=False) (branch_id, branch_hier) = branch.Many.branch_id_resolve(db, self.mjob.wtem.branch_id, branch_hier_rev=rev) if branch_id is None: # EXPLAIN: How come we don't raise here, like we do in the else? # Or, why doesn't the else block use all_errs? # See: raise_error_on_branch. # And if you look at export_cyclop.substage_initialize, # you'll see that it assurts not all_errs, so I guess # it expects us to raise. all_errs.append( 'setup_qb_cur: not a branch: %s at %s' % (self.mjob.wtem.branch_id, str(rev),)) else: g.assurt(branch_hier) g.assurt(branch_id == branch_hier[0][0]) raise_error_on_branch = False if not self.spf_conf.branch_name: # This happens on export, since export_cyclop.substage_initialize # only sets branch_id when setting up the qbs. This is because it # uses the merge_job's branch_id, and since merge_job is just an # item_versioned item, all it has is its branch_id, as items do # not also store the branch name. self.spf_conf.branch_name = branch_hier[0][2] elif self.spf_conf.branch_name != branch_hier[0][2]: # The branch name in the shapefile should match. log.error('setup_qb_cur: branch_name mismatch: %s / %s' % (self.spf_conf.branch_name, branch_hier[0][2],)) raise_error_on_branch = True # else, the branch_name in the conf matches the one we loaded by ID. # if self.spf_conf.branch_id != branch_id: # But the branch ID we can tolerate being wrong. log.warning('setup_qb_cur: unexpected spf_conf.branch_id: %s' % (self.spf_conf.branch_id,)) # For the Metc Bikeways shapefile, this just means [lb] hasn't # update the branch ID attribute in the shapefile... g.assurt(self.spf_conf.branch_name) (try_branch_id, try_branch_hier) = branch.Many.branch_id_resolve( db, self.spf_conf.branch_name, branch_hier_rev=rev) if try_branch_id == branch_id: log.warning('setup_qb_cur: ok: overriding branch_id: %s' % (branch_id,)) self.spf_conf.branch_id = branch_id else: log.error('setup_qb_cur: try_branch_id != branch_id: %s != %s' % (try_branch_id, branch_id,)) raise_error_on_branch = True if raise_error_on_branch: if conf.break_on_assurt: import pdb;pdb.set_trace() raise GWIS_Error( 'Shapefile branch ID and name do not match job details: ' 'work_item: %s/%s | shapefile: %s/%s' % (branch_hier[0][2], branch_hier[0][0], self.spf_conf.branch_name, self.spf_conf.branch_id,)) qb_cur = Item_Query_Builder(db, username, branch_hier, rev) # Load both the raw geometry and the WKT geometry; we need to be # flexible. qb_cur.filters.skip_geometry_raw = False qb_cur.filters.skip_geometry_svg = True qb_cur.filters.skip_geometry_wkt = False # To save things, we need to set the group ID explicitly. self.user_group_id = User.private_group_id(qb_cur.db, username) qb_cur.user_group_id = self.user_group_id qb_cur.item_mgr = Item_Manager() # Load the attachment cache now. On import, if we create new # attributes (see metc_bikeways_defs.py), we'll keep it updated. qb_cur.item_mgr.load_cache_attachments(qb_cur) Query_Overlord.finalize_query(qb_cur) # FIXME: This comment. I like it. But it's not true... yet. # Getting row lock in branches_prepare. So don't table lock. # # Start the transaction, since the grac_mgr does some row locking. # We'll keep the rows locked until we've verified permissions. # FIXME: Verify you rollback and start a new 'revision' lock... # or maybe just start a new 'revision' lock? or can you # write to a Shapfile first and zip through the Shapefile # to save quickly and not hold the lock so long? # BUG nnnn: Investigate using a row-level branch lock; for now, # just lock rev. qb_cur.db.transaction_begin_rw() qb_cur.grac_mgr = Grac_Manager() load_grp_mmbrshps = True qb_cur.grac_mgr.prepare_mgr('user', qb_cur, load_grp_mmbrshps) # FIXME: Does qb_src need grac_mgr? #self.qb_src.grac_mgr = qb_cur.grac_mgr # Check user's minimum access level. target_branch = self.verify_branch_access(qb_cur, min_acl, all_errs) g.assurt(target_branch.stack_id == self.spf_conf.branch_id) if (self.spf_conf.branch_name and (self.spf_conf.branch_name != qb_cur.branch_hier[0][2])): log.warning('Unexpected spf_conf.branch_name: %s' % (self.spf_conf.branch_name,)) self.spf_conf.branch_name = qb_cur.branch_hier[0][2] self.qb_cur = qb_cur log.debug('setup_qb_cur: spf_conf: %s' % (str(self.spf_conf),))
def branch_iterate(self, qb, branch_id, branch_callback, debug_limit=None): log.debug('branch_iterate: getting tmp db') # Get a new qb, and rather than clone the db, get a new connection, lest # we cannot commit ("Cannot commit when multiple cursors open"). db = db_glue.new() username = '' # Using gia_userless, so not really needed. branch_hier = copy.copy(qb.branch_hier) qb_branches = Item_Query_Builder(db, username, branch_hier, qb.revision) if branch_id: # Find just the one. qb_branches.branch_hier_limit = 1 # Indicate our non-pyserverness so that gia_userless works. qb_branches.request_is_local = True qb_branches.request_is_script = True # Get all active branches, regardless of user rights. qb_branches.filters.gia_userless = True # If debugging, just grab a handful of results. if debug_limit: qb_branches.use_limit_and_offset = True qb_branches.filters.pagin_count = int(debug_limit) g.assurt(qb_branches.sql_clauses is None) # For whatever reason, use a generator. So, in the future, when there are # millions of branches, this script runs peacefully. g.assurt(not qb_branches.db.dont_fetchall) qb_branches.db.dont_fetchall = True # Leaving as client: qb_branches.filters.min_access_level qb_branches.sql_clauses = branch.Many.sql_clauses_cols_all.clone() Query_Overlord.finalize_query(qb_branches) branches = branch.Many() branches.search_get_items(qb_branches) log.info('branch_iterate: found %d branches.' % (qb_branches.db.curs.rowcount, )) # Skipping: # prog_log = Debug_Progress_Logger(copy_this=debug_prog_log) # prog_log.log_freq = 1 # prog_log.loop_max = qb_branches.db.curs.rowcount generator = branches.results_get_iter(qb_branches) for branch_ in generator: # NOTE: We don't correct self.qb, so callers should be sure not to use # its branch_hier thinking it represents this branch_. branch_callback(branch_) # Skipping: # if prog_log.loops_inc(): # break # Skipping prog_log.loops_fin() generator.close() log.debug('branch_iterate: closing tmp db') qb_branches.db.close()
def main(): op = optparse.OptionParser(usage=usage) op.add_option('-n', '--name', dest='name', default='name') op.add_option('-l', '--latitude', dest='latitude', default='latitude') op.add_option('-g', '--longitude', dest='longitude', default='longitude') op.add_option('-c', '--comments', dest='comments', default='comments') op.add_option('-t', '--tag', dest='tag', default='tag') op.add_option('-d', '--delimiter', dest='delimiter', default=',') op.add_option('-m', '--changenote', dest='changenote', default='Import points from CSV file') (opts, args) = op.parse_args() if (len(args) == 0): op.error('CSVFILE must be specified.') csvfile = args[0] # Make sure required columns exist data = csv.DictReader(open(csvfile), delimiter=opts.delimiter) r = data.next(); if (r.get(opts.name) is None or r.get(opts.latitude) is None or r.get(opts.longitude) is None): op.error('One or more required fields not found in column names.') db = db_glue.new() db.transaction_begin_rw() # Re-open file and import points into temporary table db.sql('''CREATE TEMPORARY TABLE point_tmp (name text, comments text, tag text, geometry geometry)'''); data = csv.DictReader(open(csvfile), delimiter=opts.delimiter) for r in data: name = r.get(opts.name) lat = r.get(opts.latitude) long = r.get(opts.longitude) tag = r.get(opts.tag) comments = r.get(opts.comments) if lat == '' or long == '': print 'Latitude or longitude not specified for this point; skipping' else: db.sql('''INSERT INTO point_tmp (name, comments, tag, geometry) VALUES (%(name)s, %(comments)s, LOWER(%(tag)s), ST_Transform( ST_SetSRID(MakePoint(%(long)s, %(lat)s), %(srid_latlon)s), %(srid_default)s))''', { 'name': name, 'comments': comments, 'tag': tag, 'long': long, 'lat': lat, 'srid_latlon': conf.srid_latlon, 'srid_default': conf.default_srid }) # Save points falling within coverage_area to real point table rid = db.revision_create(); for r in db.sql('''SELECT name, comments, tag, geometry FROM point_tmp WHERE ST_Contains((SELECT geometry FROM coverage_area), geometry)'''): # Create point r['valid_starting_rid'] = rid r['id'] = db.sequence_get_next('feature_id_seq') db.sql('''INSERT INTO point (id, version, deleted, type_code, name, comments, valid_starting_rid, valid_before_rid, z, geometry) VALUES (%(id)s, 1, false, 2, %(name)s, %(comments)s, %(valid_starting_rid)s, cp_rid_inf(), 140, %(geometry)s)''', (r)) # Apply tag, if applicable if (r['tag'] is not None and r['tag'] != ''): db.sql('''INSERT INTO tag (version, deleted, label, valid_starting_rid, valid_before_rid) SELECT 1, false, %s, %s, cp_rid_inf() WHERE NOT EXISTS (SELECT id FROM tag WHERE label=%s);''', (r['tag'], rid, r['tag'])) db.sql('''INSERT INTO tag_point (version, deleted, tag_id, point_id, valid_starting_rid, valid_before_rid) VALUES (1, false, (SELECT id FROM tag WHERE label=%s), %s, %s, cp_rid_inf)''', (r['tag'], r['id'], rid)) db.revision_save(rid, permission.public, socket.gethostname(), '_script', opts.changenote) db.transaction_commit() db.close() print ('Committed revision %s.' % rid)