def getPasswd(self, name, ip): """ This sets a new password for a user and mails it to the user. (We touch or keep it) returns success status (boolean) """ # Refuse changing password for username google. this is a special username # that we use as a back-door for controling the box. Changing this password # may make it inaccessible (bug#36271) if name == 'google' : logging.info("Refusing to set password for user %s" % name) return false newPassword = password.createRandomPasswd(PASSWORD_LENGTH) if self.check_update_user(name, None, newPassword): SendMail.send(self.cfg, self.getEmail(name), false, M.MSG_FORGOTPASSWORDSUBJECT, M.MSG_FORGOTPASSWORD % (newPassword, ip), false) self.cfg.writeAdminRunnerOpMsg( "A new password has been sent to your email address") return true logging.error("couldn't set password to user %s" % name) return false
def getPasswd(self, name, ip): """ This sets a new password for a user and mails it to the user. (We touch or keep it) returns success status (boolean) """ # Refuse changing password for username google. this is a special username # that we use as a back-door for controling the box. Changing this password # may make it inaccessible (bug#36271) if name == 'google': logging.info("Refusing to set password for user %s" % name) return false newPassword = password.createRandomPasswd(PASSWORD_LENGTH) if self.check_update_user(name, None, newPassword): SendMail.send(self.cfg, self.getEmail(name), false, M.MSG_FORGOTPASSWORDSUBJECT, M.MSG_FORGOTPASSWORD % (newPassword, ip), false) self.cfg.writeAdminRunnerOpMsg( "A new password has been sent to your email address") return true logging.error("couldn't set password to user %s" % name) return false
def genstatusreport(self): """ refresh the value of crawl summary, and generate system status report + send email if needed Returns: 0 """ # Genrate report: if self.cfg.getGlobalParam('SEND_ENTERPRISE_STATUS_REPORT'): STATUS_STRING = [ "OK", "CAUTION", "WARNING", ] SUMMARY_STRING = { "global-overall-urls-crawled": M.MSG_URL_CRAWLED_SINCE_YESTERDAY, "global-overall-urls-crawl-error": M.MSG_URL_ERROR_SINCE_YESTERDAY, } # get system status system_status = self.gsa_status_logic.GetSystemStatusMap() cur_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())) subject = M.MSG_SYSTEM_STATUS_REPORT % cur_time health = max(map(lambda (x, y): y[0], system_status.items())) report = [] report.append("System Status: %s" % STATUS_STRING[health]) for param in self.gsa_status_logic.status_params_: if system_status.has_key(param): status = system_status[param] report.append("%s Status: %s. %s" % (param, STATUS_STRING[status[0]], status[1])) # get crawl summary report.append("\nCrawl Summary:") summary = self._get_crawl_summary() for param in summary.keys(): report.append("%s: %s" % (SUMMARY_STRING[param], summary[param])) # notify administrator SendMail.send(self.cfg, None, 0, subject, string.join(report, "\n"), 0) # refresh the value of crawl summary snapshot = self.cfg.getGlobalParam('ENT_CRAWL_SUMMARY') current = self._get_current_crawlsummary() for param in current.keys(): if snapshot.has_key(param): snapshot[param] = current[param] self.cfg.setGlobalParam('ENT_CRAWL_SUMMARY', snapshot) self.cfg.saveParams() return 0
def removedisk(self, machine, disk): error = self.updatedisk(machine, [disk], 0) if not error: if not mail_already_sent(M.MSG_DISKREMOVED % (disk, machine)): SendMail.send(self.cfg, None, false, M.MSG_DISKREMOVED % (disk, machine), "", true) return error
def remove(self, machine): """ This removes a machine from the configuration """ if machine not in self.cfg.getGlobalParam('MACHINES'): logging.error("%s doesn't exist" % machine) return 1 ver = self.cfg.getGlobalParam('VERSION') home = self.cfg.getGlobalParam('ENTERPRISE_HOME') testver = install_utilities.is_test(ver) # if possible stop the core services, ignore return code install_utilities.stop_core(ver, home, [machine]) if machine == E.getCrtHostName(): logging.error("Cannot remove self") return 1 # Halt the machine if APC is used. error = self.halt(machine) self.cfg.globalParams.ReplaceVarInParam("SERVERS", None, machine) self.cfg.globalParams.ReplaceVarInParam("MACHINES", None, machine) ret = core_utils.AddDeadNode(ver, testver, machine) # remove the chunkserver running on the node gfs_utils.DeleteGFSChunkservers(ver, testver, [machine]) if ret: logging.error('Cannot add dead node to the lockserver.') # we ignore this error for now # now we need to remove the data disks that were on this machine data_disks = self.cfg.globalParams.var_copy('DATACHUNKDISKS') if data_disks.has_key(machine): del data_disks[machine] if not self.cfg.setGlobalParam('DATACHUNKDISKS', data_disks): return 1 # This also saves the config file if not self.cfg.DoMachineAllocation(): return 1 # Now we need to restart babysitter because the old one # is out of sync after this serve_service_cmd = ( ". %s && " "cd %s/local/google3/enterprise/legacy/scripts && " "./serve_service.py %s" % (self.cfg.getGlobalParam('ENTERPRISE_BASHRC'), self.cfg.getGlobalParam('ENTERPRISE_HOME'), self.cfg.getGlobalParam('ENTERPRISE_HOME'))) E.exe("%s %s" % (serve_service_cmd, "babysit")) self.restart_crawl_processes(serve_service_cmd) if not mail_already_sent(M.MSG_MACHINEREMOVED % machine): SendMail.send(self.cfg, None, false, M.MSG_MACHINEREMOVED % machine, "", true) return error
def remove(self, machine): """ This removes a machine from the configuration """ if machine not in self.cfg.getGlobalParam('MACHINES'): logging.error("%s doesn't exist" % machine) return 1 ver = self.cfg.getGlobalParam('VERSION') home = self.cfg.getGlobalParam('ENTERPRISE_HOME') testver = install_utilities.is_test(ver) # if possible stop the core services, ignore return code install_utilities.stop_core(ver, home, [machine]) if machine == E.getCrtHostName(): logging.error("Cannot remove self") return 1 # Halt the machine if APC is used. error = self.halt(machine) self.cfg.globalParams.ReplaceVarInParam("SERVERS", None, machine) self.cfg.globalParams.ReplaceVarInParam("MACHINES", None, machine) ret = core_utils.AddDeadNode(ver, testver, machine) # remove the chunkserver running on the node gfs_utils.DeleteGFSChunkservers(ver, testver, [machine]) if ret: logging.error('Cannot add dead node to the lockserver.') # we ignore this error for now # now we need to remove the data disks that were on this machine data_disks = self.cfg.globalParams.var_copy('DATACHUNKDISKS') if data_disks.has_key(machine): del data_disks[machine] if not self.cfg.setGlobalParam('DATACHUNKDISKS', data_disks): return 1 # This also saves the config file if not self.cfg.DoMachineAllocation(): return 1 # Now we need to restart babysitter because the old one # is out of sync after this serve_service_cmd = (". %s && " "cd %s/local/google3/enterprise/legacy/scripts && " "./serve_service.py %s" % ( self.cfg.getGlobalParam('ENTERPRISE_BASHRC'), self.cfg.getGlobalParam('ENTERPRISE_HOME'), self.cfg.getGlobalParam('ENTERPRISE_HOME'))) E.exe("%s %s" % (serve_service_cmd, "babysit")) self.restart_crawl_processes(serve_service_cmd) if not mail_already_sent(M.MSG_MACHINEREMOVED % machine): SendMail.send(self.cfg, None, false, M.MSG_MACHINEREMOVED % machine, "", true) return error
def genstatusreport(self): """ refresh the value of crawl summary, and generate system status report + send email if needed Returns: 0 """ # Genrate report: if self.cfg.getGlobalParam('SEND_ENTERPRISE_STATUS_REPORT'): STATUS_STRING = [ "OK", "CAUTION", "WARNING", ] SUMMARY_STRING = { "global-overall-urls-crawled" : M.MSG_URL_CRAWLED_SINCE_YESTERDAY, "global-overall-urls-crawl-error" : M.MSG_URL_ERROR_SINCE_YESTERDAY, } # get system status system_status = self.gsa_status_logic.GetSystemStatusMap() cur_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())) subject = M.MSG_SYSTEM_STATUS_REPORT % cur_time health = max(map(lambda (x,y): y[0], system_status.items())) report = [] report.append("System Status: %s" % STATUS_STRING[health]) for param in self.gsa_status_logic.status_params_: if system_status.has_key(param): status = system_status[param] report.append("%s Status: %s. %s" % (param, STATUS_STRING[status[0]], status[1])) # get crawl summary report.append("\nCrawl Summary:") summary = self._get_crawl_summary() for param in summary.keys(): report.append("%s: %s" % (SUMMARY_STRING[param], summary[param])) # notify administrator SendMail.send(self.cfg, None, 0, subject, string.join(report, "\n"), 0) # refresh the value of crawl summary snapshot = self.cfg.getGlobalParam('ENT_CRAWL_SUMMARY') current = self._get_current_crawlsummary() for param in current.keys(): if snapshot.has_key(param): snapshot[param] = current[param] self.cfg.setGlobalParam('ENT_CRAWL_SUMMARY', snapshot) self.cfg.saveParams() return 0
def halt(self, machine): if rebooter.HaltMachines(self.cfg.globalParams.GetEntHome(), [machine]): # we failed just send email logging.info("Halt machine of %s failed." % machine) if not mail_already_sent(M.MSG_MACHINENEEDSHALT % machine): logging.info("Sending mail to halt %s" % machine) SendMail.send(self.cfg, None, false, M.MSG_MACHINENEEDSHALT % machine, "", true) return 1 if not mail_already_sent(M.MSG_MACHINEHALTED % machine): SendMail.send(self.cfg, None, false, M.MSG_MACHINEHALTED % machine, "", true) msg = M.MSG_LOG_HALT_MACHINE % (machine) self.writeAdminRunnerOpMsg(msg) return 0
def halt(self, machine): if rebooter.HaltMachines(self.cfg.globalParams.GetEntHome(), [machine]): # we failed just send email logging.info("Halt machine of %s failed." % machine) if not mail_already_sent(M.MSG_MACHINENEEDSHALT % machine): logging.info("Sending mail to halt %s" %machine) SendMail.send(self.cfg, None, false, M.MSG_MACHINENEEDSHALT % machine, "", true) return 1 if not mail_already_sent(M.MSG_MACHINEHALTED % machine): SendMail.send(self.cfg, None, false, M.MSG_MACHINEHALTED % machine, "", true) msg = M.MSG_LOG_HALT_MACHINE % (machine) self.writeAdminRunnerOpMsg(msg) return 0
def haltcluster(self): machines = self.cfg.getGlobalParam("MACHINES") machines.remove(E.getCrtHostName()) msg = M.MSG_LOGSHUTDOWN self.writeAdminRunnerOpMsg(msg) if len(machines) > 0: # Halt all other machines now if rebooter.HaltMachines(self.cfg.globalParams.GetEntHome(), machines): # just send an email if not mail_already_sent(M.MSG_MACHINENEEDSHALT % string.join(machines, " ")): SendMail.send(self.cfg, None, false, M.MSG_MACHINENEEDSHALT % string.join(machines, " "), "", true) # Halt this machine after a delay time.sleep(120) rebooter.HaltMachines(self.cfg.globalParams.GetEntHome(), [E.getCrtHostName()]) return 0
def haltcluster(self): machines = self.cfg.getGlobalParam("MACHINES") machines.remove(E.getCrtHostName()) msg = M.MSG_LOGSHUTDOWN self.writeAdminRunnerOpMsg(msg) if len(machines) > 0: # Halt all other machines now if rebooter.HaltMachines(self.cfg.globalParams.GetEntHome(), machines): # just send an email if not mail_already_sent( M.MSG_MACHINENEEDSHALT % string.join(machines, " ")): SendMail.send( self.cfg, None, false, M.MSG_MACHINENEEDSHALT % string.join(machines, " "), "", true) # Halt this machine after a delay time.sleep(120) rebooter.HaltMachines(self.cfg.globalParams.GetEntHome(), [E.getCrtHostName()]) return 0
def sendWarningMsg(self, license): """ sendWarningMsg send out an email when: * 90, 45, 30, 7, and 1 days before expiration. * reminder daily during the grace period. * one message after grace period it just tried its best to do so. """ # We don't send warning whena we are in install mode if self.cfg.getInstallState() == "INSTALL": return todoDays = self.cfg.getGlobalParam('ENT_LICENSE_STUFF_TODO_ON_DAYS') doneDays = self.cfg.getGlobalParam('ENT_LICENSE_STUFF_DONE_ON_DAYS') timeLeft = license.getTimeLeft() daysLeft = timeLeft/C.DAY_MILLISECONDS + 1; if timeLeft < 0: daysLeft = daysLeft - 1; graceTime = license.getGracePeriod() wasDone = None # We send a message under several conditions. In each case we check # doneDays to see if we've already sent an email for this day. Note # that for the isExpired() check, we use C.LONG_MIN_VALUE (instead of the # usual daysLeft) since we only want to send a single expiration message if license.isExpired() and C.LONG_MIN_VALUE not in doneDays: # license is expired; send an email only once! SendMail.send( self.cfg, None, false, M.WAR_LICENSE_EMAIL_SUBJECT_EXPIRED, M.WAR_LICENSE_EMAIL_EXPIRED, true); wasDone = C.LONG_MIN_VALUE elif license.isInGracePeriod() and daysLeft not in doneDays: # license is in grace period; send an email every day! graceDaysLeft = (timeLeft + graceTime) / C.DAY_MILLISECONDS + 1 SendMail.send( self.cfg, None, false, M.WAR_LICENSE_EMAIL_SUBJECT_IN_GRACE_PERIOD % (graceDaysLeft), M.WAR_LICENSE_EMAIL_IN_GRACE_PERIOD % (graceDaysLeft), true) wasDone = daysLeft elif daysLeft in todoDays and daysLeft not in doneDays: # time to send a warning (as dictated by todoDays) graceDays = (graceTime + C.DAY_MILLISECONDS - 1) / C.DAY_MILLISECONDS SendMail.send( self.cfg, None, false, M.WAR_LICENSE_EMAIL_SUBJECT_EXPIRING % daysLeft, M.WAR_LICENSE_EMAIL_EXPIRING % ( daysLeft, graceDays), true) wasDone = daysLeft # if a message was sent, record it into ENT_LICENSE_STUFF_DONE_ON_DAYS if wasDone != None: doneDays.append(wasDone) self.cfg.setGlobalParam('ENT_LICENSE_STUFF_DONE_ON_DAYS', doneDays)
def reboot(self, machine): if not self.cfg.getGlobalParam("AUTO_REBOOT"): if not mail_already_sent(M.MSG_MACHINENEEDSREBOOT % machine): SendMail.send(self.cfg, None, false, M.MSG_MACHINENEEDSREBOOT % machine, "", true) return 1 if (rebooter.RebootMachine(self.cfg.globalParams.GetEntHome(), machine)): if not mail_already_sent(M.MSG_MACHINENEEDSREBOOT % machine): SendMail.send(self.cfg, None, false, M.MSG_MACHINENEEDSREBOOT % machine, "", true) return 1 if not mail_already_sent(M.MSG_MACHINEREBOOTED % machine): SendMail.send(self.cfg, None, false, M.MSG_MACHINEREBOOTED % machine, "", true) msg = M.MSG_LOG_REBOOT_MACHINE % (machine) self.writeAdminRunnerOpMsg(msg) return 0
def reboot(self, machine): if not self.cfg.getGlobalParam("AUTO_REBOOT"): if not mail_already_sent(M.MSG_MACHINENEEDSREBOOT % machine): SendMail.send(self.cfg, None, false, M.MSG_MACHINENEEDSREBOOT % machine, "", true) return 1 if ( rebooter.RebootMachine( self.cfg.globalParams.GetEntHome(), machine) ): if not mail_already_sent(M.MSG_MACHINENEEDSREBOOT % machine): SendMail.send(self.cfg, None, false, M.MSG_MACHINENEEDSREBOOT % machine, "", true) return 1 if not mail_already_sent(M.MSG_MACHINEREBOOTED % machine): SendMail.send(self.cfg, None, false, M.MSG_MACHINEREBOOTED % machine, "", true) msg = M.MSG_LOG_REBOOT_MACHINE % (machine) self.writeAdminRunnerOpMsg(msg) return 0
def createUser(self, creatorName, ip, newUserName, newUserPassword, newUserEmail, newUserAccountType, newUserPermissions): """ Creates a new user given: creatorName - the username who creates the user ip - from which ip is created newUserXXXX - corresponding data for the new user isEncrypted - if the password given is encrypted Upon creation we send a confirmation email to the creator and a welcome message to to the new user (with password included) returns an error code (see at the top of the file) """ self.updatelock.acquire() try: # Pass the creator name when getting the user file (err, users) = self.get_checked_users(name=creatorName) if err != USER_OK: logging.error("Error %s while reading the users file. user create "\ " failed" % err) return CREATE_UNKNOWN if newUserName in users.keys(): logging.error("User %s already exists. Cannot re-create it" % (newUserName)) return CREATE_USEREXISTS if len(newUserPassword) == 0: newUserPassword = password.createRandomPasswd(PASSWORD_LENGTH) # validate the user name if not entconfig.IsNameValid(newUserName): logging.error("Invalid user name %s -- cannot create" % (newUserName)) return CREATE_INVALIDUSERNAME # $TODO$ -- add email validation if " " in newUserEmail: logging.error("Invalid email %s while creating user %s" % (newUserEmail, newUserName)) return CREATE_INVALIDEMAIL decryptedPasswd = newUserPassword urandom = open('/dev/urandom') salt = urandom.read(2) urandom.close() newUserPassword = password.sha1_base64_hash(newUserPassword, salt) newSalt = base64.encodestring(salt)[:-1] users[newUserName] = UserData(newUserName, newUserPassword, newSalt, newUserEmail, newUserAccountType, newUserPermissions) self.save_passwd_file(users) if not self.update_vmanage_password(newUserName, newUserPassword, newSalt): logging.error("Error updating vmanager password for user %s" % newUserName) finally: self.updatelock.release() self.sync_password_file() if creatorName: creatorEmail = users[creatorName].email else: creatorEmail = None accountType = users[newUserName].AccountTypePrintName() # and send email, first, to the creator if creatorEmail: SendMail.send( self.cfg, creatorEmail, false, M.MSG_NEWUSERPASSWORDSUBJECT % newUserName, M.MSG_NEWUSERPASSWORD % (newUserName, accountType, newUserEmail, ip, creatorName, creatorEmail), false) # next, to the created rootURI = "http://%s:8000" % self.cfg.getGlobalParam("EXTERNAL_WEB_IP") SendMail.send( self.cfg, newUserEmail, false, M.MSG_WELCOMENEWUSERSUBJECT, M.MSG_WELCOMENEWUSER % (accountType, creatorEmail, newUserName, decryptedPasswd, rootURI, creatorEmail), false) logging.info("User %s [email %s] created OK by %s" % (newUserName, newUserEmail, creatorName)) return CREATE_OK
def add(self, machine, apc_outlet): """ This adds a machine to the configuration """ # We can add a machine only when we are in active state if install_utilities.install_state( self.cfg.getGlobalParam('VERSION')) != "ACTIVE": logging.error("Can add a machine only when we are in active state") return 1 # First test for accessibility of the machine. if E.execute([machine], 'echo 1', None, 1) != E.ERR_OK: logging.error("Could not ssh into the machine %s" % machine) return 1 # start the svs on the remote machine restart_svs_cmd = "%s/local/google3/enterprise/legacy/util/svs_utilities.py %s %s" % ( self.cfg.getGlobalParam('ENTERPRISE_HOME'), self.cfg.getGlobalParam('ENTERPRISE_HOME'), machine) if E.execute([E.getCrtHostName()], SECURE_WRAPPER_COMMAND % ( \ self.cfg.getGlobalParam('ENTERPRISE_HOME'), "-p2", restart_svs_cmd), None, 0) != E.ERR_OK: logging.error("Could not start svs on machine %s" % machine) return 1 # wait for some time for svs to come up time.sleep(5) # check to see if the svs is up and is the right version if not svs_utilities.PingAndCheckSvsVersion( self.cfg.getGlobalParam('ENTERPRISE_BASHRC'), self.cfg.getGlobalParam('ENTERPRISE_HOME'), machine): logging.error("Svs not running correctly on machine %s" % machine) return 1 ver = self.cfg.getGlobalParam('VERSION') home = self.cfg.getGlobalParam('ENTERPRISE_HOME') testver = install_utilities.is_test(ver) # update MACHINES machines = self.cfg.getGlobalParam('MACHINES') if machine not in machines: machines.append(machine) self.cfg.setGlobalParam('MACHINES', machines) ret = core_utils.RemDeadNode(ver, testver, machine) if ret: logging.error('Cannot remove dead node from lockserver.') # we ignore this error for now # We just added a new machine into the config # this will lead to a change in concentrator config # so we need to re-run serve service which will # write the new config and restart the concentrator serve_cmd = ". %s && cd %s/local/google3/enterprise/legacy/scripts && " \ "./serve_service.py %s" % ( self.cfg.getGlobalParam('ENTERPRISE_BASHRC'), self.cfg.getGlobalParam('ENTERPRISE_HOME'), self.cfg.getGlobalParam('ENTERPRISE_HOME')) E.exe("%s %s" % (serve_cmd, "babysit")) num_tries = 5 cur_try = 0 while cur_try < num_tries: cur_try = cur_try + 1 all_disks = self.cfg.mach_param_cache.GetFact( "mounted-drives", machine) bad_disks = self.cfg.mach_param_cache.GetFact( "var_log_badhds", machine) if bad_disks and all_disks: break time.sleep(60) if all_disks == None or bad_disks == None: logging.error("Could not get machine information about %s" % machine) return 1 bad_disks = string.split(bad_disks, ' ') all_disks = string.split(all_disks, ' ') good_disks = filter(lambda x, y=bad_disks: x not in y, all_disks) good_disks = map(lambda x: "%s3" % x, good_disks) # change sda3 to hda3 etc. good_disks = map(lambda x: re.sub(r'^s', 'h', x), good_disks) # Preprocess disks before adding to remove duplicates. unique_good_disks = [] [ unique_good_disks.append(disk) for disk in good_disks if disk not in unique_good_disks ] # Add disks self.updatedisk(machine, unique_good_disks, true) # apc map update apc_map = self.cfg.globalParams.var_copy('APC_MAP') apc_map[machine] = apc_util.PortMap(apc_outlet) if not self.cfg.setGlobalParam('APC_MAP', apc_map): logging.error("ERROR setting apc map to %s" % repr(apc_map)) return 1 # create appropriate datadirs on that machine if not self.cfg.createDataDirs([machine], node_replacement=1): logging.error("ERROR could not create datadirs on machine %s" % machine) return 1 # Replicate the config self.cfg.replicateConfigOnMachine(machine) # Reconfigure net on the target machine if not reconfigurenet_util.doReconfigureNet( self.cfg.globalParams, [machine], i_am_master=0): logging.error('reconfigurenet failed for %s' % machine) return 1 # Start core services on the new node if not install_utilities.start_core(ver, home, [machine], ignore=0): logging.error("ERROR could not start core services on %s" % machine) return 1 # Add the chunkserver back gfs_utils.AddGFSChunkservers(ver, testver, [machine]) # first we need to do Machine allocation. # this will assign things that will satisfy the constraints if not self.cfg.DoMachineAllocation(serversets=['workqueue-slave']): logging.error("ERROR doing machine allocation") return 1 # now try to relllocate some servers from existing machines to the new machine replaced = self.cfg.AllocateServersToNewMachine(machine) if not replaced: logging.error("ERROR allocating services to the new machine") return 1 # first we need to restart the babysitter E.exe("%s %s" % (serve_cmd, "babysit")) time.sleep(60) # Now we need to stop all the replaced services for server_string in replaced: server = serverlib.Server() server.InitFromName(server_string) replaced_type = server.servertype() kill_cmd = servertype.GetKillCmd(replaced_type, server.port()) if E.execute([server.host()], kill_cmd, None, 1) != E.ERR_OK: logging.error("ERROR killing %s running on port %d on %s" % \ (replaced_type, server.port(), server.host())) # we should make it active if not install_utilities.set_install_state( machine, self.cfg.getGlobalParam('ENTERPRISE_HOME'), "ACTIVE"): logging.error("ERROR changing state on machine %s. " "Please make it active and activate and " "start crawl service on it" % machine) return 1 crawl_cmd = ". %s && cd %s/local/google3/enterprise/legacy/scripts && " \ "./crawl_service.py %s" % ( self.cfg.getGlobalParam('ENTERPRISE_BASHRC'), self.cfg.getGlobalParam('ENTERPRISE_HOME'), self.cfg.getGlobalParam('ENTERPRISE_HOME')) if E.execute([machine], "%s %s" % (crawl_cmd, "start"), None, 1) != E.ERR_OK: logging.error("Could not start crawl service on %s" % machine) return 1 # save all the params self.cfg.saveParams() # for faster crawl recovery, lets restart all crawl processes self.restart_crawl_processes(serve_cmd) # activate the crawl and logcontrol service on the remote machine crawl_activate_cmd = "/etc/rc.d/init.d/crawl_%s activate >&/dev/null" \ "</dev/null" % self.cfg.getGlobalParam('VERSION') if E.execute([machine], SECURE_WRAPPER_COMMAND % ( \ self.cfg.getGlobalParam('ENTERPRISE_HOME'), "-e", crawl_activate_cmd), None, 0) != E.ERR_OK: logging.error("Could not activate crawl service on machine %s" % machine) logging.error("Please activate by hand") return 1 log_activate_cmd = "/etc/rc.d/init.d/logcontrol_%s activate >&/dev/null" \ "</dev/null" % self.cfg.getGlobalParam('VERSION') if E.execute([machine], SECURE_WRAPPER_COMMAND % ( \ self.cfg.getGlobalParam('ENTERPRISE_HOME'), "-e", log_activate_cmd), None, 0) != E.ERR_OK: logging.error( "Could not activate logcontrol service on machine %s" % machine) logging.error("Please activate by hand") return 1 serve_activate_cmd = "/etc/rc.d/init.d/serve_%s activate >&/dev/null" \ "</dev/null" % self.cfg.getGlobalParam('VERSION') if E.execute([machine], SECURE_WRAPPER_COMMAND % ( \ self.cfg.getGlobalParam('ENTERPRISE_HOME'), "-e", serve_activate_cmd), None, 0) != E.ERR_OK: logging.error("Could not activate serve service on machine %s" % machine) logging.error("Please activate by hand") return 1 logging.info("Machine %s successfully added into the system" % machine) if not mail_already_sent(M.MSG_MACHINEADDED % machine): SendMail.send(self.cfg, None, false, M.MSG_MACHINEADDED % machine, "", true) return 0
def createUser(self, creatorName, ip, newUserName, newUserPassword, newUserEmail, newUserAccountType, newUserPermissions): """ Creates a new user given: creatorName - the username who creates the user ip - from which ip is created newUserXXXX - corresponding data for the new user isEncrypted - if the password given is encrypted Upon creation we send a confirmation email to the creator and a welcome message to to the new user (with password included) returns an error code (see at the top of the file) """ self.updatelock.acquire() try: # Pass the creator name when getting the user file (err, users) = self.get_checked_users(name = creatorName) if err != USER_OK: logging.error("Error %s while reading the users file. user create "\ " failed" % err) return CREATE_UNKNOWN if newUserName in users.keys(): logging.error("User %s already exists. Cannot re-create it" % ( newUserName)) return CREATE_USEREXISTS if len(newUserPassword) == 0: newUserPassword = password.createRandomPasswd(PASSWORD_LENGTH); # validate the user name if not entconfig.IsNameValid(newUserName): logging.error("Invalid user name %s -- cannot create" % (newUserName)) return CREATE_INVALIDUSERNAME # $TODO$ -- add email validation if " " in newUserEmail: logging.error("Invalid email %s while creating user %s" % ( newUserEmail, newUserName)) return CREATE_INVALIDEMAIL decryptedPasswd = newUserPassword urandom = open('/dev/urandom') salt = urandom.read(2) urandom.close() newUserPassword = password.sha1_base64_hash(newUserPassword, salt) newSalt = base64.encodestring(salt)[:-1] users[newUserName] = UserData(newUserName, newUserPassword, newSalt, newUserEmail, newUserAccountType, newUserPermissions) self.save_passwd_file(users) if not self.update_vmanage_password(newUserName, newUserPassword, newSalt): logging.error("Error updating vmanager password for user %s" % newUserName) finally: self.updatelock.release() self.sync_password_file() if creatorName: creatorEmail = users[creatorName].email else: creatorEmail = None accountType = users[newUserName].AccountTypePrintName() # and send email, first, to the creator if creatorEmail: SendMail.send(self.cfg, creatorEmail, false, M.MSG_NEWUSERPASSWORDSUBJECT % newUserName, M.MSG_NEWUSERPASSWORD % ( newUserName, accountType, newUserEmail, ip, creatorName, creatorEmail ), false) # next, to the created rootURI = "http://%s:8000" % self.cfg.getGlobalParam("EXTERNAL_WEB_IP") SendMail.send(self.cfg, newUserEmail, false, M.MSG_WELCOMENEWUSERSUBJECT, M.MSG_WELCOMENEWUSER % ( accountType, creatorEmail, newUserName, decryptedPasswd, rootURI, creatorEmail ), false) logging.info("User %s [email %s] created OK by %s" % ( newUserName, newUserEmail, creatorName)) return CREATE_OK
def prereq_check(self, send_email, collections): """ This checks the prerequisites for all collection, updates the epochs to serve from and (optionally) sends a mail. """ if collections != None: collections = string.strip(collections) if not collections: collections = ent_collection.ListCollections(self.cfg.globalParams) else: collections = map(lambda c, p = self.cfg.globalParams: ent_collection.EntCollection(c, p), map(string.strip, string.split(collections, ",")) ) # No collections -- exit quickly if not collections: return {} send_email = string.atoi(send_email) epochs = self.cfg.getGlobalParam('ENTERPRISE_EPOCHS') gwssers = self.cfg.globalParams.GetServerHostPorts("web") jobs = [] for c in collections: collection = ent_collection.EntCollection(c, self.cfg.globalParams) # Write the testwords in a copy file filename = collection.get_var('TESTWORDS') filename_copy = "%s_" % filename open(filename_copy, "w").write(open(filename, "r").read()) num = collection.get_var("TESTWORDS_IN_FIRST") jobs.append((self.cfg, gwssers, c, filename_copy, epochs, num)) # Lock a file so we test once at a time lock_file = "%s/prerequisites_lock" % self.cfg.getGlobalParam("TMPDIR") flock = E.acquire_lock(lock_file, 12) try: # Run the tests -- one per thread ... # see how many threads to spawn if len(jobs) >= NUM_THREADS: num_threads = NUM_THREADS else: num_threads = len(jobs) # create the threads - workers threads = [] for n in range(0, num_threads): threads.append(Runner(n, jobs)) # start the threads for thread in threads[:-1]: thread.start() # I run the last one threads[-1].run() # wait to collect the errors at the end errors = threads[-1].errors max_epochs = threads[-1].max_epochs for thread in threads[:-1]: thread.join() for k, v in thread.max_epochs.items(): max_epochs[k] = v for k, v in thread.errors.items(): errors[k] = v # prepare and send a nice :) message if errors and send_email: last_msg_time = self.cfg.getGlobalParam('LAST_PREREQUISITES_EMAIL_TIME') email_interval = self.cfg.getGlobalParam('ENTERPRISE_INTER_EMAIL_TIME') now = int(time.time()) if now - last_msg_time > email_interval: msg = [M.MSG_PREREQ_FAIL] msg.extend(map( lambda (c, e): "Collection %s generated a wrong answer for %s" % (c, string.join(e, ",")), errors.items())) SendMail.send(self.cfg, None, 1, M.MSG_PREREQ_FAIL_SUBJECT, string.join(msg, "\n"), 1) self.cfg.globalParams.set_var('LAST_PREREQUISITES_EMAIL_TIME', now) self.cfg.globalParams.set_var('LAST_PREREQUISITES_CHECK', time.strftime("%Y/%m/%d %H:%M:%S")) epochs.sort() cur_epoch = epochs[-1] for c in collections: collection = ent_collection.EntCollection(c, self.cfg.globalParams) collection.set_var('LAST_PREREQUISITES_ERRORS', errors.get(c, [])) # EPOCH_SERVING has two values in the form of "es[0] es[1]" # es[0]: the epoch the prereq_check ask us to serve, or # -1 means no epoch answers OK, # -2 means current index answers OK # es[1]: the epoch the user set from UI, if -2 means use # most recent valid epoch # the serving logic is as following: # -- if user set a sepcific epoch (es[1]) >= 0), serve es[1] # -- if user set most recent valid epoch (es[1] == -2), then # serve the current index if no/all epochs answers ok # (es[0] == -1 or es[0] == -2 ) # otherwise (es[0] >= 0) serve from the es[0] # es = string.split(string.strip( open(collection.get_var('EPOCHS_SERVING'), "r").read()), " ") # The epoch prereq_check asks us to serve # this from -- -2 means current index is OK, # -1 means no epoch answers OK (is returned by the checker) epoch = max_epochs.get(c, -2) if not errors.has_key(c): epoch = -2 # initialize EPOCHS_SERVING if not es or len(es) == 1: es = [epoch, -2] else: es = map(string.atoi, es) # if this change cause automatic rollback, which means # - user choose the most recent valid epoch and # - the new epoch differs from previous epoch and # - the change is not from -1 -> -2 or -2 -> -1. # we log it in AdminRunner Operations log if es[1] == -2 and epoch != es[0] and ( es[0] + epoch != -3 ) : epochs_to_time = self.cfg.getGlobalParam('ENTERPRISE_EPOCHS_ENDTIME') epoch_time = epochs_to_time.get(epoch, M.MSG_EPOCH_CURRENT_TIME) self.writeAdminRunnerOpMsg(M.MSG_UI_LOG_INDEX_ROLLBACK % epoch_time) es[0] = epoch collection.set_file_var_content('EPOCHS_SERVING', string.join(map(str, es)), 0) # also check if the current serving epoch for the collection # is the most recent one, if not, send a warning email if send_email and ( ( es[1] == -2 and es[0] >= 0 ) or \ (es[1] >= 0 and es[1] != cur_epoch ) ) : last_msg_time = self.cfg.getGlobalParam( 'LAST_SERVING_EPOCH_WARNING_EMAIL_TIME') email_interval = self.cfg.getGlobalParam( 'ENTERPRISE_INTER_EMAIL_TIME') now = int(time.time()) if now - last_msg_time > email_interval: SendMail.send(self.cfg, None, 0, M.MSG_SERVING_EPOCH_NOT_CURRENT % c, "", 0) self.cfg.globalParams.set_var( 'LAST_SERVING_EPOCH_WARNING_EMAIL_TIME', now) self.cfg.saveParams() finally: flock.close() return errors
def add(self, machine, apc_outlet): """ This adds a machine to the configuration """ # We can add a machine only when we are in active state if install_utilities.install_state(self.cfg.getGlobalParam('VERSION')) != "ACTIVE": logging.error("Can add a machine only when we are in active state") return 1 # First test for accessibility of the machine. if E.execute([machine], 'echo 1', None, 1) != E.ERR_OK: logging.error("Could not ssh into the machine %s" % machine) return 1 # start the svs on the remote machine restart_svs_cmd = "%s/local/google3/enterprise/legacy/util/svs_utilities.py %s %s" % ( self.cfg.getGlobalParam('ENTERPRISE_HOME'), self.cfg.getGlobalParam('ENTERPRISE_HOME'), machine) if E.execute([E.getCrtHostName()], SECURE_WRAPPER_COMMAND % ( \ self.cfg.getGlobalParam('ENTERPRISE_HOME'), "-p2", restart_svs_cmd), None, 0) != E.ERR_OK: logging.error("Could not start svs on machine %s" % machine) return 1 # wait for some time for svs to come up time.sleep(5) # check to see if the svs is up and is the right version if not svs_utilities.PingAndCheckSvsVersion( self.cfg.getGlobalParam('ENTERPRISE_BASHRC'), self.cfg.getGlobalParam('ENTERPRISE_HOME'), machine): logging.error("Svs not running correctly on machine %s" % machine) return 1 ver = self.cfg.getGlobalParam('VERSION') home = self.cfg.getGlobalParam('ENTERPRISE_HOME') testver = install_utilities.is_test(ver) # update MACHINES machines = self.cfg.getGlobalParam('MACHINES') if machine not in machines: machines.append(machine) self.cfg.setGlobalParam('MACHINES', machines) ret = core_utils.RemDeadNode(ver, testver, machine) if ret: logging.error('Cannot remove dead node from lockserver.') # we ignore this error for now # We just added a new machine into the config # this will lead to a change in concentrator config # so we need to re-run serve service which will # write the new config and restart the concentrator serve_cmd = ". %s && cd %s/local/google3/enterprise/legacy/scripts && " \ "./serve_service.py %s" % ( self.cfg.getGlobalParam('ENTERPRISE_BASHRC'), self.cfg.getGlobalParam('ENTERPRISE_HOME'), self.cfg.getGlobalParam('ENTERPRISE_HOME')) E.exe("%s %s" % (serve_cmd, "babysit")) num_tries = 5 cur_try = 0 while cur_try < num_tries: cur_try = cur_try + 1 all_disks = self.cfg.mach_param_cache.GetFact("mounted-drives", machine) bad_disks = self.cfg.mach_param_cache.GetFact("var_log_badhds", machine) if bad_disks and all_disks: break time.sleep(60) if all_disks == None or bad_disks == None: logging.error("Could not get machine information about %s" % machine) return 1 bad_disks = string.split(bad_disks, ' ') all_disks = string.split(all_disks, ' ') good_disks = filter(lambda x, y=bad_disks: x not in y, all_disks) good_disks = map(lambda x: "%s3" % x, good_disks) # change sda3 to hda3 etc. good_disks = map(lambda x: re.sub(r'^s', 'h', x), good_disks) # Preprocess disks before adding to remove duplicates. unique_good_disks = [] [unique_good_disks.append(disk) for disk in good_disks if disk not in unique_good_disks] # Add disks self.updatedisk(machine, unique_good_disks, true) # apc map update apc_map = self.cfg.globalParams.var_copy('APC_MAP') apc_map[machine] = apc_util.PortMap(apc_outlet) if not self.cfg.setGlobalParam('APC_MAP', apc_map): logging.error("ERROR setting apc map to %s" % repr(apc_map)) return 1 # create appropriate datadirs on that machine if not self.cfg.createDataDirs([machine], node_replacement = 1): logging.error("ERROR could not create datadirs on machine %s" % machine) return 1 # Replicate the config self.cfg.replicateConfigOnMachine(machine) # Reconfigure net on the target machine if not reconfigurenet_util.doReconfigureNet(self.cfg.globalParams, [machine], i_am_master=0): logging.error('reconfigurenet failed for %s' % machine) return 1 # Start core services on the new node if not install_utilities.start_core(ver, home, [machine], ignore=0): logging.error("ERROR could not start core services on %s" % machine) return 1 # Add the chunkserver back gfs_utils.AddGFSChunkservers(ver, testver, [machine]) # first we need to do Machine allocation. # this will assign things that will satisfy the constraints if not self.cfg.DoMachineAllocation(serversets=['workqueue-slave']): logging.error("ERROR doing machine allocation") return 1 # now try to relllocate some servers from existing machines to the new machine replaced = self.cfg.AllocateServersToNewMachine(machine) if not replaced: logging.error("ERROR allocating services to the new machine") return 1 # first we need to restart the babysitter E.exe("%s %s" % (serve_cmd, "babysit")) time.sleep(60) # Now we need to stop all the replaced services for server_string in replaced: server = serverlib.Server() server.InitFromName(server_string) replaced_type = server.servertype() kill_cmd = servertype.GetKillCmd(replaced_type, server.port()) if E.execute([server.host()], kill_cmd, None, 1) != E.ERR_OK: logging.error("ERROR killing %s running on port %d on %s" % \ (replaced_type, server.port(), server.host())) # we should make it active if not install_utilities.set_install_state(machine, self.cfg.getGlobalParam('ENTERPRISE_HOME'), "ACTIVE"): logging.error("ERROR changing state on machine %s. " "Please make it active and activate and " "start crawl service on it" % machine) return 1 crawl_cmd = ". %s && cd %s/local/google3/enterprise/legacy/scripts && " \ "./crawl_service.py %s" % ( self.cfg.getGlobalParam('ENTERPRISE_BASHRC'), self.cfg.getGlobalParam('ENTERPRISE_HOME'), self.cfg.getGlobalParam('ENTERPRISE_HOME')) if E.execute([machine], "%s %s" % (crawl_cmd, "start"), None, 1) != E.ERR_OK: logging.error("Could not start crawl service on %s" % machine) return 1 # save all the params self.cfg.saveParams() # for faster crawl recovery, lets restart all crawl processes self.restart_crawl_processes(serve_cmd) # activate the crawl and logcontrol service on the remote machine crawl_activate_cmd = "/etc/rc.d/init.d/crawl_%s activate >&/dev/null" \ "</dev/null" % self.cfg.getGlobalParam('VERSION') if E.execute([machine], SECURE_WRAPPER_COMMAND % ( \ self.cfg.getGlobalParam('ENTERPRISE_HOME'), "-e", crawl_activate_cmd), None, 0) != E.ERR_OK: logging.error("Could not activate crawl service on machine %s" % machine) logging.error("Please activate by hand") return 1 log_activate_cmd = "/etc/rc.d/init.d/logcontrol_%s activate >&/dev/null" \ "</dev/null" % self.cfg.getGlobalParam('VERSION') if E.execute([machine], SECURE_WRAPPER_COMMAND % ( \ self.cfg.getGlobalParam('ENTERPRISE_HOME'), "-e", log_activate_cmd), None, 0) != E.ERR_OK: logging.error("Could not activate logcontrol service on machine %s" % machine) logging.error("Please activate by hand") return 1 serve_activate_cmd = "/etc/rc.d/init.d/serve_%s activate >&/dev/null" \ "</dev/null" % self.cfg.getGlobalParam('VERSION') if E.execute([machine], SECURE_WRAPPER_COMMAND % ( \ self.cfg.getGlobalParam('ENTERPRISE_HOME'), "-e", serve_activate_cmd), None, 0) != E.ERR_OK: logging.error("Could not activate serve service on machine %s" % machine) logging.error("Please activate by hand") return 1 logging.info("Machine %s successfully added into the system" % machine) if not mail_already_sent(M.MSG_MACHINEADDED % machine): SendMail.send(self.cfg, None, false, M.MSG_MACHINEADDED % machine, "", true) return 0