def SyncOneboxLog(config): """Syncs Local Onebox log file with GFS Onebox Log file ONLY on clusters. As of 4.6.4, this is called from scripts/periodic_script.py and from onebox_handler.py, when the user does View Log AND the machine is a cluster. """ onebox_port = servertype.GetPortBase('oneboxenterprise') onebox_node = config.SERVERS[onebox_port] crt_machine = E.getCrtHostName() ent_config_type = config.var('ENT_CONFIG_TYPE') #If onebox server is not running no need to sync. if ent_config_type != 'CLUSTER' or crt_machine != onebox_node[0]: return tmp_dir = config.var('TMPDIR') gfs_cell = config.var('GFS_CELL') local_log_name = os.path.join(tmp_dir, config.var('ENTERPRISE_ONEBOX_LOG')) gfs_log_name = os.path.join(os.sep, 'gfs', gfs_cell, config.var('ENTERPRISE_ONEBOX_LOG')) equalize_command = 'equalize %s %s' % (local_log_name, gfs_log_name) # fileutil equalize copies only the difference of the log files. err, out = E.run_fileutil_command(config, equalize_command) if not err: return # files didn't match in the begining, possibly a new log file would have # created, copy the whole log file in such case. copy_command = 'cp -f %s %s' % (local_log_name, gfs_log_name) err, out = E.run_fileutil_command(config, copy_command) if err: logging.error('Error while syncing onebox logs.')
def deleteCollection(self, collection): """Delete all reports and logs for a particular collection.""" self.logreplock.acquire() try: for reportType in [liblog.RAW_REPORT, liblog.SUMMARY_REPORT]: reports = self.getLogReports(collection, reportType) for report in reports: # stop running job if report is being (re)generated. if report.completeState != COMPLETE: self.stopRunningJob(self.jobName(report)) # delete data files if any. (html_file, valid_file) = liblog.get_report_filenames(self.entConfig, reportType, report.reportName, collection) self.RemoveReportFiles(html_file, valid_file) self.reportCount[reportType] -= len(reports) logging.info('Delete total %d reports of type %s for collection %s.' % ( len(reports), reportType, collection)) listfile = liblog.get_report_list_filename(self.entConfig, reportType, collection) (err, out) = E.run_fileutil_command(self.entConfig, 'rm -f %s' % listfile) if err: logging.error('Cannot remove list file %s.' % listfile) report_collection_dir = liblog.get_report_collection_dir(self.entConfig, collection) (err, out) = E.run_fileutil_command(self.entConfig, 'rmdir %s' % report_collection_dir) if err: logging.error('Cannot delete unused directory %s' % \ report_collection_dir) finally: self.logreplock.release()
def TouchFile(global_params, filename): """ check to see if filename exists, create if it does not exists """ # first check if file exists ls_cmd = "ls %s" % filename err, out = E.run_fileutil_command(self.globalParams, ls_cmd) if err != E.ERR_OK: # create if not exists create_cmd = "truncate %s 0" % filename err, out = E.run_fileutil_command(self.globalParams, create_cmd) if err != E.ERR_OK: logging.fatal("Could not create file: %s" % filename)
def TouchFile(global_params, filename): """ check to see if filename exists, create if it does not exists """ # first check if file exists ls_cmd = "ls %s" % filename err, out = E.run_fileutil_command(self.globalParams, ls_cmd) if err != E.ERR_OK: # create if not exists create_cmd = "truncate %s 0" % filename err, out = E.run_fileutil_command(self.globalParams, create_cmd) if err != E.ERR_OK: logging.fatal("Could not create file: %s" % filename)
def drain_urlmanagers(self): """ We need to do this before advancing the epoch -- we can do it multiple times """ urlmanagers = self.cfg.globalParams.GetServerHostPorts("urlmanager") num_shards = self.cfg.globalParams.GetNumShards('urlmanager') epoch = self.cfg.getGlobalParam('RT_EPOCH') for (host, port) in urlmanagers: # We don't do it here directly because of the timeout cmd = ". %s; cd %s/local/google3/enterprise/legacy/util && "\ "./port_talker.py %s %d 'd DumpingStatusTable' %d" % ( self.cfg.getGlobalParam('ENTERPRISE_BASHRC'), self.cfg.entHome, host, port, 300) # 5 min timeout err = E.execute([E.getCrtHostName()], cmd, None, 0) if E.ERR_OK != err: logging.error("Error draining urlmanagers [%s]" % err) return 1 # Make sure that the file is out shard_num = servertype.GetPortShard(port) file = "%surlmanager_out_table_%02d_of_%02d_epoch%010d" % ( self.cfg.getGlobalParam('NAMESPACE_PREFIX'), shard_num, num_shards, epoch) err, out = E.run_fileutil_command(self.cfg.globalParams, "ls %s" % file) if E.ERR_OK != err: logging.error("The status table file [%s] is not there" % file) return 1 return 0
def RemoveReportFiles(self, html_file, valid_file): """Remove report and its valid file.""" (err, out) = E.run_fileutil_command(self.entConfig, 'rm -f %s %s' % \ (html_file, valid_file)) if err: logging.error('Failed to remove report files: %s and %s' % \ (html_file, valid_file))
def RemoveOldQueue(self, encQueueName): """Remove data file index file of a crawl queue.""" queue_file = self.getCrawlQueueFileName(encQueueName) index_file = self.getCrawlQueueIndexFileName(encQueueName) (err, _) = E.run_fileutil_command(self.entConfig, 'rm -f %s %s' % \ (queue_file, index_file)) if err: logging.error('Failed to remove crawlqueue snapshot file for %s' % \ encQueueName)
def execMultiTimes(self, to_exec, max_tries=10): """ exec a gfs command by trying multiple times """ try_num = 0 while try_num < max_tries: err, out = E.run_fileutil_command(self.globalParams, to_exec) if E.ERR_OK == err: return true time.sleep(10) try_num = try_num + 1 logging.error("Error executing %s" % to_exec) return false
def execMultiTimes(self, to_exec, max_tries=10): """ exec a gfs command by trying multiple times """ try_num = 0 while try_num < max_tries: err, out = E.run_fileutil_command(self.globalParams, to_exec) if E.ERR_OK == err: return true time.sleep(10) try_num = try_num + 1 logging.error("Error executing %s" % to_exec) return false
def MakeGoogleDir(entconfig, dir): """Create a directory in Google filesystem.""" try: if dir[-1] == '/': # gfs doesn't tolerate this trailing slash dir = dir[:-1] mode = gfile.File_Stat(dir)[0] valid_dir = stat.S_ISDIR(mode) except: valid_dir = 0 if not valid_dir: (err, out) = E.run_fileutil_command(entconfig, 'mkdir -p %s' % dir) if err: logging.error('Failed on mkdir for %s. Error: %s' % (dir, out))
def MakeGoogleDir(entconfig, dir): """Create a directory in Google filesystem.""" try: if dir[-1] == '/': # gfs doesn't tolerate this trailing slash dir = dir[:-1] mode = gfile.File_Stat(dir)[0] valid_dir = stat.S_ISDIR(mode) except: valid_dir = 0 if not valid_dir: (err, out) = E.run_fileutil_command(entconfig, 'mkdir -p %s' % dir) if err: logging.error('Failed on mkdir for %s. Error: %s' % (dir, out))
def CopyRawReportFromGfsToLocal(self, reportName, collection): """Make a local copy of a raw report so file_handler can use.""" (remoteName, _) = liblog.get_report_filenames(self.entConfig, liblog.RAW_REPORT, reportName, collection) localName = liblog.get_local_raw_report_filename(self.entConfig, reportName, collection) liblog.MakeDir(os.path.dirname(localName)) (err, out) = E.run_fileutil_command(self.entConfig, 'copy -f %s %s' % \ (remoteName, localName), COMMAND_TIMEOUT_PERIOD) if err: logging.error('Failed to make copy from gfs for %s. Error: %s' % \ (localName, out)) return false return true
def removefileifexists(self, filename): '''Remove a file if it exists, return 0 if removal succeeded''' self.updatelock.acquire() try: try: # The caller may want to remove GFS file or localfile, # call fileutil to take care of all of them. rm_cmd = "rm -f %s" % filename err, out = E.run_fileutil_command(self.cfg.globalParams, rm_cmd) if err != E.ERR_OK: logging.error("Failed to remove file %s" % filename) logging.error("fileutil output: %s" % out) return "1" else: return "0" except IOError, e: logging.error("Failed to remove file %s" % filename) logging.error(str(e)) return "1" finally: self.updatelock.release()
def makePhysicalFile(self, virtualFile, clientName, grepString, fileArg): """ Makes a physical file from a virtual one Creates a temp file with results from grep operation/cating of files Returns: [machine name], [file name] return null on error """ # Sanitize fileArg. fileArg = ''.join([ x for x in fileArg if x in string.ascii_letters + string.digits + '_-%' ]) # Translate from String to fileId if not virtualFile or not FILE_TABLE.has_key(virtualFile): return None # For each file that we can export we have to have an entry in the # global FILE_TABLE fe = FILE_TABLE[virtualFile] pathIn = fe.getPathIn(self.cfg.globalParams, clientName, fileArg, grepString) pathOut = fe.getPathOut(self.cfg.globalParams, clientName, fileArg, grepString) auxGrepString = fe.aux_grep auxCutString = fe.aux_cut machine = E.getCrtHostName() tmpPath = None # Copy web log from GFS to the log directory if necessary. if virtualFile == 'WEB_LOG' and self.cfg.getGlobalParam('GFS_ALIASES') and \ not os.path.exists(pathIn): ok = self.cfg.logmanager.CopyRawReportFromGfsToLocal( fileArg, clientName) if not ok or not os.path.exists(pathIn): logging.error('Failed on CopyRawReportFromGfsToLocal()') return None # Copy the feed log from GFS to the log directory if necessary. elif virtualFile == 'FEED_LOG' and self.cfg.getGlobalParam('GFS_ALIASES') and \ not os.path.exists(pathIn): tmpPath = pathOut + "_fromGFS" (status, output) = E.run_fileutil_command( self.cfg.globalParams, "cat %s > %s" % (pathIn, tmpPath), 5) if E.ERR_OK != status: logging.error("Failed to copy %s to %s" % (pathIn, tmpPath)) return None pathIn = tmpPath # Count the files that we can get files = E.ls([machine], pathIn) if not files: numFiles = 0 else: numFiles = len(files) # If we need only one file, and there are more than one, use the # last one. if numFiles > 0 and not fe.multi_files: pathIn = files[-1] # Create the auxiliary command to create the actual file command = None if numFiles == 0: # No files availavle. command = "echo -e '' > %s" % pathOut else: if virtualFile == 'FEED_LOG': command = "tail -n +2 %s " % pathIn if fe.do_tac: command = command + " | tac " # If we reverse the files before displaying elif fe.do_tac: command = "tac `ls -r %s` " % pathIn # Grep the lines we want if auxGrepString: if not command: command = "cat `ls -r %s`" % pathIn command = command + " | grep -- %s" % commands.mkarg( auxGrepString) # Maybe we need another grep if grepString: if not command: command = "cat `ls -r %s`" % pathIn parsedGrepString = commands.mkarg(grepString) command = command + " | grep -i -F -- %s" % parsedGrepString # Maybe a cut as well if auxCutString: if not command: command = "cat `ls -r %s`" % pathIn command = command + " | cut %s" % auxCutString if command: command = command + " > " + pathOut # execute the command "file+operation > temp_filename" # if the command is null just use the path we have. if not command: return pathIn E.execute([machine], command, None, false) if tmpPath: E.rm([machine], tmpPath) return pathOut
def handleResult(self, jobName, jobToken, returnCode, reportType, collection, reportName, update, html_file, valid_file, new_html_file, new_valid_file): """This is like a callback method to take care of the result of doLogDump() or doLogReport(). This method is executed in a worker thread.""" self.joblock.acquire() try: # if abandoned, don't report. if (not self.runningJobs.has_key(jobName) or self.runningJobs[jobName] != jobToken): logging.info('Running job for report %s complete, ' 'but it was abandoned.' % reportName) self.RemoveReportFiles(new_html_file, new_valid_file) return else: del self.runningJobs[jobName] finally: self.joblock.release() exited = os.WIFEXITED(returnCode) if exited: returnCode = os.WEXITSTATUS(returnCode) # good path. if (exited and (returnCode == liblog.STILL_VALID or returnCode == liblog.SUCCESS)): logging.info('Log report %s for collection %s generated correctly' % \ (reportName, collection)) if returnCode == liblog.SUCCESS: (err, _) = E.run_fileutil_command(self.entConfig, 'copy %s %s' % \ (new_valid_file, valid_file), COMMAND_TIMEOUT_PERIOD) if err: # This may make the report invalid next time we try to update, # but it's ok to use. logging.error('Failed to copy report valid file %s to %s' % \ (new_valid_file, valid_file)) (err, _) = E.run_fileutil_command(self.entConfig, 'copy %s %s' % \ (new_html_file, html_file), COMMAND_TIMEOUT_PERIOD) if err: self.RemoveReportFiles(new_html_file, new_valid_file) logging.error('Failed to copy complete report %s to %s' % \ (new_html_file, html_file)) # change returnCode to execute the failure path. returnCode = liblog.FAILURE else: self.setReportCompleteState(reportType, collection, reportName, COMPLETE) if self.entConfig.var('GFS_ALIASES') and \ reportType == liblog.RAW_REPORT: self.CopyRawReportFromGfsToLocal(reportName, collection) else: self.setReportCompleteState(reportType, collection, reportName, COMPLETE) # failure path. if not exited or returnCode == liblog.FAILURE: logging.error('Error running log report command for report %s' % \ reportName) self.RemoveReportFiles(new_html_file, new_valid_file) if update: # if we fail to update, leave the old one untouched. self.setReportCompleteState(reportType, collection, reportName, COMPLETE, takeOldRecord=true) else: self.setReportCompleteState(reportType, collection, reportName, FAILURE) self.logreplock.acquire() try: self.reportCount[liblog.RAW_REPORT] -= 1 finally: self.logreplock.release()
def makePhysicalFile(self, virtualFile, clientName, grepString, fileArg): """ Makes a physical file from a virtual one Creates a temp file with results from grep operation/cating of files Returns: [machine name], [file name] return null on error """ # Sanitize fileArg. fileArg = ''.join([x for x in fileArg if x in string.ascii_letters + string.digits + '_-%']) # Translate from String to fileId if not virtualFile or not FILE_TABLE.has_key(virtualFile): return None # For each file that we can export we have to have an entry in the # global FILE_TABLE fe = FILE_TABLE[virtualFile] pathIn = fe.getPathIn(self.cfg.globalParams, clientName, fileArg, grepString) pathOut = fe.getPathOut(self.cfg.globalParams, clientName, fileArg, grepString) auxGrepString = fe.aux_grep auxCutString = fe.aux_cut machine = E.getCrtHostName() tmpPath = None # Copy web log from GFS to the log directory if necessary. if virtualFile == 'WEB_LOG' and self.cfg.getGlobalParam('GFS_ALIASES') and \ not os.path.exists(pathIn): ok = self.cfg.logmanager.CopyRawReportFromGfsToLocal(fileArg, clientName) if not ok or not os.path.exists(pathIn): logging.error('Failed on CopyRawReportFromGfsToLocal()') return None # Copy the feed log from GFS to the log directory if necessary. elif virtualFile == 'FEED_LOG' and self.cfg.getGlobalParam('GFS_ALIASES') and \ not os.path.exists(pathIn): tmpPath = pathOut + "_fromGFS" (status, output) = E.run_fileutil_command(self.cfg.globalParams, "cat %s > %s" % (pathIn, tmpPath), 5) if E.ERR_OK != status: logging.error("Failed to copy %s to %s" % (pathIn, tmpPath)) return None pathIn = tmpPath # Count the files that we can get files = E.ls([machine], pathIn) if not files: numFiles = 0 else: numFiles = len(files) # If we need only one file, and there are more than one, use the # last one. if numFiles > 0 and not fe.multi_files: pathIn = files[-1] # Create the auxiliary command to create the actual file command = None if numFiles == 0 : # No files availavle. command = "echo -e '' > %s" % pathOut; else: if virtualFile == 'FEED_LOG': command = "tail -n +2 %s " % pathIn if fe.do_tac: command = command + " | tac " # If we reverse the files before displaying elif fe.do_tac: command = "tac `ls -r %s` " % pathIn # Grep the lines we want if auxGrepString: if not command: command = "cat `ls -r %s`" % pathIn command = command + " | grep -- %s" % commands.mkarg(auxGrepString) # Maybe we need another grep if grepString: if not command: command = "cat `ls -r %s`" % pathIn parsedGrepString = commands.mkarg(grepString) command = command + " | grep -i -F -- %s" % parsedGrepString # Maybe a cut as well if auxCutString: if not command: command = "cat `ls -r %s`" % pathIn command = command + " | cut %s" % auxCutString if command: command = command + " > " + pathOut; # execute the command "file+operation > temp_filename" # if the command is null just use the path we have. if not command: return pathIn E.execute([machine], command, None, false); if tmpPath: E.rm([machine], tmpPath) return pathOut