def start(self) -> None: self.title() self.readConfiguration() (Stream.of(self.modules.values()).sorted( key=lambda task: task.priority).each(lambda task: self.every( task.name, self.configuration('immediately', name=task.name, default=task.immediately), self.configuration('interval', name=task.name, default=task.interval, convert=lambda v: Stream.ifelse( v, alternator=task.interval)), task. priority, task(self), ()))) def configurator() -> bool: self.readConfiguration() return True def stopper() -> bool: while self.control.running: if self.wait() is None: break if not self.control.running and not self.deferred: self.stop() return False return True self.every('reload', False, '1h', 0, configurator, ()) self.every('restart', False, '24h', 0, stopper, ()) super().start()
def show_status(self) -> None: logger.info( 'Currently blocked companies: %s' % (Stream.of(self.locks.items()).map(lambda kv: '%s=%r' % kv).join( ', ') if self.locks else 'none', )) for (name, queue) in Stream.of(self.control.queued.items()).map( lambda kv: ('queued prio %d' % kv[0], kv[1])).list() + [ ('running', self.control.running) ]: logger.info('Currently %d %s processes' % (len(queue), name)) for entry in queue: logger.info( ' %s%s' % (entry.description, (' (%d)' % entry.pid if entry.pid is not None else ''))) for job in self.deferred: logger.info('%s: deferred' % job.description) if self._schedule.queue: logger.info('Currently scheduled events:') for event in self._schedule.queue: ts = event.time % (24 * 60 * 60) logger.info('\t%2d:%02d:%02d [Prio %d]: %s' % (ts // 3600, (ts // 60) % 60, ts % 60, event.priority, cast(Schedule.Job, event.action).name))
def get_ready_to_run(self) -> List[METAFile]: (ready, stamps, finals) = self.scan_ready_to_run() if ready: for info in ready: info.stamp = stamps[info.basename] with DB() as db: invalids: Set[int] = set() for mailing in (Stream(ready).map( lambda i: i.mailing).distinct().sorted()): rq = db.querys( 'SELECT deleted ' 'FROM mailing_tbl ' 'WHERE mailing_id = :mailingID', {'mailingID': mailing}) if rq is None: logger.info('Mailing %d no more existing' % mailing) invalids.add(mailing) elif rq.deleted: logger.info('Mailing %d is marked as deleted' % mailing) invalids.add(mailing) if invalids: for info in (Stream(ready).filter( lambda i: i.mailing in invalids)): self.move(info.path, self.deleted) if info.stamp is not None: self.move(info.stamp.path, self.deleted) ready = (Stream(ready).filter( lambda i: i.mailing not in invalids).list()) if ready: logger.info( '{count:,d} files are ready to send'.format(count=len(ready))) return ready
def readConfiguration(self) -> None: ccfg = CompanyConfig() ccfg.read() self.config = (Stream( ccfg.scan_config( class_names=['generate'], single_value=True)).map(lambda cv: (cv.name, cv.value)).dict()) self.companies = self.configuration( 'companies', convert=lambda v: Stream.ifelse(Range(v))) if self.companies: logger.info('Limit operations on these companies: %s' % self.companies)
def executor(self) -> bool: available = (Stream(globals().values()).filter(lambda t: type( t) is type and issubclass(t, Janitor) and t is not Janitor).map( lambda t: (t.service, t)).dict()) for service in self.services: if service not in available: logger.error( '{service}: not known, available are {services}'.format( service=service, services=Stream(self.services).sorted().join(', '))) return False for service in self.services: available[service]().run(not self.dryrun) return True
def pendings (self) -> bool: while self.control.running: pending = self.wait () if pending is None: break logger.debug ('%s: process finished' % pending.description) while self.control.queued and len (self.control.running) < self.processes: prio = Stream.of (self.control.queued.keys ()).sorted ().first () w = self.control.queued[prio].pop (0) if not self.control.queued[prio]: del self.control.queued[prio] # if w.prepare is not None: try: w.prepare (*w.args, **w.kwargs) except Exception as e: logger.exception ('%s: prepare fails: %s' % (w.description, e)) def starter () -> bool: self.processtitle (w.description) rc = w.method (*w.args, **w.kwargs) self.processtitle () return rc w.pid = self.control.control.spawn (starter) self.control.running.append (w) logger.debug ('%s: launched' % w.description) return bool (self.control.running or self.control.queued)
def __str__(self) -> str: return '{name} <{parameter}>'.format( name=self.__class__.__name__, parameter=Stream.of( ('mailing', self.mailing_id), ('customer', self.customer_id), ('source', self.source), ('selector', self.selector)).filter(lambda kv: kv[ 1] is not None).map(lambda kv: '{key}={value!r}'.format( key=kv[0], value=kv[1])).join(', '))
def use_arguments(self, args: argparse.Namespace) -> None: self.dryrun = args.dryrun self.quiet = args.quiet self.language = args.language self.company_id = args.company_id self.description = args.description self.tags = Stream( args.tags).map(lambda t: listsplit(t)).chain().list() self.only_tags = args.only_tags self.filename = args.filename[0]
def scan_ready_to_run( self ) -> Tuple[List[METAFile], Dict[str, METAFile], DefaultDict[ str, List[METAFile]]]: finals: DefaultDict[str, List[METAFile]] = defaultdict(list) stamps: Dict[str, METAFile] = {} availables: Set[str] = set() basenames: Set[str] = set() data: List[METAFile] = [] for filename in (Stream(os.listdir(self.meta)).filter(lambda f: bool( f.startswith('AgnMail') and (f.endswith('.xml.gz') or f.endswith('.stamp') or f.endswith( '.final'))))): info = METAFile(os.path.join(self.meta, filename)) if not info.valid: continue # if info.extension == 'final': finals[info.mailid].append(info) elif info.extension == 'stamp': stamps[info.basename] = info elif info.extension.startswith('xml'): availables.add(info.mailid) basenames.add(info.basename) if info.is_ready(): data.append(info) for info in Stream(finals.values()).chain().filter( lambda i: i.mailid not in availables): logger.info('No more data file for final %s found, archive it' % info.filename) self.move(info.path, self.archive) for info in Stream( stamps.values()).filter(lambda i: i.basename not in basenames): logger.info('Move dangeling stamp file %s to archive' % info.filename) self.move(info.path, self.archive) isfull = self.queue_is_full() ready = (Stream(data).filter( lambda i: i.basename in stamps and i.mailid in finals and (not isfull or i.single)).sorted( key=lambda i: (not i.single, i.timestamp, i.blocknr)).list()) return (ready, stamps, finals)
def configuration (self, key: str, name: Optional[str] = None, default: Any = None, convert: Optional[Callable[[Any], Any]] = None) -> Any: return ( Stream.of ( ('%s:%s[%s]' % (name, key, host)) if name is not None else None, '%s[%s]' % (key, host), ('%s:%s' % (name, key)) if name is not None else None, key ) .filter (lambda k: k is not None and k in self.config) .map (lambda k: self.config[k]) .first (no = default, finisher = lambda v: v if convert is None or v is None else convert (v)) )
def term (self) -> None: super ().term () if self.control.running: self.title ('in termination') (Stream.of (self.control.running) .peek (lambda p: logger.info ('Sending terminal signal to %s' % p.description)) .each (lambda p: self.control.control.term (p.pid)) ) while self.control.running: logger.info ('Waiting for %d remaining child processes' % len (self.control.running)) self.wait (True) if self.deferred: logger.info ('Schedule final run for %d deferred tasks' % len (self.deferred)) self.defers ()
def __init__ (self) -> None: self.incoming = syscfg.get_str ('direct-path-incoming', os.path.join (base, 'DIRECT')) self.archive = syscfg.get_str ('direct-path-archive', os.path.join (base, 'ARCHIVE')) self.recover = syscfg.get_str ('direct-path-recover', os.path.join (base, 'RECOVER')) self.queues = syscfg.get_list ('direct-path-queues', ',', Stream (os.listdir (base)) .filter (lambda f: bool (f.startswith ('QUEUE'))) .map (lambda f: os.path.join (base, f)) .filter (lambda p: os.path.isdir (p) and not os.path.isfile (os.path.join (p, '.ignore'))) .list () ) if len (self.queues) == 0: raise error ('No queues for spooling found') self.queues.sort () self.cur = multiprocessing.Value ('i', 0) self.mta = MTA ()
def joining (self, job: Watchdog.Job, ec: Daemonic.Status) -> None: if self.output_path is not None and self.is_temp_output and os.path.isfile (self.output_path): if ec.exitcode is None or ec.exitcode != Watchdog.EC_EXIT: with open (self.output_path, errors = 'backslashreplace') as fd: if self.limit > 0: st = os.fstat (fd.fileno ()) if st.st_size > self.limit * 1024: fd.seek (-self.limit * 1024, 2) truncated = True else: truncated = False lines = Stream.of (fd).remain (self.limit + 1).list () if len (lines) > self.limit: truncated = True if truncated: lines[0] = '[..]' output = '\n'.join (lines) + '\n' else: output = fd.read () if output: logger.info ('Output of unexpected terminated process:\n%s' % output) os.unlink (self.output_path)
def wait(self, block: bool = False) -> Optional[ScheduleGenerate.Pending]: w: Optional[ScheduleGenerate.Pending] = None while self.control.running and w is None: rc = self.control.subprocess.join(timeout=None if block else 0) if not rc.pid: break # w = (Stream.of(self.control.running).filter( lambda r: bool(r.pid == rc.pid)).first(no=None)) if w is not None: logger.debug('{desc}: returned with {ec}'.format( desc=w.description, ec=f'exit with {rc.exitcode}' if rc.exitcode is not None else f'died due to signal {rc.signal}')) if w.finalize is not None: try: w.finalize(rc, *w.args, **w.kwargs) except Exception as e: logger.exception('%s: finalize fails: %s' % (w.description, e)) self.control.running.remove(w) self.lockTitle() return w
def collect_new_bounces(self) -> None: #{{{ logger.info('Start collecting new bounces') with self.db.request() as cursor: data: Dict[str, Any] = {} query = ('SELECT customer_id, company_id, mailing_id, detail ' 'FROM bounce_tbl ' 'WHERE %s ORDER BY company_id, customer_id' % self.timestamp.make_between_clause('timestamp', data)) # Update = namedtuple( 'Update', ['customer_id', 'company_id', 'mailing_id', 'detail']) class Collect(Stream.Collector): def supplier(self) -> Any: self.data: Dict[Tuple[int, int], Update] = {} self.uniques = 0 return self def accumulator(self, supplier: Any, element: Any) -> None: update = Update(*element) if update.detail >= 400 and update.detail < 520: key = (update.company_id, update.customer_id) try: if update.detail > self.data[key].detail: self.data[key] = update except KeyError: self.data[key] = update self.uniques += 1 def finisher(self, supplier: Any, count: int) -> Any: return (count, self.uniques, self.data) records: int uniques: int updates: Dict[Tuple[int, int], Update] (records, uniques, updates) = cursor.stream(query, data).collect(Collect()) logger.info( 'Read {records:,d} records ({uniques:,d} uniques) and have {updates:,d} for insert' .format(records=records, uniques=uniques, updates=len(updates))) # inserts = 0 query = ( 'INSERT INTO bounce_collect_tbl (customer_id, company_id, mailing_id, timestamp) VALUES (:customer_id, :company_id, :mailing_id, current_timestamp)' ) for update in Stream(updates.values()).sorted(): cursor.update( query, { 'customer_id': update.customer_id, 'company_id': update.company_id, 'mailing_id': update.mailing_id }) inserts += 1 if inserts % 10000 == 0: cursor.sync() logger.info( f'Inserted {inserts:,d} records into bounce_collect_tbl') cursor.sync() logger.info( f'Read {records:,d} records ({uniques:,d} uniques) and inserted {inserts:,d}' ) # company_ids: List[int] = [] query = 'SELECT distinct company_id FROM bounce_collect_tbl' for record in self.db.query(query): if record.company_id is not None and record.company_id > 0: company_ids.append(record.company_id)
def read_database(self, auto: List[Autoresponder]) -> List[str]: rc: List[str] = [] with DB() as db: company_list: List[int] = [] new_domains: Dict[str, BavUpdate.RID] = {} forwards: List[BavUpdate.Forward] = [] seen_domains: Set[str] = set() accepted_forwards: Set[str] = set() ctab: Dict[int, str] = {} # rc.append(f'fbl@{self.filter_domain}\taccept:rid=unsubscribe') for domain in self.domains: if domain not in seen_domains: rc.append(f'fbl@{domain}\talias:fbl@{self.filter_domain}') seen_domains.add(domain) if self.filter_domain not in seen_domains: new_domains[self.filter_domain] = BavUpdate.RID( rid=0, domain=self.filter_domain) seen_domains.add(self.filter_domain) # missing = [] for row in db.query( 'SELECT company_id, mailloop_domain FROM company_tbl WHERE status = :status', {'status': 'active'}): if row.mailloop_domain: ctab[row.company_id] = row.mailloop_domain if row.mailloop_domain not in seen_domains: rc.append( f'fbl@{row.mailloop_domain}\talias:fbl@{self.filter_domain}' ) if row.mailloop_domain not in self.mtdom and row.mailloop_domain.lower( ) != self.fqdn: new_domains[row.mailloop_domain] = BavUpdate.RID( rid=0, domain=row.mailloop_domain) seen_domains.add(row.mailloop_domain) else: missing.append(row.company_id) company_list.append(row.company_id) if missing: logger.debug( 'Missing mailloop_domain for companies {companies}'.format( companies=Stream(missing).sorted().join(', '))) # seen_rids: Set[int] = set() seen_filter_addresses: Dict[str, str] = {} for row in db.query( 'SELECT rid, shortname, company_id, filter_address, ' ' forward_enable, forward, ar_enable, ' ' subscribe_enable, mailinglist_id, form_id, timestamp, ' ' spam_email, spam_required, spam_forward, ' ' autoresponder_mailing_id, security_token ' 'FROM mailloop_tbl ' 'ORDER BY rid'): if row.company_id not in company_list or row.rid is None: if row.company_id not in company_list: logger.debug('{row}: ignore due to inactive company') elif row.rid is None: logger.error( '{row}: ignore due to empty rid, should never happen!' ) continue # row_id = f'{row.rid} {row.shortname} [{row.company_id}]' seen_rids.add(row.rid) domains: List[str] = [self.filter_domain] aliases: List[str] = [] if row.filter_address is not None: for alias in listsplit(row.filter_address): if not alias.startswith(self.prefix): with Ignore(ValueError): (local_part, domain_part) = alias.split('@', 1) normalized_alias = '{local_part}@{domain_part}'.format( local_part=local_part, domain_part=domain_part.lower()) if normalized_alias in seen_filter_addresses: logger.warning( f'{row_id}: already seen "{alias}" as "{normalized_alias}" before ({seen_filter_addresses[normalized_alias]})' ) else: seen_filter_addresses[ normalized_alias] = row_id if domain_part not in domains: domains.append(domain_part) if domain_part not in self.mtdom and domain_part not in new_domains: new_domains[ domain_part] = BavUpdate.RID( rid=row.rid, domain=domain_part) aliases.append(alias) # ar_enable = False if row.ar_enable and row.autoresponder_mailing_id: if not row.security_token: logger.error( f'{row_id}: Autoresponder has mailing id, but no security token, not used' ) else: auto.append( Autoresponder( row.rid, row.timestamp if row.timestamp is not None else datetime.now(), row.autoresponder_mailing_id, row.security_token)) ar_enable = True # try: cdomain = ctab[row.company_id] if cdomain not in domains: if cdomain in self.domains: domains.append(cdomain) else: logger.debug( f'{row_id}: company\'s domain "{cdomain}" not found in mailertable' ) except KeyError: logger.debug( f'{row_id}: no domain for company found, further processing' ) extra = [f'rid={row.rid}'] if row.company_id: extra.append(f'cid={row.company_id}') if row.forward_enable and row.forward: forward = row.forward.strip() if forward: extra.append(f'fwd={forward}') forwards.append( BavUpdate.Forward(rid=row.rid, address=forward)) if row.spam_email: extra.append(f'spam_email={row.spam_email}') if row.spam_forward: forward = row.spam_forward.strip() if forward: extra.append(f'spam_fwd={forward}') if row.spam_required: extra.append(f'spam_req={row.spam_required}') if ar_enable: extra.append(f'ar={row.rid}') if row.autoresponder_mailing_id: extra.append(f'armid={row.autoresponder_mailing_id}') if row.subscribe_enable and row.mailinglist_id and row.form_id: extra.append(f'sub={row.mailinglist_id}:{row.form_id}') line = '{prefix}{rid}@{domain}\taccept:{extra}'.format( prefix=self.prefix, rid=row.rid, domain=self.filter_domain, extra=','.join([escape(_e) for _e in extra])) logger.debug(f'{row_id}: add line: {line}') rc.append(line) if aliases: for alias in aliases: rc.append( f'{alias}\talias:{self.prefix}{row.rid}@{self.filter_domain}' ) accepted_forwards.add(alias) # if seen_rids: rules: Dict[int, Dict[str, List[str]]] = {} for row in db.query( 'SELECT rid, section, pattern FROM mailloop_rule_tbl'): if row.rid in seen_rids: try: rule = rules[row.rid] except KeyError: rule = rules[row.rid] = {} try: sect = rule[row.section] except KeyError: sect = rule[row.section] = [] sect.append(row.pattern) self.update_rules(rules) # for forward in forwards: with Ignore(ValueError): fdomain = (forward.address.split('@', 1)[-1]).lower() for domain in self.mtdom: if domain == fdomain and forward.address not in accepted_forwards: logger.warning( f'{forward.ird}: using address "{forward.address}" with local handled domain "{domain}"' ) refuse = [] for (domain, new_domain) in ((_d, _n) for (_d, _n) in new_domains.items() if _d == fdomain): logger.warning( f'{new_domain.rid}: try to add new domain for already existing forward address "{forward.address}" in {forward.rid}, refused' ) refuse.append(domain) for domain in refuse: del new_domains[domain] # if new_domains: if self.mta.mta == 'sendmail': if os.access(BavUpdate.control_sendmail, os.X_OK): cmd = [BavUpdate.control_sendmail, 'add'] for domain in new_domains: cmd.append(domain) logger.info(f'Found new domains, add them using {cmd}') silent_call(*cmd) if os.access(BavUpdate.restart_sendmail, os.X_OK): logger.info( 'Restarting sendmail due to domain update') silent_call(BavUpdate.restart_sendmail) else: logger.warning( f'Missing {BavUpdate.restart_sendmail}, no restart of mta perfomed' ) else: logger.warning( f'Missing {BavUpdate.control_sendmail}, no new domains are added' ) self.read_mailertable(new_domains) return rc
def executor (self) -> bool: activator = Activator () modules = (Stream.of (globals ().values ()) .filter (lambda module: type (module) is type and issubclass (module, Task) and hasattr (module, 'interval')) .filter (lambda module: activator.check (['%s-%s' % (program, module.name)])) .map (lambda module: (module.name, module)) .dict () ) logger.info ('Active modules: %s' % ', '.join (sorted (modules.keys ()))) schedule = ScheduleGenerate (modules, self.oldest, self.processes) if self.modules: schedule.readConfiguration () for name in self.modules: if name not in modules: print ('** %s not known' % name) else: logger.info ('Module found') module = modules[name] (schedule) rc = module () schedule.show_status () logger.info ('Module returns %r' % (rc, )) if schedule.control.queued or schedule.control.running: logger.info ('Execute backgound processes') try: while schedule.control.queued: schedule.show_status () schedule.pendings () if len (schedule.control.running) == self.processes: logger.info ('Currently %d processes running, wait for at least one to terminate' % len (schedule.control.running)) schedule.show_status () schedule.wait (True) logger.info ('Wait for %d background process to teminate' % len (schedule.control.running)) while schedule.control.running: schedule.show_status () if not schedule.wait (True): break except KeyboardInterrupt: logger.info ('^C, terminate all running processes') for p in schedule.control.running[:]: if p.pid is not None: schedule.control.control.term (p.pid) schedule.wait () else: schedule.control.running.remove (p) logger.info ('Waiting for 2 seconds to kill all remaining processes') time.sleep (2) for p in schedule.control.running[:]: if p.pid is not None: schedule.control.control.term (p.pid, signal.SIGKILL) else: schedule.control.running.remove (p) logger.info ('Waiting for killed processes to terminate') while schedule.wait (True) is not None: pass logger.info ('Background processes done') if schedule.deferred: logger.info ('Deferred jobs active, process them') try: last = -1 while schedule.defers (): cur = len (schedule.deferred) if cur != last: logger.info ('%d jobs remaining' % cur) last = cur time.sleep (1) except KeyboardInterrupt: logger.info ('^C, terminating') else: jq = JobqueueGenerate (schedule) jq.start (restart_delay = '1m', termination_delay = '5m') return True
def lockTitle (self) -> None: self.title (Stream.of (self.locks.items ()) .map (lambda kv: '%s=%r' % kv) .join (', ', lambda s: ('active locks %s' % s) if s else s) )
def start_entry(self, entry: Entry) -> bool: return Stream(self.in_progress.values()).filter( lambda e: bool(e.companyID == entry.companyID)).count() == 0
def starter (self) -> bool: if self.db.isopen (): def duration_format (d: int) -> str: return ('%d:%02d:%02d' % (d // 3600, (d // 60) % 60, d % 60)) if d >= 3600 else ('%d:%02d' % (d // 60, d % 60)) mailing = Mailing (merger = self.configuration ('merger', 'localhost')) expire = self.unit.parse (self.configuration ('expire', '30m')) parallel = self.unit.parse (self.configuration ('parallel', '4')) startup = self.unit.parse (self.configuration ('startup', '5m')) now = datetime.now () if self.in_progress: self.title ('checking %d mailings for temination' % len (self.in_progress)) seen = set () for row in self.db.queryc ( 'SELECT status_id, genstatus, genchange ' 'FROM maildrop_status_tbl ' 'WHERE status_id IN (%s)' % (Stream (self.in_progress.values ()) .map (lambda e: e.statusID) .join (', '), ) ): seen.add (row.status_id) entry = self.in_progress.pop (row.status_id) if row.genstatus in (3, 4) or (row.genstatus == 1 and row.genchange > entry.genChange): duration = int ((row.genchange - entry.startDate).total_seconds ()) logger.info ('%s: generation finished after %s' % (entry.name, duration_format (duration))) else: duration = int ((now - row.genchange).total_seconds ()) if row.genstatus == 1: if duration >= startup: logger.warning ('%s: startup time exceeded, respool entry' % entry.name) if self.db.update ( 'DELETE FROM rulebased_sent_tbl WHERE mailing_id = :mailingID', { 'mailingID': entry.mailingID }, commit = True ) > 0: logger.info ('%s: entry from rulebased_sent_tbl had been removed' % entry.name) else: logger.info ('%s: no entry in rulebased_sent_tbl found to remove' % entry.name) self.add_to_queue (entry) else: logger.debug ('%s: during startup since %s up to %s' % (entry.name, duration_format (duration), duration_format (startup))) self.in_progress[entry.statusID] = entry elif row.genstatus == 2: if duration > expire: logger.info ('%s: creation exceeded expiration time, leave it running' % entry.name) else: logger.debug ('%s: still in generation for %s up to %s' % (entry.name, duration_format (duration), duration_format (expire))) self.in_progress[entry.statusID] = entry else: logger.error ('%s: unexpected genstatus %s, leave it alone' % (entry.name, row.genstatus)) self.db.sync () for entry in Stream (self.in_progress.values ()).filter (lambda e: e.statusID not in seen).list (): logger.warning ('%s: maildrop status entry vanished, remove from observing' % entry.name) self.in_progress.pop (entry.statusID) self.title () # if len (self.in_progress) < parallel and self.in_queue: if not self.ref._running: logger.info ('Postpone generation of %d mailings due to shutdown in progress' % len (self.in_queue)) self.save_pending (list (self.in_queue.keys ())) self.in_queue.clear () self.in_progress.clear () else: self.title ('try to start %d out of %d mailings' % (parallel - len (self.in_progress), len (self.in_queue))) for entry in list (self.in_queue.values ()): if not mailing.active (): logger.error ('%s: merger not active, abort' % entry.name) break if self.ref.islocked (entry.companyID) and entry.statusField != 'T': logger.debug ('%s: company %d is locked' % (entry.name, entry.companyID)) continue self.remove_from_queue (entry) if self.ready_to_send (now, entry): if mailing.fire (status_id = entry.statusID, cursor = self.db.cursor): entry.startDate = now self.in_progress[entry.statusID] = entry logger.info ('%s: started' % entry.name) if len (self.in_progress) >= parallel: break else: logger.error ('%s: failed to start' % entry.name) if self.resume_entry (entry): self.add_to_queue (entry) self.db.sync () self.title () return self.is_active ()