def Run(self, force=False): """Do the actual work of the Cron. Will first check if DueToRun is True. CronJob object must be locked (i.e. opened via OpenWithLock) for Run() to be called. Args: force: If True, the job will run no matter what (i.e. even if DueToRun() returns False). Raises: LockError: if the object is not locked. """ if not self.locked: raise aff4.LockError("CronJob must be locked for Run() to be called.") if self.KillOldFlows(): return # If currently running flow has finished, update our state. current_flow_urn = self.Get(self.Schema.CURRENT_FLOW_URN) if current_flow_urn: current_flow = aff4.FACTORY.Open(current_flow_urn, token=self.token) runner = current_flow.GetRunner() if not runner.IsRunning(): if runner.context.state == rdfvalue.Flow.State.ERROR: self.Set(self.Schema.LAST_RUN_STATUS, rdfvalue.CronJobRunStatus( status=rdfvalue.CronJobRunStatus.Status.ERROR)) stats.STATS.IncrementCounter("cron_job_failure", fields=[self.urn.Basename()]) else: self.Set(self.Schema.LAST_RUN_STATUS, rdfvalue.CronJobRunStatus( status=rdfvalue.CronJobRunStatus.Status.OK)) start_time = self.Get(self.Schema.LAST_RUN_TIME) elapsed = time.time() - start_time.AsSecondsFromEpoch() stats.STATS.RecordEvent("cron_job_latency", elapsed, fields=[self.urn.Basename()]) self.DeleteAttribute(self.Schema.CURRENT_FLOW_URN) self.Flush() if not force and not self.DueToRun(): return cron_args = self.Get(self.Schema.CRON_ARGS) flow_urn = flow.GRRFlow.StartFlow( runner_args=cron_args.flow_runner_args, args=cron_args.flow_args, token=self.token, sync=False) self.Set(self.Schema.CURRENT_FLOW_URN, flow_urn) self.Set(self.Schema.LAST_RUN_TIME, rdfvalue.RDFDatetime().Now()) self.Flush() flow_link = aff4.FACTORY.Create(self.urn.Add(flow_urn.Basename()), "AFF4Symlink", token=self.token) flow_link.Set(flow_link.Schema.SYMLINK_TARGET(flow_urn)) flow_link.Close()
def DeleteRecords(self, ids): """Delete records identified by ids. Args: ids: A list of ids provided by ClaimRecords. Raises: LockError: If the queue is not locked. """ if not self.locked: raise aff4.LockError("Queue must be locked to delete records.") data_store.DB.MultiDeleteAttributes( ids, [self.LOCK_ATTRIBUTE, self.VALUE_ATTRIBUTE], token=self.token)
def RefreshClaims(self, ids, timeout="30m"): """Refreshes claims on records identified by ids. Args: ids: A list of ids provided by ClaimRecords timeout: The new timeout for these claims. Raises: LockError: If the queue is not locked. """ if not self.locked: raise aff4.LockError("Queue must be locked to refresh claims.") expiration = rdfvalue.RDFDatetime().Now() + rdfvalue.Duration(timeout) for subject in ids: data_store.DB.Set(subject, self.LOCK_ATTRIBUTE, expiration, token=self.token, sync=False) data_store.DB.Flush()
def Compact(self, callback=None, timestamp=None): """Compacts versioned attributes into the collection stream. Versioned attributes come from the datastore sorted by the timestamp in the decreasing order. This is the opposite of what we want in the collection (as items in the collection should be in chronological order). Compact's implementation can handle very large collections that can't be reversed in memory. It reads them in batches, reverses every batch individually, and then reads batches back in the reversed order and write their contents to the collection stream. Args: callback: An optional function without arguments that gets called periodically while processing is done. Useful in flows that have to heartbeat. timestamp: Only items added before this timestamp will be compacted. Raises: RuntimeError: if problems are encountered when reading back temporary saved data. Returns: Number of compacted results. """ if not self.locked: raise aff4.LockError("Collection must be locked before compaction.") compacted_count = 0 batches_urns = [] current_batch = [] # This timestamp will be used to delete attributes. We don't want # to delete anything that was added after we started the compaction. freeze_timestamp = timestamp or rdfvalue.RDFDatetime().Now() def UpdateIndex(): seek_index = self.Get(self.Schema.SEEK_INDEX, SeekIndex()) prev_index_pair = seek_index.checkpoints and seek_index.checkpoints[-1] if (not prev_index_pair or self.size - prev_index_pair.index_offset >= self.INDEX_INTERVAL): new_index_pair = SeekIndexPair(index_offset=self.size, byte_offset=self.fd.Tell()) seek_index.checkpoints.Append(new_index_pair) self.Set(self.Schema.SEEK_INDEX, seek_index) def DeleteVersionedDataAndFlush(): """Removes versioned attributes and flushes the stream.""" data_store.DB.DeleteAttributes(self.urn, [self.Schema.DATA.predicate], end=freeze_timestamp, token=self.token, sync=True) if self.IsJournalingEnabled(): journal_entry = self.Schema.COMPACTION_JOURNAL(compacted_count, age=freeze_timestamp) attrs_to_set = {self.Schema.COMPACTION_JOURNAL: [journal_entry]} aff4.FACTORY.SetAttributes(self.urn, attrs_to_set, set(), add_child_index=False, sync=True, token=self.token) if self.Schema.DATA in self.synced_attributes: del self.synced_attributes[self.Schema.DATA] self.Flush(sync=True) def HeartBeat(): """Update the lock lease if needed and call the callback.""" lease_time = config_lib.CONFIG["Worker.compaction_lease_time"] if self.CheckLease() < lease_time / 2: logging.info("%s: Extending compaction lease.", self.urn) self.UpdateLease(lease_time) stats.STATS.IncrementCounter("packed_collection_lease_extended") if callback: callback() HeartBeat() # We iterate over all versioned attributes. If we get more than # self.COMPACTION_BATCH_SIZE, we write the data to temporary # stream in the reversed order. for _, value, _ in data_store.DB.ResolvePrefix( self.urn, self.Schema.DATA.predicate, token=self.token, timestamp=(0, freeze_timestamp)): HeartBeat() current_batch.append(value) compacted_count += 1 if len(current_batch) >= self.COMPACTION_BATCH_SIZE: batch_urn = rdfvalue.RDFURN("aff4:/tmp").Add("%X" % utils.PRNG.GetULong()) batches_urns.append(batch_urn) buf = cStringIO.StringIO() for data in reversed(current_batch): buf.write(struct.pack("<i", len(data))) buf.write(data) # We use AFF4Image to avoid serializing/deserializing data stored # in versioned attributes. with aff4.FACTORY.Create(batch_urn, aff4.AFF4Image, mode="w", token=self.token) as batch_stream: batch_stream.Write(buf.getvalue()) current_batch = [] # If there are no versioned attributes, we have nothing to do. if not current_batch and not batches_urns: return 0 # The last batch of results can be written to our collection's stream # immediately, because we have to reverse the order of all the data # stored in versioned attributes. if current_batch: buf = cStringIO.StringIO() for data in reversed(current_batch): buf.write(struct.pack("<i", len(data))) buf.write(data) self.fd.Seek(0, 2) self.fd.Write(buf.getvalue()) self.stream_dirty = True self.size += len(current_batch) UpdateIndex() # If current_batch was the only available batch, just write everything # and return. if not batches_urns: DeleteVersionedDataAndFlush() return compacted_count batches = {} for batch in aff4.FACTORY.MultiOpen(batches_urns, aff4_type=aff4.AFF4Image, token=self.token): batches[batch.urn] = batch if len(batches_urns) != len(batches): raise RuntimeError("Internal inconsistency can't read back all the " "temporary batches.") # We read all the temporary batches in reverse order (batches itself # were reversed when they were written). self.fd.Seek(0, 2) for batch_urn in reversed(batches_urns): batch = batches[batch_urn] HeartBeat() data = batch.Read(len(batch)) self.fd.Write(data) self.stream_dirty = True self.size += self.COMPACTION_BATCH_SIZE UpdateIndex() aff4.FACTORY.Delete(batch_urn, token=self.token) DeleteVersionedDataAndFlush() # Update system-wide stats. stats.STATS.IncrementCounter("packed_collection_compacted", delta=compacted_count) return compacted_count
def ClaimRecords(self, limit=None, timeout="30m", record_filter=lambda x: False): """Returns and claims up to limit unclaimed records for timeout seconds. Returns a list of records which are now "claimed", a claimed record will generally be unavailable to be claimed until the claim times out. Note however that in case of an unexpected timeout or other error a record might be claimed twice at the same time. For this reason it should be considered weaker than a true lock. Args: limit: The number of records to claim. timeout: The duration of the claim. record_filter: A filter method to determine if the record should be returned. It will be called serially on each record and the record will be filtered (not returned or locked) if it returns True. Returns: A list (id, record) where record is a self.rdf_type and id is a record identifier which can be used to delete or release the record. Raises: LockError: If the queue is not locked. """ if not self.locked: raise aff4.LockError("Queue must be locked to claim records.") now = rdfvalue.RDFDatetime().Now() results = [] for subject, values in data_store.DB.ScanAttributes( self.urn.Add("Records"), [self.VALUE_ATTRIBUTE, self.LOCK_ATTRIBUTE], token=self.token): if self.VALUE_ATTRIBUTE not in values: # Unlikely case, but could happen if, say, a thread called RefreshClaims # so late that another thread already deleted the record. continue if self.LOCK_ATTRIBUTE in values and rdfvalue.RDFDatetime( values[self.LOCK_ATTRIBUTE][1]) > now: continue rdf_value = self.rdf_type(values[ # pylint: disable=not-callable self.VALUE_ATTRIBUTE][1]) if record_filter(rdf_value): continue results.append((subject, rdf_value)) if limit is not None and len(results) == limit: break expiration = rdfvalue.RDFDatetime().Now() + rdfvalue.Duration(timeout) # TODO(user): Add bulk set method to datastore. for subject, _ in results: data_store.DB.Set(subject, self.LOCK_ATTRIBUTE, expiration, token=self.token, sync=False) data_store.DB.Flush() return results
def ClaimRecords(self, limit=10000, timeout="30m", start_time=None, record_filter=lambda x: False, max_filtered=1000): """Returns and claims up to limit unclaimed records for timeout seconds. Returns a list of records which are now "claimed", a claimed record will generally be unavailable to be claimed until the claim times out. Note however that in case of an unexpected timeout or other error a record might be claimed twice at the same time. For this reason it should be considered weaker than a true lock. Args: limit: The number of records to claim. timeout: The duration of the claim. start_time: The time to start claiming records at. Only records with a timestamp after this point will be claimed. record_filter: A filter method to determine if the record should be returned. It will be called serially on each record and the record will be filtered (not returned or locked) if it returns True. max_filtered: If non-zero, limits the number of results read when filtered. Specifically, if max_filtered filtered results are read sequentially without any unfiltered results, we stop looking for results. Returns: A list (id, record) where record is a self.rdf_type and id is a record identifier which can be used to delete or release the record. Raises: LockError: If the queue is not locked. """ if not self.locked: raise aff4.LockError("Queue must be locked to claim records.") now = rdfvalue.RDFDatetime.Now() after_urn = None if start_time: after_urn = self._MakeURN(self.urn, start_time.AsMicroSecondsFromEpoch(), 0) results = [] filtered_count = 0 for subject, values in data_store.DB.ScanAttributes( self.urn.Add("Records"), [self.VALUE_ATTRIBUTE, self.LOCK_ATTRIBUTE], max_records=4 * limit, after_urn=after_urn, token=self.token): if self.VALUE_ATTRIBUTE not in values: # Unlikely case, but could happen if, say, a thread called RefreshClaims # so late that another thread already deleted the record. Go ahead and # clean this up. data_store.DB.DeleteAttributes(subject, [self.LOCK_ATTRIBUTE], token=self.token) continue if self.LOCK_ATTRIBUTE in values: timestamp = rdfvalue.RDFDatetime.FromSerializedString( values[self.LOCK_ATTRIBUTE][1]) if timestamp > now: continue rdf_value = self.rdf_type.FromSerializedString( values[self.VALUE_ATTRIBUTE][1]) if record_filter(rdf_value): filtered_count += 1 if max_filtered and filtered_count >= max_filtered: break continue results.append((subject, rdf_value)) filtered_count = 0 if len(results) >= limit: break expiration = rdfvalue.RDFDatetime.Now() + rdfvalue.Duration(timeout) with data_store.DB.GetMutationPool(token=self.token) as mutation_pool: for subject, _ in results: mutation_pool.Set(subject, self.LOCK_ATTRIBUTE, expiration) return results