def analyze_enis(account_id, bucket, prefix, enis, ips, start, end, store_dir, ipdb=None, cmdb=None, ipranges=None, region=None, reject=None, targets=None, ports=None, tz=None, sample_count=20): logging.basicConfig(level=logging.INFO) logging.getLogger('botocore').setLevel(logging.WARNING) ports = map(int, ports) start, end = get_dates(start, end, tz) client = boto3.client('s3') log_prefix = "%s/%s/flow-log/%s/%s" % ( prefix.rstrip('/'), account_id, start.strftime('%Y/%m/%d'), "00000000-0000-0000-0000-000000000000") resolver = IPResolver(ipdb, cmdb, ipranges) agg_in_traffic = Counter() agg_out_traffic = Counter() agg_inport_traffic = Counter() agg_outport_traffic = Counter() for eni, ip in zip(enis, ips): files = eni_download_flows(client, bucket, log_prefix, start, end, eni, store_dir) in_traffic, out_traffic, inport_traffic, outport_traffic = eni_log_analyze( set(ips), eni_flow_stream(files, start, end), start=start, end=end, reject=reject, target_ips=targets, ports=ports) agg_in_traffic.update(in_traffic) agg_out_traffic.update(out_traffic) agg_inport_traffic.update(inport_traffic) agg_outport_traffic.update(outport_traffic) print("Inbound %d Most Commmon" % sample_count) for ip, bcount in resolve_ip_address(agg_in_traffic, resolver, start, end).most_common(sample_count): print("%s %s" % ip, human_size(bcount)) print("Outbound %d Most Common" % sample_count) for ip, bcount in resolve_ip_address(agg_out_traffic, resolver, start, end).most_common(sample_count): print("%s %s" % ip, human_size(bcount))
def eni_log_analyze(ips, flow_stream, start=None, end=None, reject=None, target_ips=None, ports=()): # in_packets = Counter() in_bytes = Counter() in_ports = Counter() # out_packets = Counter() out_bytes = Counter() out_ports = Counter() # intra_bytes = Counter() stats = Counter() # reject = 'REJECT' for flow_records in flow_stream: for record in flow_records: stats['Flows'] += 1 stats['Bytes'] += record.bytes if record.action == reject: stats['Rejects'] += 1 # if ports and (record.srcport not in ports and record.dstport not in ports): # continue # if reject is not None: # if reject and record.action != 'REJECT': # continue # if reject is False and record.action != 'ACCEPT': # continue # if target_ips: # if not (record.dstaddr in target_ips or # record.srcaddr in target_ips): # continue # if record.dstaddr in ips and record.srcaddr in ips: # intra_bytes[record.srcaddr] += record.bytes if record.dstaddr in ips: # in_packets[record.srcaddr] += record.packets in_bytes[record.srcaddr] += record.bytes # if record.srcaddr not in ips: # in_ports[record.srcport] += record.bytes elif record.srcaddr in ips: # out_packets[record.dstaddr] += record.packets out_bytes[record.dstaddr] += record.bytes # out_ports[record.dstport] += record.bytes else: raise ValueError("") log.info( "records:%d rejects:%d inbytes:%s outbytes:%s bytes:%s", stats['Flows'], stats['Rejects'], human_size(sum(in_bytes.values())), human_size(sum(out_bytes.values())), human_size(stats['Bytes'])) return in_bytes, out_bytes, in_ports, out_ports
def analyze_enis( account_id, bucket, prefix, enis, ips, start, end, store_dir, ipdb=None, cmdb=None, ipranges=None, region=None, reject=None, targets=None, ports=None, tz=None, sample_count=20): logging.basicConfig(level=logging.INFO) logging.getLogger('botocore').setLevel(logging.WARNING) ports = map(int, ports) start, end = get_dates(start, end, tz) client = boto3.client('s3') log_prefix = "%s/%s/flow-log/%s/%s" % ( prefix.rstrip('/'), account_id, start.strftime('%Y/%m/%d'), "00000000-0000-0000-0000-000000000000") resolver = IPResolver(ipdb, cmdb, ipranges) agg_in_traffic = Counter() agg_out_traffic = Counter() agg_inport_traffic = Counter() agg_outport_traffic = Counter() for eni, ip in zip(enis, ips): files = eni_download_flows( client, bucket, log_prefix, start, end, eni, store_dir) in_traffic, out_traffic, inport_traffic, outport_traffic = eni_log_analyze( set(ips), eni_flow_stream(files, start, end), start=start, end=end, reject=reject, target_ips=targets, ports=ports) agg_in_traffic.update(in_traffic) agg_out_traffic.update(out_traffic) agg_inport_traffic.update(inport_traffic) agg_outport_traffic.update(outport_traffic) print("Inbound %d Most Commmon" % sample_count) for ip, bcount in resolve_ip_address( agg_in_traffic, resolver, start, end).most_common(sample_count): print("%s %s" % ip, human_size(bcount)) print("Outbound %d Most Common" % sample_count) for ip, bcount in resolve_ip_address( agg_out_traffic, resolver, start, end).most_common(sample_count): print("%s %s" % ip, human_size(bcount))
def flow_stream_stats(ips, flow_stream, period): period_counters = {} stats = Counter() for flow_records in flow_stream: for record in flow_records: stats['Flows'] += 1 stats['Bytes'] += record.bytes pk = record.start - record.start % period pc = period_counters.get(pk) if pc is None: period_counters[pk] = pc = { 'inbytes': Counter(), 'outbytes': Counter()} if record.action == REJECT: stats['Rejects'] += 1 if record.dstaddr in ips: pc['inbytes'][record.srcaddr] += record.bytes elif record.srcaddr in ips: pc['outbytes'][record.dstaddr] += record.bytes else: raise ValueError("") log.info( "flows:%d bytes:%s rejects:%s", stats['Flows'], human_size(stats['Bytes']), stats['Rejects']) return period_counters
def upload_file(self, s3_key, file_path): """Upload a files to S3.""" if not os.path.exists(file_path): raise RuntimeError("ERROR: File not found: %s" % file_path) log.print_log("Uploading file: %s" % file_path) conn = self.get_connection() s3_bucket = conn.get_bucket(self.bucket) file_size = os.stat(file_path).st_size file_human_size = utils.human_size(file_size) log.print_log("Uploading to S3 key: %s (%s)" % (s3_key, file_human_size)) key = s3_bucket.new_key(s3_key) if self.content_type: key.set_metadata('Content-Type', self.content_type) if self.encrypt_files is True: key.set_metadata('x-amz-meta-s3tools-gpgenc', 'gpg') # FYI: For s3cmd bytes_written = key.set_contents_from_filename(file_path, encrypt_key=True) if bytes_written != file_size: msg = "ERROR: Mismatch in bytes synced to S3 bucket and local file: " \ "{0} != {1}".format(bytes_written, file_size) raise RuntimeError(msg)
def save(self): self.log("beginning of save process...") self.current_save_directory = self.create_directory(self.get_current_directory_name()) for save in self.sections: if not save == "global": self.log("saving [{0}]...".format( save)) self.get_connector(save).save() self.log("[{0}] successfully saved".format(save)) if self.post_save(): self.log("save {0})".format(self.current_save_directory)) self.log("save process successfully ended (total size: {0})".format(human_size(self.size))) return True else: return False
def eni_download_flows(client, bucket, prefix, start, end, eni, store_dir): # t = time.time() # 30m aggregation delay # if end: # end_barrier = end + timedelta(seconds=30*60) # else: # end_barrier = None log_size = count = skip = 0 eni_path = os.path.join(store_dir, '%s-all' % eni) if not os.path.exists(eni_path): os.makedirs(eni_path) results = client.list_objects_v2( Bucket=bucket, Prefix="%s/%s" % ( prefix.rstrip('/'), "%s-all" % eni)) truncated = results['IsTruncated'] for k in results.get('Contents', ()): # if end_barrier and k['LastModified'] > end_barrier: # skip += 1 # continue # if k['LastModified'] < start: # skip += 1 # continue dl_key = os.path.join(store_dir, '%s-all' % eni, k['Key'].rsplit('/', 1)[-1]) log_size += k['Size'] if os.path.exists(dl_key) and os.path.getsize(dl_key) == k['Size']: count += 1 yield dl_key continue client.download_file(bucket, k['Key'], dl_key) yield dl_key count += 1 log.info("eni:%s logs-skip:%d logs-consumed:%d truncated:%s size:%s" % ( eni, skip, count, truncated, human_size(log_size)))
def mail(self): """ Send a mail to all admins """ self.log("Sending mail to admins...") formated_stamp = self.stamp.strftime('%Y-%m-%d %Hh%M') mail_content="" mail_subject="" if len(self.saved_datasets)+ len(self.dont_need_save_datasets)==len(self.datasets) and len(self.not_saved_datasets)==0: mail_subject = "Savior : {0} successfully ended ({1} saved)".format(formated_stamp, human_size(self.size)) mail_content+= "Savior script has run correctly on {0}.\n".format(formated_stamp) mail_content+= "{0} datasets were saved :\n".format(len(self.saved_datasets)) for ds in self.saved_datasets: mail_content+= " - {0} ({1} saved)\n".format(ds.name, human_size(ds.size)) mail_content+= "{0} datasets did not need save :\n".format(len(self.dont_need_save_datasets)) for ds in self.dont_need_save_datasets: mail_content+= "- {0}\n".format(ds.name) elif len(self.saved_datasets)>0 and len(self.not_saved_datasets)>0: mail_subject = "Savior : {0} ended with maybe some errors ({1} saved)".format(formated_stamp, human_size(self.size)) mail_content+= "Savior script has run on {0}.\n".format(formated_stamp) mail_content+= "{0} datasets on {1} have been saved :\n".format(len(self.saved_datasets),len(self.datasets)) for ds in self.saved_datasets: mail_content+= " - {0} ({1} saved)\n".format(ds.name, human_size(ds.size)) mail_content+= "\nFor some reason, the following datasets HAVE NOT been saved :\n" for ds in self.not_saved_datasets: mail_content+= " - {0}\n".format(ds.name) else: mail_subject = "Savior : {0} script has failed".format(formated_stamp) mail_content+= "Savior script has run on {0}.\n".format(formated_stamp) mail_content+= "Savior : {0} script has failed, no datasets have been saved.\n".format(formated_stamp) #log part mail_content+= "For more informations, please read the following log :\n\n" os.chdir(self.root_path) log = open(self.log_file, 'r').read() mail_content+=log sender=self.settings.get("mail", "from") to=self.settings.get("mail", "admins").split(",") to=filter(None, to) smtp={} try: smtp['hostname'] = self.settings.get("mail", "smtp_hostname") except: raise ParseConfigError("'smtp_hostname' setting is missing in settings.ini") try: smtp['port'] = self.settings.get("mail", "smtp_port") except: smtp['port'] = None try: smtp['username'] = self.settings.get("mail", "smtp_username") smtp['password'] = self.settings.get("mail", "smtp_password") except: smtp['username'] = None smtp['password'] = None msg = MIMEText(mail_content) msg["Subject"]=mail_subject msg["From"]=sender msg["To"]=", ".join(to) try: if smtp['port']: s = smtplib.SMTP(smtp['hostname'], smtp['port']) else: s = smtplib.SMTP(smtp['hostname']) except Exception, e: self.log("can't connect to SMTP server [{0}] : {1}".format(smtp['hostname'], e), "error") return False
except Exception, e: self.log(traceback.format_exc()) exception = True self.not_saved_datasets.append(ds) self.log("Save process has met a critical error: {0}".format(e), "critical") self.log("Skipping save for all remaining datasets") ds.remove() else: self.dont_need_save_datasets.append(ds) else: self.not_saved_datasets.append(ds) self.size = sum( [ds.size for ds in self.datasets ]) self.log("Save process ended : {0} datasets have been saved. (size: {1})".format( len(self.saved_datasets), human_size(self.size) ) ) if self.send_mail: self.mail() def clean(self): """ Remove old saves """ self.check_config() self.log("cleaning {0} datasets...".format(len(self.datasets))) self.cleaned_datasets = [] for ds in self.datasets: ds.remove_old_saves() self.cleaned_datasets.append(ds)
labels[key] = value materials = populate_materials(materials_file) channel = 0 Data.map_discrete_voxels(discrete_volume, volume, labels, materials, channel, stencil_channels) print("Volume created", volume.shape) ts = time() volume_sat = build_volume_sat(volume) sat_time = time() - ts print("SAT", sat_time, "seconds") sat_size = (volume_sat.shape[0] * volume_sat.shape[1] * volume_sat.shape[2] * volume_sat.shape[3] * volume_sat.dtype.itemsize) print("SAT size", human_size(sat_size)) ts = time() tree = SATTileTree2D(volume, 32, alignment_z_centered=False) tree_time = time() - ts print("SAT Tile Tree", tree_time, "seconds") print("SAT Tile Tree size", human_size(tree.size())) sat_from_tile_tree = tree.get_sat() # print(volume_sat[3:5, 0, 3], volume_sat[0, 3:5, 3], volume_sat[3, 3:5, 0]) print(volume.shape, volume_sat.shape, sat_from_tile_tree.shape) print("SAT are close", numpy.allclose(volume_sat[1:, 1:, 1:], sat_from_tile_tree)) pad_x, pad_y = 0, 0
def _parse_item(self, item): result = { "name": None, "provider": "Unknown", "size": "Unknown", "uri": None, "seeds": "0", "peers": "0", "info_hash": "", "language": None, # todo would be nice to assign correct icons but that can be very time consuming due to the number # of indexers in Jackett "icon": get_icon_path(), "_size_bytes": -1 } for ref in item: tag = ref.tag attrib = ref.attrib if tag == "{" + self._torznab_ns + "}attr": val = attrib["value"] if isinstance(val, str): val = val.decode("utf-8") if "name" in attrib and "value" in attrib and attrib["name"] and val and \ attrib["name"] in self._torznab_elementum_mappings["torznab_attrs"]: json = self._torznab_elementum_mappings["torznab_attrs"][ attrib["name"]] result[json] = val continue if ref.tag in self._torznab_elementum_mappings[ "tags"] and ref.text is not None: json = self._torznab_elementum_mappings["tags"][ref.tag] val = ref.text.strip() if isinstance(val, str): val = val.decode("utf-8") result[json] = val # if we didn't get a magnet uri, attempt to resolve the magnet uri. # todo for some reason Elementum cannot resolve the link that gets proxied through Jackett. # So we will resolve it manually for Elementum for now. # In actuality, this should be fixed within Elementum if result["uri"] is None: link = item.find('link') jackett_uri = "" if link is not None: jackett_uri = link.text else: enclosure = item.find('enclosure') if enclosure is not None: jackett_uri = enclosure.attrib['url'] if jackett_uri != "": result["uri"] = get_magnet_from_jackett(jackett_uri) if result["name"] is None or result["uri"] is None: log.warning("Could not parse item; name = %s; uri = %s", result["name"], result["uri"]) log.debug("Failed item is: %s", ElementTree.tostring(item, encoding='utf8')) return None # result["name"] = result["name"].decode("utf-8") # might be needed for non-english items result["seeds"] = int(result["seeds"]) result["peers"] = int(result["peers"]) resolution = get_resolution(result["name"]) result["resolution"] = utils.resolutions.keys()[::-1].index(resolution) result["_resolution"] = resolution result["release_type"] = get_release_type(result["name"]) if result["size"] != "Unknown": result["_size_bytes"] = int(result["size"]) result["size"] = human_size(result["_size_bytes"]) return result