def setupDB(url): logger = Logger("db") logger.log("Setting up database") engine = create_engine(url, encoding='utf-8') #if url.startswith("sqlite"): # event.listen(engine, 'connect', onconnect) session = Session() session.configure(bind=engine) model.Base.metadata.bind = engine model.Base.metadata.create_all() return session, engine
class BaseRPCCommands(xmlrpc.XMLRPC): """ RPC functions that are offered to other peers. """ allowNone = True useDateTime = True def __init__(self, config, session): self.session = session self.config = config self.logger = Logger("BaseRPCFunctions") def _getpeer(self, peername, action): """ Attempt to retrieve a peer """ try: return Peer.getByName(self.session, peername) except MultipleResultsFound, e: self.logger.log("A Peer %r %s but we have multiple peers by that name: %r" % (peername, action, e)) except NoResultFound, e: self.logger.log("A Peer %r %s but we have no peers by that name: %r" % (peername, action, e))
class ScannableRequest(object): """ State object representing the state of a request. Includes most code required to do scanning of a url. @type url: C{str} @ivar url: url that was requested @type filesize: C{int} @ivar filesize: size of the file at the requested url @type hash: C{str} (empty string for not available) @ivar hash: the hash of the file, if the file has been retrieved and hashed. use L{getHash} to ensure this happens. L{dolocalscan} performs a download and hash, if one has not already been done. @type fileid: C{str} @ivar fileid: the UUID of the file downloaded. C{config.scanning.download_location.format(id=fileid)} will give the path to the downloaded file, and C{config.scanning.local_server_url.format(id=fileid)} will give an http url to the same file. @type timeout: C{float} @ivar timeout: timeout to schedule when sleep() is called """ def __init__(self, config, session, url=None, parentrequest=None, digestmanager=None, scanlogmanager=None): self.logger = Logger("Scanner") self.config = config self.session = session self.handler = scanhandlers.get(config.scanning.handler) if not url and not parentrequest: raise Exception("url or parentrequest must be provided!") elif not url: self.url = parentrequest.url else: self.url = url self.fileid = None self.parentrequest = parentrequest self.digestmanager = digestmanager self.scanlogmanager = scanlogmanager try: self.timeout = float(self.config.scanning.timeout) except: self.timeout = 0.1 self.headers = {} self.downloaded_filepath = '' self.downloaded_filesize = -1 # an invalid size and can safely be used for "unknown" self.objectage = None self.contenthash = '' self.scan = None self.closepeers = [] def sleep(self): """ @return: a twisted Deferred that will be called L{timeout} seconds after this method is called @rtype: C{twisted.internet.defer.Deferred} """ d = defer.Deferred() reactor.callLater(self.timeout, d.callback, None) return d #@cached def retrieve(self): """ Retrieve the url via http and store it. urllib warning: When opening HTTPS URLs, does not attempt to validate the server certificate. """ self.logger.log('ScannableRequest.retrieve called') with TimeMeasurer() as retrieve_timer: id = str(uuid.uuid4()) filepath = self.config.scanning.download_location.format(id=id) if not os.path.exists(os.path.dirname(filepath)): os.makedirs(os.path.dirname(filepath)) self.fileid = id # grab file with a subprocess... script_name = 'urlretrieve.py' script_path = os.path.join(frameworkdir, script_name) proc = subprocess.Popen(["python", script_path, self.url, filepath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) proc.wait() if(proc.returncode != 0): error = proc.stderr.readlines() self.logger.log("Error downloading url %s for scanning: %s " % (self.url, error)) raise IncompleteScanError else: lines = proc.stdout.readlines() lines[:] = [l.strip() for l in lines] filepath, headers = pickle.loads('\n'.join(lines)) self.downloaded_filepath = filepath self.headers = dict(headers) self.retrievems = int(retrieve_timer.total * 1000.0) # time.time() uses seconds, not ms @property def filepath(self): """ Ensure we have downloaded the file when trying to use filepath. """ if not self.downloaded_filepath: self.retrieve() return self.downloaded_filepath def retrieveHeaders(self): """ Retrieve the headers from the url via HTTP HEAD, if they are not already stored. """ if not self.headers: oururl = urlparse.urlparse(self.url) if oururl.scheme == "http": conn = httplib.HTTPConnection(oururl.netloc) else: conn = httplib.HTTPSConnection(oururl.netloc) conn.request("HEAD", oururl.path) response = conn.getresponse() self.headers = dict(response.getheaders()) if self.downloaded_filesize < 0: try: content_length = self.headers["content-length"] self.downloaded_filesize = int(content_length) except KeyError: self.logger.log("url returned no content-length: %r" % (self.url)) except ValueError: self.logger.log("url returned invalid filesize: %r" % content_length) if not self.objectage: try: age = self.headers['last-modified'] self.objectage = datetime.strptime(age, '%a, %d %b %Y %H:%M:%S %Z') except KeyError: self.logger.log('url returned no last-modified: %r' % (self.url)) except ValueError: self.logger.log('url returned invalid last-modified: %r' % age) # message-length should be handled too: # http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.4 @property def filesize(self): """ Ensure we have the filesize when we try to use it. """ if self.downloaded_filesize < 0: self.retrieveHeaders() # If it is still invalid, see what the os can give us if self.downloaded_filesize < 0: try: self.downloaded_filesize = os.path.getsize(self.filepath) except (IOError, WindowsError), e: msg = "Error while processing %s (file stats for file at %s): %s" self.logger.log(msg % (self.url, self.filepath, e)) raise IncompleteScanError return self.downloaded_filesize
class SocialScanRPCCommands(BaseRPCCommands): """ SocialScan RPC functions that are offered to other peers. """ def __init__(self, config, session): super(SocialScanRPCCommands, self).__init__(config, session) self.logger = Logger("RPCFunctions") def xmlrpc_scanlogOffer(self, peername, url): """ Called by a peer to offer a scanlog. Stores the offer so that a worker can retrieve it later (due to the large size scanlogs often reach). """ self.xmlrpc_logOffer(peername, url, logtype='scan') def xmlrpc_scanRequest(self, peername, url, key): try: peer = self._getpeer(peername, "requested a scan on url %s" % url) if not peer: return "peer not known" request = QueuedRequest(self.config.owner, "active-scan", peer, url, key=key) self.session.add(request) self.session.commit() self.logger.log("Scan request %r: %r" % (key, request)) return "success" except: self.logger.exception() return "exception" def xmlrpc_scanResult(self, peername, url, hash, key, malicious, scannervv, sigversion, sigdatestr): try: peer = self._getpeer(peername, "returned a scan on url %s" % url) if not peer: return "peer not known" request = self.session.query(SentScanRequest).\ filter(SentScanRequest.owner == self.config.owner).\ filter(SentScanRequest.key == key).\ filter(SentScanRequest.peer == peer).\ filter(SentScanRequest.url == url).first() if not request: self.log("Peer %r attempted to return a scan result for " "url %r with key %r, but no such scan was requested" % (peer, url, key)) return "no such request" sigdate = datetime.datetime.utcfromtimestamp(int(sigdatestr)) hash = hash or None # if the hash is empty or similar, replace with None scan = Scan(self.config.owner, "social-active", url, malicious, siginfo=SigInfo(scannervv, sigversion, sigdate), hash=hash, sentrequest=request, peer=peer) self.session.add(scan) self.session.commit() self.logger.log("Scan result %r: %r" % (key, scan)) return "success" except: self.logger.exception() return "exception"
class Redirector(basic.LineOnlyReceiver): """ Redirector protocol class. Implements the squid redirector protocol, agnostic of where it is used from. Currently only uses the url field from the squid redirector protocol, so leaving out the other fields will have no effect. @ivar logger: 'Redirector' logger @type logger: L{Logger} @ivar config: config instance @type config: L{AttributeConfig} @ivar endpoint: endpoint location of Core server on localhost @type endpoint: C{twisted.internet.endpoints.TCP4ClientEndpoint} """ begin = None end = None delimiter = "\n" def __init__(self, config): self.logger = Logger('Redirector') self.config = config self.endpoint = TCP4ClientEndpoint(reactor, "127.0.0.1", int(config.scanning._core_port)) def parseLine(self, line): """ Parse a line from squid @param line: line received from squid @type line: C{str} @return: url from the line @rtype: C{str} """ # IDnum URLstr ip/fqdn ident method key=value key=value # or # URLstr ip/fqdn ident method key=value key=value # E.g., http://www.google.com 192.168.100.1/- user2 GET myip=192.168.100.1 myport=3128 split = line.split(" ") fields = iter(split) # the following block deals with the ID number being optional first = fields.next() try: channelid = int(first) except ValueError: url = first else: url = fields.next() return url def stop(self): """ Stop running; provided in case this protocol is subclassed. Stops the reactor. """ reactor.stop() @property def totaltime(self): if self.begin and self.end: return self.end - self.begin elif self.begin: return datetime.now() - self.begin else: return timedelta(0) def callback(self, result): """ Callback provided to CoreClientFactory. @param result: url to redirect to, or C{""}. @type result: C{str} """ self.end = datetime.now() msg = 'Decision took %s seconds; URL result: %s' self.logger.log(msg % (self.totaltime.total_seconds(), result)) self.transport.write("%s\n" % result) def dataReceived(self, data): """ A hack to make sure this protocol will work regardless of whether it is fed \\r\\n newlines or \\n newlines. """ basic.LineOnlyReceiver.dataReceived(self, data.replace("\r", "")) def lineReceived(self, line): """ Handle a received line. @type line: C{str} @param line: line received """ self.begin = datetime.now() self.logger.log("Got a new request: [%s]" % line.replace("\n", "")) if not line: self.logger.log("Line empty, exiting: %r" % line) self.stop() url = self.parseLine(line) if not url: self.logger.log("URL empty, ignoring: %r" % url) return factory = CoreClientFactory(url, self.callback) self.endpoint.connect(factory)
class ContainerManager(object): """ Container manager object which stores and manages loaded containers, and runs jobs related to them. For example, containers may be of type ScanLogFile or ScanDigestFile @ivar logger: "ContainerManager" logger @type logger: L{Logger} @ivar config: socialscan configuration @type config: L{AttributeConfig} @ivar session: SQLAlchemy database session @type session: C{sqlalchemy.orm.session.Session} @ivar loadlimit: C{int} version of C{config.container_manager.loadlimit} @type loadlimit: C{int} @ivar containers: foreign containers currently loaded @type containers: C{list} of L{ContainerMixin} @ivar ourcontainer: the container that is currently being built by this container manager @type ourcontainer: L{ContainerMixin} @ivar announcequeue: local containers to announce to peers @type announcequeue: C{list} of L{ContainerMixin} """ def __init__(self, config, session, container_mixin): self.container = container_mixin self.cname = self.container.__name__.lower() self.name = "%sManager" % (self.container.__name__) self.logger = Logger("%s" % self.name) self.logger.log("initializing %s" % (self.name)) self.config = config self.session = session self.loaded = False sharedir = os.path.dirname(config.container_manager.share_location) storedir = os.path.dirname(config.container_manager.storage_location) if not os.path.exists(sharedir): self.logger.log("creating share dir %r" % sharedir) os.makedirs(sharedir) if not os.path.exists(storedir): self.logger.log("creating storage dir %r" % storedir) os.makedirs(storedir) self.loadlimit = int(config.container_manager.loadlimit) self.announcequeue = [] self.containers = [] allcontainers = ( session.query(self.container) .filter(self.container.owner == config.owner) .filter(self.container.creator != config.owner) .all() ) # TODO: get the below filter working # .filter(self.container.tainted == False)\ sortedcontainers = sorted(allcontainers, key=lambda container: container.usefulness) for container in sortedcontainers[: self.loadlimit]: try: container.container_type = eval(container.container_type_name) self.containers.append(container.load()) except (ValueError, AttributeError, IOError), error: msg = "Error while loading %s %s: %s" self.logger.log(msg % (self.cname, container, error)) self.ourcontainer = ( session.query(self.container) .filter(self.container.owner == config.owner) .filter(self.container.creator == config.owner) .filter(self.container.complete == False) .order_by(self.container.date.desc()) .first() ) if self.ourcontainer: try: self.ourcontainer.container_type = eval(self.ourcontainer.container_type_name) self.ourcontainer.load() except: self.logger.exception() self.ourcontainer = None else: self.loaded = True if not self.ourcontainer: self._newcontainer(None)