class TestMagic(unittest.TestCase): mime = False def setUp(self): self.m = Magic(mime=self.mime) def testFileTypes(self): for filename, desc, mime in testfile: filename = path.join(path.dirname(__file__), "testdata", filename) if self.mime: target = mime else: target = desc self.assertEqual(target, self.m.from_buffer(open(filename).read(1024))) self.assertEqual(target, self.m.from_file(filename), filename) def testErrors(self): self.assertRaises(IOError, self.m.from_file, "nonexistent") self.assertRaises(MagicException, Magic, magic_file="noneexistent") os.environ['MAGIC'] = '/nonexistetn' self.assertRaises(MagicException, Magic) del os.environ['MAGIC']
class TestHandler(HandlerTestCase): def afterSetUp(self): self.data = open("./data/test.ogv").read() self.kw = dict(env=dict(PATH=self.env_path)) self.input = Handler(self.tmp_url, self.data, "ogv", **self.kw) self.file_detector = Magic(mime=True) def testConvertVideo(self): """Test coversion of video to another format""" output_data = self.input.convert("mpeg") file_format = self.file_detector.from_buffer(output_data) self.assertEquals(file_format, 'video/mpeg') def testgetMetadata(self): """Test if metadata is extracted from""" output_metadata = self.input.getMetadata() self.assertEquals(output_metadata, {'Encoder': 'Lavf52.64.2'}) def testsetMetadata(self): """ Test if metadata are inserted correclty """ metadata_dict = {"title": "Set Metadata Test", "creator": "cloudooo"} output = self.input.setMetadata(metadata_dict) handler = Handler(self.tmp_url, output, "ogv", **self.kw) metadata = handler.getMetadata() self.assertEquals(metadata["Title"], "Set Metadata Test") self.assertEquals(metadata["Creator"], "cloudooo") def testConvertAudio(self): """Test coversion of audio to another format""" self.data = open("./data/test.ogg").read() self.input = Handler(self.tmp_url, self.data, "ogg", **self.kw) output_data = self.input.convert("wav") file_format = self.file_detector.from_buffer(output_data) # XXX this might expect 'audio/vnd.wave' but magic only got 'audio/x-wav' self.assertEquals(file_format, 'audio/x-wav')
def scan_directory(repo, slab_repo, directory): m = Magic(True) active = repo.active_files() updated = {} confirmed = set() slab_repo.open_slab() try: for dirpath, dirnames, filenames in walk(directory): rel_path = relpath(dirpath, directory) if directory != dirpath else '' for file_name in filenames: full_name = join(dirpath, file_name) file_name = join(rel_path, file_name) stats = stat(full_name) confirmed.add(file_name) seen_ts, seen_size = active.get(file_name, (None, None)) if seen_ts != long(stats.st_mtime) or seen_size != long(stats.st_size): segments = slab_repo.process_file(full_name) with open(full_name) as f: mime = m.from_buffer(f.read(2**15)) updated[file_name] = long(stats.st_mtime), long(stats.st_size), mime, segments finally: slab_repo.close_slab() repo.mark_deleted(set(active) - confirmed) repo.mark_seen(confirmed - set(updated)) repo.mark_updated(updated)
class IMedia(metaclass=ABCMeta): """Interface for processing of attached media""" @abstractmethod def __init__(self): self.magic = Magic(mime=True) @abstractmethod def get_adapted_image(self, image_io): raise DontwiNotImplementedError @abstractmethod def get_adapted_video(self, video_io): raise DontwiNotImplementedError def get_mime(self, media_io): if hasattr(media_io, "getvalue"): buffer = media_io.getvalue() elif hasattr(media_io, "seekable") and media_io.seekable(): buffer = media_io.read(1024) media_io.seek(SEEK_SET) else: raise DontwiMediaError return self.magic.from_buffer(buffer) @staticmethod def get_media_size(media_io): if hasattr(media_io, "getvalue"): size = len(media_io.getvalue()) elif hasattr(media_io, "seekable") and media_io.seekable(): media_io.seek(0, SEEK_END) size = media_io.tell() media_io.seek(SEEK_SET) else: raise DontwiMediaError return size
def determineDataType(data): # If data is a dictionary and contains type key, # we can directly derive the data_type if isinstance(data, dict): if 'type' in data: data_type = data['type'] else: data_type = 'dict' else: # If data is not a dictionary, we try to guess MIME type # by using magic library try: from magic import Magic mime_checker = Magic(mime=True) data_type = mime_checker.from_buffer(data) # noqa except: register_exception( stream="warning", prefix="BibWorkflowObject.determineDataType:" " Impossible to resolve data type." ) data_type = "" return data_type
class TestMagicMimeEncoding(unittest.TestCase): def setUp(self): self.m = Magic(mime_encoding=True) def testFileEncoding(self): for filename, encoding in testFileEncoding: filename = path.join(path.dirname(__file__), "testdata", filename) self.assertEqual(encoding, self.m.from_buffer(open(filename).read(1024))) self.assertEqual(encoding, self.m.from_file(filename), filename)
def get_mime_type(resource_path): """ Renvoyer le type MIME d'un fichier local :raises: IOError """ mime = Magic(mime=True) mimetype = mime.from_buffer(open(resource_path, 'rb').read()) return mimetype
def assert_uploaded_file_mime(file_instance, allowed_mimes): mime = Magic(mime=True) if isinstance(file_instance, TemporaryUploadedFile): mime_type = mime.from_file(file_instance.temporary_file_path()) elif isinstance(file_instance, InMemoryUploadedFile): mime_type = mime.from_buffer(file_instance.file.getvalue()) else: raise Exception('Provided file is not a valid django upload file. \ Use util.assert_file_mime instead.') return mime_type in allowed_mimes
class FSContainer(object): def __init__(self, container_name, obj): self._container_name = container_name self._obj = obj self._fs = GridFS(self._obj.db) if Magic: self._magic = Magic(mime=True) def __getitem__(self, key): f = self.open(key) content = f.read() f.close() return content def __setitem__(self, key, value): content_type = None if value and Magic: content_type = self._magic.from_buffer(value) f = self.open(key, 'w') try: f.content_type = content_type f.write(value) except TypeError: raise TypeError("GridFS value mus be string not %s" % type(value)) finally: f.close() def __delitem__(self, key): spec = {'metadata.doc_id':self._obj['_id'], 'metadata.container':self._container_name, 'metadata.name':key} self._fs.remove(spec,collection=self._obj.collection.name) def open(self, name, mode='r'): search_spec = {'metadata.name':name, 'metadata.container': self._container_name, 'metadata.doc_id':self._obj['_id']} if mode == 'r': try: return GridFile(search_spec, self._obj.db, 'r', self._obj.collection.name) except IOError: raise IOError('"%s" is not found in the database' % name) else: file = self._obj.collection.files.find_one(search_spec) if file: return GridFile({'_id':ObjectId(file['_id'])}, self._obj.db, 'w', self._obj.collection.name) write_spec = {'metadata':{'name':name, 'container':self._container_name, 'doc_id':self._obj['_id']}} return GridFile(write_spec, self._obj.db, 'w', self._obj.collection.name) def __iter__(self): for metafile in self._obj.collection.files.find( {'metadata.container': self._container_name, 'metadata.doc_id': self._obj['_id']}): yield metafile['metadata']['name'] def list(self): return [i for i in self] def __repr__(self): return "<%s '%s'>" % (self.__class__.__name__, self._container_name)
def contentType(self): if (self.content_type == None): mime = Magic(mime=True) content_type = mime.from_buffer(self.data.read(1024)) if content_type in self.allowed_content_types: self.content_type = content_type else: raise FlogMediaError(' '.join(['content_type', str(content_type), 'is not allowed'])) return self.content_type
def save_bits_to_file(filepath, bits): # get file extension bitstring = Bits(bin=bits) mime = Magic(mime=True) mime_type = mime.from_buffer(bitstring.tobytes()) with open(f"{filepath}/file{mimetypes.guess_extension(type=mime_type)}", "wb") as f: bitstring.tofile(f)
def mimetype(): # use the built-in mimetypes, then use magic library # XXX performance penalty here getting the entire file? # XXX should provide a way to allow other packages to add # more mimetypes. data = contents() if not isinstance(data, basestring): return None mt = mimetypes.guess_type(path)[0] if mt is None or mt.startswith('text/'): magic = Magic(mime=True) mt = magic.from_buffer(data[:4096]) return mt
def determine_figure_type(buff): """ Attempt to determine the figure type of an image file stored in a buffer. Uses the `magic` module to try to determine the MIME type of the image and then converts that to a Hedwig `FigureType` enum value. Raises a `UserError` exception if the determined MIME type is not recognised as a Hedwig figure type. """ m = Magic(mime=True) return FigureType.from_mime_type(m.from_buffer(buff))
async def examineFile(f, fileType: str) -> Tuple[bool, List[str]]: await f.seek(0) magic = Magic(mime=True, keep_going=True) # Mp3 files pose interesting problems in mimetype detection. Let the audio # loading mechanism tell us if it actually is audio. if f.filename.split('.')[-1] == 'mp3': typeGuess = typeIncluded = 'audio' else: typeGuess = magic.from_buffer(await f.read(2048)).split('/')[0] typeIncluded = f.content_type.split('/')[0] await f.seek(0) types = [typeGuess, typeIncluded] status = all(fileType == mimetype for mimetype in types) return (status, types)
def test_get_media_ios(self): toots = [ TootStatus(a_dc) for a_dc in dummy_toot_with_media(YOUR_MASTODON_FQDN) ] mastodon_connector = MastodonConnector( self.config.items["endpoint your_mastodon"]) magic = Magic(mime=True) for a_toot in toots: media_ios = mastodon_connector.get_media_ios(a_toot.get_medias()) for a_io in media_ios: self.assertIsInstance(a_io, mastodon_connector.MediaIo) self.assertIsInstance(a_io.io, BytesIO) data = a_io.io.read(261) mime = magic.from_buffer(data) mime_prefix = mime.split("/")[0] self.assertIsNotNone(mime_prefix, a_io.type)
def save_bits_to_file(file_path, bits): # get file extension bitstring = Bits(bin=bits) mime = Magic(mime=True) mime_type = mime.from_buffer(bitstring.tobytes()) # If filepath not passed in use defualt # otherwise used passed in filepath if file_path == None: filepath = f"file{mimetypes.guess_extension(type=mime_type)}" else: filepath = file_path with open(filepath, "wb") as f: bitstring.tofile(f)
class TestHandler(HandlerTestCase): def afterSetUp(self): self.kw = dict(env=dict(PATH=self.env_path)) self.file_detector = Magic(mime=True) def testConvertPDFtoText(self): """Test conversion of pdf to txt""" pdf_document = open("data/test.pdf").read() handler = Handler(self.tmp_url, pdf_document, "pdf", **self.kw) txt_document = handler.convert("txt") self.assertTrue(txt_document.startswith("UNG Docs Architecture")) def testConvertPStoPDF(self): """Test conversion of ps to pdf""" ps_document = open("data/test.ps").read() handler = Handler(self.tmp_url, ps_document, "ps", **self.kw) pdf_document = handler.convert("pdf") mimetype = self.file_detector.from_buffer(pdf_document) self.assertEquals(mimetype, "application/pdf") def testgetMetadata(self): """Test if the metadata are extracted correctly""" pdf_document = open("data/test.pdf").read() handler = Handler(self.tmp_url, pdf_document, "pdf", **self.kw) metadata = handler.getMetadata() self.assertEquals(type(metadata), DictType) self.assertNotEquals(metadata, {}) self.assertEquals(metadata["title"], 'Free Cloud Alliance Presentation') def testsetMetadata(self): """Test if the metadata is inserted correctly""" pdf_document = open("data/test.pdf").read() handler = Handler(self.tmp_url, pdf_document, "pdf", **self.kw) metadata_dict = {"title": "Set Metadata Test", "creator": "gabriel\'@"} new_document = handler.setMetadata(metadata_dict) handler = Handler(self.tmp_url, new_document, "pdf", **self.kw) metadata = handler.getMetadata() self.assertEquals(metadata["title"], 'Set Metadata Test') self.assertEquals(metadata['creator'], 'gabriel\'@')
def mutate(self, info, **kwargs): """Process file input.""" finding_id = kwargs.get('finding_id') project = integrates_dao.get_finding_project(finding_id) file_input = info.context.FILES.get('document', None) mime = Magic(mime=True) if isinstance(file_input, TemporaryUploadedFile): mime_type = mime.from_file(file_input.temporary_file_path()) elif isinstance(file_input, InMemoryUploadedFile): mime_type = mime.from_buffer(file_input.file.getvalue()) else: mime_type = '' mib = 1048576 if (file_input and mime_type in ['text/x-yaml', 'text/plain', 'text/html']): if file_input.size < 1 * mib: try: success = process_file(file_input, finding_id, info) except (InvalidRange, InvalidSchema, InvalidPort): raise else: success = False raise InvalidFileSize() else: success = False raise InvalidFileType() ret = UploadFile(success=success) if success: update_last_vuln_date(finding_id) util.cloudwatch_log( info.context, 'Security: Uploaded file in {project} \ project succesfully'.format(project=project)) else: util.cloudwatch_log( info.context, 'Security: Attempted to delete file \ from {project} project'.format(project=project)) util.invalidate_cache(finding_id) util.invalidate_cache(project) return ret
def determineDataType(data): # If data is a dictionary and contains type key, # we can directly derive the data_type if isinstance(data, dict): if 'type' in data: data_type = data['type'] else: data_type = 'dict' else: from magic import Magic mime_checker = Magic(mime=True) # If data is not a dictionary, we try to guess MIME type # by using magic library try: data_type = mime_checker.from_buffer(data) # noqa except: register_exception(stream="warning", prefix="BibWorkflowObject.determineDataType:" + " Impossible to resolve data type.") data_type = "" return data_type
def handle_uploaded_file(f): if not f.size < 1e6: raise MemoryError('File too big!') mime = Magic(mime=True) if mime.from_buffer(f.read()) != 'text/plain': raise ValueError('Passed file is not a text file.') f.file.seek(0) file = TextIOWrapper(f.file) recipe_file = parse_file(file) output = recipe_file.to_djangodb() # If recipe with same UUID exists, try to update it, else create a new one. if 'uuid' in output and output['uuid']: try: current_version = Recipe.objects.get(pk=output['uuid']) log.info('Replacing currently existing recipe with new one.') for field in current_version.__dict__: if field in output: current_version.__dict__[field] = output[field] current_version.save() return current_version except Recipe.DoesNotExist: pass log.info('Creating new recipe entry for {}'.format(output['name'])) recipe = Recipe(**output) recipe.save() return recipe
def mutate(self, info, **kwargs): """Process file input.""" finding_id = kwargs.get('finding_id') origin = kwargs.get('origin', '') project = finding_domain.get_project(finding_id) file_input = info.context.FILES['1'] mime = Magic(mime=True) if isinstance(file_input, TemporaryUploadedFile): mime_type = mime.from_file(file_input.temporary_file_path()) elif isinstance(file_input, InMemoryUploadedFile): mime_type = mime.from_buffer(file_input.file.getvalue()) else: mime_type = '' mib = 1048576 if (file_input and mime_type in ['text/x-yaml', 'text/plain', 'text/html']): if file_input.size < 1 * mib: success = process_file(file_input, finding_id, info, origin) else: raise InvalidFileSize() else: raise InvalidFileType() ret = UploadFile(success=success) if success: update_last_vuln_date(finding_id) util.invalidate_cache(finding_id) util.invalidate_cache(project) util.cloudwatch_log( info.context, 'Security: Uploaded file in {project} \ project succesfully'.format(project=project)) else: util.cloudwatch_log( info.context, 'Security: Attempted to delete file \ from {project} project'.format(project=project)) raise ErrorUploadingFileS3() return ret
def _getFileType(self, output_data): mime = Magic(mime=True) mimetype = mime.from_buffer(decodestring(output_data)) return mimetype
class FS(object): def __init__(self, gridfs, obj): self._gridfs = gridfs for container in self._gridfs.get('containers', []): self.__dict__[container] = FSContainer(container, obj) self._obj = obj self._fs = GridFS(self._obj.db) if Magic: self._magic = Magic(mime=True) def __getitem__(self, key): f = self.open(key) content = f.read() f.close() return content def __setitem__(self, key, value): content_type = None if value and Magic: content_type = self._magic.from_buffer(value) f = self.open(key, 'w') try: f.content_type = content_type f.write(value) except TypeError: raise TypeError("GridFS value mus be string not %s" % type(value)) finally: f.close() def __getattr__(self, key): if key not in ['_gridfs', '_obj', '_fs', '_containers', '_magic']: if key not in self._gridfs.get('containers', []) and key in self._gridfs.get('files', []): return self[key] return super(FS, self).__getattribute__(key) def __setattr__(self, key, value): if key not in ['_gridfs', '_obj', '_fs', '_containers', '_magic']: if key not in self._gridfs.get('containers', []) and key in self._gridfs.get('files', []): self[key] = value else: super(FS, self).__setattr__(key, value) def __delitem__(self, key): self._fs.remove({'metadata.doc_id':self._obj['_id'], 'metadata.name':key}, collection=self._obj.collection.name) def __delattr__(self, key): del self[key] def open(self, name, mode='r'): assert name in self._gridfs.get('files', []), "%s is not declared in gridfs" % name search_spec = {'metadata.name':name, 'metadata.doc_id':self._obj['_id']} if mode == 'r': try: return GridFile(search_spec, self._obj.db, 'r', self._obj.collection.name) except IOError: raise IOError('"%s" is not found in the database' % name) else: file = self._obj.collection.files.find_one(search_spec) if file: return GridFile({'_id':ObjectId(file['_id'])}, self._obj.db, 'w', self._obj.collection.name) write_spec = {'metadata':{'name':name, 'doc_id':self._obj['_id']}} return GridFile(write_spec, self._obj.db, 'w', self._obj.collection.name) def __iter__(self): for i in self._obj.collection.files.find({'metadata.doc_id': self._obj['_id']}): container, name = i['metadata'].get('container'), i['metadata']['name'] if container: name = "%s/%s" % (container, name) yield name def list(self): return [i for i in self] def __repr__(self): return "<%s of object '%s'>" % (self.__class__.__name__, self._obj['_id'])
class GnuPG_Decryptor: """ Class representing Native application of GnuPG_Decryptor broswer extension. Native application is responsible for accessing private keys and decrypting content of a web page. """ def __init__(self): self._passwords = dict() self._gui = None self._QApp = None self._sudo = None self._homedir = None self.MAX_MESSAGE_SIZE = 750 * 1024 self.mimeResolver = Magic(mime=True) self._lock = Lock() def show(self): """ Method displays GUI window for user """ # If Gui is not defined yet, construct it if (self._gui is None): self._QApp = QApplication(sys.argv) initKeys = [] for keyId, password in self._passwords.items(): initKeys.append({'id': keyId, 'password': password}) self._gui = GnuPG_Decryptor_GUI(self, initKeys, self._sudo, self._homedir) # show the window self._gui.show() return self._QApp.exec_() def keyList(self, settings): """ Method returns list of secret keys based on sudo and homedir settings """ stdin = '' args = [] # use sudo if (settings['sudo']['use']): # sudo argument args.append('sudo') # do not remember password args.append('-Sk') # add password to stdin stdin += settings['sudo']['password'] + '\n' # gpg call args.append('gpg') # use homedir if (settings['home']['use']): args.append('--homedir') args.append(settings['home']['homedir']) # command to list secret keys args.append('--list-secret-keys') # call subprocess process = Popen(args, stdin=PIPE, stdout=PIPE, stderr=PIPE) stdout, _ = process.communicate(stdin.encode()) retcode = process.returncode ids = [] # if success if (retcode == 0): stdout = stdout.decode().splitlines() ids = [{ 'id': line[25:].strip(), 'password': '' } for line in stdout if line.startswith('uid')] return {'returnCode': retcode, 'keys': ids} def setPasswords(self, config): """ Method sets new keys and passwords. """ # clear current keys and passwords self._passwords = dict() # set new keys and password for key in config['keys']: self._passwords[key['id']] = key['password'] # set sudo if (config['sudo']['use']): self._sudo = config['sudo']['password'] else: self._sudo = None # set homedir parameter if (config['home']['use']): self._homedir = config['home']['homedir'] else: self._homedir = None # notify background script about changes self.updateKeys() def getKeyUidFromId(self, keyId): """ From key id (or fingerprint if you prefer) generates get UID using gpg application """ args = ['gpg'] # if homedir parameter should be used if (not self._homedir is None): args.append('--homedir') args.append(self._homedir) # add gpg argumnets args.append('--list-public-keys') args.append('--fingerprint') args.append(keyId) # call subprocess process = Popen(args, stdin=PIPE, stdout=PIPE, stderr=PIPE) stdout, _ = process.communicate() retcode = process.returncode uid = None # if success if (retcode == 0): stdout = stdout.decode().splitlines() uids = [ line[25:].strip() for line in stdout if line.startswith('uid') ] if (uids): uid = uids[0] return uid def getKeyUidFromData(self, data): """ Method finds out, which keys were used for data encryption. """ # command line arguments args = ['gpg', '--list-packets', '--list-only'] # call gpg process = Popen(args, stdin=PIPE, stdout=PIPE, stderr=PIPE) stdout, _ = process.communicate(data) retcode = process.returncode keys = [] # if success if (retcode == 0): # output is on stderr stdout = stdout.decode().splitlines() # we care only about lines starting with "gpg: encrypted" filtered = [line for line in stdout if line.startswith(':pubkey')] for line in filtered: # find where ID/fingerprint is idx1 = line.find('keyid ') + 6 idx2 = line.find(',', idx1) if (idx2 == -1): idx2 = len(line) # get uid from id/fingerprint uid = self.getKeyUidFromId(line[idx1:idx2]) if (not uid is None): keys.append(uid) return keys @staticmethod def get_message(): """ Reads message from background script """ raw_length = sys.stdin.buffer.read(4) if not raw_length: sys.exit(0) message_length = unpack('=I', raw_length)[0] message = sys.stdin.buffer.read(message_length).decode("utf-8") return loads(message) @staticmethod def encode_message(message_content): """ Encode a message for transmission, given its content. """ encoded_content = dumps(message_content).encode("utf-8") encoded_length = pack('=I', len(encoded_content)) return { 'length': encoded_length, 'content': pack(str(len(encoded_content)) + "s", encoded_content) } def send_message(self, encoded_message): """ Sends an encoded message to background script. """ with self._lock: sys.stdout.buffer.write(encoded_message['length']) sys.stdout.buffer.write(encoded_message['content']) sys.stdout.buffer.flush() def debug(self, messageString): """ Sends debug message to background script """ self.send_message( GnuPG_Decryptor.encode_message({ 'message': messageString, 'type': 'debug' })) def loadKeys(self): """ Asks background scripts for stored keys. """ self.send_message( GnuPG_Decryptor.encode_message({'type': 'getKeysRequest'})) def updateKeys(self): """ Update keys in background scripts """ keys = self._passwords.copy() for key in keys.keys(): keys[key] = '' message = {'type': 'updateKeysRequest', 'keys': keys} if (not self._sudo is None): message['sudo'] = 1 else: message['sudo'] = 0 if (not self._homedir is None): message['homedir'] = self._homedir self.send_message(GnuPG_Decryptor.encode_message(message)) def decrypt(self, rawData, keys, messageId, tabId): """ Decrypts the data and sends decrypted content to the content script. """ err = b'' retcode = 0 for key in keys: args = [] sudoPass = '' keyPass = self._passwords[key] # if sudo should be used if (not self._sudo is None): args.append('sudo') args.append('-Sk') sudoPass = self._sudo + '\n' # gpp argument args.append('gpg') # if homedir should be used if (not self._homedir is None): args.append('--homedir') args.append(self._homedir) # be quiet as possible args.append('--quiet') # use password if we know it if (keyPass): args.append('--no-tty') args.append('--pinentry-mode=loopback') args.append('--passphrase') args.append(keyPass) # decrypt command for gpg args.append('--decrypt') # call subprocess process = Popen(args, stdin=PIPE, stdout=PIPE, stderr=PIPE) decrypted, err = process.communicate(sudoPass.encode() + rawData) retcode = process.returncode # if decryption failed, try next key if (retcode != 0): continue # get mimeType of data mimeType = self.mimeResolver.from_buffer(decrypted) # encode data using base64 decrypted = b64encode(decrypted) # split data into blocks blocks = [ decrypted[i:i + self.MAX_MESSAGE_SIZE] for i in range(0, len(decrypted), self.MAX_MESSAGE_SIZE) ] # get last block of data lastBlock = blocks.pop() # prepare response response = { 'messageId': messageId, 'success': 1, 'message': '', 'type': 'decryptResponse', 'data': '', 'encoding': 'base64', 'mimeType': mimeType, 'lastBlock': 0, 'tabId': tabId } # send all blocks, except last one for block in blocks: response['data'] = block.decode() self.send_message(GnuPG_Decryptor.encode_message(response)) # send last blocks response['data'] = lastBlock.decode() response['lastBlock'] = 1 self.send_message(GnuPG_Decryptor.encode_message(response)) break if (retcode != 0): errorMessage = 'Unable to decrypt data: ' + err.decode() self.send_message( GnuPG_Decryptor.encode_message({ 'messageId': messageId, 'success': 0, 'message': errorMessage, 'type': 'decryptResponse', 'data': '', 'tabId': tabId })) elif (not keys): errorMessage = 'Unable to decrypt data: Required key is not present' self.send_message( GnuPG_Decryptor.encode_message({ 'messageId': messageId, 'success': 0, 'message': errorMessage, 'type': 'decryptResponse', 'data': '', 'tabId': tabId })) def main(self): """ Reads messages from background scripts and create responses. """ largeRequests = dict() # load stored keys self.loadKeys() while True: # read message message = GnuPG_Decryptor.get_message() errorMessage = str() if (message['type'] == 'decryptRequest' and 'tabId' in message): # message is containts encrypted data # ged id of sender tabId = message['tabId'] # decode data if (message['encoding'] == 'base64'): rawData = b64decode(message['data']) elif (message['encoding'] == 'ascii'): rawData = message['data'].encode() else: errorMessage = 'Invalid encoding: ' + message['encoding'] self.send_message( GnuPG_Decryptor.encode_message({ 'messageId': message['messageId'], 'success': 0, 'message': errorMessage, 'type': 'decryptResponse', 'data': '', 'tabId': tabId })) continue # data are split into blocks, join those blocks if (message['lastBlock'] == 0): largeRequests[message['messageId']] = largeRequests[ message['messageId']] + rawData if ( message['messageId'] in largeRequests) else rawData continue elif (message['messageId'] in largeRequests): rawData = largeRequests[message['messageId']] + rawData del (largeRequests[message['messageId']]) # get key, that was used for encryption keys = self.getKeyUidFromData(rawData) # use only keys that are available keys = [key for key in keys if key in self._passwords] #start_time = time.time() t1 = Thread(target=self.decrypt, args=(rawData, keys, message['messageId'], tabId)) t1.start() # if we have at least one valid key, decrypt data elif (message['type'] == 'displayWindow'): # User clicked on icon - diplay window self.show() elif (message['type'] == 'getKeysResponse'): # Set new keys self._passwords = message['keys'] self._homedir = message[ 'homedir'] if 'homedir' in message else None self._sudo = '' if 'sudo' in message and message[ 'sudo'] else None
def _get_file_encoding_type(file_path: str) -> str: blob = open(file_path, "rb").read() m = Magic(mime_encoding=True) return m.from_buffer(blob)
class FilesStorage: def __init__(self, settings): self.web_root = settings['files.web_root'].strip() self.archive_web_root = self.web_root + '.archive' self.save_path = settings['files.save_path'].strip() self.secret_key = settings['files.secret_key'].strip() self.disposition = settings.get('files.disposition', 'inline') forbidden_ext = settings.get('files.forbidden_ext', DANGEROUS_EXT) self.forbidden_ext = set( [s.strip().upper() for s in forbidden_ext.split(',') if s.strip()]) self.forbidden_mime = DANGEROUS_MIME_TYPES self.forbidden_hash = set(['md5:d41d8cd98f00b204e9800998ecf8427e' ]) # empty file if 'files.forbidden_mime' in settings: with open(settings['files.forbidden_mime']) as fp: self.forbidden_mime = set([ s.strip().lower() for s in fp.readlines() if '/' in s.strip() ]) if 'files.forbidden_hash' in settings: with open(settings['files.forbidden_hash']) as fp: self.forbidden_hash = set([ s.strip().lower() for s in fp.readlines() if s.startswith("md5:") ]) if 'files.get_url_expire' in settings: # dirty monkey pathing from openprocurement.documentservice import views self.old_EXPIRES = views.EXPIRES views.EXPIRES = int(settings['files.get_url_expire']) LOGGER.warning( "Chagne default expire for get_url from {} to {}".format( self.old_EXPIRES, views.EXPIRES)) self.replica_apis = list() if 'files.replica_api' in settings: self.replica_apis = [ s.strip() for s in settings['files.replica_api'].split(',') if s.strip() ] self.require_replica_upload = settings.get( 'files.require_replica_upload', True) self.replica_timeout = 300 self.magic = Magic(mime=True) self.session = Session() self.dir_mode = 0o2710 self.file_mode = 0o440 self.meta_mode = 0o400 def web_location(self, key, archived=False): web_root = self.web_root if not archived else self.archive_web_root return os.path.join(web_root, key[-2:], key[-4:], key).encode() def file_path(self, key): path = os.path.join(self.save_path, key[-2:], key[-4:]) return path, os.path.join(path, key) def hash_to_uuid(self, md5hash): return hashlib.sha1(md5hash + ':uuid:' + self.secret_key).hexdigest() def uuid_to_file(self, uuid): return hashlib.sha1(uuid + ':file:' + self.secret_key).hexdigest() def save_meta(self, uuid, meta, overwrite=False): key = self.uuid_to_file(uuid) path, name = self.file_path(key) name += '.meta' if not overwrite and os.path.exists(name): raise ContentUploaded(uuid) meta['modified'] = get_now().isoformat() if not os.path.exists(path): os.makedirs(path, mode=self.dir_mode) with open(name + '~', 'wt') as fp: flock(fp, LOCK_EX | LOCK_NB) json.dump(meta, fp) os.rename(name + '~', name) os.chmod(name, self.meta_mode) def read_meta(self, uuid): key = self.uuid_to_file(uuid) path, name = self.file_path(key) name += '.meta' if not os.path.exists(name): raise KeyNotFound(uuid) # pragma: no cover with open(name) as fp: return json.load(fp) def check_forbidden(self, filename, content_type, fp): for ext in filename.rsplit('.', 2)[1:]: if ext.upper() in self.forbidden_ext: return True if content_type.lower() in self.forbidden_mime: return True fp.seek(0) magic_type = self.magic.from_buffer(fp.read(2048)) if magic_type.lower() in self.forbidden_mime: return True if filename.upper().endswith('.ZIP') or \ 'application/zip' in (content_type, magic_type): fp.seek(0) try: zipobj = zipfile.ZipFile(fp) except zipfile.BadZipfile: return for filename in zipobj.namelist(): for ext in filename.rsplit('.', 2)[1:]: if ext.upper() in self.forbidden_ext: return True def compute_md5(self, in_file, blocksize=0x10000): in_file.seek(0) md5hash = hashlib.md5() while True: block = in_file.read(blocksize) if not block or not len(block): break md5hash.update(block) return "md5:" + md5hash.hexdigest() def upload_to_replicas(self, post_file, uuid, max_retry=10): filename = post_file.filename content_type = post_file.type in_file = post_file.file for replica in self.replica_apis: auth = None schema = "http" if "://" in replica: schema, replica = replica.split("://", 1) if "@" in replica: auth, replica = replica.split('@', 1) auth = tuple(auth.split(':', 1)) post_url = "{}://{}/upload".format(schema, replica) timeout = self.replica_timeout replica_uuid = None for n in range(max_retry): try: in_file.seek(0) files = {'file': (filename, in_file, content_type, {})} res = self.session.post(post_url, auth=auth, files=files, timeout=timeout) res.raise_for_status() if res.status_code == 200: data = res.json() get_url, get_params = data['get_url'].split('?', 1) get_host, replica_uuid = get_url.rsplit('/', 1) if uuid != replica_uuid: # pragma: no cover raise ValueError( "Salve uuid mismatch, verify secret_key") LOGGER.info("Upload {} to replica {}".format( uuid, post_url)) break except Exception as e: # pragma: no cover LOGGER.warning("Error {}/{} upload {} to {}: {}".format( n + 1, max_retry, uuid, post_url, e)) if n >= max_retry - 1: raise sleep(n + 1) def register(self, md5hash): if md5hash in self.forbidden_hash: raise StorageUploadError('forbidden_file ' + md5hash) now_iso = get_now().isoformat() uuid = self.hash_to_uuid(md5hash) meta = dict(uuid=uuid, hash=md5hash, created=now_iso) try: self.save_meta(uuid, meta) except ContentUploaded: pass return uuid def upload(self, post_file, uuid=None): now_iso = get_now().isoformat() filename = get_filename(post_file.filename) content_type = post_file.type in_file = post_file.file md5hash = self.compute_md5(in_file) if md5hash in self.forbidden_hash: LOGGER.warning("Forbidden file by hash {}".format(md5hash)) raise StorageUploadError('forbidden_file ' + md5hash) if uuid is None: uuid = self.hash_to_uuid(md5hash) meta = dict(uuid=uuid, hash=md5hash, created=now_iso) else: meta = self.read_meta(uuid) if not compare_digest(meta['hash'], md5hash): raise HashInvalid(meta['hash'] + "/" + md5hash) key = self.uuid_to_file(uuid) path, name = self.file_path(key) if os.path.exists(name): meta = self.read_meta(uuid) if meta['filename'] != filename: if 'alternatives' not in meta: meta['alternatives'] = list() meta['alternatives'].append({ 'created': now_iso, 'filename': filename }) self.save_meta(uuid, meta, overwrite=True) return uuid, md5hash, content_type, filename if self.check_forbidden(filename, content_type, in_file): LOGGER.warning("Forbidden file {} {} {} {}".format( filename, content_type, uuid, md5hash)) raise StorageUploadError('forbidden_file ' + md5hash) meta['filename'] = filename meta['Content-Type'] = content_type meta['Content-Disposition'] = build_header( filename, disposition=self.disposition, filename_compat=quote(filename.encode('utf-8'))) self.save_meta(uuid, meta, overwrite=True) in_file.seek(0) with open(name + '~', 'wb') as out_file: flock(out_file, LOCK_EX | LOCK_NB) copyfileobj(in_file, out_file) os.rename(name + '~', name) os.chmod(name, self.file_mode) try: if self.replica_apis: self.upload_to_replicas(post_file, uuid) except Exception as e: # pragma: no cover LOGGER.error("Replica failed {}, remove file {} {}".format( e, uuid, md5hash)) if self.require_replica_upload: os.rename(name, name + '~') raise StorageUploadError('replica_failed') return uuid, md5hash, content_type, filename def get(self, uuid): meta = self.read_meta(uuid) if meta['uuid'] != uuid: raise KeyNotFound(uuid) # pragma: no cover if meta['hash'] in self.forbidden_hash: raise KeyNotFound(uuid) # pragma: no cover key = self.uuid_to_file(uuid) meta['X-Accel-Redirect'] = self.web_location(key, meta.get('archived')) return meta
def render(old_path, old_blob, new_path, new_blob): old_type = None new_type = None try: from magic import Magic mag = Magic(mime=True) if old_blob: old_type = mag.from_buffer(old_blob) if new_blob: new_type = mag.from_buffer(new_blob) except ImportError: pass types = [] if old_type: types.append(old_type) if new_type: types.append(new_type) if len([t for t in types if t.startswith('image/')]) > 1: """ Image diff. """ icon = 'picture' def do_render(out): out.extend('<table class="diff-any diff-sidebyside diff-image">') out.extend('<td class="left old num"></td>') out.extend('<td class="left old line">') if old_blob: data = "data:%s;base64,%s" % (old_type, b64encode(old_blob)) out.extend('<img alt="" src="%s">' % smart_str(escape(data))) out.extend('</td>') out.extend('<td class="right new num"></td>') out.extend('<td class="right new line">') if new_blob: data = "data:%s;base64,%s" % (new_type, b64encode(new_blob)) out.extend('<img alt="" src="%s">' % smart_str(escape(data))) out.extend('</td>') out.extend('</tr>') out.extend('</table>') else: """ Text diff. """ old_token_lines = () new_token_lines = () try: """ Syntax highlighting for the old text. """ if old_blob: old_lexer = guess_lexer_for_filename(old_path, old_blob) old_tokens = old_lexer.get_tokens(old_blob) old_token_lines = TokenLineStream(old_tokens) except ClassNotFound: pass try: """ Syntax highlighting for the new text. """ if new_blob: new_lexer = guess_lexer_for_filename(new_path, new_blob) new_tokens = new_lexer.get_tokens(new_blob) new_token_lines = TokenLineStream(new_tokens) except ClassNotFound: pass if old_blob is not None and new_blob is not None: """ Normal two sided diff """ icon = 'edit' def do_render(out): diff_line_stream = diff(old_blob, new_blob) render_side_diff(out, diff_line_stream, (old_token_lines, new_token_lines)) elif old_blob is not None: """ One sided deletion diff. """ icon = 'trash' def do_render(out): render_blob(out, -1, old_blob, old_token_lines) elif new_blob is not None: """ One sided creation diff. """ icon = 'file' def do_render(out): render_blob(out, 1, new_blob, new_token_lines) else: """ No sided diff. eg: Metadata change. """ icon = 'question-sign' def do_render(out): out.extend("No data") return render_box(old_path, new_path, icon, do_render)
def get(self, request, key=None, **kwargs): image_fp = _get_image_fp(key) magic = Magic(mime=True) content_type = magic.from_buffer(image_fp.read(1024)) image_fp.seek(0) return HttpResponse(image_fp, content_type=content_type)
def get_content_type(self, file_): """ Returns the content type of the file using python magic """ magic = Magic(mime=True) return magic.from_buffer(file_.read(1024))
def from_buffer(self, buf, mime=True): return _Magic.from_buffer(self, buf)
try: file=openers[protocol]( url, offset ) bytes = gunzip( file ) finally: file.close() warc = WarcRecord( bytes ) if not isinstance( warc.block, HttpResponse ): raise Exception, 'WARC record is not an HTTP response' # TODO: Check file-type if warc.block.type != 'text/html': # Pass 'True' to get Mime-types rather than descriptive names. magic = Magic(True) print magic.from_buffer( warc.block.body ) exit(2) html=Html( warc.block.body, warc.block.charset ) if not html.doc or html.doc.getroot() is None: raise Exception, 'Error parsing HTML' root=html.doc.getroot() xml = DefaultTransformer( html ).transform( ) print etree.tostring( xml, method='xml', encoding=unicode ).encode('utf-8') except Exception, message: log( '500', offset, url.geturl(), message )
def file_mng(upload_dir, col, username): col1, col2, col3, col4 = visf.create_columns(4,[0,1,1,1]) manage_files = ['Show Sar Files','Add Sar Files', 'Delete Sar Files'] sar_files = [ x for x in os.listdir(upload_dir) if os.path.isfile(f'{upload_dir}/{x}')] sar_files_uploaded =[x for x in sar_files if not x.endswith(('.df'))] sar_files = [x.rstrip('\.df') for x in sar_files if x.endswith(('.df'))] sar_files.extend(sar_files_uploaded) managef_options = col1.selectbox( 'Show/Add/Delete', manage_files) st.markdown('___') if managef_options == 'Add Sar Files': st.set_option( 'deprecation.showfileUploaderEncoding', False) sar_files = [col1.file_uploader( "Please upload your SAR files", key='sar_uploader', #accept_multiple_files=False)] accept_multiple_files=True)] if col1.button('Submit'): if sar_files: for multi_files in sar_files: for u_file in multi_files: if u_file is not None: #st.write(dir(sar_file)) f_check = Magic() #stringio = io.StringIO(sar_file.decode("utf-8")) bytes_data = u_file.read() res = f_check.from_buffer(bytes_data) if not "ASCII text" in res: col1.warning( f'File is not a valid sar ASCII data file. Instead {res}') continue else: #TODO check if Linux Header is present and if sar sections are present col1.write( f"Sar file is valid. Renaming {u_file.name}") with open(f'{upload_dir}/{u_file.name}', 'wb') as targetf: targetf.write(bytes_data) #remove name col1, col2 = visf.create_columns(2,[0,1]) renamed_name = helpers.rename_sar_file(f'{upload_dir}/{u_file.name}', col=col1) # remove old redis data r_hash = f"{Config.rkey_pref}:{username}" r_key = f"{renamed_name}_df" try: redis_mng.delete_redis_key(r_hash, r_key) except: print(f'{renamed_name} key not in redis db or redis db offline') # remove old pickle from disk df_file = f'{upload_dir}/{renamed_name}.df' os.system(f'rm -rf {df_file}') elif managef_options == 'Delete Sar Files': if sar_files: dfiles_ph = col1.empty() dfiles = dfiles_ph.multiselect( 'Choose your Files to delete', sar_files) if col1.button('Delete selected Files'): for file in dfiles: r_item = f'{file}_df' df_file = f'{upload_dir}/{file}.df' fs_file = f'{upload_dir}/{file}' os.system(f'rm -f {df_file}') os.system(f'rm -f {fs_file}') try: rkey = f"{Config.rkey_pref}:{username}" print( f'delete {rkey}, {r_item} from redis at {datetime.now().strftime("%m/%d/%y %H:%M:%S")}') redis_mng.del_redis_key_property(rkey, r_item) except: print(f'{rkey}, {r_item} not available in redis db or redis \ db not online') sar_files = os.listdir(upload_dir) sar_files = [x.rstrip('.df') for x in sar_files if x.endswith('.df')] dfiles = dfiles_ph.multiselect( 'Choose your Files to delete', sar_files, default=None) else: col1.write("You currently have no sar files") elif managef_options == 'Show Sar Files': col1.empty() col1.write(df.DataFrame(sar_files, columns=['Files']))
def get_mimetype(fobject): mime = Magic(mime=True) mimetype = mime.from_buffer(fobject.read(1024)) fobject.seek(0) return mimetype
def detect_mime_type(uploaded_file: UploadedFile) -> str: """Detect mime type of an uploaded file""" magic = Magic(mime=True) chunk = next(uploaded_file.chunks(chunk_size=2048)) return magic.from_buffer(chunk)
def post(self): params = request.form.to_dict(flat=True) try: u = self.upload_schema_full.load(params, session=db.session) except ValidationError as e: return abort( 422, str(e) ) # This is not a return actually, I just want PyCharm to shut up db.session.add(u) # Start handling of files only if there is any if request.files: if len(request.files) > current_app.config['MAX_FILES_IN_UPLOAD']: return abort(422, "Too many files") # Handle attachments if needed files_to_save = [] m = Magic(mime=True) for field_name, attachment in request.files.items(): original_filename = secure_filename( os.path.basename(attachment.filename)) md5_hash = hashlib.md5(attachment.read()).hexdigest() size = attachment.tell() if size > current_app.config['MAX_SINGLE_FILE_LENGTH']: # This could be moved to the schema checking as well, # But we don't want to process a big file any further to save resources return abort(413, f"{field_name} is too big") # MD5 calculating read the file to the end, (because .read()) # so we have to seek back to it's beginning to actually save it attachment.seek(0, 0) # Apparently .doc requires to be fully read in order to being identified properly mime = m.from_buffer(attachment.read()) attachment.seek(0, 0) # prepare file to be saved target_file_extension = os.path.splitext(original_filename)[-1] # We don't want filenames ending with a dot if target_file_extension == '.': target_file_extension = '' target_filepath = os.path.join( current_app.config["UPLOAD_FOLDER"], f"{md5_hash}_{size}{target_file_extension}") # Files will be saved later, after all validated successfully files_to_save.append((attachment, target_filepath)) # Store meta data file_params = { 'original_filename': original_filename, 'md5_hash': md5_hash, 'size': size, 'mime': mime } try: f = self.file_schema_full.load(file_params, session=db.session) except ValidationError as e: # This is not a return actually, I just want PyCharm to stop complaining return abort(422, f"{field_name}: {e}") f.upload = u db.session.add(f) # before saving, ensure a containing directory os.makedirs(current_app.config["UPLOAD_FOLDER"], 0o755, exist_ok=True) # All single validation succeeded so far # But database constraints are not yet enforced # We just save the files here, so that if saving fails, the data won't be committed to the db for attachment, target_filepath in files_to_save: if not os.path.isfile(target_filepath): attachment.save(target_filepath) # Commit all that stuff to database db.session.commit() return jsonify(self.upload_schema_simple.dump(u)), 201
def run_campaign(self, mail, base_url, url): # Since this function is in a different thread, it doesn't have the app's context by default with app.app_context(): unsent_results = [ x for x in Campaign.query.filter_by(id=self.id).first().results if x.status == 'Scheduled' ] campaign = Campaign.query.filter_by(id=self.id).first( ) # since in diff thread, references to self will not update the database # start the worker and send emails job_id = str(self.id) if self is None: sched.remove_job(job_id) app.logger.info( f'Campaign ID {job_id} does not exist - Campaign will not start, scheduled job will be removed' ) return # Before sending emails, ensure the web server starts on the worker # If the worker gives an issue, kill off the campaign and log the error if campaign.status == 'Scheduled': worker_response = self.start_worker() if not worker_response['success']: msg = worker_response['msg'] campaign.status = msg db.session.commit() app.logger.error( f'Failed to start campaign {self.name} (ID: {self.id}) - Worker web server failed to start on server {self.server.alias} (IP: {self.server.ip}) - Reason: {msg}' ) sched.remove_job(job_id) return else: app.logger.info( f'Campaign {self.name} (ID: {self.id}) successfully started web server on {self.server.alias} (IP: {self.server.ip})' ) campaign.status = 'Active' db.session.commit() for _ in range(int(self.batch_size)): if unsent_results: result = unsent_results.pop() recipient = result.person msg = Message(subject=self.email.subject, sender=self.profile.from_address, recipients=[recipient.email]) msg.html = self.email.prep_html(base_url=base_url, target=recipient, result=result, url=url) msg.body = html2text.html2text(msg.html.decode()) if self.attachment: # Determine mimetype of attachment from bytes mime = Magic(mime=True) mimetype = mime.from_buffer(self.attachment) # attach the file msg.attach(self.attachment_name, mimetype, self.attachment) status = '' try: mail.send(msg) except Exception as e: status = 'Error' app.logger.exception( f'Error sending email to {recipient.email} for {self.name} (ID: {self.id}) - {e}' ) else: status = 'Sent' app.logger.info( f'Email succesflly sent to {recipient.email} for campaign {self.name} (ID: {self.id})' ) # Updates email's status in database result.status = status event = Event(action=status, time=datetime.now(), ip_address='N/A') result.events.append(event) db.session.commit() # When all targets have been emailed, the job has to be explicitly removed else: sched.remove_job(job_id=job_id) app.logger.info( f'Finished sending emails for campaign {self.name} (ID: {self.id})' ) return return