def _valid_file(self, filename, filetype): logging.debug('_valid_file( %s, %s )', filename, filetype) # Delegate file validation to the filetype module. # Aliased to filemagic on import - already using a variable # called filetype throughout. # The filetype module does file magic checking in pure python, # so no deps or bindings on libmagic or anything else. # Interface is a bit clunky, considered duplicating its validation # logic in each plugin, but placing the one very awkward call here # means it will apply to every supported filetype (read: plugin) # without any additional work. # # 1. Retrieve a filemagic object of the type we want # to validate against. Bit of a nightmare, as the function: # get_type() compares the "filetype" argument passed in, # against an enumerated list of filetypes, using is(). # is() does not play nicely if the filetype argument takes the form # of a key from a dictionary - never matching against what looks like # identical strings. Need to jump through an intern() hoop to get the # "correct string" for comparison. # 2. Using the filemagic object returned from step one above, via: # get_type() - call it's instance method: match() passing in the # appropriate bytes from the file being tested - this is where the # the magic happens.. # The public API is at least helpful here, providing a utility function: # get_signature_bytes(filename) # Return from match() is Boolean, so just pass it back to caller. return filemagic.get_type(None, sys.intern(filetype)).match( filemagic.utils.get_signature_bytes(filename))
def set_exif_date(path, date): if filetype.guess(path) != filetype.get_type("image/jpeg"): return tags = piexif.load(path) datestr = date.strftime('%Y:%m:%d %H:%M:%S') tags['Exif'][piexif.ExifIFD.DateTimeDigitized] = datestr tags['Exif'][piexif.ExifIFD.DateTimeOriginal] = datestr piexif.insert(piexif.dump(tags), path)
def decompress(fname): ftype = ft.get_type(fname) if ftype == "gz": ext = GzipFile(fname, 'rb') elif ftype == "bz2": ext = BZ2File(fname, 'rb') filedata = ext.read() new_name = get_new_name(fname[:fname.rfind(".")]) with open(new_name, "w") as w: w.write(filedata) new_type = ft.get_type(new_name) if new_type: new_plus_type = get_new_name(new_name + "." + new_type) os.rename(new_name, new_plus_type) return new_plus_type return new_name
def unarchive(fname): ftype = ft.get_type(fname) if ftype == "rar": ext = RarFile(fname) elif ftype == "tar": ext = tarfile.open(fname) elif ftype == "zip": ext = ZipFile(fname) new_path = get_new_name(fname[:fname.rfind(".")] + "_extracted") if not os.path.exists(new_path): os.makedirs(new_path) ext.extractall(path=new_path) return new_path
def check_document_format(self): """Check if the downloaded document has the filetype that the downloader expects. If the downloader does not expect any special filetype, accept anything because there is no way to know if it is correct. :returns: True if it is of the right type, else otherwise :rtype: bool """ def print_warning(): self.logger.error( "The downloaded data does not seem to be of" "the correct type (%s)" % self.expected_document_extension ) if self.expected_document_extension is None: return True result = None kind = filetype.guess(self.get_document_data()) if kind is None: print_warning() return False if not isinstance(self.expected_document_extension, list): self.expected_document_extension = [ self.expected_document_extension] for expected_document_extension in self.expected_document_extension: expected_kind = filetype.get_type(ext=expected_document_extension) if expected_kind is None: raise Exception( "I can't understand the expected extension {0}".format( expected_document_extension ) ) result = kind.mime == expected_kind.mime if not result: print_warning() return result
def upload_photo(self, request, **kwargs): # pragma: no cover file = request.FILES.get("file") serializer = UserPhotoSerializer(data=request.data) serializer.is_valid(raise_exception=True) if filetype.is_image(file): user = self.request.user user.photo = file user.save(update_fields=["photo"]) # Update avatar on integrations celery_app.send_task("update_user_photo", args=[user.email, self._get_photo_url(user)]) # Update avatar in all rocket chat registered for authorization in user.authorizations_user.all(): for project in authorization.organization.project.all(): for service_status in project.service_status.filter( service__service_type=Service.SERVICE_TYPE_CHAT): upload_photo_rocket( server_rocket=service_status.service.url, jwt_token=self.request.auth, avatar_url=user.photo.url, ) return Response({"photo": user.photo.url}) try: raise UnsupportedMediaType( filetype.get_type(file.content_type).extension, detail=_( "We accept images only in the formats: .png, .jpeg, .gif"), ) except Exception: raise UnsupportedMediaType( None, detail=_( "We accept images only in the formats: .png, .jpeg, .gif"), )
def download_image(cls, url): if url is None: return raw_img = None session_id = random.random() proxy_url = ( 'http://%s-country-cn-session-%s:%[email protected]:%d' % (LUMINATI_USERNAME, session_id, LUMINATI_PASSWORD, PORT)) proxies = { 'http': proxy_url, 'https': proxy_url, } # if DEBUG: # proxies = None if url: img_response = requests.get( url, headers={ 'accept': 'image/webp,image/apng,image/*,*/*;q=0.8', 'accept-encoding': 'gzip, deflate', 'accept-language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7,fr-FR;q=0.6,fr;q=0.5,zh-TW;q=0.4', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 Edg/81.0.416.72', 'cache-control': 'no-cache', 'dnt': '1', }, proxies=proxies, timeout=TIMEOUT, ) if img_response.status_code == 200: raw_img = img_response.content content_type = img_response.headers.get('Content-Type') ext = filetype.get_type( mime=content_type.partition(';')[0].strip()).extension else: ext = None return raw_img, ext
def main(): print "setting up directories..." dt.clear_or_create(DIRECTORIES["temp"]) dt.create_directory(DIRECTORIES["archives"]) print "done setting up directories" ftype = ft.get_type(unpack_file) print "unpacking and flattening files..." unpack.unpack(unpack_file, DIRECTORIES["temp"]) unpack.flatten_folder(DIRECTORIES["temp"]) # I could have flatten_folder return a list of files in the directory, so that # we wouldn't have to search through the directory everytime for specific files # since os.walk is slow with directories with large files print "done unpacking and flattening" sp = SchemaProps(SCHEMA_URL) file_details = {"file":unpack_file, "process_time":process_time, "file_timestamp":file_timestamp} election_details = {} vip_id = None election_id = None print "converting to db style flat files...." if dt.file_by_name(CONFIG_FILE, DIRECTORIES["temp"]): file_details.update(process_config(DIRECTORIES["temp"], DIRECTORIES["temp"] + CONFIG_FILE, sp)) if dt.files_by_extension(".txt", DIRECTORIES["temp"]) > 0: file_details.update(process_flatfiles(DIRECTORIES["temp"], sp)) print "processing xml files..." xml_files = dt.files_by_extension(".xml", DIRECTORIES["temp"]) if len(xml_files) >= 1: ftff.feed_to_db_files(DIRECTORIES["temp"], xml_files[0], sp.full_header_data("db"), sp.version) os.remove(xml_files[0]) if "valid_files" in file_details: file_details["valid_files"].append(xml_files[0]) else: file_details["valid_files"] = [xml_files[0]] print "done processing xml files" print "getting feed details..." db = EasySQL("localhost","vip","username","password") try: with open(DIRECTORIES["temp"] + "source.txt", "r") as f: reader = csv.DictReader(f) row = reader.next() vip_id = row["vip_id"] election_details["vip_id"] = vip_id with open(DIRECTORIES["temp"] + "election.txt", "r") as f: reader = csv.DictReader(f) row = reader.next() election_details["election_date"] = row["date"] election_details["election_type"] = row["election_type"] except: er.report_summary(vip_id, election_id, file_details, election_details) return election_id = get_election_id(election_details, db) election_details["election_id"] = election_id print "done getting feed details" print "converting to full db files...." element_counts, error_data, warning_data = convert_to_db_files(vip_id, election_id, file_details["file_timestamp"], DIRECTORIES["temp"], sp) print "done converting to full db files" er.report_summary(vip_id, election_id, file_details, election_details, element_counts) if len(error_data) > 0: er.feed_issues(vip_id, file_details["file_timestamp"], error_data, "error") if len(warning_data) > 0: er.feed_issues(vip_id, file_details["file_timestamp"], warning_data, "warning") update_data(vip_id, election_id, file_details["file_timestamp"], db, element_counts, DIRECTORIES["temp"], DIRECTORIES["archives"]) db_validations(vip_id, election_id, db, sp) generate_feed(file_details)
def testFileTypes(self): for filename, type_response in file_list: filename = path.join(path.dirname(__file__), "testdata", filename) self.assertEqual(type_response, ft.get_type(filename))
def public_file_name(self): file_type = get_type(mime=self.content_type) return f'{self.thesis.registration_number or slugify(self.thesis.title)}-' \ f'{slugify(self.type_attachment.identifier)}.{file_type.extension}'
def main(): print "setting up directories..." dt.clear_or_create(DIRECTORIES["temp"]) dt.create_directory(DIRECTORIES["archives"]) print "done setting up directories" ftype = ft.get_type(unpack_file) print "unpacking and flattening files..." unpack.unpack(unpack_file, DIRECTORIES["temp"]) unpack.flatten_folder(DIRECTORIES["temp"]) # I could have flatten_folder return a list of files in the directory, so that # we wouldn't have to search through the directory everytime for specific files # since os.walk is slow with directories with large files print "done unpacking and flattening" sp = SchemaProps(SCHEMA_URL) file_details = { "file": unpack_file, "process_time": process_time, "file_timestamp": file_timestamp } election_details = {} vip_id = None election_id = None print "converting to db style flat files...." if dt.file_by_name(CONFIG_FILE, DIRECTORIES["temp"]): file_details.update( process_config(DIRECTORIES["temp"], DIRECTORIES["temp"] + CONFIG_FILE, sp)) if dt.files_by_extension(".txt", DIRECTORIES["temp"]) > 0: file_details.update(process_flatfiles(DIRECTORIES["temp"], sp)) print "processing xml files..." xml_files = dt.files_by_extension(".xml", DIRECTORIES["temp"]) if len(xml_files) >= 1: ftff.feed_to_db_files(DIRECTORIES["temp"], xml_files[0], sp.full_header_data("db"), sp.version) os.remove(xml_files[0]) if "valid_files" in file_details: file_details["valid_files"].append(xml_files[0]) else: file_details["valid_files"] = [xml_files[0]] print "done processing xml files" print "getting feed details..." db = EasySQL("localhost", "vip", "username", "password") try: with open(DIRECTORIES["temp"] + "source.txt", "r") as f: reader = csv.DictReader(f) row = reader.next() vip_id = row["vip_id"] election_details["vip_id"] = vip_id with open(DIRECTORIES["temp"] + "election.txt", "r") as f: reader = csv.DictReader(f) row = reader.next() election_details["election_date"] = row["date"] election_details["election_type"] = row["election_type"] except: er.report_summary(vip_id, election_id, file_details, election_details) return election_id = get_election_id(election_details, db) election_details["election_id"] = election_id print "done getting feed details" print "converting to full db files...." element_counts, error_data, warning_data = convert_to_db_files( vip_id, election_id, file_details["file_timestamp"], DIRECTORIES["temp"], sp) print "done converting to full db files" er.report_summary(vip_id, election_id, file_details, election_details, element_counts) if len(error_data) > 0: er.feed_issues(vip_id, file_details["file_timestamp"], error_data, "error") if len(warning_data) > 0: er.feed_issues(vip_id, file_details["file_timestamp"], warning_data, "warning") update_data(vip_id, election_id, file_details["file_timestamp"], db, element_counts, DIRECTORIES["temp"], DIRECTORIES["archives"]) db_validations(vip_id, election_id, db, sp) generate_feed(file_details)
# Example showing use of filetyp library import sys import filetype # open input file from argument 1 print(filetype.match(sys.argv[1])) f = open(sys.argv[1], 'rb') buf = f.read(256) print(filetype.get_type(None, 'flac').match(buf))