Пример #1
0
    def _valid_file(self, filename, filetype):
        logging.debug('_valid_file( %s, %s )', filename, filetype)
        # Delegate file validation to the filetype module.
        # Aliased to filemagic on import - already using a variable
        # called filetype throughout.

        # The filetype module does file magic checking in pure python,
        # so no deps or bindings on libmagic or anything else.
        # Interface is a bit clunky, considered duplicating its validation
        # logic in each plugin, but placing the one very awkward call here
        # means it will apply to every supported filetype (read: plugin)
        # without any additional work.
        #
        # 1. Retrieve a filemagic object of the type we want
        # to validate against.  Bit of a nightmare, as the function:
        # get_type() compares the "filetype" argument passed in,
        # against an enumerated list of filetypes, using is().
        # is() does not play nicely if the filetype argument takes the form
        # of a key from a dictionary - never matching against what looks like
        # identical strings.  Need to jump through an intern() hoop to get the
        # "correct string" for comparison.
        # 2. Using the filemagic object returned from step one above, via:
        # get_type() - call it's instance method: match() passing in the
        # appropriate bytes from the file being tested - this is where the
        # the magic happens..
        # The public API is at least helpful here, providing a utility function:
        # get_signature_bytes(filename)
        # Return from match() is Boolean, so just pass it back to caller.
        return filemagic.get_type(None, sys.intern(filetype)).match(
            filemagic.utils.get_signature_bytes(filename))
Пример #2
0
def set_exif_date(path, date):
    if filetype.guess(path) != filetype.get_type("image/jpeg"):
        return

    tags = piexif.load(path)
    datestr = date.strftime('%Y:%m:%d %H:%M:%S')
    tags['Exif'][piexif.ExifIFD.DateTimeDigitized] = datestr
    tags['Exif'][piexif.ExifIFD.DateTimeOriginal] = datestr
    piexif.insert(piexif.dump(tags), path)
Пример #3
0
def decompress(fname):
    ftype = ft.get_type(fname)

    if ftype == "gz":
        ext = GzipFile(fname, 'rb')
    elif ftype == "bz2":
        ext = BZ2File(fname, 'rb')

    filedata = ext.read()
    new_name = get_new_name(fname[:fname.rfind(".")])
    with open(new_name, "w") as w:
        w.write(filedata)

    new_type = ft.get_type(new_name)
    if new_type:
        new_plus_type = get_new_name(new_name + "." + new_type)
        os.rename(new_name, new_plus_type)
        return new_plus_type
    return new_name
Пример #4
0
def decompress(fname):
	ftype = ft.get_type(fname)
	
	if ftype == "gz":
		ext = GzipFile(fname, 'rb')
	elif ftype == "bz2":
		ext = BZ2File(fname, 'rb')

	filedata = ext.read()
	new_name = get_new_name(fname[:fname.rfind(".")])
	with open(new_name, "w") as w:
		w.write(filedata)

	new_type = ft.get_type(new_name)
	if new_type:
		new_plus_type = get_new_name(new_name + "." + new_type)
		os.rename(new_name, new_plus_type)
		return new_plus_type
	return new_name
Пример #5
0
def unarchive(fname):
    ftype = ft.get_type(fname)

    if ftype == "rar":
        ext = RarFile(fname)
    elif ftype == "tar":
        ext = tarfile.open(fname)
    elif ftype == "zip":
        ext = ZipFile(fname)

    new_path = get_new_name(fname[:fname.rfind(".")] + "_extracted")
    if not os.path.exists(new_path):
        os.makedirs(new_path)
    ext.extractall(path=new_path)
    return new_path
Пример #6
0
def unarchive(fname):
	ftype = ft.get_type(fname)
	
	if ftype == "rar":
		ext = RarFile(fname)
	elif ftype == "tar":
		ext = tarfile.open(fname)
	elif ftype == "zip":
		ext = ZipFile(fname)

	new_path = get_new_name(fname[:fname.rfind(".")] + "_extracted")
	if not os.path.exists(new_path):
		os.makedirs(new_path)
	ext.extractall(path=new_path)
	return new_path
Пример #7
0
    def check_document_format(self):
        """Check if the downloaded document has the filetype that the
        downloader expects. If the downloader does not expect any special
        filetype, accept anything because there is no way to know if it is
        correct.

        :returns: True if it is of the right type, else otherwise
        :rtype:  bool
        """
        def print_warning():
            self.logger.error(
                "The downloaded data does not seem to be of"
                "the correct type (%s)" % self.expected_document_extension
            )

        if self.expected_document_extension is None:
            return True

        result = None
        kind = filetype.guess(self.get_document_data())

        if kind is None:
            print_warning()
            return False

        if not isinstance(self.expected_document_extension, list):
            self.expected_document_extension = [
                self.expected_document_extension]

        for expected_document_extension in self.expected_document_extension:

            expected_kind = filetype.get_type(ext=expected_document_extension)
            if expected_kind is None:
                raise Exception(
                    "I can't understand the expected extension {0}".format(
                        expected_document_extension
                    )
                )

            result = kind.mime == expected_kind.mime

        if not result:
            print_warning()

        return result
Пример #8
0
    def upload_photo(self, request, **kwargs):  # pragma: no cover
        file = request.FILES.get("file")

        serializer = UserPhotoSerializer(data=request.data)
        serializer.is_valid(raise_exception=True)

        if filetype.is_image(file):
            user = self.request.user
            user.photo = file
            user.save(update_fields=["photo"])

            # Update avatar on integrations
            celery_app.send_task("update_user_photo",
                                 args=[user.email,
                                       self._get_photo_url(user)])

            # Update avatar in all rocket chat registered
            for authorization in user.authorizations_user.all():
                for project in authorization.organization.project.all():
                    for service_status in project.service_status.filter(
                            service__service_type=Service.SERVICE_TYPE_CHAT):
                        upload_photo_rocket(
                            server_rocket=service_status.service.url,
                            jwt_token=self.request.auth,
                            avatar_url=user.photo.url,
                        )

            return Response({"photo": user.photo.url})
        try:
            raise UnsupportedMediaType(
                filetype.get_type(file.content_type).extension,
                detail=_(
                    "We accept images only in the formats: .png, .jpeg, .gif"),
            )
        except Exception:
            raise UnsupportedMediaType(
                None,
                detail=_(
                    "We accept images only in the formats: .png, .jpeg, .gif"),
            )
Пример #9
0
 def download_image(cls, url):
     if url is None:
         return
     raw_img = None
     session_id = random.random()
     proxy_url = (
         'http://%s-country-cn-session-%s:%[email protected]:%d' %
         (LUMINATI_USERNAME, session_id, LUMINATI_PASSWORD, PORT))
     proxies = {
         'http': proxy_url,
         'https': proxy_url,
     }
     # if DEBUG:
     #     proxies = None
     if url:
         img_response = requests.get(
             url,
             headers={
                 'accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
                 'accept-encoding': 'gzip, deflate',
                 'accept-language':
                 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7,fr-FR;q=0.6,fr;q=0.5,zh-TW;q=0.4',
                 'user-agent':
                 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 Edg/81.0.416.72',
                 'cache-control': 'no-cache',
                 'dnt': '1',
             },
             proxies=proxies,
             timeout=TIMEOUT,
         )
         if img_response.status_code == 200:
             raw_img = img_response.content
             content_type = img_response.headers.get('Content-Type')
             ext = filetype.get_type(
                 mime=content_type.partition(';')[0].strip()).extension
         else:
             ext = None
     return raw_img, ext
Пример #10
0
def main():

	print "setting up directories..."
	
	dt.clear_or_create(DIRECTORIES["temp"])
	dt.create_directory(DIRECTORIES["archives"])
	
	print "done setting up directories"

	ftype = ft.get_type(unpack_file)

	print "unpacking and flattening files..."

	unpack.unpack(unpack_file, DIRECTORIES["temp"])
	unpack.flatten_folder(DIRECTORIES["temp"])
# I could have flatten_folder return a list of files in the directory, so that
# we wouldn't have to search through the directory everytime for specific files
# since os.walk is slow with directories with large files

	print "done unpacking and flattening"

	sp = SchemaProps(SCHEMA_URL)
	file_details = {"file":unpack_file, "process_time":process_time, "file_timestamp":file_timestamp}
	election_details = {}
	vip_id = None
	election_id = None

	print "converting to db style flat files...."

	if dt.file_by_name(CONFIG_FILE, DIRECTORIES["temp"]):
		file_details.update(process_config(DIRECTORIES["temp"], DIRECTORIES["temp"] + CONFIG_FILE, sp))
	if dt.files_by_extension(".txt", DIRECTORIES["temp"]) > 0:
		file_details.update(process_flatfiles(DIRECTORIES["temp"], sp))
	print "processing xml files..."
	xml_files = dt.files_by_extension(".xml", DIRECTORIES["temp"])
	if len(xml_files) >= 1:
		ftff.feed_to_db_files(DIRECTORIES["temp"], xml_files[0], sp.full_header_data("db"), sp.version)
		os.remove(xml_files[0])
		if "valid_files" in file_details:
			file_details["valid_files"].append(xml_files[0])
		else:
			file_details["valid_files"] = [xml_files[0]]

	print "done processing xml files"

	print "getting feed details..."
	db = EasySQL("localhost","vip","username","password")
	try:
		with open(DIRECTORIES["temp"] + "source.txt", "r") as f:
			reader = csv.DictReader(f)
			row = reader.next()
			vip_id = row["vip_id"]
			election_details["vip_id"] = vip_id
		with open(DIRECTORIES["temp"] + "election.txt", "r") as f:
			reader = csv.DictReader(f)
			row = reader.next()
			election_details["election_date"] = row["date"]
			election_details["election_type"] = row["election_type"]
	except:
		er.report_summary(vip_id, election_id, file_details, election_details)
		return

	election_id = get_election_id(election_details, db)
	election_details["election_id"] = election_id
	print "done getting feed details"

	print "converting to full db files...."
	element_counts, error_data, warning_data = convert_to_db_files(vip_id, election_id, file_details["file_timestamp"], DIRECTORIES["temp"], sp)
	print "done converting to full db files"
	
	er.report_summary(vip_id, election_id, file_details, election_details, element_counts)
	if len(error_data) > 0:
		er.feed_issues(vip_id, file_details["file_timestamp"], error_data, "error")
	if len(warning_data) > 0:
		er.feed_issues(vip_id, file_details["file_timestamp"], warning_data, "warning")

	update_data(vip_id, election_id, file_details["file_timestamp"], db, element_counts, DIRECTORIES["temp"], DIRECTORIES["archives"])	

	db_validations(vip_id, election_id, db, sp)

	generate_feed(file_details)
Пример #11
0
 def testFileTypes(self):
     for filename, type_response in file_list:
         filename = path.join(path.dirname(__file__), "testdata", filename)
         self.assertEqual(type_response, ft.get_type(filename))
Пример #12
0
 def public_file_name(self):
     file_type = get_type(mime=self.content_type)
     return f'{self.thesis.registration_number or slugify(self.thesis.title)}-' \
            f'{slugify(self.type_attachment.identifier)}.{file_type.extension}'
Пример #13
0
def main():

    print "setting up directories..."

    dt.clear_or_create(DIRECTORIES["temp"])
    dt.create_directory(DIRECTORIES["archives"])

    print "done setting up directories"

    ftype = ft.get_type(unpack_file)

    print "unpacking and flattening files..."

    unpack.unpack(unpack_file, DIRECTORIES["temp"])
    unpack.flatten_folder(DIRECTORIES["temp"])
    # I could have flatten_folder return a list of files in the directory, so that
    # we wouldn't have to search through the directory everytime for specific files
    # since os.walk is slow with directories with large files

    print "done unpacking and flattening"

    sp = SchemaProps(SCHEMA_URL)
    file_details = {
        "file": unpack_file,
        "process_time": process_time,
        "file_timestamp": file_timestamp
    }
    election_details = {}
    vip_id = None
    election_id = None

    print "converting to db style flat files...."

    if dt.file_by_name(CONFIG_FILE, DIRECTORIES["temp"]):
        file_details.update(
            process_config(DIRECTORIES["temp"],
                           DIRECTORIES["temp"] + CONFIG_FILE, sp))
    if dt.files_by_extension(".txt", DIRECTORIES["temp"]) > 0:
        file_details.update(process_flatfiles(DIRECTORIES["temp"], sp))
    print "processing xml files..."
    xml_files = dt.files_by_extension(".xml", DIRECTORIES["temp"])
    if len(xml_files) >= 1:
        ftff.feed_to_db_files(DIRECTORIES["temp"], xml_files[0],
                              sp.full_header_data("db"), sp.version)
        os.remove(xml_files[0])
        if "valid_files" in file_details:
            file_details["valid_files"].append(xml_files[0])
        else:
            file_details["valid_files"] = [xml_files[0]]

    print "done processing xml files"

    print "getting feed details..."
    db = EasySQL("localhost", "vip", "username", "password")
    try:
        with open(DIRECTORIES["temp"] + "source.txt", "r") as f:
            reader = csv.DictReader(f)
            row = reader.next()
            vip_id = row["vip_id"]
            election_details["vip_id"] = vip_id
        with open(DIRECTORIES["temp"] + "election.txt", "r") as f:
            reader = csv.DictReader(f)
            row = reader.next()
            election_details["election_date"] = row["date"]
            election_details["election_type"] = row["election_type"]
    except:
        er.report_summary(vip_id, election_id, file_details, election_details)
        return

    election_id = get_election_id(election_details, db)
    election_details["election_id"] = election_id
    print "done getting feed details"

    print "converting to full db files...."
    element_counts, error_data, warning_data = convert_to_db_files(
        vip_id, election_id, file_details["file_timestamp"],
        DIRECTORIES["temp"], sp)
    print "done converting to full db files"

    er.report_summary(vip_id, election_id, file_details, election_details,
                      element_counts)
    if len(error_data) > 0:
        er.feed_issues(vip_id, file_details["file_timestamp"], error_data,
                       "error")
    if len(warning_data) > 0:
        er.feed_issues(vip_id, file_details["file_timestamp"], warning_data,
                       "warning")

    update_data(vip_id, election_id, file_details["file_timestamp"], db,
                element_counts, DIRECTORIES["temp"], DIRECTORIES["archives"])

    db_validations(vip_id, election_id, db, sp)

    generate_feed(file_details)
Пример #14
0
# Example showing use of filetyp library

import sys
import filetype

# open input file from argument 1
print(filetype.match(sys.argv[1]))

f = open(sys.argv[1], 'rb')

buf = f.read(256)
print(filetype.get_type(None, 'flac').match(buf))