Exemplo n.º 1
0
 def mime_type(self):
     m = MimeTypes()
     m.read(LIASIS_DIR+MIME_TYPES)
     if m.guess_type(self.tfile):
         return m.guess_type(self.tfile)[0]
     else:
         return "text/plain"
Exemplo n.º 2
0
 def getMimeType(self,buffre,url,mtype):
     
     if '?' in url:
         url = url.split('?')[0]
         
     mime = MimeTypes()
     ext = os.path.splitext(url)[1]
     if mtype == 'text/html' and ext == '':
         if url[-1] == '/':
             l = len(url)-1
             
             url = url[0:-1]                         
         url = url+'/index.html'
         ext = '.html'
         
     #ext1 = mime.guess_extension(mtype,True)
     #print ext1
     mime_type = mime.guess_type(url)        
     #print url
     if ext:
         #print url
         u = urlparse.urlparse(url)
         #print u.netloc,u.path
         print self.host
         if self.host:
             root_dir = self.root_dir+"/"+self.host
             
         file_path = os.path.join(root_dir,u.netloc+u.path)
         print file_path
         #if not os.path.isfile(file_path):
         makeDir(os.path.dirname(file_path))
         f = open(file_path,"wb")            
         f.write(buffre)            
Exemplo n.º 3
0
    def upload_attachment(self, file_path):
        __, file_name = os.path.split(file_path)
        mime = MimeTypes()
        url = urllib.pathname2url(file_path)
        mime_type, __ = mime.guess_type(url)

        data = {
            'file_name': file_name,
            'file_type': mime_type
        }
        url = urljoin(self.api_url, 'attachment/upload')
        response = self.post(url, data=data)

        with open(file_path) as fh:
            file_data = fh.read()

        upload_response = requests.put(
            response['upload_url'],
            data=file_data,
            headers={'content-type': mime_type},
            params={'file': file_path}
        )
        upload_response.raise_for_status()

        return {
            'file_url': response['file_url'],
            'file_type': mime_type,
        }
Exemplo n.º 4
0
def download(request, filename):
    #down_file = File.objects.get(name = filename)
    BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    DOWNLOAD_URL = BASE_DIR+"/download/"
    file_path = DOWNLOAD_URL + filename
    file_name = filename
    fp = open(file_path, 'rb')
    response = HttpResponse(fp.read())
    fp.close()
    mime = MimeTypes()
    type, encoding = mime.guess_type(file_name)
    if type is None:
        type = 'application/octet-stream'
    response['Content-Type'] = type
    response['Content-Length'] = str(os.stat(file_path).st_size)
    if encoding is not None:
        response['Content-Encoding'] = encoding
    if u'WebKit' in request.META['HTTP_USER_AGENT']:
        filename_header = 'filename=%s' % file_name.encode('utf-8')
    elif u'MSIE' in request.META['HTTP_USER_AGENT']:
        filename_header = ''
    else:
        filename_header = 'filename*=UTF-8\'\'%s' % urllib.quote(file_name.encode('utf-8'))
    response['Content-Disposition'] = 'attachment; ' + filename_header
    # 記錄系統事件
    if is_event_open(request) :       
        log = Log(user_id=request.user.id, event=u'下載檔案<'+filename+'>')
        log.save()     
    return response
Exemplo n.º 5
0
 def update_community(self, properties):
     pc = api.portal.get_tool('portal_catalog')
     brain = pc.unrestrictedSearchResults(portal_type='ulearn.community',
                                          community_hash=self.params['community'])
     if not brain:
         brain = pc.unrestrictedSearchResults(portal_type='ulearn.community',
                                              gwuuid=self.params['community'])
     if brain:
         community = brain[0].getObject()
         if properties['title'] is not None:
             community.title = properties['title']
         if properties['description'] is not None:
             community.description = properties['description']
         if properties['image'] is not None:
             imageObj = ''
             mime = MimeTypes()
             mime_type = mime.guess_type(properties['image'])
             imgName = (properties['image'].split('/')[-1]).decode('utf-8')
             imgData = requests.get(properties['image']).content
             imageObj = NamedBlobImage(data=imgData,
                                       filename=imgName,
                                       contentType=mime_type[0])
             community.image = imageObj
         if properties['activity_view'] is not None:
             community.activity_view = properties['activity_view']
         if properties['twitter_hashtag'] is not None:
             community.twitter_hashtag = properties['twitter_hashtag']
         if properties['notify_activity_via_push'] is not None:
             community.notify_activity_via_push = True if properties['notify_activity_via_push'] == 'True' else None
         if properties['notify_activity_via_push_comments_too'] is not None:
             community.notify_activity_via_push_comments_too = True if properties['notify_activity_via_push_comments_too'] == 'True' else None
         community.reindexObject()
         return True
     else:
         return False
Exemplo n.º 6
0
def get_mime_type(path):
    mime = MimeTypes()

    mime_type = mime.guess_type(path)[0]
    if not mime_type:
        mime_type = "text/{0}".format(os.path.splitext(path)[1])
    return mime_type
def GetRFC(rfc_id,file_name,peer_ip,peer_port):
    global LRFCdata
    global Server
    
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.settimeout(None)
    peer_ipaddr=peer_ip
    s.connect((peer_ipaddr,int(peer_port)))
    
    message = ["GETRFC",str(rfc_id),"P2P-DI/1.0","Host: ",IP,"Port: ",PORT,"Title: ",file_name,"Client ID: ",str(self_id)]
    s.send(pickle.dumps(message))

    os.chdir(os.getcwd())
    filename=file_name+".pdf"
    file1=open(filename,'wb')
    
    q=s.recv(4096)
    q=pickle.loads(q)
    
    if 'FILE NOT FOUND' in q:
        code = '404'
        phrase = 'FILE NOT FOUND'
        reply = ["P2P-DI/1.0 ",str(code),str(phrase)]
        PrintResponse(reply,'FILERESPN')
    else:
        if 'FILE FOUND' in q:
            last_modified = q[2]
            message = ["OK"]
            s.send(pickle.dumps(message))
            while True:
                q=s.recv(4096)
                if q:
                    file1.write(q)
                else:
                    code = '200'
                    phrase = 'OK'
                    mime = MimeTypes()
                    filesize = os.stat(filename).st_size
                    mime_type = mime.guess_type(filename)
                    reply = ["P2P-DI/1.0 ",str(code),str(phrase),"Last-Modified: ",str(last_modified),"Content-Length: ",str(filesize),"Content-Type: ",str(mime_type[0])]
                    PrintResponse(reply,'FILESENT')
                    file1.close()
                    break

            serverIP = Server[0]
            serverPort = Server[1]
            message=["PQUERY","P2P-DI/1.0 ",str(self_id),"Host: ",IP,"Port: ",str(PORT)]
            reply = client(message,serverIP,serverPort)
            PrintResponse(reply,'PQUERY')
            

            LocalRFC = LocalRFCRecord(rfc_id,file_name)
            LRFCdata.append(LocalRFC.getLocalRFC())
            Local_linked_list.append(LocalRFC.getLocalRFC())

            active_list = reply[4]    
            active_list=RFCStore(active_list,[[rfc_id,file_name]])
    
    s.close()
Exemplo n.º 8
0
    def export_warc(self):
        # by using select_for_update and checking for existence of this file,
        # we make sure that we won't accidentally try to create the file multiple
        # times in parallel.
        asset = self.assets.select_for_update().first()
        if not asset:
            return  # this is not an old-style Link
        if default_storage.exists(self.warc_storage_file()):
            return

        guid = self.guid
        out = self.open_warc_for_writing()

        def write_resource_record(file_path, url, content_type):
            self.write_warc_resource_record(
                default_storage.open(file_path),
                url.encode('utf8'),
                content_type,
                default_storage.created_time(file_path),
                out)

        def write_metadata_record(metadata, target_headers):
            concurrent_to = (v for k, v in target_headers if k == warctools.WarcRecord.ID).next()
            warc_date = (v for k, v in target_headers if k == warctools.WarcRecord.DATE).next()
            url = (v for k, v in target_headers if k == warctools.WarcRecord.URL).next()
            self.write_warc_metadata_record(metadata, url, concurrent_to, warc_date, out)

        # write PDF capture
        if asset.pdf_capture and ('cap' in asset.pdf_capture or 'upload' in asset.pdf_capture):
            file_path = os.path.join(asset.base_storage_path, asset.pdf_capture)
            headers = write_resource_record(file_path, "file:///%s/%s" % (guid, asset.pdf_capture), 'application/pdf')
            #write_metadata_record({'role':'primary', 'user_upload':asset.user_upload}, headers)

        # write image capture (if it's not a PDF thumbnail)
        elif (asset.image_capture and ('cap' in asset.image_capture or 'upload' in asset.image_capture)):
            file_path = os.path.join(asset.base_storage_path, asset.image_capture)
            mime_type = get_mime_type(asset.image_capture)
            write_resource_record(file_path, "file:///%s/%s" % (guid, asset.image_capture), mime_type)

        if asset.warc_capture:
            # write WARC capture
            if asset.warc_capture == 'archive.warc.gz':
                file_path = os.path.join(asset.base_storage_path, asset.warc_capture)
                self.write_warc_raw_data(default_storage.open(file_path), out)

            # write wget capture
            elif asset.warc_capture == 'source/index.html':
                mime = MimeTypes()
                for root, dirs, files in default_storage.walk(os.path.join(asset.base_storage_path, 'source')):
                    rel_path = root.split(asset.base_storage_path, 1)[-1]
                    for file_name in files:
                        mime_type = mime.guess_type(file_name)[0]
                        write_resource_record(os.path.join(root, file_name),
                                              "file:///%s%s/%s" % (guid, rel_path, file_name), mime_type)

        self.close_warc_after_writing(out)

        # regenerate CDX index
        self.cdx_lines.all().delete()
Exemplo n.º 9
0
def searchImages(rootDir):
    imageList = []
    mime = MimeTypes()
    for root, subFolders, files in os.walk(rootDir):
        for file in files:
            mt = mime.guess_type(file)[0]
            if mt and mt.startswith('image/'):
                imageList = imageList + [os.path.join(root,file)]
    return imageList
Exemplo n.º 10
0
def get_extension(url):
    from mimetypes import MimeTypes
    mime_types = MimeTypes()

    (type, encoding) = mime_types.guess_type(url)
    extensions = mime_types.guess_all_extensions(type)
    extension = extensions[-1]

    return extension
Exemplo n.º 11
0
def add_documents(request, category_id):
    if request.is_ajax():
        files = request.GET.getlist('files', False)
        cat = Category.objects.get(id=category_id)
        l_doc = []
        l_pdf = []
        cmds = []
        paths = []
        for f in list(files):
            mime = MimeTypes()
            path = os.path.join(settings.MEDIA_ROOT, settings.UPLOAD_DIR, f)
            m = mime.guess_type(path)[0]
            d = Document(name=f.encode('ascii', 'ignore'), owner=request.user, refer_category=cat)
            d.save()
            cat.add_doc(d)
            if m == 'application/pdf':
                l_pdf.append(([cat], path, f, [d]))
            elif m in ['image/png', 'image/jpeg', 'image/bmp']:
                im = Image.open(path)
                w, h = im.size
                new_filename = str(d.id) + '_' + f
                new_path = os.path.join(cat.get_absolute_path(), new_filename)
                shutil.copy2(path, new_path)
                d.add_page(d.get_npages() + 1, new_filename, w, h)
                for fu in FileUpload.objects.all():
                    if fu.file.path == path:
                        fu.delete()
                d.complete = True
                d.save()
                remove_fileupload([path])
            elif m in ['application/msword', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document']:
                p = re.compile(r'.[Dd][Oo][Cc][xX]?$')
                new_f = p.sub('.pdf', f)
                new_path = path.replace(f, new_f)
                cmd = 'soffice --headless --convert-to pdf  %s --outdir %s/upload' % (path, settings.MEDIA_ROOT)
                cmds.append(cmd)
                paths.append(path)
                l_doc.append(([cat], new_path, new_f, [d]))
            elif m in ['application/vnd.ms-excel', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet']:
                p = re.compile(r'.[Xx][Ll][Ss][xX]?$')
                new_f = p.sub('.pdf', f)
                new_path = path.replace(f, new_f)
                cmd = 'soffice --headless --convert-to pdf  %s --outdir %s/upload' % (path, settings.MEDIA_ROOT)
                cmds.append(cmd)
                paths.append(path)
                l_doc.append(([cat], new_path, new_f, [d]))
            else:
                print 'ERREUR FORMAT FICHIER'
        if len(l_doc):
            thread1 = Timer(0, manage_convert_doc_to_pdf, (cmds, paths, l_doc,))
            thread1.start()
        if len(l_pdf):
            thread = Timer(0, manage_convert_pdf_to_jpg, (l_pdf,))
            thread.start()
        results = {'doc_list': [d.as_json() for d in cat.get_docs()], 'n': cat.count_docs()}
        return HttpResponse(json.dumps(results))
Exemplo n.º 12
0
 def uploadFileToS3(self ,filename,content):
      try :
           mime= MimeTypes.guess_type(filename)[0]
           k=Key(S3UploadManager.S3Bucket)
           k.key= filename
           k.set_metadata("Content-Type", mime)
           k.set_contents_from_string(content)
           k.set_acl("public-read")
      except Exception as e:
           raise type(e)(e.message)
Exemplo n.º 13
0
Arquivo: pyle.py Projeto: hrkfdn/pyle
def showfile(file):
    mime = MimeTypes()
    mime_type = mime.guess_type(file)
    if mime_type[0] is not None and mime_type[0].startswith("image"):
        return render_template("image.html",
                title="pyle :: preview",
                filepath="/data/" + file,
                filename=os.path.basename(file))
    else:
        return readfile(file)
Exemplo n.º 14
0
 def mime(self):
     if self.mMimeType is not None:
         return self.mMimeType
     
     mime = MimeTypes()
     url = urllib.pathname2url(self.mFullPathName)
     self.mMimeType = mime.guess_type(url)[0]
     if self.mMimeType is None:
         self.mMimeType = "Unkown/None"
     return self.mMimeType
Exemplo n.º 15
0
 def make_headers(filename):
     mime = MimeTypes()
     mime.add_type('text/plain', '.log')
     mime.add_type('text/x-yaml', '.yaml')
     content_type, encoding = mime.guess_type(filename)
     headers = {"Content-Type": "application/octet-stream"}
     if content_type:
         headers['Content-Type'] = content_type
     if encoding:
         headers['Content-Encoding'] = encoding
     return headers
    def get_mime_type(self, file_path):
        #Get mime type
        try:
            mime = MimeTypes()
            murl = urllib.pathname2url(file_path)
            mime_type = mime.guess_type(murl)[0]
            if mime_type == None:
                mime_type = 'text/plain'
        except:
            mime_type = 'text/plain'

        return mime_type
Exemplo n.º 17
0
 def show_media(self, url, title = "", desc ="", loop = 0):
     mime = MimeTypes()
     mime_type = mime.guess_type(url)
     payload = {
         "target": url,
         "title":title,
         "description":desc,
         "mimeType": mime_type[0],
         "iconSrc":"",
         "loop":loop}
     command = '{"id":"images","type":"request","uri":"ssap://media.viewer/open","payload":'+ json.dumps(payload)+'}'
     self.send_command(command)
Exemplo n.º 18
0
	def addFileOutput(self, name, filepath):
		data = open(filepath).read()
		filename = os.path.basename(filepath)
		mime = MimeTypes()
		mime_guess = mime.guess_type(filename)[0]

		self.result[name] = {
			"filename": filename,
			"content-type": mime_guess,
			"data": base64.b64encode(data)
		}
		return self
Exemplo n.º 19
0
    def transform_import_values(self, value):
        '''
        # TODO: Following commented code can be used if user does not already have file in final location using django ORM:

        request = HttpRequest()
        # request.FILES['file-list_' + str(nodeid)] = None
        files = []
        # request_list = []

        for val in value.split(','):
            val_dict = {}
            val_dict['content'] = val
            val_dict['name'] = val.split('/')[-1].split('.')[0]
            val_dict['url'] = None
            # val_dict['size'] = None
            # val_dict['width'] = None
            # val_dict['height'] = None
            files.append(val_dict)
            f = open(val, 'rb')
            django_file = InMemoryUploadedFile(f,'file',val.split('/')[-1].split('.')[0],None,None,None)
            request.FILES.appendlist('file-list_' + str(nodeid), django_file)
        print request.FILES
        value = files
        '''

        mime = MimeTypes()
        tile_data = []
        for file_path in value.split(','):
            try:
                file_stats = os.stat(file_path)
                tile_file['lastModified'] = file_stats.st_mtime
                tile_file['size'] =  file_stats.st_size
            except:
                pass
            tile_file = {}
            tile_file['file_id'] =  str(uuid.uuid4())
            tile_file['status'] = ""
            tile_file['name'] =  file_path.split('/')[-1]
            tile_file['url'] =  settings.MEDIA_URL + 'uploadedfiles/' + str(tile_file['name'])
            # tile_file['index'] =  0
            # tile_file['height'] =  960
            # tile_file['content'] =  None
            # tile_file['width'] =  1280
            # tile_file['accepted'] =  True
            tile_file['type'] =  mime.guess_type(file_path)[0]
            tile_file['type'] = '' if tile_file['type'] == None else tile_file['type']
            tile_data.append(tile_file)
            file_path = 'uploadedfiles/' + str(tile_file['name'])
            fileid = tile_file['file_id']
            models.File.objects.get_or_create(fileid=fileid, path=file_path)

        result = json.loads(json.dumps(tile_data))
        return result
    def update_data(self, data, content_type, size):
        """

        """
        if not data:
            # still index
            self.index_object()
            return

        passed, virus_name = DataVirusCheckAdapter(data).process()

        if not passed:
            log.warn('found virus %s, rejecting file' % virus_name)
            raise XWFFileError('found virus %s, rejecting file' % virus_name)

        # this isn't necessarily an error, on init we get called without data
        base_files_dir = self._base_files_dir
        self.size = size
        self.set_modificationTime()

        if base_files_dir:
            fileId = os.path.join(base_files_dir, self.getId())
            f = file(fileId, 'wb+')

            if hasattr(data, '__class__') and data.__class__ is Pdata:
                while data is not None:
                    f.write(data.data)
                    data = data.next
            else:
                f.write(data)

            f.close()

        # fix the title
        title = to_unicode_or_bust(self.title)
        title = removePathsFromFilenames(title)
        self.title = title

        if content_type is not None:
            self.content_type = content_type
        else:
            mtypes = MimeTypes()
            mime_type = mtypes.guess_type(title)
            if mime_type:
                self.content_type = mime_type

        self.ZCacheable_invalidate()
        self.ZCacheable_set(None)
        self.http__refreshEtag()

        # index ourselves into the catalog
        self.index_object()
Exemplo n.º 21
0
def get_base64_image(url):
    template_image = ''
    mime = MimeTypes()
    mime_type = mime.guess_type(url)[0]
    # Steam the image from the url
    request = requests.get(url)
    image_buffer = StringIO(request.content)

    if image_buffer:
        template_image = image_buffer.getvalue().encode('base64')
        template_image = u'data:%s;base64,%s' % (mime_type, template_image)

    return template_image
Exemplo n.º 22
0
    def batch_upload_response(self, request):
        output_fields = flatten_fieldsets(self.fieldsets)
        
        media_file_name = get_media_file_name(self, self.model)


        #Disabling exception handling here @olivierdalang's feedback:
        # try:
        latest_log_entry = LogEntry.objects.filter(action_flag=ADDITION).order_by('-action_time')[0]
        ct = ContentType.objects.get_for_id(latest_log_entry.content_type_id)
        obj = ct.get_object_for_this_type(pk=latest_log_entry.object_id)
        
        if obj:

            object_data = {}

            mime = MimeTypes()
            media_file = get_media_file(self, self.model, obj)
            media_file_url = media_file.url #urllib.pathname2url(media_file.url) #Not sure why i had this, but it's escaping the URL
            
            mime_type = mime.guess_type(media_file_url)
            edit_url = reverse('admin:%s_%s_change' %(obj._meta.app_label,  obj._meta.model_name),  args=[obj.id] )

            object_data['media_file_url'] = media_file_url
            object_data['media_file_size'] = media_file.size
            object_data['media_file_type'] = mime_type[0]
            object_data['edit_url'] = mark_safe(edit_url)

            field_values = {}

            for output_field in output_fields:
                value = str(self.get_field_contents(output_field, obj))
                label = str(label_for_field(output_field, self.model, self))

                field_values[output_field] = {
                    'label':label,
                    'value':value
                }

            object_data['field_values'] = field_values
            

            data = {
                "success":True,
                "files":[
                    object_data
                ]
            }
            json_dumped = json.dumps(data)

            return HttpResponse(json_dumped, content_type='application/json')
Exemplo n.º 23
0
 def write(self, filename, contents, encoding='utf8'):
     """
     Write a file to the data store.
     """
     mime = MimeTypes()
     mime.add_type('text/x-yaml', '.yaml')
     content_type, _ = mime.guess_type(filename)
     key = self.bucket.new_key(self._path(filename))
     key.set_contents_from_string(contents.encode(encoding), {
         'Content-Type': content_type or 'text/plain',
         'Content-Encoding': encoding,
     })
     if self.public:
         key.set_canned_acl('public-read')
Exemplo n.º 24
0
    def upload(self, filepath):
        filename = filepath.split("/")[-1]
        mime = MimeTypes()
        mime_type = mime.guess_type(filename)
        data = {"filename": filename, "mime_type": mime_type}
        response = self.session.post("%s/files/" % self.url, data=json.dumps(data), headers=HEADERS)

        if response.status_code == requests.codes.created:
            f = File(self, response.json())
            with open(filepath, "rb") as localfile:
                response = requests.put(f.upload_url, data=localfile)
                return f
        else:
            print(response.text)
Exemplo n.º 25
0
def filetype(file):
 name= ''
 mime = MimeTypes()
 mime_type = mime.guess_type(file)
 #print(mime_type)


 if "application/x-zip-compressed" in mime_type:
  name= "zip"
 elif "gzip" in mime_type:
  name= "gzip"
 elif "application/x-tar" in mime_type:  
  name= "tar"
 return name
Exemplo n.º 26
0
def Email(subject, body, path=0):
    

    msg = Message(subject,
        sender="*****@*****.**",
        recipients=["*****@*****.**"])
    msg.body = body
    if path!=0:
        mim=MimeTypes()
        for fn in os.listdir(path):
            mimty=mim.guess_type(fn)
            with app.open_resource(path+"\\"+fn) as fp:
                msg.attach(fn,mimty[0], fp.read())
        shutil.rmtree(path)
    mail.send(msg)
Exemplo n.º 27
0
    def POST(self):
        params = {}
        params['nom'] = self.params.pop('title')
        params['community_type'] = self.params.pop('community_type')
        params['description'] = self.params.pop('description', None)
        params['image'] = self.params.pop('image', None)
        params['activity_view'] = self.params.pop('activity_view', None)
        params['twitter_hashtag'] = self.params.pop('twitter_hashtag', None)
        params['notify_activity_via_push'] = self.params.pop('notify_activity_via_push', None)
        params['notify_activity_via_push_comments_too'] = self.params.pop('notify_activity_via_push_comments_too', None)

        pc = api.portal.get_tool('portal_catalog')
        nom = safe_unicode(params['nom'])
        util = queryUtility(IIDNormalizer)
        id_normalized = util.normalize(nom, max_length=500)
        result = pc.unrestrictedSearchResults(portal_type='ulearn.community',
                                              id=id_normalized)

        imageObj = ''
        if params['image']:
            mime = MimeTypes()
            mime_type = mime.guess_type(params['image'])
            imgName = (params['image'].split('/')[-1]).decode('utf-8')
            imgData = requests.get(params['image']).content
            imageObj = NamedBlobImage(data=imgData,
                                      filename=imgName,
                                      contentType=mime_type[0])

        if result:
            # community = result[0].getObject()
            success_response = 'Community already exists.'
            status = 200
        else:
            new_community_id = self.context.invokeFactory('ulearn.community', id_normalized,
                                                          title=params['nom'],
                                                          description=params['description'],
                                                          image=imageObj,
                                                          community_type=params['community_type'],
                                                          activity_view=params['activity_view'],
                                                          twitter_hashtag=params['twitter_hashtag'],
                                                          notify_activity_via_push=True if params['notify_activity_via_push'] == 'True' else None,
                                                          notify_activity_via_push_comments_too=True if params['notify_activity_via_push_comments_too'] == 'True' else None,
                                                          checkConstraints=False)
            new_community = self.context[new_community_id]
            success_response = 'Created community "{}" with hash "{}".'.format(new_community.absolute_url(), sha1(new_community.absolute_url()).hexdigest())
            status = 201
        logger.info(success_response)
        return ApiResponse.from_string(success_response, code=status)
Exemplo n.º 28
0
    def post_media(self, user_id, file_path, sent_by_maker=False):
        role = "appUser"
        if sent_by_maker:
            role = "appMaker"

        data = {"role": role}

        mime = MimeTypes()
        mime_type, _ = mime.guess_type(file_path)

        file_name = os.path.basename(file_path)
        files = {'source': (file_name, open(file_path, 'rb'), mime_type)}

        url = 'appusers/{0}/conversation/images'.format(user_id)

        return self.ask(url, data, 'post', files)
	def handleFile(self, filename):
		fd = open(filename)
		content = fd.read()
		fd.close()
		
		self.send_response(200)
		
		mime = MimeTypes()
		mimetype = mime.guess_type(filename)
		self.send_header("Content-Type", mimetype[0])
		if mimetype[1]:
			self.send_header("Content-Encoding", mimetype[1])
		self.send_header("Content-Length", str(len(content)))
		self.end_headers()
		self.wfile.write(content)
		self.wfile.flush()
Exemplo n.º 30
0
    def view(self, request, **kwargs):
        '''
        allow a file to be viewed as opposed to download. This is particularly needed when a video file is stored
        in the fileservice and user wants to be able to use a view the video as opposed to having to download it
        first. It passes the serving of the file to nginx/apache which will return all the proper headers allowing,
        say, html5's video viewer's 'seek' indicator/knob to work. Otherwise the video is only played sequentially

        Note that nginx/apache need to be configured accordingly. nginx for example:
        location /var/lib/geoserver_data/file-service-store/ {
           # forces requests to be authorized
           internal;
           alias   /var/lib/geoserver_data/file-service-store/;
        }

        for apache, need to install xsendfile module, enable it, set the path and then
        XSendFile on
        XSendFilePath /var/lib/geoserver_data/file-service-store

        example use:
        /fileservice/view/med.mp4
        or
        /fileservice/med.mp4/view

        Note that media players tend to require the route to end with the filename like /fileservice/view/med.mp4
        '''
        # method check to avoid bad requests
        self.method_check(request, allowed=['get'])
        # Must be done otherwise endpoint will be wide open
        self.is_authenticated(request)

        response = None
        file_item_name = kwargs.get('name', None)
        if file_item_name:
            mime = MimeTypes()
            url = urllib.pathname2url(file_item_name)
            mime_type = mime.guess_type(url)
            response = HttpResponse(content_type=mime_type[0])
            file_with_route = smart_str('{}{}'.format(helpers.get_fileservice_dir(), file_item_name))
            # apache header
            response['X-Sendfile'] = file_with_route
            # nginx header
            response['X-Accel-Redirect'] = file_with_route

        if not response:
            response = self.create_response(request, {'status': 'filename not specified'})

        return response
Exemplo n.º 31
0
                  aws_access_key_id=os.environ['PARROT_AWS_KEY_ID'],
                  aws_secret_access_key=os.environ['PARROT_AWS_KEY_SECRET'])
cf = boto3.client('cloudfront',
                  aws_access_key_id=os.environ['PARROT_AWS_KEY_ID'],
                  aws_secret_access_key=os.environ['PARROT_AWS_KEY_SECRET'])
# Read our targets
bucket = os.environ['PARROT_CDN_BUCKET']
dist = os.environ['PARROT_CDN_DIST']

print('Uploading files...')
mime = MimeTypes()
for filename in tqdm(glob.glob('frontend/build/**/*', recursive=True)):
    if not os.path.isdir(filename):
        # Try to fill the MIME type based on the filename. If we cannot make a
        # guess, then we assume it's a binary file.
        mime_type = mime.guess_type(filename)[0]
        if mime_type is None:
            mime_type = 'application/octet-stream'
        # Upload the file to the s3 bucket with read permission
        s3.upload_file(
            filename,
            bucket,
            # The new filename on s3. We want to remove the frontend/build/
            # prefix from the name.
            filename.split('frontend/build/')[-1],
            ExtraArgs={
                'ACL': 'public-read',
                'ContentType': mime_type,
            })
print('Done.')
Exemplo n.º 32
0
	return print('Message saved')

def saveSent(Mailbox, emailObj={}):
	Mailbox.select_folder(IMAP_SENT_FOLDER, readonly=False)
	msg =  createMsg(emailObj).as_string()
	print(Mailbox.append(IMAP_SENT_FOLDER, msg))
	return print('Message saved')


#=============Data for test to be deleted=====
mime = MimeTypes()
file = None
AttachFilepath = '/Users/macbookpro15/Documents/ReactJS/QuickMail/myapi/рома.png'
with open(AttachFilepath, 'rb') as fp:
	fileData = fp.read()
	fileMIME = mime.guess_type(AttachFilepath)[0]
	mainType = fileMIME.split('/')[0]
	subType = fileMIME.split('/')[1]
	fileSize=fp.tell()
	fileName=basename(AttachFilepath)

fileObj = {'name': fileName, 'file':fileData , 'size':fileSize, 'MIME':fileMIME}

emailObj = {
	'id': 1234567,
	'Date': datetime.now(),
	'From':{'name':'roman.reshetnyak', 'email':'*****@*****.**'},
	'To': [{'name':'vian.vinos', 'email':'*****@*****.**'}],
	'Cc': [{'name':'dima.vinos', 'email':'*****@*****.**'}],
	'Subject':'На дворе вечерело',
	'body':'<div><b>Hello Привет мир</b>версия 2.6</div>',
Exemplo n.º 33
0
class ResponseTypes(object):
    CLASSES = {
        'text/html': 'scrapy.http.HtmlResponse',
        'application/atom+xml': 'scrapy.http.XmlResponse',
        'application/rdf+xml': 'scrapy.http.XmlResponse',
        'application/rss+xml': 'scrapy.http.XmlResponse',
        'application/xhtml+xml': 'scrapy.http.HtmlResponse',
        'application/vnd.wap.xhtml+xml': 'scrapy.http.HtmlResponse',
        'application/xml': 'scrapy.http.XmlResponse',
        'application/json': 'scrapy.http.TextResponse',
        'application/x-json': 'scrapy.http.TextResponse',
        'application/json-amazonui-streaming': 'scrapy.http.TextResponse',
        'application/javascript': 'scrapy.http.TextResponse',
        'application/x-javascript': 'scrapy.http.TextResponse',
        'text/xml': 'scrapy.http.XmlResponse',
        'text/*': 'scrapy.http.TextResponse',
    }

    def __init__(self):
        self.classes = {}
        self.mimetypes = MimeTypes()
        mimedata = get_data('scrapy', 'mime.types').decode('utf8')
        self.mimetypes.readfp(StringIO(mimedata))
        for mimetype, cls in six.iteritems(self.CLASSES):
            self.classes[mimetype] = load_object(cls)

    def from_mimetype(self, mimetype):
        """Return the most appropriate Response class for the given mimetype"""
        if mimetype is None:
            return Response
        elif mimetype in self.classes:
            return self.classes[mimetype]
        else:
            basetype = "%s/*" % mimetype.split('/')[0]
            return self.classes.get(basetype, Response)

    def from_content_type(self, content_type, content_encoding=None):
        """Return the most appropriate Response class from an HTTP Content-Type
        header """
        if content_encoding:
            return Response
        mimetype = to_native_str(content_type).split(';')[0].strip().lower()
        return self.from_mimetype(mimetype)

    def from_content_disposition(self, content_disposition):
        try:
            filename = to_native_str(
                content_disposition, encoding='latin-1',
                errors='replace').split(';')[1].split('=')[1]
            filename = filename.strip('"\'')
            return self.from_filename(filename)
        except IndexError:
            return Response

    def from_headers(self, headers):
        """Return the most appropriate Response class by looking at the HTTP
        headers"""
        cls = Response
        if b'Content-Type' in headers:
            cls = self.from_content_type(
                content_type=headers[b'Content-type'],
                content_encoding=headers.get(b'Content-Encoding'))
        if cls is Response and b'Content-Disposition' in headers:
            cls = self.from_content_disposition(
                headers[b'Content-Disposition'])
        return cls

    def from_filename(self, filename):
        """Return the most appropriate Response class from a file name"""
        mimetype, encoding = self.mimetypes.guess_type(filename)
        if mimetype and not encoding:
            return self.from_mimetype(mimetype)
        else:
            return Response

    def from_body(self, body):
        """Try to guess the appropriate response based on the body content.
        This method is a bit magic and could be improved in the future, but
        it's not meant to be used except for special cases where response types
        cannot be guess using more straightforward methods."""
        chunk = body[:5000]
        chunk = to_bytes(chunk)
        if not binary_is_text(chunk):
            return self.from_mimetype('application/octet-stream')
        elif b"<html>" in chunk.lower():
            return self.from_mimetype('text/html')
        elif b"<?xml" in chunk.lower():
            return self.from_mimetype('text/xml')
        else:
            return self.from_mimetype('text')

    def from_args(self, headers=None, url=None, filename=None, body=None):
        """Guess the most appropriate Response class based on
        the given arguments."""
        cls = Response
        if headers is not None:
            cls = self.from_headers(headers)
        if cls is Response and url is not None:
            cls = self.from_filename(url)
        if cls is Response and filename is not None:
            cls = self.from_filename(filename)
        if cls is Response and body is not None:
            cls = self.from_body(body)
        return cls
Exemplo n.º 34
0
 def _check_file_mime_type(path):
     mime = MimeTypes()
     return mime.guess_type(path)
#!/usr/bin/env python
import sys
import json
import urllib
import base64
from mimetypes import MimeTypes
import os
dir = os.path.dirname(__file__)

if len(sys.argv) < 4:
    print 'Must give the node uuid, image file-name, and output file.'
    print sys.argv

mime = MimeTypes()
url = urllib.pathname2url(sys.argv[2])
mime_type = mime.guess_type(url)

image_path = os.path.join(os.getcwd(), sys.argv[2])
with open(image_path, "rb") as image_file:
    encoded_string = base64.b64encode(image_file.read())

data = {
    "uuid": sys.argv[1],
    "file": encoded_string,
    "mimetype": mime_type[0],
}

output_path = os.path.join(os.getcwd(), sys.argv[3])
with open(output_path, 'w') as outfile:
    json.dump(data, outfile)
Exemplo n.º 36
0
    def handle_client_request(self, resp, addr):
        #print("Handling client request")
        #print(resp)
        try:
            #while True:
             #   data = server.recv(2048)
            #resp = binascii.a2b_uu(resp)
            #resp = self.new_break_req(resp)
            resp = bytes(resp).decode("utf-8")
            #print(resp)
            #print(bytes(resp).decode("utf-8"))
            if self.debugging:
                print("Processing a new request for the server:")
             #       print(data)
             #   if not data:
              #      break
            #self.lock.acquire()
            self.break_req(resp)# break the client request for server to understand
            self.checksecurity()
            if self.error_code != 400:
                self.directory = self.curr_directory + self.path
                try:
                    if self.action == "GET":
                        self.make_file_name()
                        if os.path.exists(self.directory):
                            flagorig = True
                            flagnew = False
                        else:
                            flagorig = False
                            if os.path.exists(self.file_directory):
                                flagnew = True
                                self.directory = self.file_directory
                            else:
                                flagnew = False
                        if flagorig or flagnew:
                            if self.debugging:
                                print("Valid Path!")
                            self.isDirectory = os.path.isdir(self.directory)
                            if self.isDirectory:
                                self.error_code = 200
                                files = os.listdir(self.directory)
                                self.req_body = json.dumps(files)
                                if self.debugging:
                                    print("Returning a list of the current files in the current data directory!",
                                            self.directory)
                                    print("Files returned from the server: ")
                                    print(self.req_body)
                            else:
                                self.isFile = os.path.isfile(self.directory)
                                if self.isFile:
                                    if self.debugging:
                                        print("Returning the content of the file in the data directory",
                                                self.directory)
                                    file_read = open(self.directory, "r")
                                    self.req_body = file_read.read()
                                    file_read.close()
                        else:
                            if self.debugging:
                                print("Path could not be found : ", self.directory)
                            self.error_code = 404

                    elif self.action == "POST":
                        pathlib.Path(os.path.dirname(self.directory)).mkdir(parents=True, exist_ok=True)
                        self.make_file_name()
                        if self.patheditflag:
                            self.directory = self.file_directory
                        file_o = open(self.directory, "w")
                        file_o.write(self.client_body)
                        file_o.close()
                        self.error_code = 200
                        self.req_body = self.client_body
                        self.isFile = True

                except OSError as err:
                    if self.debugging:
                        print(err)
                    self.error_code = 400
                    self.req_body = ""
                except SystemError as err:
                    if self.debugging:
                        print(err)
                    self.error_code = 400
                    self.req_body = ""
            if self.isDirectory:
                self.header_dict["Content-Type"] = "application/json"
            elif self.isFile:
                if self.patheditflag:
                    self.directory = self.file_directory
                    #print("File type")
                mimes_all = MimeTypes()
                mime_type = mimes_all.guess_type(self.directory)
                #print(mime_type[0])
                self.header_dict["Content-Type"] = mime_type[0]

            resp = httplib(self.error_code, self.req_body, self.header_dict)
            self.response = resp.response_head() + self.req_body
            if self.debugging:
                print('Response is :\n',self.response)
                #print('\nrequest body is: \n' + self.req_body)
            return self.response
                #server.sendall(response.encode("ascii"))
            #self.makepacket.makepackets(response, addr)
            #TODO: check what needs to be passed to send packets back to client

            #self.error_code = 200
            #self.header_dict = {}
            #self.lock.release()
        finally:
            print("")
Exemplo n.º 37
0
    def sendFile(self):
        #If we don't need to send files, return
        if self.mRequestQueue.empty():
            return

        #Get some values
        request = self.mRequestQueue.get()
        filepath = request.getURL()
        connection = "close"
        if request.containsKey("Connection"):
            connection = request.getParam("Connection")

        #Check if file exists
        if not os.path.isfile(filepath):
            print filepath
            self.sendNotFound()
            if connection.lower() == "close":
                self.mShouldStop = True
            return

        try:
            #Open file
            file = open(filepath, "rb")
            size = os.path.getsize(filepath)
            #Get mime type
            mime = MimeTypes()
            mime_type = mime.guess_type(filepath)

            if file.closed:
                self.sendNotFound()
                if connection.lower() == "close":
                    self.mShouldStop = True
                    return

            #Send header
            self.mSocket.send("HTTP/1.1 200 OK\r\nContent-Length: " +
                              str(size) + "\r\nContent-Type: " +
                              str(mime_type[0]) + "\r\n" + "Connection: " +
                              connection + "\r\n\r\n")

            #Init
            bufferData = file.read(5024)
            self.mSocket.send(bufferData)
            #Keep sending as long as we have data
            while bufferData:
                bufferData = file.read(5024)
                if not bufferData:
                    break

                self.mSocket.send(bufferData)
                #Do a non blocking read to support more than one get request per socket
                self.mSocket.setblocking(0)
                try:
                    data = self.mSocket.recv(1024)
                    #If we have data, try to parse requests
                    if data:
                        self.mBuffer = self.mBuffer + data
                        self.parseReceive()
                except:
                    data = False

                #Enable blocking again
                self.mSocket.setblocking(1)

            file.close()
            if connection.lower() == "close":
                print "Shutting down connection because of connection: close"
                self.mSocket.close()
                self.mShouldStop = True
        except:
            self.sendBadRequest()
            print "Exception in send."

        #Work off the files recursive because we check for more requests in the send loop
        self.sendFile()
Exemplo n.º 38
0
class Manager(Queue, EncoderListener):
    def __init__(self):
        super().__init__()
        self._listeners = None
        self._files = None
        self._encoder = None
        self._mime = None
        self._parameters = settings.parameters
        self._max_cores = utils.cpu_count()
        self._encoder = None
        self._lock = Lock()
        self._working = False

    def add_file(self, path: Path):
        #print(path.absolute())
        if not self._files:
            self._files = []
        if path.is_file() and path not in self._files and self._is_supported(
                path):
            print('supported')
            self._files.append(path)
            for listener in self._listeners:
                listener.on_file_add(path, self._files.__len__())

    def remove_file(self, path: Path):
        if self._files is None:
            return
        index = -1
        for i, p in self._files:
            if p == path:
                index = i
                break
        if index != -1:
            self.remove_file_at(index)

    def remove_file_at(self, index: int):
        path = self._files.pop(index)
        for listener in self._listeners:
            listener.on_file_remove(path, self._files.__len__())

    def add_listener(self, listener: ManagerListener):
        if not self._listeners:
            self._listeners = set()
        self._listeners.add(listener)

    def get_encoder(self) -> Encoder:
        return self._encoder

    def set_encoder(self, encoder: Encoder):
        self._encoder = encoder

    def get_parameters(self):
        return self._parameters

    def start_job(self):
        print('start job')
        with self._lock:
            if not self._files:
                print('Nothing to convert')
                return
            if not self._working:
                if self._encoder is None:
                    self._encoder = EncoderManager(self, self._parameters,
                                                   self, utils.cpu_count())
                self._encoder.do_job()
                self._working = True

    def stop_job(self):
        print('stop job')
        with self._lock:
            if self._working:
                self._encoder.abort()
                self._working = False

    def _is_supported(self, path: Path) -> bool:
        if not self._mime:
            from mimetypes import MimeTypes
            self._mime = MimeTypes()
        mime_type = self._mime.guess_type(str(path.absolute()))
        return mime_type[0] in self.supported_mime_types

    @property
    def supported_mime_types(self):
        return mime_white_list

    def _get_files(self) -> list:
        return self._files

    def on_job_done(self):
        for listener in self._listeners:
            listener.on_encoding_finish()

    def on_file_encode(self, path: Path, success: bool):
        for listener in self._listeners:
            listener.on_file_encode(path, success, len(self._files))
Exemplo n.º 39
0
    def do_GET(self):
        print("************************************************")
        mime = MimeTypes()
        mimeType = mime.guess_type(self.path)
        print("Request aayi")
        urlParseData = urlparse(self.path)
        print("URLPARSEDATA****  ", urlParseData)
        urlPath = urlParseData[2]
        query = urlParseData[4]
        print("QUERY *****", query)
        if query != '':
            query = parse_qs(query)
            print(query)
            print("name ", query["nm"][0])
        newPath = ''
        if urlPath == '/' or urlPath == '/favicon.ico':
            newPath = './default/index.html'
            print("default wale iff me agya")
        else:
            path = urlPath
            path = path.split("/")
            print(path[1])
            if path[1] in siteNames:
                if len(path) == 2:
                    print('./apps/' + path[1] + '/index.html')
                    if os.path.isfile('./apps/' + path[1] + '/index.html'):
                        print('index.html found')
                        # urlPath='./apps/'+path[1]+'/index.html'
                        newPath = urlPath + '/index.html'
                    else:
                        print('index.html not found')
                        if os.path.isfile('./apps/' + path[1] + '/index.htm'):
                            print('index.htm found')
                            #urlPath='./apps/'+path[1]+'/index.htm'
                            newPath = urlPath + '/index.html'
                        else:
                            print('index.htm not found')
                            if os.path.isfile('./apps/' + path[1] +
                                              '/index.py'):
                                print("index.py found")
                                # urlPath='./apps/'+path[1]+'/index.py'
                                newPath = urlPath + '/index.html'
                            else:
                                self.send_error(404, "home page not found")
                                return
                    self.send_response(301)
                    self.send_header('Location', newPath)
                    self.end_headers()
                    return
                else:
                    if path[2] == 'private':
                        print("Kuch nahi ho sakta")
                        self.wfile.write(bytes("404 NOT FOUND", "utf-8"))
                    else:
                        a = self.path.find("/")
                        print("index ", a)
                        abcd = urlPath[urlPath.find("/", a + 1):]
                        print("aaaaabcdcdbvava", abcd)
                        mapping = siteNames[path[1]].getMapping()
                        if abcd in mapping:
                            print("py exitssssssssssssssssss karti h")
                            a = mapping[abcd].rfind(".")
                            if a != -1:
                                fileName = mapping[abcd][a + 1:]
                                print(fileName)
                                packageName = mapping[abcd][:a]
                                print(packageName)
                                finalpath = 'apps.' + path[
                                    1] + ".private." + packageName + "." + fileName
                                moduleName = __import__(finalpath,
                                                        fromlist=fileName)
                                #request=Request
                                self.send_response(200)
                                request = Request(query)
                                response = Response(self, self.wfile,
                                                    'text/html')
                                moduleName.process(request, response)
                                #self.send_header('Content-type',response.getContentType())
                                #self.end_headers()
                                return
                            else:
                                finalpath = 'apps.' + path[
                                    1] + ".private." + mapping[abcd]
                                moduleName = __import__(finalpath,
                                                        fromlist=mapping[abcd])
                                self.send_response(200)
                                request = Request(query)
                                response = Response(self, self.wfile,
                                                    'text/html')
                                moduleName.process(request, response)
                                #self.send_header('Content-type',response.getContentType())
                                #self.end_headers()
                                return
                        else:
                            newPath = './apps' + urlPath  #self.path[self.path.find("/"):]
                            mimeType = mime.guess_type(newPath)

            else:
                self.send_error(404, '%s Not Found' % (path[1]))
        try:
            #mimeType=mime.guess_type(newPath)
            print(mimeType)
            print(newPath)
            f = open(newPath, 'rb')
            self.send_response(200)
            self.send_header('Content-type', mimeType)
            self.end_headers()
            self.wfile.write(f.read())
            f.close()
        except IOError:
            print("File does not exist")
            self.send_error(404, 'File Not Found: %s' % urlPath)

    # print(urlPath)
        return
Exemplo n.º 40
0
class Sample:
    """Represents a document analysis.

    Uses textract to read document into a long string.  The methods are various
    sequences to get information to help make decisions for deliberate academic
    writing.
    """

    def __init__(self, path):
        """
        Create document instance for analysis.

        Opens and reads document to string raw_text.
        Textract interprets the document format and
        opens to plain text string (docx, pdf, odt, txt)

        Args:
        path (str): path to file to open, anaylze, close


        Public attributes:
        -user: (str) optional string to set username.
        -path: (str) relative path to document.
        -abs_path: (str) the absolute path to the document.
        -file_name:  (str) the file name with extension of document (base
        name).
        -mime:  tbd
        -guessed_type:  makes best guess of mimetype of document.
        -file_type:  returns index[0] from guessed_type.
        -raw_text:  (str) plain text extracted from .txt, .odt, .pdf, .docx,
        and .doc.
        -ptext:  (str) raw text after a series of regex expressions to
        eliminate special characters.
        -text_no_feed:  (str) ptext with most new line characters eliminated
        /n/n stays intact.
        -sentence_tokens:  list of all sentences in a comma separated list
        derived by nltk.
        -sentence_count:  (int) count of sentences found in list.
        -passive_sentences:  list of passive sentences identified by the
        passive module.
        -passive_sentence_count:  count of the passive_sentences list.
        -percent_passive:  (float) ratio of passive sentences to all sentences
        in percent form.
        -be_verb_analysis:  (int) sum number of occurrences of each to be verb
        (am, is, are, was, were, be, being been).
        -be_verb_count: tbd
        -be_verb_analysis: tbd
        -weak_sentences_all:  (int) sum of be verb analysis.
        -weak_sentences_set:  (set) set of all sentences identified as
        having to be verbs.
        -weak_sentences_count:  (int) count of items in weak_sentences_set.
        -weak_verbs_to_sentences:  (float) proportion of sentences with to
        be to all sentences in percent (this might not be sound).
        -word_tokens:  list of discreet words in text that breaks
        contractions up (default nltk tokenizer).
        -word_tokens_no_punct:  list of all words in text including
        contractions but otherwise no punctuation.
        -no_punct:  (str) full text string without sentence punctuation.
        -word_tokens_no_punct:  uses white-space tokenizer to create a list
        of all words.
        readability_flesch_re:  (int) Flesch Reading Ease Score (numeric
        score) made by textstat module.
        readability_smog_index:  (int) grade level as determined by the
        SMOG algorithum made by textstat module.
        readability_flesch_kincaid_grade:  (int)  Flesch-Kincaid grade level
        of reader made by textstat module.
        readability_coleman_liau_index:  (int) grade level of reader as made by
        textstat module.
        readability_ari:  (int) grade leader of reader determined by
        automated readability index algorithum implemented by textstat.
        readability_linser_write:  FIX SPELLING grade level as determined
        by Linsear Write algorithum implemented by textstat.
        readability_dale_chall:  (int) grade level based on Dale-Chall
        readability as determined by textstat.
        readability_standard:  composite grade level based on readability
        algorithums.
        -flesch_re_key:  list for interpreting Flesch RE Score.
        -word_count:  word count of document based on white space tokener,
        this word count should be used.
        -page_length:  (float) page length in decimal format given 250
        words per page.
        -paper_count:  (int) number of printed pages given 250 words per
        page.
        -parts_of_speech:  words with parts of speech tags.
        -pos_counts:  values in word, tag couple grouped in a list.
        -pos_total:  (int) sum of pos_counts values
        -pos_freq:  (dict) word, ratio of whole
        -doc_pages:  (float) page length based on 250 words per page
        (warning, this is the second time this attribute is defined).
        -freq_words:  word frequency count not standardized based on the
        correct word tokener (not ratio, just count).
        modal_dist:  count of auxillary verbs based on word_tokens_no_punct.
        sentence_count (int): Count the sentence tokens
        passive_sentences (list): List of all sentences identified as passive
        passive_sentence_count (int): count of items in passive_sentences
        be_verb_count (int): count "to be" verbs in text
        word_tokens_no_punct (list): words separated, stripped of punctuation,
         made lower case
        flesch_re_key (str): reading ease score to description
        freq_words (list or dict): frequency distribution of all words
        modal_dist (list): frequency distribution of aux verbs
        """
        self.user = ""
        self.path = path
        self.abs_path = os.path.abspath(self.path)
        if os.path.isfile(self.path):
            self.file_name = os.path.basename(path)
            self.mime = MimeTypes()
            self.guessed_type = self.mime.guess_type(self.path)
            self.file_type = self.guessed_type[0]
            self.raw_text = textract.process(self.path)
            self.ptext = re.sub(u'[\u201c\u201d]', '"', self.raw_text)
            self.ptext = re.sub(u"\u2014", "--", self.ptext)
            self.ptext = re.sub(",", ",", self.ptext)
            self.ptext = re.sub("—", "--", self.ptext)
            self.ptext = re.sub("…", "...", self.ptext)
            self.text_no_feed = self.clean_new_lines(self.ptext)
            self.sentence_tokens = self.sentence_tokenize(self.text_no_feed)
            self.sentence_count = len(self.sentence_tokens)
            self.passive_sentences = passive(self.text_no_feed)
            self.passive_sentence_count = len(self.passive_sentences)
            self.percent_passive = (100 *
                                        (float(self.passive_sentence_count) /
                                         float(self.sentence_count)))

            self.be_verb_analysis = self.count_be_verbs(self.sentence_tokens)
            self.be_verb_count = self.be_verb_analysis[0]
            self.weak_sentences_all = self.be_verb_analysis[1]
            self.weak_sentences_set = set(self.weak_sentences_all)
            self.weak_sentences_count = len(self.weak_sentences_set)
            self.weak_verbs_to_sentences = 100 * float(self.weak_sentences_count) / float(self.sentence_count)
            self.word_tokens = self.word_tokenize(self.text_no_feed)
            self.word_tokens_no_punct = \
                self.word_tokenize_no_punct(self.text_no_feed)
            self.no_punct = self.strip_punctuation(self.text_no_feed)
            # use this! It make lower and strips symbols
            self.word_tokens_no_punct = self.ws_tokenize(self.no_punct)
            self.readability_flesch_re = \
                textstat.flesch_reading_ease(self.text_no_feed)
            self.readability_smog_index = \
                textstat.smog_index(self.text_no_feed)
            self.readability_flesch_kincaid_grade = \
                textstat.flesch_kincaid_grade(self.text_no_feed)
            self.readability_coleman_liau_index = \
                textstat.coleman_liau_index(self.text_no_feed)
            self.readability_ari = \
                textstat.automated_readability_index(self.text_no_feed)
            self.readability_linser_write = \
                textstat.linsear_write_formula(self.text_no_feed)
            self.readability_dale_chall = \
                textstat.dale_chall_readability_score(self.text_no_feed)
            self.readability_standard = \
                textstat.text_standard(self.text_no_feed)
            self.flesch_re_key = (
                "* 90-100 : Very Easy",
                "* 80-89 : Easy",
                "* 70-79 : Fairly Easy",
                "* 60-69 : Standard",
                "* 50-59 : Fairly Difficult",
                "* 30-49 : Difficult",
                "* 0-29 : Very Confusing"
                )
            if self.word_tokens_no_punct:
                self.word_count = len(self.word_tokens_no_punct)
                self.page_length = float(self.word_count)/float(250)
                self.paper_count = int(math.ceil(self.page_length))
                self.parts_of_speech = pos_tag(self.word_tokens_no_punct)
                self.pos_counts = Counter(tag for word, tag in
                                          self.parts_of_speech)
                self.pos_total = sum(self.pos_counts.values())
                self.pos_freq = dict((word, float(count)/self.pos_total) for
                                     word, count in self.pos_counts.items())
                self.doc_pages = float(float(self.word_count)/float(250))
                self.freq_words = \
                    self.word_frequency(self.word_tokens_no_punct)
                self.modal_dist = self.modal_count(self.word_tokens_no_punct)
                # self.ws_tokens = self.ws_tokenize(self.text_no_cr)

    def strip_punctuation(self, string_in):
        """
        Strip punctuation from string and make lower case.

        Given a string of sentences, translate string
        to remove some common symbols and conver caps
        to lower case.

        Args:
        string_in (str): Text to strip punctuation from

        return:
        str
        """
        string_in = string_in.translate(None, ',.!?\"<>{}[]--@()\'--')
        return str(string_in.lower())

    def ws_tokenize(self, text):
        """
        Given string of words, return word tokens with  contractions OK.

        Other tokenizers tokenize punctuation. The WhitespaceTokenizer
        is important because of contractions.

        Args:
        text (str)

        returns:
        list
        """
        self.tokenizer = nltk.tokenize.regexp.WhitespaceTokenizer()
        return self.tokenizer.tokenize(text)

    def syllables_per_word(self, text):
        """
        Return count of syllables per word.

        Loops through all words to add word and syllable
        count to a list.

        Args:
        text (str)

        Returns:
        list
        """
        self.word_syllables = []
        for word in text:
            self.word_syllables.append([word,
                                        textstat.textstat.syllable_count(
                                            word)])
        return self.word_syllables

    def polysyllables(self, text):
        """
        Count polysyllables.

        Count words in text string that have >= 3 syllables.

        Args:
        text(str)

        Returns:
        int: polysllable word count in text arg

        """
        return textstat.textstat.polysyllabcount(text)

    def word_frequency(self, words):
        """
        List 50 most common words in tokenized list.

        memo: words = [word for word in words if not word.isnumeric()].

        Args:
        text(str)

        Returns:
        list
        """
        words = [word.lower() for word in words]
        self.word_dist = FreqDist(words)
        return self.word_dist.most_common(50)

    def word_tokenize_no_punct(self, text):
        """
        Make list of words without listing punctuation.

        Args:
            text (str): Plain text string

        Returns:
             list of words
        """
        tokenizer = RegexpTokenizer(r'\w+')
        return tokenizer.tokenize(text)

    def word_tokenize(self, paragraph):
        """
        Tokenize words from long string that includes sentences.

        Uses default tokenizer from nltk.

        Args:
        paragraph (str)

        Return:
        list
        """
        try:
            self.word_tokens = tokenize.word_tokenize(paragraph)
            return self.word_tokens
        except:
            print("Error: Cannot perform word analyses.")
            return False

    def sentence_tokenize(self, paragraphs):
        """
        Tokenize sentences.

        Uses default sent tokenizer.

        Args:
        paragraph (str)

        Returns:
        list
        """
        try:
            self.sentences = tokenize.sent_tokenize(paragraphs)
            return self.sentences
        except:
            print "Could not tokenize text."
            return False

    def clean_new_lines(self, paragraphs):
        """Strip new line characters except for new paragraphs."""
        self.text_no_cr =\
            paragraphs.replace("\n\n",
                           "TOADIES").replace("\r",
                                              "").replace("\n",
                                                          "").replace(
                                                              "TOADIES", "\n")
        return self.text_no_cr

    def count_be_verbs(self, sentences):
        """
        Count be verbs in each sentence in a list.

        Loop through sentences to provide weak verb count.
        If count >= 1, add sentence to list.

        Args:
            sentences (str, list)

        Return:
            list of be-verb count and stand-out sentences

        """
        self.verbs = [" am ", " is ", " are ", " was ", " were ", " be ",\
            "being ", " been "]
        self.weak_sentences = []
        self.verb_count = 0
        for sentence in sentences:
            for verb in self.verbs:
                if verb in sentence:
                    self.verb_count = self.verb_count + 1
                    self.weak_sentences.append(sentence)

        return [self.verb_count, self.weak_sentences]

    def syllable_count(self, word):
        """
        Count syllables in a word.

        Uses NLTK dictionary to find word syllabication.

        Args:
            word (string)

        Returns:
            int syllable count
        """
        self.d = cmudict.dict()
        return min([len([y for y in x if isdigit(y[-1])])
                    for x in self.d[str(word).lower()]])

    def modal_count(self, text):
        """
        Return FreqDist of modal verbs in text.

        Args:
            text (str)

        Return:
            list
        """
        fdist = FreqDist(w.lower() for w in text)
        modals = ['can', 'could', 'shall', 'should', 'will', 'would', 'do',
                  'does', 'did', 'may', 'might', 'must', 'has', 'have', 'had']
        modals_freq = []
        for m in modals:
            modals_freq.append(str(m + ': ' + str(fdist[m])))
        return modals_freq
Exemplo n.º 41
0
    def populateFileInfo(self, scanOption):
        '''Runs the two scanners and parses their output into the fileInfo object'''
        ''' Get File Type'''
        mime = MimeTypes()
        self.fileType = mime.guess_type(self.filePath)[0]

        if self.fileType == None:
            self.fileType = 'Unknown'
        '''Check to see if file is cached.'''
        cached = self.isCached()
        '''If it isn't cached, run scans, else get file from database.'''
        if cached == -1:
            if scanOption == 'fossology':
                '''Run fossology'''
                '''Fossology doesn't return an exit code of 0 so we must always catch the output.'''
                try:
                    fossOutput = subprocess.check_output(
                        [settings.FOSSOLOGY_PATH, self.filePath])
                except OSError as ose:
                    print "Error running FOSSology nomos, check your path to nomos in settings.py"
                except Exception as e:
                    fossOutput = str(e.output)
                '''Parse outputs'''
                (fileName, fossLicense) = output_parser.foss_parser(fossOutput)
                self.licenseInfoInFile.append(fossLicense)
                self.licenseComments = "#FOSSology "
                self.licenseComments += fossLicense
            else:
                '''Scan to find licenses'''
                '''Run Ninka'''
                ninkaOutput = subprocess.check_output(
                    [settings.NINKA_PATH, self.filePath],
                    preexec_fn=lambda: signal(SIGPIPE, SIG_DFL))
                '''Run fossology'''
                '''Fossology doesn't return an exit code of 0 so we must always catch the output.'''
                try:
                    fossOutput = subprocess.check_output(
                        [settings.FOSSOLOGY_PATH, self.filePath])
                except OSError as ose:
                    print "Error running FOSSology nomos, check your path to nomos in settings.py"
                except Exception as e:
                    fossOutput = str(e.output)
                '''Parse outputs'''
                (fileName,
                 ninkaLicense) = output_parser.ninka_parser(ninkaOutput)
                (fileName, fossLicense) = output_parser.foss_parser(fossOutput)
                '''Get extracted text from ninka "senttok" file'''
                try:
                    with open(self.filePath + ".senttok", 'r') as f:
                        for line in f:
                            if ninkaLicense in line:
                                line_tok = line.split(';')
                                self.extractedText += line_tok[3] + "\n"
                                self.extractedText += line_tok[4]
                except Exception as e:
                    '''Do nothing, we just wont have extracted text for this license.'''
                '''License merging logic.'''
                fossLicense = fossLicense.upper().strip()
                ninkaLicense = ninkaLicense.upper().strip()
                match = output_parser.lic_compare(fossLicense, ninkaLicense)

                if match and fossLicense != 'ERROR':
                    self.licenseInfoInFile.append(fossLicense)
                elif match and fossLicense == 'ERROR':
                    self.licenseInfoInFile.append(ninkaLicense)
                elif not match and fossLicense == 'UNKNOWN':
                    self.licenseInfoInFile.append(ninkaLicense)
                else:
                    self.licenseInfoInFile.append("NO ASSERTION")

                self.licenseComments = "#FOSSology "
                self.licenseComments += fossLicense
                self.licenseComments += " #Ninka "
                self.licenseComments += ninkaLicense
        else:
            with MySQLdb.connect(host=settings.database_host,
                                 user=settings.database_user,
                                 passwd=settings.database_pass,
                                 db=settings.database_name) as dbCursor:
                self.getChecksum()
                self.getFileInfoFromChecksum(dbCursor)
Exemplo n.º 42
0
def get_image():
    image_path = request.args.get('image_path')
    mime = MimeTypes()
    mimetype, _ = mime.guess_type(image_path)
    return send_file(os.path.join('..', image_path), mimetype=mimetype)
Exemplo n.º 43
0
def create_wagtail_image(img_src: str,
                         image_name: str = None,
                         collection_name: str = None) -> Union[None, Image]:
    """
    Create a Wagtail Image from a given source. It takes an optional file name
    and collection name.

    If the collection name is provided, but a collection is not found, a new collection
    will be created.

    Examples:
        create_wagtail_image('/app/source/images/myimage.jpg')
        create_wagtail_image('/app/source/images/myimage.jpg', image_name='Same Image.jpg')
        create_wagtail_image('/app/source/images/myimage.jpg', collection_name='Dev test collection')
    """

    mime = MimeTypes()
    mime_type = mime.guess_type(img_src)

    if mime_type:
        mime_type = mime_type[0].split('/')[1].upper()
    else:
        # Default to a JPEG mimetype.
        mime_type = 'JPEG'

    f = BytesIO()

    # Copy the image to the local machine before converting it to a Wagtail image.
    if img_src.startswith("http"):
        # Download the image from a URL. Requires the requests package.
        response = requests.get(img_src, stream=True)
        if response.status_code == 200:
            # Create an image out of the Cloudinary URL and write it to a PIL Image.
            pil_image = PILImage.open(response.raw)
            pil_image.save(f, mime_type)
        else:
            # Image URL didn't 200 for us. Nothing we can do about that. Return early.
            print(f"Could not generate image from url {img_src}")
            return
    else:
        # Save the image from a local source. The requests package is not needed.
        pil_image = PILImage.open(img_src)
        pil_image.save(f, mime_type)

    # If the image is supposed to be part of a collection, look for the collection or create it.
    collection = Collection.get_first_root_node()
    if collection_name:
        specific_collection = Collection.objects.filter(
            name=collection_name).first()
        # Use the specific collection if it's found. Otherwise create a new collection
        # based on the `collection_name` parameter.
        collection = specific_collection if specific_collection else collection.add_child(
            name=collection_name)

    # If an image name was not provided, create one from the img_src..
    if not image_name:
        image_name = ntpath.basename(img_src)

    # Create the Wagtail Image and return it
    wagtail_image = Image.objects.create(title=image_name,
                                         file=ImageFile(f, name=image_name),
                                         collection=collection)
    return wagtail_image
Exemplo n.º 44
0
f_path = os.path.dirname(sys.argv[1])
if f_path != '':
    f_path = f_path + '/'
soup = BeautifulSoup(f, 'html.parser')

# encode favicon, styles, images, videos, audios, and scripts
for tag, attr in tags.iteritems():
    encode(tag, attr)

# encode background images
styled_tags = soup.find_all(style=True)
for tag in styled_tags:
    if 'background-image' in tag['style']:
        b_img_url = re.findall(r'background-image:url\((.*?)\);',
                               tag['style'])[0]
        mimetype = mime.guess_type(b_img_url)
        with open(f_path + b_img_url, 'rb') as b_img_file:
            encoded_uri = 'data:' + mimetype[
                0] + ';base64,' + base64.b64encode(b_img_file.read())
        tag['style'] = tag['style'].replace(b_img_url, encoded_uri)

# encode fonts (they need to be included in an internal CSS style with the "fonts" ID)
font_css = soup.find('style', {'id': 'fonts'})
if font_css:
    fonts_url = re.findall(r'url\((.*?)\)', font_css.contents[0])
    for font_url in fonts_url:
        mimetype = mime.guess_type(font_url)
        with open(f_path + font_url, 'rb') as font_file:
            encoded_uri = 'data:' + mimetype[
                0] + ';base64,' + base64.b64encode(font_file.read())
        font_css.string = font_css.contents[0].replace(font_url, encoded_uri)
Exemplo n.º 45
0
    def EscanearArquivo(self, event):
        processo = ValidadorCodigoBarras()
        scanner = zbar.Scanner()
        try:
            codigos_barras_arquivo = []
            temp_url = urllib.pathname2url(event.src_path)
            mimetypes = MimeTypes()
            mimetype_file = mimetypes.guess_type(temp_url)
            del(mimetypes)
            if mimetype_file[0] != 'application/pdf':
                EnviarEmailERRO('Formato de arquivo recebido nao reconhecido ou nao aceito. Arquivo: ' + event.src_path + ' - Mimetype: ' + mimetype_file[0])
                return

            arquivo_ftp = event.src_path

            arquivo_pdf_temp = re.sub('(?i)'+re.escape(diretorio_ftp), lambda m: diretorio_temp, arquivo_ftp)
            pdfWriter = PyPDF2.PdfFileWriter()
            pdfFileObj = open(arquivo_ftp, 'rb')
            pdfReader = PyPDF2.PdfFileReader(pdfFileObj)

            pageObj = pdfReader.getPage(0)
            pdfWriter.addPage(pageObj)

            with open(arquivo_pdf_temp, 'wb') as output:
                pdfWriter.write(output)

            pdfFileObj.close()

            del(pdfWriter)
            del(pdfFileObj)
            del(pdfReader)

            arquivo_imagem = re.sub('(?i)'+re.escape('.pdf'), lambda m: '.jpg', arquivo_pdf_temp)

            imagem_temp = Image(filename=arquivo_pdf_temp, resolution=(400,400))
            imagem_temp = imagem_temp.convert('jpeg')
            imagem_temp.save(filename=arquivo_imagem)
            del(imagem_temp)

            processo.nome_arquivo_pdf = arquivo_ftp
            processo.nome_arquivo_thumbnail = arquivo_imagem

            # colorido
            #print(arquivo_ftp + ' - colorido - sem efeito')
            codigos_barras = []
            imagem_temp = PI.open(arquivo_imagem).convert('L')
            imagem_array_temp = np.array(imagem_temp)
            del(imagem_temp)
            codigo_barras_temp = scanner.scan(imagem_array_temp)
            del(imagem_array_temp)
            for codigo_barras in codigo_barras_temp:
                if codigo_barras.type == 'QR-Code':
                    continue
                codigos_barras.append(codigo_barras.data)
            if len(codigos_barras) > 0:
                for codigo_barras in codigos_barras:
                    processo.codigo_barras = codigo_barras
                    if processo.Processar(True):
                        logger.info(arquivo_ftp + ' - colorido - sem efeito - ' + codigo_barras)
                        os.remove(arquivo_ftp)
                        os.remove(arquivo_pdf_temp)
                        os.remove(arquivo_imagem)
                        return
                    else:
                        if codigo_barras not in codigos_barras_arquivo:
                            codigos_barras_arquivo.append(codigo_barras)

            #print(arquivo_ftp + ' - colorido - gaussian blur')
            codigos_barras = []
            imagem_temp = cv2.imread(arquivo_imagem)
            imagem_temp = cv2.GaussianBlur(imagem_temp, (5, 5), 0)
            imagem_temp = PI.fromarray(imagem_temp).convert('L')
            imagem_temp = np.array(imagem_temp)
            codigo_barras_temp = scanner.scan(imagem_temp)
            for codigo_barras in codigo_barras_temp:
                if codigo_barras.type == 'QR-Code':
                    continue
                codigos_barras.append(codigo_barras.data)
            del(imagem_temp)
            if len(codigos_barras) > 0:
                for codigo_barras in codigos_barras:
                    processo.codigo_barras = codigo_barras
                    if processo.Processar(True):
                        logger.info(arquivo_ftp + ' - colorido - gaussian blur - ' + codigo_barras)
                        os.remove(arquivo_ftp)
                        os.remove(arquivo_pdf_temp)
                        os.remove(arquivo_imagem)
                        return
                    else:
                        if codigo_barras not in codigos_barras_arquivo:
                            codigos_barras_arquivo.append(codigo_barras)

            #print(arquivo_ftp + ' - colorido - median blur')
            codigos_barras = []
            imagem_temp = cv2.imread(arquivo_imagem)
            imagem_temp = cv2.medianBlur(imagem_temp, 5)
            imagem_temp = PI.fromarray(imagem_temp).convert('L')
            imagem_temp = np.array(imagem_temp)
            codigo_barras_temp = scanner.scan(imagem_temp)
            for codigo_barras in codigo_barras_temp:
                if codigo_barras.type == 'QR-Code':
                    continue
                codigos_barras.append(codigo_barras.data)
            del(imagem_temp)
            if len(codigos_barras) > 0:
                for codigo_barras in codigos_barras:
                    processo.codigo_barras = codigo_barras
                    if processo.Processar(True):
                        logger.info(arquivo_ftp + ' - colorido - median blur - ' + codigo_barras)
                        os.remove(arquivo_ftp)
                        os.remove(arquivo_pdf_temp)
                        os.remove(arquivo_imagem)
                        return
                    else:
                        if codigo_barras not in codigos_barras_arquivo:
                            codigos_barras_arquivo.append(codigo_barras)

            for angle in range(5, 90, 5):
                #print(arquivo_ftp + ' - colorido - angulo - ' + str(angle))
                codigos_barras = []
                imagem_temp = cv2.imread(arquivo_imagem)
                (oldY,oldX) = imagem_temp.shape[:-1]
                rotation_matrix = cv2.getRotationMatrix2D((oldX/2,oldY/2),angle,1)
                (newX,newY) = (oldX*1,oldY*1)
                r = np.deg2rad(angle)
                (newX,newY) = (abs(np.sin(r)*newY)+abs(np.cos(r)*newX),abs(np.sin(r)*newX)+abs(np.cos(r)*newY))
                (tx,ty) = ((newX-oldX)/2,(newY-oldY)/2)
                rotation_matrix[0,2] += tx
                rotation_matrix[1,2] += ty
                imagem_temp = cv2.warpAffine(imagem_temp, rotation_matrix, dsize=(int(newX),int(newY)), flags=cv2.INTER_LINEAR)
                imagem_temp = PI.fromarray(imagem_temp).convert('L')
                imagem_temp = np.array(imagem_temp)
                codigo_barras_temp = scanner.scan(imagem_temp)
                for codigo_barras in codigo_barras_temp:
                    if codigo_barras.type == 'QR-Code':
                        continue
                    codigos_barras.append(codigo_barras.data)
                del(imagem_temp)
                if len(codigos_barras) > 0:
                    for codigo_barras in codigos_barras:
                        processo.codigo_barras = codigo_barras
                        if processo.Processar(True):
                            logger.info(arquivo_ftp + ' - colorido - angulo - ' + angle + ' - ' + codigo_barras)
                            os.remove(arquivo_ftp)
                            os.remove(arquivo_pdf_temp)
                            os.remove(arquivo_imagem)
                            return
                        else:
                            if codigo_barras not in codigos_barras_arquivo:
                                codigos_barras_arquivo.append(codigo_barras)

            # preto e branco
            #print(arquivo_ftp + ' - preto e branco - sem efeito')
            codigos_barras = []
            imagem_temp = cv2.imread(arquivo_imagem)
            imagem_temp = cv2.cvtColor(imagem_temp, cv2.COLOR_RGB2GRAY)
            cv2.imwrite(arquivo_imagem, imagem_temp)
            imagem_temp = PI.open(arquivo_imagem).convert('L')
            imagem_temp = np.array(imagem_temp)
            codigo_barras_temp = scanner.scan(imagem_temp)
            for codigo_barras in codigo_barras_temp:
                if codigo_barras.type == 'QR-Code':
                    continue
                codigos_barras.append(codigo_barras.data)
            del(imagem_temp)
            if len(codigos_barras) > 0:
                for codigo_barras in codigos_barras:
                    processo.codigo_barras = codigo_barras
                    if processo.Processar(True):
                        logger.info(arquivo_ftp + ' - preto e branco - sem efeito - ' + codigo_barras)
                        os.remove(arquivo_ftp)
                        os.remove(arquivo_pdf_temp)
                        os.remove(arquivo_imagem)
                        return
                    else:
                        if codigo_barras not in codigos_barras_arquivo:
                            codigos_barras_arquivo.append(codigo_barras)

            #print(arquivo_ftp + ' - preto e branco - gaussian blur')
            codigos_barras = []
            imagem_temp = cv2.imread(arquivo_imagem)
            imagem_temp = cv2.GaussianBlur(imagem_temp, (5, 5), 0)
            imagem_temp = PI.fromarray(imagem_temp).convert('L')
            imagem_temp = np.array(imagem_temp)
            codigo_barras_temp = scanner.scan(imagem_temp)
            for codigo_barras in codigo_barras_temp:
                if codigo_barras.type == 'QR-Code':
                    continue
                codigos_barras.append(codigo_barras.data)
            del(imagem_temp)
            if len(codigos_barras) > 0:
                for codigo_barras in codigos_barras:
                    processo.codigo_barras = codigo_barras
                    if processo.Processar(True):
                        logger.info(arquivo_ftp + ' - preto e branco - gaussian blur - ' + codigo_barras)
                        os.remove(arquivo_ftp)
                        os.remove(arquivo_pdf_temp)
                        os.remove(arquivo_imagem)
                        return
                    else:
                        if codigo_barras not in codigos_barras_arquivo:
                            codigos_barras_arquivo.append(codigo_barras)

            #print(arquivo_ftp + ' - preto e branco - median blur')
            codigos_barras = []
            imagem_temp = cv2.imread(arquivo_imagem)
            imagem_temp = cv2.medianBlur(imagem_temp, 5)
            imagem_temp = PI.fromarray(imagem_temp).convert('L')
            imagem_temp = np.array(imagem_temp)
            codigo_barras_temp = scanner.scan(imagem_temp)
            for codigo_barras in codigo_barras_temp:
                if codigo_barras.type == 'QR-Code':
                    continue
                codigos_barras.append(codigo_barras.data)
            del(imagem_temp)
            if len(codigos_barras) > 0:
                for codigo_barras in codigos_barras:
                    processo.codigo_barras = codigo_barras
                    if processo.Processar(True):
                        logger.info(arquivo_ftp + ' - preto e branco - median blur - ' + codigo_barras)
                        os.remove(arquivo_ftp)
                        os.remove(arquivo_pdf_temp)
                        os.remove(arquivo_imagem)
                        return
                    else:
                        if codigo_barras not in codigos_barras_arquivo:
                            codigos_barras_arquivo.append(codigo_barras)

            for angle in range(5, 90, 5):
                #print(arquivo_ftp + ' - preto e branco - angulo - ' + str(angle))
                codigos_barras = []
                imagem_temp = cv2.imread(arquivo_imagem)
                (oldY,oldX) = imagem_temp.shape[:-1]
                rotation_matrix = cv2.getRotationMatrix2D((oldX/2,oldY/2),angle,1)
                (newX,newY) = (oldX*1,oldY*1)
                r = np.deg2rad(angle)
                (newX,newY) = (abs(np.sin(r)*newY)+abs(np.cos(r)*newX),abs(np.sin(r)*newX)+abs(np.cos(r)*newY))
                (tx,ty) = ((newX-oldX)/2,(newY-oldY)/2)
                rotation_matrix[0,2] += tx
                rotation_matrix[1,2] += ty
                imagem_temp = cv2.warpAffine(imagem_temp, rotation_matrix, dsize=(int(newX),int(newY)), flags=cv2.INTER_LINEAR)
                imagem_temp = PI.fromarray(imagem_temp).convert('L')
                imagem_temp = np.array(imagem_temp)
                codigo_barras_temp = scanner.scan(imagem_temp)
                for codigo_barras in codigo_barras_temp:
                    if codigo_barras.type == 'QR-Code':
                        continue
                    codigos_barras.append(codigo_barras.data)
                del(imagem_temp)
                if len(codigos_barras) > 0:
                    for codigo_barras in codigos_barras:
                        processo.codigo_barras = codigo_barras
                        if processo.Processar(True):
                            logger.info(arquivo_ftp + ' - preto e branco - angulo - ' + angle + ' - ' + codigo_barras)
                            os.remove(arquivo_ftp)
                            os.remove(arquivo_pdf_temp)
                            os.remove(arquivo_imagem)
                            return
                        else:
                            if codigo_barras not in codigos_barras_arquivo:
                                codigos_barras_arquivo.append(codigo_barras)

            imagem_temp = Image(filename=arquivo_pdf_temp, resolution=(400,400))
            imagem_temp.convert('jpeg')
            imagem_temp.save(filename=arquivo_imagem)
            del(imagem_temp)

            processo.codigo_barras = '#-#'.join(str(codigo_barras) for codigo_barras in codigos_barras_arquivo)
            if processo.Processar(False):
                logger.info(arquivo_ftp + ' - ultimo recurso - ' + processo.codigo_barras)
                os.remove(arquivo_ftp)
                os.remove(arquivo_pdf_temp)
                os.remove(arquivo_imagem)
                return
        finally:
            if isinstance(scanner,zbar.Scanner):
                del(scanner)
            if isinstance(processo,ValidadorCodigoBarras):
                del(processo)
Exemplo n.º 46
0
    def run(self, path):
        if path == "/":
            f = open(os.path.join(config.get_runtime_path(), "platformcode", "template", "page.html"), "rb")
            self.handler.send_response(200)
            self.handler.send_header('Content-type', 'text/html')
            self.handler.end_headers()
            respuesta = f.read()
            self.handler.wfile.write(respuesta)
            f.close()

        elif path.startswith("/local/"):
            import base64
            if PY3:
                import urllib.parse as urllib
            else:
                import urllib
            Path = path.replace("/local/", "").split("/")[0]
            Path = base64.b64decode(urllib.unquote_plus(Path)).decode()
            Size = int(os.path.getsize(Path))
            f = open(Path, "rb")
            if not self.handler.headers.get("range") == None:
                if "=" in str(self.handler.headers.get("range")) and "-" in str(self.handler.headers.get("range")):
                    Inicio = int(self.handler.headers.get("range").split("=")[1].split("-")[0])
                    if self.handler.headers.get("range").split("=")[1].split("-")[1] != "":
                        Fin = int(self.handler.headers.get("range").split("=")[1].split("-")[1])
                    else:
                        Fin = Size - 1

            else:
                Inicio = 0
                Fin = Size - 1

            if not Fin > Inicio: Fin = Size - 1

            if self.handler.headers.get("range") == None:
                logger.info("-------------------------------------------------------")
                logger.info("Solicitando archivo local: " + Path)
                logger.info("-------------------------------------------------------")

                self.handler.send_response(200)
                self.handler.send_header("Content-Disposition", "attachment; filename=video.mp4")
                self.handler.send_header('Accept-Ranges', 'bytes')
                self.handler.send_header('Content-Length', str(Size))
                self.handler.send_header("Connection", "close")
                self.handler.end_headers()
                while True:
                    time.sleep(0.2)
                    buffer = f.read(1024 * 250)
                    if not buffer:
                        break
                    self.handler.wfile.write(buffer)
                self.handler.wfile.close()
                f.close()
            else:
                logger.info("-------------------------------------------------------")
                logger.info("Solicitando archivo local: " + Path)
                logger.info("Rango: " + str(Inicio) + "-" + str(Fin) + "/" + str(Size))
                logger.info("-------------------------------------------------------")
                f.seek(Inicio)

                self.handler.send_response(206)
                self.handler.send_header("Content-Disposition", "attachment; filename=video.mp4")
                self.handler.send_header('Accept-Ranges', 'bytes')
                self.handler.send_header('Content-Length', str(Fin - Inicio))
                self.handler.send_header('Content-Range', str(Inicio) + "-" + str(Fin) + "/" + str(Size))
                self.handler.send_header("Connection", "close")

                self.handler.end_headers()
                while True:
                    time.sleep(0.2)
                    buffer = f.read(1024 * 250)
                    if not buffer:
                        break
                    self.handler.wfile.write(buffer)
                self.handler.wfile.close()
                f.close()
        elif path.startswith("/media/"):
            file = os.path.join(config.get_runtime_path(), "platformcode", "template", path[7:])
            from mimetypes import MimeTypes
            mime = MimeTypes()
            mime_type = mime.guess_type(file)
            try:
                mim = mime_type[0]
            except:
                mim = ""
            f = open(file, "rb")
            self.handler.send_response(200)
            self.handler.send_header('Content-type', mim)
            self.handler.end_headers()
            self.handler.wfile.write(f.read())
            f.close()
Exemplo n.º 47
0
    def handle(self, *args, **options):
        products = Product.objects.all()
        buyersguide_page = self.get_or_create_buyers_guide()

        for product in products:
            # 1. Create ProductPage out of this product
            product = product.specific  # Get the specific class

            # Always refresh the buyersguide_page to update treebeards pathing
            buyersguide_page.refresh_from_db()

            # Check if ProductPage exists. If it does, continue on.
            # This check will allow us to run this script more than once if needed
            if ProductPage.objects.filter(slug=product.slug).exists():
                self.debug_print(
                    f"Product '{product.slug}' already exists, skipping.")
                continue

            if isinstance(product, SoftwareProduct):
                new_product_page = SoftwareProductPage()
                specific_fields = [
                    'medical_privacy_compliant', 'easy_to_learn_and_use',
                    'handles_recordings_how', 'recording_alert',
                    'recording_alert_helptext',
                    'medical_privacy_compliant_helptext', 'host_controls',
                    'easy_to_learn_and_use_helptext'
                ]
            elif isinstance(product, GeneralProduct):
                new_product_page = GeneralProductPage()
                specific_fields = [
                    'camera_device', 'camera_app', 'microphone_device',
                    'microphone_app', 'location_device', 'location_app',
                    'personal_data_collected', 'biometric_data_collected',
                    'social_data_collected', 'how_can_you_control_your_data',
                    'data_control_policy_is_bad', 'track_record_choices',
                    'company_track_record', 'track_record_is_bad',
                    'track_record_details', 'offline_capable',
                    'offline_use_description', 'uses_ai',
                    'ai_uses_personal_data', 'ai_is_transparent', 'ai_helptext'
                ]
            self.debug_print(
                f"Treating '{product.slug}' as {new_product_page.__class__.__name__}"
            )

            # Apply the fields that are different or may cause issues if copied directly from one model to another
            new_product_page.slug_en = product.slug
            new_product_page.title = product.name
            new_product_page.title_en = product.name
            new_product_page.product_url = product.url
            new_product_page.cloudinary_image = product.cloudinary_image
            new_product_page.live = not product.draft  # If product is draft, it shall not be live.

            # These are the common fields between SoftwareProductPages and GeneralProductPages
            fields = specific_fields + [
                'slug', 'privacy_ding', 'adult_content', 'uses_wifi',
                'uses_bluetooth', 'review_date', 'company', 'blurb', 'price',
                'worst_case', 'signup_requires_email', 'signup_requires_phone',
                'signup_requires_third_party_account',
                'signup_requirement_explanation',
                'how_does_it_use_data_collected',
                'data_collection_policy_is_bad',
                'user_friendly_privacy_policy',
                'show_ding_for_minimum_security_standards',
                'meets_minimum_security_standards', 'uses_encryption',
                'uses_encryption_helptext', 'security_updates',
                'security_updates_helptext', 'strong_password',
                'strong_password_helptext', 'manage_vulnerabilities',
                'manage_vulnerabilities_helptext', 'privacy_policy',
                'privacy_policy_helptext', 'phone_number', 'live_chat',
                'email', 'twitter'
            ]

            self.debug_print("\tSetting fields:")
            for field in fields:
                # Loop through every field for this product and copy the value
                # from the Product model to the Page model.
                self.debug_print("\t\t", field, " as ",
                                 getattr(product, field))
                setattr(new_product_page, field, getattr(product, field))

            self.debug_print(f"Product has image? {bool(product.image)}")
            self.debug_print(
                f"Product has cloudinary image? {bool(product.cloudinary_image)}"
            )

            # Get the image file field, and convert it into a WagtailImage object
            if product.image:
                # Check if there is an image file. If there isn't one, don't try to copy the
                # FieldFile to a WagtailImage object.
                try:
                    image_file = product.image.file
                except FileNotFoundError:
                    image_file = None

                if image_file:
                    mime = MimeTypes()
                    mime_type = mime.guess_type(
                        product.image.file.name)  # -> ('image/jpeg', None)
                    if mime_type:
                        mime_type = mime_type[0].split('/')[1].upper()
                    else:
                        # Default to a JPEG mimetype.
                        mime_type = 'JPEG'
                    # Create an image out of the FileField.
                    pil_image = PILImage.open(product.image.file)
                    f = BytesIO()
                    pil_image.save(f, mime_type)
                    # Store the image as a WagtailImage object
                    new_image_name = ntpath.basename(product.image.file.name)
                    wagtail_image = WagtailImage.objects.create(
                        title=new_image_name,
                        file=ImageFile(f, name=new_image_name))
                    # Associate new_product_page.image with wagtail_image
                    new_product_page.image = wagtail_image

            # Add the new page as a child to BuyersGuidePage. This will add a
            # `path` to the new_product_page and place it in the Wagtail Tree
            # using Django Treebeard
            buyersguide_page.add_child(instance=new_product_page)

            # Save revision and/or publish so we can add Orderables to this page.
            new_product_page.save()
            new_product_page.save_revision()

            self.debug_print("\tCreated", new_product_page)

            # Loop through all the m2ms and create Orderable objects for this new page type
            # Add privacy policy links
            for privacy_link in product.privacy_policy_links.all():
                new_orderable = ProductPagePrivacyPolicyLink()
                new_orderable.page = new_product_page
                new_orderable.label = privacy_link.label
                new_orderable.url = privacy_link.url
                new_orderable.save()
                new_product_page.privacy_policy_links.add(new_orderable)
                self.debug_print("\tPrivacy Orderables added")
            # Add product categories
            for category in product.product_category.all():
                new_orderable = ProductPageCategory()
                new_orderable.product = new_product_page
                new_orderable.category = category
                new_orderable.save()
                new_product_page.product_categories.add(new_orderable)
                self.debug_print("\tCategory Orderables added")
            # Add updates
            for update in product.updates.all():
                new_orderable = ProductUpdates()
                new_orderable.page = new_product_page
                new_orderable.update = update
                new_orderable.save()
                new_product_page.updates.add(new_orderable)
                self.debug_print("\tUpdate Orderables added")

            # Attach a Votes object to each page if `Page.get_or_create_votes()` exists.
            if hasattr(new_product_page, 'get_or_create_votes'):
                new_product_page.get_or_create_votes()
                # Use .to_dict() to pull out the old aggregated votes
                product_dict = product.to_dict()
                votes = product_dict.get('votes', None)
                if votes:
                    votes = votes.get('creepiness').get('vote_breakdown')
                    self.debug_print(votes)
                    values = [x for (i, x) in sorted(votes.items())]
                    product_total = sum([
                        x * ((i + 1) * 20 - 10) for i, x in enumerate(values)
                    ])
                    self.debug_print(
                        f'\tOriginal votes: {values} (total score: {product_total})'
                    )
                else:
                    # Default vote "bin"
                    values = [0, 0, 0, 0, 0]
                    product_total = 0

            new_product_page.votes.set_votes(values)
            new_product_page.creepiness_value = product_total
            new_product_page.save()
            self.debug_print(
                f'\tNew product votes: {new_product_page.get_or_create_votes()}'
            )

            if not product.draft:
                new_product_page.live = True
                new_product_page.save_revision().publish()
            else:
                new_product_page.save_revision()

            # Always good to fresh from db when using Django Treebeard.
            buyersguide_page.refresh_from_db()

        time.sleep(1)

        # Once all the ProductPages are added, add related_products
        # By writing a secondary for loop we can avoid attaching a legacy_product
        # to each ProductPage because they'll have slugs in common.
        self.debug_print("\nFinal step: Adding related products\n")

        # Loop through every ProductPage we now have.
        for product_page in ProductPage.objects.all():
            # Fetch the PNI Product that this page was created from.
            try:
                product = Product.objects.get(slug=product_page.slug)
            except Product.DoesNotExist:
                self.debug_print(
                    f"Skipping {product_page} because a ProductPage.slug={product_page.slug} was not found"
                )  # noqa
                continue
            # Loop through all the Product.related_products
            for related_product in product.related_products.all():
                try:
                    # Find the related ProductPage based on the correct slug.
                    related_page = ProductPage.objects.get(
                        slug=related_product.slug)
                except ProductPage.DoesNotExist:
                    self.debug_print("Missing product page", product_page)
                    continue
                # Create a new Orderable for the Related Product. This provides
                # a higher quality editing experience for Wagtail editors/admins.
                new_related_product = RelatedProducts()
                new_related_product.page = product_page
                new_related_product.related_product = related_page
                new_related_product.save()
                product_page.related_product_pages.add(new_related_product)
                self.debug_print("\tAdded related product page:", related_page)
Exemplo n.º 48
0
def run_campaign(campaign, domain='demo.django-crm.io', protocol='https'):
    try:
        campaign = Campaign.objects.get(id=campaign)
        attachments = []
        if campaign.attachment:
            file_path = campaign.attachment.path
            file_name = file_path.split("/")[-1]
            content = open(file_path, 'rb').read()
            mime = MimeTypes()
            mime_type = mime.guess_type(file_path)
            attachments.append((file_name, content, mime_type[0]))
        subject = campaign.subject

        contacts = Contact.objects.filter(
            contact_list__in=[each_list for each_list in campaign.contact_lists.all()])
        default_html = campaign.html_processed
        for each_contact in contacts:
            html = default_html
            campaign_log = CampaignLog.objects.create(contact=each_contact,
                                                      campaign=campaign)
            if campaign.reply_to_email:
                reply_to_email = campaign.reply_to_email
            else:
                message_id = get_campaign_message_id(campaign_log)
                campaign_log.message_id = message_id
                campaign_log.save()
                domain_name = 'django-crm.com'
                if campaign.from_email is not None:
                    from_email = campaign.from_email
                else:
                    from_email = campaign.created_by.email
                reply_to_email = str(from_email) + ' <' + \
                    str(message_id + '@' + domain_name + '') + '>'
            if not (each_contact.is_bounced or each_contact.is_unsubscribed):
                # domain_url = settings.URL_FOR_LINKS
                domain_url = protocol + '://' + domain
                img_src_url = domain_url + reverse('marketing:campaign_open', kwargs={
                    'campaign_log_id': campaign_log.id, 'email_id': each_contact.id})
                # images can only be accessed over https
                link = '<img src={img_src_url} alt="company_logo" title="company_logo" height="1" width="1" />'.format(
                    img_src_url=img_src_url)
                # link = '<img src="' + domain_url + '/m/cm/track-email/' + \
                #     str(campaign_log.id) + '/contact/' + \
                #     str(each_contact.id) + '/" height="1" width="1" alt="company_logo" + \
                #     title="company_logo"/>'

                unsubscribe_from_campaign_url = reverse(
                    'marketing:unsubscribe_from_campaign', kwargs={'contact_id': each_contact.id,
                                                                   'campaign_id': campaign.id})
                unsubscribe_from_campaign_html = "<br><br/><a href={}>Unsubscribe</a>".format(
                    domain_url + unsubscribe_from_campaign_url)
                names_dict = {'company_name': each_contact.company_name if each_contact.company_name else '',
                              'last_name': each_contact.last_name if each_contact.last_name else '',
                              'city': each_contact.city if each_contact.city else '',
                              'state': each_contact.state if each_contact.state else '',
                              'first_name': each_contact.name,
                              'email': each_contact.email, 'email_id': each_contact.id,
                              'unsubscribe_from_campaign_url': unsubscribe_from_campaign_url}

                html = Template(html).render(Context(names_dict))
                mail_html = html + link + unsubscribe_from_campaign_html
                from_email = str(campaign.from_name) + "<" + \
                    str(campaign.from_email) + '>'
                to_email = [each_contact.email]
                send_campaign_mail(
                    subject, mail_html, from_email, to_email, [], [reply_to_email], attachments)
    except Exception as e:
        print(e)
        pass
Exemplo n.º 49
0
class SauceNao(object):
    """"
    small script to work with SauceNao locally
    """

    SEARCH_POST_URL = 'http://saucenao.com/search.php'

    # basic account allows currently 20 images within 30 seconds
    # you can increase this value is you have a premium account
    LIMIT_30_SECONDS = 20

    # 0=html, 2=json but json is omitting important data but includes more data about authors
    # taken from the API documentation(requires login): https://saucenao.com/user.php?page=search-api
    API_HTML_TYPE = 0
    API_JSON_TYPE = 2

    CONTENT_CATEGORY_KEY = 'Material'
    CONTENT_CHARACTERS_KEY = 'Characters'

    mime = None
    logger = None

    def __init__(self, directory, databases=999, minimum_similarity=65, combine_api_types=False, api_key=None,
                 exclude_categories='', move_to_categories=False, output_type=API_HTML_TYPE, start_file=None,
                 log_level=logging.ERROR, title_minimum_similarity=90):
        """Initializing function

        :type directory: str
        :type databases: int
        :type minimum_similarity: float
        :type combine_api_types: bool
        :type api_key: str
        :type exclude_categories: str
        :type move_to_categories: bool
        :type start_file: str
        :type log_level: int
        :type title_minimum_similarity: float
        """
        self._directory = directory
        self._databases = databases
        self._minimum_similarity = minimum_similarity
        self._combine_api_types = combine_api_types
        self._api_key = api_key
        self._exclude_categories = exclude_categories
        self._move_to_categories = move_to_categories
        self._output_type = output_type
        self._start_file = start_file
        self._title_minimum_similarity = title_minimum_similarity

        self._previous_status_code = None

        self.mime = MimeTypes()
        logging.basicConfig(level=log_level)
        self.logger = logging.getLogger("saucenao_logger")

    def check_file(self, file_name: str) -> list:
        """Check the given file for results on SauceNAO

        :type file_name: str
        :return:
        """
        self.logger.info("checking file: {0:s}".format(file_name))
        if self._combine_api_types:
            result = self.check_image(file_name, self.API_HTML_TYPE)
            sorted_results = self.parse_results_json(result)

            additional_result = self.check_image(file_name, self.API_JSON_TYPE)
            additional_sorted_results = self.parse_results_json(additional_result)
            sorted_results = self.merge_results(sorted_results, additional_sorted_results)
        else:
            result = self.check_image(file_name, self._output_type)
            sorted_results = self.parse_results_json(result)

        filtered_results = self.filter_results(sorted_results)
        return filtered_results

    def get_http_data(self, file_path: str, output_type: int):
        """Prepare the http relevant data(files, headers, params) for the given file path and output type

        :param file_path:
        :param output_type:
        :return:
        """
        with open(file_path, 'rb') as file_object:
            files = {'file': file_object.read()}

        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                          'Chrome/63.0.3239.84 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
            'Accept-Language': 'en-DE,en-US;q=0.9,en;q=0.8',
            'Accept-Encoding': 'gzip, deflate, br',
            'DNT': '1',
            'Connection': 'keep-alive'
        }

        params = {
            'file': file_path,
            'Content-Type': self.mime.guess_type(file_path),
            # parameters taken from form on main page: https://saucenao.com/
            'url': None,
            'frame': 1,
            'hide': 0,
            # parameters taken from API documentation: https://saucenao.com/user.php?page=search-api
            'output_type': output_type,
            'db': self._databases,
        }

        if self._api_key:
            params['api_key'] = self._api_key

        return files, params, headers

    def check_image(self, file_name: str, output_type: int) -> str:
        """Check the possible sources for the given file

        :type output_type: int
        :type file_name: str
        :return:
        """
        file_path = os.path.join(self._directory, file_name)

        files, params, headers = self.get_http_data(file_path=file_path, output_type=output_type)
        link = requests.post(url=self.SEARCH_POST_URL, files=files, params=params, headers=headers)

        code, msg = http.verify_status_code(link, file_name)

        if code == http.STATUS_CODE_SKIP:
            self.logger.error(msg)
            return json.dumps({'results': []})
        elif code == http.STATUS_CODE_REPEAT:
            if not self._previous_status_code:
                self._previous_status_code = code
                self.logger.info("Received an unexpected status code, repeating after 10 seconds...")
                time.sleep(10)
                return self.check_image(file_name, output_type)
            else:
                raise UnknownStatusCodeException(msg)
        else:
            self._previous_status_code = None

        if output_type == self.API_HTML_TYPE:
            return self.parse_results_html_to_json(link.text)

        return link.text

    @staticmethod
    def parse_results_html_to_json(html: str) -> str:
        """Parse the results and sort them descending by similarity

        :type html: str
        :return:
        """
        soup = Soup(html, 'html.parser')
        # basic format of json API response
        results = {'header': {}, 'results': []}

        for res in soup.find_all('td', attrs={"class": "resulttablecontent"}):  # type: element.Tag
            # optional field in SauceNao
            title_tag = res.find_next('div', attrs={"class": "resulttitle"})
            if title_tag:
                title = title_tag.text
            else:
                title = ''

            # mandatory field in SauceNao
            similarity = res.find_next('div', attrs={"class": "resultsimilarityinfo"}).text.replace('%', '')
            alternate_links = [a_tag['href'] for a_tag in
                               res.find_next('div', attrs={"class": "resultmiscinfo"}).find_all('a', href=True)]
            content_column = []
            content_column_tags = res.find_all('div', attrs={"class": "resultcontentcolumn"})
            for content_column_tag in content_column_tags:
                for br in content_column_tag.find_all('br'):
                    br.replace_with('\n')
                content_column.append(content_column_tag.text)

            result = {
                'header': {
                    'similarity': similarity
                },
                'data': {
                    'title': title,
                    'content': content_column,
                    'ext_urls': alternate_links
                }
            }
            results['results'].append(result)

        return json.dumps(results)

    @staticmethod
    def parse_results_json(text: str) -> list:
        """Parse the results and sort them descending by similarity

        :type text: str
        :return:
        """
        result = json.loads(text)
        results = [res for res in result['results']]
        return sorted(results, key=lambda k: float(k['header']['similarity']), reverse=True)

    def filter_results(self, sorted_results) -> list:
        """Return results with a similarity bigger or the same as the defined similarity from the arguments (default 65%)

        :type sorted_results: list|tuple|Generator
        :return:
        """
        filtered_results = []
        for res in sorted_results:
            if float(res['header']['similarity']) >= float(self._minimum_similarity):
                filtered_results.append(res)
            else:
                # we can break here since the results are sorted by similarity anyways
                break
        return filtered_results

    @staticmethod
    def get_content_value(results, key: str):
        """Return the first match of Material in content
        multiple sites have a categorisation which SauceNao utilizes to provide it in the content section

        :type results: list|tuple|Generator
        :type key: str
        :return:
        """
        for result in results:
            if 'content' in list(result['data'].keys()):
                for content in result['data']['content']:
                    if re.match('{0:s}: .*'.format(key), content):
                        return ''.join(re.split(r'{0:s}: '.format(key), content)[1:]).rstrip("\n").split('\n')
        return ''

    @staticmethod
    def merge_two_dicts(x: dict, y: dict) -> dict:
        """Take x dictionary and insert/overwrite y dictionary values

        :type x: dict
        :type y: dict
        :return:
        """
        z = x.copy()
        z.update(y)
        return z

    def merge_results(self, result: list, additional_result: list) -> list:
        """Merge two result arrays

        :type result: list
        :type additional_result: list
        :return:
        """
        if len(result) <= len(additional_result):
            length = len(result)
        else:
            length = len(additional_result)

        for i in range(length):
            for key in list(result[i].keys()):
                result[i][key] = self.merge_two_dicts(result[i][key], additional_result[i][key])

        return result
Exemplo n.º 50
0
class ResponseTypes:

    CLASSES = {
        "text/html": "scrapy.http.HtmlResponse",
        "application/atom+xml": "scrapy.http.XmlResponse",
        "application/rdf+xml": "scrapy.http.XmlResponse",
        "application/rss+xml": "scrapy.http.XmlResponse",
        "application/xhtml+xml": "scrapy.http.HtmlResponse",
        "application/vnd.wap.xhtml+xml": "scrapy.http.HtmlResponse",
        "application/xml": "scrapy.http.XmlResponse",
        "application/json": "scrapy.http.TextResponse",
        "application/x-json": "scrapy.http.TextResponse",
        "application/json-amazonui-streaming": "scrapy.http.TextResponse",
        "application/javascript": "scrapy.http.TextResponse",
        "application/x-javascript": "scrapy.http.TextResponse",
        "text/xml": "scrapy.http.XmlResponse",
        "text/*": "scrapy.http.TextResponse",
    }

    def __init__(self):
        self.classes = {}
        self.mimetypes = MimeTypes()
        mimedata = get_data("scrapy", "mime.types").decode("utf8")
        self.mimetypes.readfp(StringIO(mimedata))
        for mimetype, cls in self.CLASSES.items():
            self.classes[mimetype] = load_object(cls)

    def from_mimetype(self, mimetype):
        """Return the most appropriate Response class for the given mimetype"""
        if mimetype is None:
            return Response
        elif mimetype in self.classes:
            return self.classes[mimetype]
        else:
            basetype = f"{mimetype.split('/')[0]}/*"
            return self.classes.get(basetype, Response)

    def from_content_type(self, content_type, content_encoding=None):
        """Return the most appropriate Response class from an HTTP Content-Type
        header"""
        if content_encoding:
            return Response
        mimetype = to_unicode(content_type).split(";")[0].strip().lower()
        return self.from_mimetype(mimetype)

    def from_content_disposition(self, content_disposition):
        try:
            filename = (to_unicode(
                content_disposition, encoding="latin-1",
                errors="replace").split(";")[1].split("=")[1].strip("\"'"))
            return self.from_filename(filename)
        except IndexError:
            return Response

    def from_headers(self, headers):
        """Return the most appropriate Response class by looking at the HTTP
        headers"""
        cls = Response
        if b"Content-Type" in headers:
            cls = self.from_content_type(
                content_type=headers[b"Content-Type"],
                content_encoding=headers.get(b"Content-Encoding"),
            )
        if cls is Response and b"Content-Disposition" in headers:
            cls = self.from_content_disposition(
                headers[b"Content-Disposition"])
        return cls

    def from_filename(self, filename):
        """Return the most appropriate Response class from a file name"""
        mimetype, encoding = self.mimetypes.guess_type(filename)
        if mimetype and not encoding:
            return self.from_mimetype(mimetype)
        else:
            return Response

    def from_body(self, body):
        """Try to guess the appropriate response based on the body content.
        This method is a bit magic and could be improved in the future, but
        it's not meant to be used except for special cases where response types
        cannot be guess using more straightforward methods."""
        chunk = body[:5000]
        chunk = to_bytes(chunk)
        if not binary_is_text(chunk):
            return self.from_mimetype("application/octet-stream")
        elif b"<html>" in chunk.lower():
            return self.from_mimetype("text/html")
        elif b"<?xml" in chunk.lower():
            return self.from_mimetype("text/xml")
        else:
            return self.from_mimetype("text")

    def from_args(self, headers=None, url=None, filename=None, body=None):
        """Guess the most appropriate Response class based on
        the given arguments."""
        cls = Response
        if headers is not None:
            cls = self.from_headers(headers)
        if cls is Response and url is not None:
            cls = self.from_filename(url)
        if cls is Response and filename is not None:
            cls = self.from_filename(filename)
        if cls is Response and body is not None:
            cls = self.from_body(body)
        return cls
Exemplo n.º 51
0
 def setanalyzedFileType(self, path):
     mime = MimeTypes()
     url = urllib.pathname2url(path)
     mime_type = mime.guess_type(url)
     self.fileType = mime_type
Exemplo n.º 52
0
def run_campaign(campaign, domain="demo.django-crm.io", protocol="https"):
    blocked_domains = BlockedDomain.objects.values_list("domain", flat=True)
    blocked_emails = BlockedEmail.objects.values_list("email", flat=True)
    try:
        campaign = Campaign.objects.get(id=campaign)
        attachments = []
        if campaign.attachment:
            file_path = campaign.attachment.path
            file_name = file_path.split("/")[-1]
            content = open(file_path, "rb").read()
            mime = MimeTypes()
            mime_type = mime.guess_type(file_path)
            attachments.append((file_name, content, mime_type[0]))
        subject = campaign.subject

        contacts = Contact.objects.filter(contact_list__in=[
            each_list for each_list in campaign.contact_lists.all()
        ])
        default_html = campaign.html_processed
        for each_contact in contacts:
            html = default_html
            campaign_log = CampaignLog.objects.create(contact=each_contact,
                                                      campaign=campaign)
            if campaign.reply_to_email:
                reply_to_email = campaign.reply_to_email
            else:
                message_id = get_campaign_message_id(campaign_log)
                campaign_log.message_id = message_id
                campaign_log.save()
                domain_name = "django-crm.com"
                if campaign.from_email is not None:
                    from_email = campaign.from_email
                else:
                    from_email = campaign.created_by.email
                reply_to_email = (str(from_email) + " <" +
                                  str(message_id + "@" + domain_name + "") +
                                  ">")
            if not (each_contact.is_bounced or each_contact.is_unsubscribed):
                if (each_contact.email not in blocked_emails) and (
                        each_contact.email.split("@")[-1]
                        not in blocked_domains):
                    # domain_url = settings.URL_FOR_LINKS
                    domain_url = protocol + "://" + domain
                    img_src_url = domain_url + reverse(
                        "marketing:campaign_open",
                        kwargs={
                            "campaign_log_id": campaign_log.id,
                            "email_id": each_contact.id,
                        },
                    )
                    # images can only be accessed over https
                    link = '<img src={img_src_url} alt="company_logo" title="company_logo" height="1" width="1" />'.format(
                        img_src_url=img_src_url)
                    # link = '<img src="' + domain_url + '/m/cm/track-email/' + \
                    #     str(campaign_log.id) + '/contact/' + \
                    #     str(each_contact.id) + '/" height="1" width="1" alt="company_logo" + \
                    #     title="company_logo"/>'

                    unsubscribe_from_campaign_url = reverse(
                        "marketing:unsubscribe_from_campaign",
                        kwargs={
                            "contact_id": each_contact.id,
                            "campaign_id": campaign.id,
                        },
                    )
                    unsubscribe_from_campaign_html = "<br><br/><a href={}>Unsubscribe</a>".format(
                        domain_url + unsubscribe_from_campaign_url)
                    names_dict = {
                        "company_name":
                        each_contact.company_name
                        if each_contact.company_name else "",
                        "last_name":
                        each_contact.last_name
                        if each_contact.last_name else "",
                        "city":
                        each_contact.city if each_contact.city else "",
                        "state":
                        each_contact.state if each_contact.state else "",
                        "first_name":
                        each_contact.name,
                        "email":
                        each_contact.email,
                        "email_id":
                        each_contact.id,
                        "name":
                        each_contact.name + " " + each_contact.last_name
                        if each_contact.last_name else "",
                        "unsubscribe_from_campaign_url":
                        unsubscribe_from_campaign_url,
                    }

                    html = Template(html).render(Context(names_dict))
                    mail_html = html + link + unsubscribe_from_campaign_html
                    from_email = (str(campaign.from_name) + "<" +
                                  str(campaign.from_email) + ">")
                    to_email = [each_contact.email]
                    send_campaign_mail(
                        subject,
                        mail_html,
                        from_email,
                        to_email,
                        [],
                        [reply_to_email],
                        attachments,
                    )
    except Exception as e:
        print(e)
        pass
Exemplo n.º 53
0
def upload_file(request):
    if request.method == 'POST':
        form = UploadFileForm(request.POST, request.FILES)
        if form.is_valid():

            mime = MimeTypes()
            uploaded_file = request.FILES['file']
            mime_type = mime.guess_type(uploaded_file.name)

            # Get mime type string from tuple
            if mime_type[0]:
                mime_type = mime_type[0]
            else:
                return HttpResponseBadRequest(
                    json.dumps({
                        'status': 'failed',
                        'reason': 'Invalid file.'
                    }), 'application/json')

            if validate_upload_file(
                    uploaded_file, mime_type
            ) and uploaded_file.size <= settings.MAX_ARCHIVE_FILE_SIZE:
                link = Link(submitted_url=form.cleaned_data['url'],
                            submitted_title=form.cleaned_data['title'],
                            created_by=request.user)
                link.save()

                asset = Asset(link=link)
                file_name = 'cap' + mime.guess_extension(mime_type)
                file_path = os.path.join(asset.base_storage_path, file_name)

                uploaded_file.file.seek(0)
                file_name = default_storage.store_file(uploaded_file,
                                                       file_path)

                if mime_type == 'application/pdf':
                    asset.pdf_capture = file_name
                else:
                    asset.image_capture = file_name
                asset.save()

                response_object = {
                    'status': 'success',
                    'linky_id': link.guid,
                    'linky_hash': link.guid
                }

                return HttpResponse(json.dumps(response_object),
                                    'application/json',
                                    201)  # '201 Created' status
            else:
                return HttpResponseBadRequest(
                    json.dumps({
                        'status': 'failed',
                        'reason': 'Invalid file.'
                    }), 'application/json')
        else:
            return HttpResponseBadRequest(
                json.dumps({
                    'status': 'failed',
                    'reason': 'Missing file.'
                }), 'application/json')

    return HttpResponseBadRequest(
        json.dumps({
            'status': 'failed',
            'reason': 'No file submitted.'
        }), 'application/json')
Exemplo n.º 54
0
def send_campaign_email_to_admin_contact(campaign,
                                         domain="demo.django-crm.io",
                                         protocol="https"):
    try:
        campaign = Campaign.objects.get(id=campaign)
        attachments = []
        if campaign.attachment:
            file_path = campaign.attachment.path
            file_name = file_path.split("/")[-1]
            content = open(file_path, "rb").read()
            mime = MimeTypes()
            mime_type = mime.guess_type(file_path)
            attachments.append((file_name, content, mime_type[0]))
        subject = campaign.subject
        contacts = ContactEmailCampaign.objects.all()
        default_html = campaign.html_processed
        blocked_domains = BlockedDomain.objects.values_list("domain",
                                                            flat=True)
        blocked_emails = BlockedEmail.objects.values_list("email", flat=True)
        for each_contact in contacts:
            if (each_contact.email not in blocked_emails) and (
                    each_contact.email.split("@")[-1] not in blocked_domains):
                html = default_html
                if campaign.reply_to_email:
                    reply_to_email = campaign.reply_to_email
                else:
                    domain_name = "django-crm.com"
                    if campaign.from_email is not None:
                        from_email = campaign.from_email
                    else:
                        from_email = campaign.created_by.email
                    reply_to_email = (str(from_email) + " <" +
                                      str(settings.EMAIL_HOST_USER + "@" +
                                          domain_name + "") + ">")

                # domain_url = settings.URL_FOR_LINKS
                domain_url = protocol + "://" + domain
                # img_src_url = domain_url + reverse('marketing:campaign_open', kwargs={
                #     'campaign_log_id': campaign_log.id, 'email_id': each_contact.id})
                # # images can only be accessed over https
                # link = '<img src={img_src_url} alt="company_logo" title="company_logo" height="1" width="1" />'.format(
                #     img_src_url=img_src_url)
                # link = '<img src="' + domain_url + '/m/cm/track-email/' + \
                #     str(campaign_log.id) + '/contact/' + \
                #     str(each_contact.id) + '/" height="1" width="1" alt="company_logo" + \
                #     title="company_logo"/>'

                # unsubscribe_from_campaign_url = reverse(
                #     'marketing:unsubscribe_from_campaign', kwargs={'contact_id': each_contact.id,
                #                                                     'campaign_id': campaign.id})
                # unsubscribe_from_campaign_html = "<br><br/><a href={}>Unsubscribe</a>".format(
                #     domain_url + unsubscribe_from_campaign_url)

                # names_dict = {'company_name': '', 'city': '', 'state': '',
                #                 'last_name': each_contact.last_name if each_contact.last_name else '',
                #                 'email': each_contact.email, 'email_id': each_contact.id,
                #                 'name': each_contact.name + ' ' + each_contact.last_name if each_contact.last_name else '',
                #             }

                # mail_html = html + link + unsubscribe_from_campaign_html
                html = Template(html).render(
                    Context({"email_id": each_contact.id}))
                mail_html = html
                from_email = (str(campaign.from_name) + "<" +
                              str(campaign.from_email) + ">")
                to_email = [each_contact.email]
                send_campaign_mail(
                    subject,
                    mail_html,
                    from_email,
                    to_email,
                    [],
                    [reply_to_email],
                    attachments,
                )
    except Exception as e:
        print(e)
        pass
Exemplo n.º 55
0
 def _get_mime_type(local_file):
     mime = MimeTypes()
     url = urllib.pathname2url(local_file)
     mime_type = mime.guess_type(url)
     return mime_type[0]
Exemplo n.º 56
0
def getFileContent():
    try:
        #Initialize
        result = {'file' : '',
                   'mime' : '',
                   'message' : '',
                   'message_css' : '',
                   'download' : '',
                   'showDownload' : False,
                   'containerScroll' : True,
                   'content' : '',
                   'keywords' : ''}
    
        #json_data = request.json
        
        json_data = request.get_json(force=True)
        
        #file = json_data['path']
        file = os.path.join(CURRENT_DIR, json_data['path'].replace("../",""))
        
        mime = MimeTypes()
        mime_type = mime.guess_type(urllib.pathname2url(file))[0] if mime.guess_type(urllib.pathname2url(file))[0] else 'text/plain' 
        result['file'] = file
        result['mime'] = mime_type
        download_html = ''
        download_html+='<div class="">'
        download_html+='<a class="btn-download btn btn-lg btn-primary" href="' + file + '" target="_blank"><i class="glyphicon glyphicon-download-alt"></i> Download</a>'
        download_html+='</div>'
        result['download'] = download_html
        
        supported_application = ['application/xml', 'text/x-php', 'text/plain', 'text/xml', 'text/csv']
        mime = mime_type.split('/')
        file_mime = mime[0]
        if mime_type in supported_application:
            content = ''
            words = ''
            
            file_ext = file.split('.')
            
            if file_ext[len(file_ext)-1] == "properties":
                with open(file) as f:
                    for line in f:
                        content += line
            elif file_ext[len(file_ext)-1] == "feature":
                content = open(file).read()
                x = open(file, "r")
                words = [w for w in x.read().split() if w[0] == "@"]
            elif file_ext[len(file_ext)-1] == "csv":
                with open(file, 'rb') as csvfile:
                    spamreader = csv.reader(csvfile, delimiter=' ', quotechar='|')
                    for row in spamreader:
                        line = ' '.join(row)
                        content += line + "\n"
            else:
                content = open(file).read()
                content = unicode(content, errors='ignore')

            result['content'] = content
            
            result['containerScroll'] = True
            result['showDownload'] = False
            result['keywords'] = words
            
        elif (file_mime == 'text'):
            html_content = ''
            html_content+= '<object data="' + file + '" type="' + mime_type + '" id="fileObject" style="width:100% ">'
            html_content+='<embed type="' + mime_type + '" src="' + file + '" style="width:100%">'
            html_content+='</object>'
            result['content'] = html_content
            result['containerScroll'] = False
            result['showDownload'] = False
        else:
            html_content = ''
            html_content+= '<object data="' + file + '" type="' + mime_type + '" id="fileObject" style="width:100% " class="hidden">'
            html_content+='<embed type="' + mime_type + '" src="' + file + '" style="width:100%">'
            html_content+='</object>'
            result['content'] = html_content
            result['containerScroll'] = False
            result['showDownload'] = True
       
        return jsonify(status='OK',result=result)
    except Exception,e:
        return jsonify(status='ERROR',message=str(e))
Exemplo n.º 57
0
def get_file_mime():
    mime = MimeTypes()
    file_path = r'C:\Users\rzhang\Pictures\33.png'
    print(mime.guess_type(file_path))
Exemplo n.º 58
0
def editFileContent():
    try:
        #Initialize
        result = {'file' : '',
                   'mime' : '',
                   'message' : '',
                   'message_css' : '',
                   'download' : '',
                   'showDownload' : False,
                   'containerScroll' : True,
                   'content' : '',
                   'keywords' : ''}
    
        #json_data = request.json
        
        json_data = request.get_json(force=True)
        
        #file = json_data['path']
        file = os.path.join(CURRENT_DIR, json_data['path'].replace("../",""))
        fileContent = json_data['content'].replace("<br>", "\n")
        
        # cleanr = re.compile('<.*?>')
        # cleantext = re.sub(cleanr, '', fileContent)
        #print "584", cleantext
        
        # fileContent = cleantext
        
        # Providing Read, Write and Execute permission to selected featureFiles 
        exePermissionStr = "sudo " + "find " + file + " -type " + "f " + "-exec " + "chmod " + "ugo+rwx " + "{} " +"\;"
        #print "569", exePermissionStr
        ffPermissionChange = subprocess.Popen(exePermissionStr, stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
        output, err = ffPermissionChange.communicate()
        temp = ffPermissionChange.returncode
        #print "572", temp

        if temp == 0:
            with open(file, "w") as f:
                #print "575"
                f.write(fileContent)
        else: 
            print "Write permission not granted to the feature file"

        mime = MimeTypes()
        mime_type = mime.guess_type(urllib.pathname2url(file))[0] if mime.guess_type(urllib.pathname2url(file))[0] else 'text/plain' 
        result['file'] = file
        result['mime'] = mime_type
        download_html = ''
        download_html+='<div class="">'
        download_html+='<a class="btn-download btn btn-lg btn-primary" href="' + file + '" target="_blank"><i class="glyphicon glyphicon-download-alt"></i> Download</a>'
        download_html+='</div>'
        result['download'] = download_html
        
        supported_application = ['application/xml', 'text/x-php', 'text/plain', 'text/xml', 'text/csv' ]
        mime = mime_type.split('/')
        file_mime = mime[0]
        if mime_type in supported_application:
            content = ''
            words = ''
            
            file_ext = file.split('.')
            
            if file_ext[len(file_ext)-1] == "properties":
                with open(file) as f:
                    for line in f:
                        content += line
            elif file_ext[len(file_ext)-1] == "feature":
                #print "569"
                content = open(file).read()
                x = open(file, "r")
                words = [w for w in x.read().split() if w[0] == "@"]   
                
            else:
                content = open(file).read()

            result['content'] = content
            
            result['containerScroll'] = True
            result['showDownload'] = False
            result['keywords'] = words
            
        elif (file_mime == 'text'):
            html_content = ''
            html_content+= '<object data="' + file + '" type="' + mime_type + '" id="fileObject" style="width:100% ">'
            html_content+='<embed type="' + mime_type + '" src="' + file + '" style="width:100%">'
            html_content+='</object>'
            result['content'] = html_content
            result['containerScroll'] = False
            result['showDownload'] = False
        else:
            html_content = ''
            html_content+= '<object data="' + file + '" type="' + mime_type + '" id="fileObject" style="width:100% " class="hidden">'
            html_content+='<embed type="' + mime_type + '" src="' + file + '" style="width:100%">'
            html_content+='</object>'
            result['content'] = html_content
            result['containerScroll'] = False
            result['showDownload'] = True
       
        return jsonify(status='OK',result=result)
        
    except Exception,e:
        return jsonify(status='ERROR',message=str(e))
Exemplo n.º 59
0
#!/usr/bin/python3
# -*- coding: utf-8 -*-

import base64
from mimetypes import MimeTypes

try:
    pathfile = str(input("Digite o caminho do arquivo: "))
except:
    print("Ex: /home/user/Desktop/img.jpeg")
else:
    with open(pathfile, "rb") as Image:
        mime = MimeTypes()
        mime_type = mime.guess_type(pathfile)
        base64definer = base64.b64encode(Image.read())
        base64definer = str(base64definer)
        base64definer = base64definer.replace("'", "")
        base64definer = base64definer.replace("b/9j", "/9j")
        print(f"data:{mime_type[0]};base64,{base64definer}")
Exemplo n.º 60
0
     continue
 prev_req_name = req_name
 prev_req_time = curr_req_time
 prev_addr = addr[0]
 logger.info("Connection Accepted: Client: " + str(addr))
 # use the below print statement to print the request ( for debugging )
 # print(data)
 
 logger.info("Requested  File: " + urllib.parse.unquote(req_name))
 # ? Check whether the request is to download
 if(req_name[-3:] == "get"):
     file_path = home_dir + urllib.parse.unquote(req_name[:-3])
     # print file path ( for debugging )
     # print("file Path: ", file_path)
     mime = MimeTypes()
     mime_type = mime.guess_type(file_path)[0]
     if(mime_type == None):
         mime_type = 'text/plain'
     # ? Check whether the request is directory
     if(os.path.isdir(file_path)):
         create_zip(file_path.split('/')[-1], file_path)
         data = ''
         with open(file_path.split('/')[-1] + '.zip', 'rb') as f:
             data = f.read()     # reading the file in the binary format
         zip_path = os.getcwd() + '/' + file_path.split('/')[-1] + ".zip"
         mime_type = mime.guess_type(zip_path)[0]
         if(mime_type == None):
             mime_type = 'text/plain'
         response = get_header(200, 'download', mime_type, os.path.getsize(zip_path), file_path.split('/')[-1] + ".zip").encode()
         response += data    # attaching the binary format of the text to the http response
         os.remove(zip_path)