Пример #1
0
 def __call__(self, environ, start_response):
     """Respond to a request when called in the usual WSGI way."""
     if environ['REQUEST_METHOD'] not in ('GET', 'HEAD'):
         headers = [('Allow', 'GET, HEAD')]
         return self.method_not_allowed(environ, start_response, headers)
     path_info = environ.get('PATH_INFO', '')
     full_path = self._full_path(path_info)
     if path_info.startswith('/exec'):
         from subprocess import Popen, PIPE, STDOUT
         import urllib
         query = environ.get('QUERY_STRING')
         args = []
         cwd = '.'
         for var in query.split('&'):
             split = var.split('=')
             if split[0] == 'args':
                 args = urllib.unquote_plus(split[1]).split(' ')
             if split[0] == 'cwd':
                 cwd = split[1]
         print cwd
         print args
         proc = Popen(args, stdout=PIPE, stderr=STDOUT, cwd=cwd)
         proc.wait()
         headers = [('Date', rfc822.formatdate(time.time())),
                    ('Content-Type', 'text/plain')]
         start_response("200 OK", headers)
         return proc.stdout.readlines()
     if not self._is_under_root(full_path):
         return self.not_found(environ, start_response)
     if path.isdir(full_path):
         if full_path[-1] <> '/' or full_path == self.root:
             location = util.request_uri(environ, include_query=False) + '/'
             if environ.get('QUERY_STRING'):
                 location += '?' + environ.get('QUERY_STRING')
             headers = [('Location', location)]
             return self.moved_permanently(environ, start_response, headers)
         else:
             full_path = self._full_path(path_info + self.index_file)
     content_type = self._guess_type(full_path)
     try:
         etag, last_modified = self._conditions(full_path, environ)
         headers = [('Date', rfc822.formatdate(time.time())),
                    ('Last-Modified', last_modified), ('ETag', etag)]
         if_modified = environ.get('HTTP_IF_MODIFIED_SINCE')
         if if_modified and (rfc822.parsedate(if_modified) >=
                             rfc822.parsedate(last_modified)):
             return self.not_modified(environ, start_response, headers)
         if_none = environ.get('HTTP_IF_NONE_MATCH')
         if if_none and (if_none == '*' or etag in if_none):
             return self.not_modified(environ, start_response, headers)
         file_like = self._file_like(full_path)
         headers.append(('Content-Type', content_type))
         start_response("200 OK", headers)
         if environ['REQUEST_METHOD'] == 'GET':
             return self._body(full_path, environ, file_like)
         else:
             return ['']
     except (IOError, OSError), e:
         print e
         return self.not_found(environ, start_response)
Пример #2
0
 def process_users(self, items):
  users = self.session.users
  for (i, item) in enumerate(items):
# Tweets
   if 'user' in item:
    if unicode(item['user']['id']) not in users:
     with self.session.storage_lock: users[unicode(item['user']['id'])] = {}
    if '_last_update' not in users[unicode(item['user']['id'])] or time.mktime(rfc822.parsedate(item['created_at'])) > time.mktime(rfc822.parsedate(users[unicode(item['user']['id'])]['_last_update'])):
     item['user']['_last_update'] = item['created_at']
     with self.session.storage_lock: users[unicode(item['user']['id'])].update(item['user'])
    item['user'] = users[unicode(unicode(item['user']['id']))]
# Retweets
   if 'retweeted_status' in item and 'user' in item['retweeted_status']:
    if unicode(item['retweeted_status']['user']['id']) not in users:
     with self.session.storage_lock: users[unicode(item['retweeted_status']['user']['id'])] = {}
    if '_last_update' not in users[unicode(item['retweeted_status']['user']['id'])] or time.mktime(rfc822.parsedate(item['retweeted_status']['created_at'])) > time.mktime(rfc822.parsedate(users[unicode(item['retweeted_status']['user']['id'])]['_last_update'])):
     item['retweeted_status']['user']['_last_update'] = item['retweeted_status']['created_at']
     with self.session.storage_lock: users[unicode(item['retweeted_status']['user']['id'])].update(item['retweeted_status']['user'])
    item['retweeted_status']['user'] = users[unicode(item['retweeted_status']['user']['id'])]
# Direct messages
   if 'sender' in item:
    if unicode(item['sender']['id']) not in users:
     with self.session.storage_lock: users[unicode(item['sender']['id'])] = {}
    if '_last_update' not in users[unicode(item['sender']['id'])] or time.mktime(rfc822.parsedate(item['created_at'])) > time.mktime(rfc822.parsedate(users[unicode(item['sender']['id'])]['_last_update'])):
     item['sender']['_last_update'] = item['created_at']
     with self.session.storage_lock: users[unicode(item['sender']['id'])].update(item['sender'])
    item['sender'] = users[unicode(item['sender']['id'])]
   if 'recipient' in item:
    if unicode(item['recipient']['id']) not in users:
     with self.session.storage_lock: users[unicode(item['recipient']['id'])] = {}
    if '_last_update' not in users[unicode(item['recipient']['id'])] or time.mktime(rfc822.parsedate(item['created_at'])) > time.mktime(rfc822.parsedate(users[unicode(item['recipient']['id'])]['_last_update'])):
     item['recipient']['_last_update'] = item['created_at']
     with self.session.storage_lock: users[unicode(item['recipient']['id'])].update(item['recipient'])
    item['recipient'] = users[unicode(item['recipient']['id'])]
  return items
Пример #3
0
 def process_users(self, items):
  users = self.session.users
  for (i, item) in enumerate(items):
# Tweets
   if 'user' in item:
    if unicode(item['user']['id']) not in users:
     with self.session.storage_lock: users[unicode(item['user']['id'])] = {}
    if '_last_update' not in users[unicode(item['user']['id'])] or time.mktime(rfc822.parsedate(item['created_at'])) > time.mktime(rfc822.parsedate(users[unicode(item['user']['id'])]['_last_update'])):
     item['user']['_last_update'] = item['created_at']
     with self.session.storage_lock: users[unicode(item['user']['id'])].update(item['user'])
    item['user'] = users[unicode(unicode(item['user']['id']))]
# Retweets
   if 'retweeted_status' in item and 'user' in item['retweeted_status']:
    if unicode(item['retweeted_status']['user']['id']) not in users:
     with self.session.storage_lock: users[unicode(item['retweeted_status']['user']['id'])] = {}
    if '_last_update' not in users[unicode(item['retweeted_status']['user']['id'])] or time.mktime(rfc822.parsedate(item['retweeted_status']['created_at'])) > time.mktime(rfc822.parsedate(users[unicode(item['retweeted_status']['user']['id'])]['_last_update'])):
     item['retweeted_status']['user']['_last_update'] = item['retweeted_status']['created_at']
     with self.session.storage_lock: users[unicode(item['retweeted_status']['user']['id'])].update(item['retweeted_status']['user'])
    item['retweeted_status']['user'] = users[unicode(item['retweeted_status']['user']['id'])]
# Direct messages
   if 'sender' in item:
    if unicode(item['sender']['id']) not in users:
     with self.session.storage_lock: users[unicode(item['sender']['id'])] = {}
    if '_last_update' not in users[unicode(item['sender']['id'])] or time.mktime(rfc822.parsedate(item['created_at'])) > time.mktime(rfc822.parsedate(users[unicode(item['sender']['id'])]['_last_update'])):
     item['sender']['_last_update'] = item['created_at']
     with self.session.storage_lock: users[unicode(item['sender']['id'])].update(item['sender'])
    item['sender'] = users[unicode(item['sender']['id'])]
   if 'recipient' in item:
    if unicode(item['recipient']['id']) not in users:
     with self.session.storage_lock: users[unicode(item['recipient']['id'])] = {}
    if '_last_update' not in users[unicode(item['recipient']['id'])] or time.mktime(rfc822.parsedate(item['created_at'])) > time.mktime(rfc822.parsedate(users[unicode(item['recipient']['id'])]['_last_update'])):
     item['recipient']['_last_update'] = item['created_at']
     with self.session.storage_lock: users[unicode(item['recipient']['id'])].update(item['recipient'])
    item['recipient'] = users[unicode(item['recipient']['id'])]
  return items
Пример #4
0
 def __call__(self, environ, start_response):
     """Respond to a request when called in the usual WSGI way."""
     path_info = environ.get('PATH_INFO', '')
     full_path = self._full_path(path_info)
     if not self._is_under_root(full_path):
         return self.not_found(environ, start_response)
     if path.isdir(full_path):
         if full_path[-1] <> '/' or full_path == self.root:
             location = util.request_uri(environ, include_query=False) + '/'
             if environ.get('QUERY_STRING'):
                 location += '?' + environ.get('QUERY_STRING')
             headers = [('Location', location)]
             return self.moved_permanently(environ, start_response, headers)
         else:
             full_path = self._full_path(path_info + self.index_file)
     try:
         sz = int(environ['CONTENT_LENGTH'])
     except:
         sz = 0
     if environ['REQUEST_METHOD'] == 'PUT' and sz > 0:
         for putglob in self.puttable:
             if fnmatch(path_info, putglob):
                 data = environ['wsgi.input'].read(sz)
                 try:
                     with open(full_path, "wb") as f: f.write(data)
                     return self.success_no_content(environ, start_response)
                 except:
                     print sys.exc_info()[1]
                     return self.server_error(environ, start_response)
     if environ['REQUEST_METHOD'] not in ('GET', 'HEAD'):
         headers = [('Allow', 'GET, HEAD')]
         return self.method_not_allowed(environ, start_response, headers)
     content_type = self._guess_type(full_path)
     try:
         etag, last_modified = self._conditions(full_path, environ)
         headers = [('Date', rfc822.formatdate(time.time())),
                    ('Last-Modified', last_modified),
                    ('ETag', etag)]
         if_modified = environ.get('HTTP_IF_MODIFIED_SINCE')
         if if_modified and (rfc822.parsedate(if_modified)
                             >= rfc822.parsedate(last_modified)):
             return self.not_modified(environ, start_response, headers)
         if_none = environ.get('HTTP_IF_NONE_MATCH')
         if if_none and (if_none == '*' or etag in if_none):
             return self.not_modified(environ, start_response, headers)
         file_like = self._file_like(full_path)
         headers.append(('Content-Type', content_type))
         start_response("200 OK", headers)
         if environ['REQUEST_METHOD'] == 'GET':
             return self._body(full_path, environ, file_like)
         else:
             return ['']
     except (IOError, OSError), e:
         print e
         return self.not_found(environ, start_response)
Пример #5
0
 def __call__(self, environ, start_response):
     """Respond to a request when called in the usual WSGI way."""
     path_info = environ.get('PATH_INFO', '')
     full_path = self._full_path(path_info)
     if not self._is_under_root(full_path):
         return self.not_found(environ, start_response)
     if path.isdir(full_path):
         if full_path[-1] <> '/' or full_path == self.root:
             location = util.request_uri(environ, include_query=False) + '/'
             if environ.get('QUERY_STRING'):
                 location += '?' + environ.get('QUERY_STRING')
             headers = [('Location', location)]
             return self.moved_permanently(environ, start_response, headers)
         else:
             full_path = self._full_path(path_info + self.index_file)
     try:
         sz = int(environ['CONTENT_LENGTH'])
     except:
         sz = 0
     if environ['REQUEST_METHOD'] == 'PUT' and sz > 0:
         for putglob in self.puttable:
             if fnmatch(path_info, putglob):
                 data = environ['wsgi.input'].read(sz)
                 try:
                     with open(full_path, "wb") as f:
                         f.write(data)
                     return self.success_no_content(environ, start_response)
                 except:
                     print sys.exc_info()[1]
                     return self.server_error(environ, start_response)
     if environ['REQUEST_METHOD'] not in ('GET', 'HEAD'):
         headers = [('Allow', 'GET, HEAD')]
         return self.method_not_allowed(environ, start_response, headers)
     content_type = self._guess_type(full_path)
     try:
         etag, last_modified = self._conditions(full_path, environ)
         headers = [('Date', rfc822.formatdate(time.time())),
                    ('Last-Modified', last_modified), ('ETag', etag)]
         if_modified = environ.get('HTTP_IF_MODIFIED_SINCE')
         if if_modified and (rfc822.parsedate(if_modified) >=
                             rfc822.parsedate(last_modified)):
             return self.not_modified(environ, start_response, headers)
         if_none = environ.get('HTTP_IF_NONE_MATCH')
         if if_none and (if_none == '*' or etag in if_none):
             return self.not_modified(environ, start_response, headers)
         file_like = self._file_like(full_path)
         headers.append(('Content-Type', content_type))
         start_response("200 OK", headers)
         if environ['REQUEST_METHOD'] == 'GET':
             return self._body(full_path, environ, file_like)
         else:
             return ['']
     except (IOError, OSError), e:
         print e
         return self.not_found(environ, start_response)
Пример #6
0
def guardar(RSS, resource, num, usuario):
    if resource == "rss":
        rss = RSS
    else:
        rss = resource
    try:
        d = feedparser.parse(rss)
        title = d.feed.title
        url = d.feed.link
        Rss = True
    except:
        Rss = False
    if Rss:
        try:
            logo = '<img src="'+d.feed.image.href+'">'
        except:
            logo = ""
        date = d.feed.published
        date = datetime.datetime.fromtimestamp(calendar.timegm(rfc822.parsedate(date)))
        # Guardar los nuevos canales de /canales
        try:
            T_Canal = Channels.objects.get(Title = title)
            T_Canal.Logo = logo
            T_Canal.RSS = rss
            T_Canal.Date = date
            T_Canal.Url = url
            T_Canal.NumMensaje = T_Canal.NumMensaje
        except Channels.DoesNotExist:
            T_Canal = Channels(Title = title, Logo = logo, RSS = rss, Date = date, Url = url, NumMensaje = 0)
        T_Canal.save()
        numNoticias = 0
        # Guardar las nuevas noticias de /canales/num
        for N_noticia in d.entries:
            titulo = N_noticia.title
            link = N_noticia.link
            date2 = N_noticia.published
            date2 = datetime.datetime.fromtimestamp(calendar.timegm(rfc822.parsedate(date2)))
            descripcion = N_noticia.description
            titular = d.feed.title
            try:
                T_num = CanalesNum.objects.get(Titulo = titulo, CanalId = T_Canal.id)
                T_num.Link = link
                T_num.Date = date2
                T_num.Descripcion = descripcion
                T_num.Titular = titular
            except CanalesNum.DoesNotExist:
                T_num = CanalesNum(Titulo = titulo, CanalId = T_Canal.id, Link = link, Date = date2, Descripcion = descripcion, Titular = titular)
                numNoticias = numNoticias + 1
            T_num.save()
        T_Canal.NumMensaje = T_Canal.NumMensaje + numNoticias
        T_Canal.save()
    # Si le das al boton de actualizar de /canales/num
    if resource != "rss":
        return HttpResponseRedirect("http://localhost:1234/canales/"+num)
Пример #7
0
    def index(self, req):
        """ Handle GET and HEAD requests for static files. Directory requests are not allowed"""
        static_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), '../static/'))

        # filter out ..
        try:
            static_path = req.urlvars['path'].replace('/..', '')
        except:
            return HTTPForbidden()

        path = os.path.join(static_dir, static_path) 
        if os.path.isdir(path):
            return HTTPForbidden()

        if req.method == 'GET' or req.method == 'HEAD':
            if os.path.isfile(path):
                etag, modified, mime_type, size = self._get_stat(path)

                res = Response()
                res.headers['content-type'] = mime_type
                res.date = rfc822.formatdate(time.time())
                res.last_modified = modified
                res.etag = etag

                if_modified_since = req.headers.get('HTTP_IF_MODIFIED_SINCE')
                if if_modified_since:
                    if rfc822.parsedate(if_modified_since) >= rfc822.parsedate(last_modified):
                        return HTTPNotModified()

                if_none_match = req.headers.get('HTTP_IF_NONE_MATCH')
                if if_none_match:
                    if if_none_match == '*' or etag in if_none_match:
                        return HTTPNotModified()

                # set the response body
                if req.method == 'GET':
                    fd = open(path, 'rb')
                    if 'wsgi.file_wrapper' in req.environ:
                        res.app_iter = req.environ['wsgi.file_wrapper'](fd)
                        res.content_length = size
                    else:
                        res.app_iter = iter(lambda: fd.read(8192), '')
                        res.content_length = size
                else:
                    res.body = ''

                return res
            else:
                return None
        else:
            return None
Пример #8
0
    def index(self, req):
        """ Handle GET and HEAD requests for static files. Directory requests are not allowed"""
        static_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), '../static/'))

        # filter out ..
        try:
            static_path = req.urlvars['path'].replace('/..', '')
        except:
            return HTTPForbidden()

        path = os.path.join(static_dir, static_path) 
        if os.path.isdir(path):
            return HTTPForbidden()

        if req.method == 'GET' or req.method == 'HEAD':
            if os.path.isfile(path):
                etag, modified, mime_type, size = self._get_stat(path)

                res = Response()
                res.headers['content-type'] = mime_type
                res.date = rfc822.formatdate(time.time())
                res.last_modified = modified
                res.etag = etag

                if_modified_since = req.headers.get('HTTP_IF_MODIFIED_SINCE')
                if if_modified_since:
                    if rfc822.parsedate(if_modified_since) >= rfc822.parsedate(last_modified):
                        return HTTPNotModified()

                if_none_match = req.headers.get('HTTP_IF_NONE_MATCH')
                if if_none_match:
                    if if_none_match == '*' or etag in if_none_match:
                        return HTTPNotModified()

                # set the response body
                if req.method == 'GET':
                    fd = open(path, 'rb')
                    if 'wsgi.file_wrapper' in req.environ:
                        res.app_iter = req.environ['wsgi.file_wrapper'](fd)
                        res.content_length = size
                    else:
                        res.app_iter = iter(lambda: fd.read(8192), '')
                        res.content_length = size
                else:
                    res.body = ''

                return res
            else:
                return None
        else:
            return None
Пример #9
0
def guardar(RSS, resource, num, usuario):
    if resource == "rss":
        rss = RSS
    else:
        rss = resource
    try:
        d = feedparser.parse(rss)
        Rss = True
    except UnboundLocalError:
        Rss = False
    if Rss:
        title = d.feed.title
        url = d.feed.link
        Num_Mensaje = len(d.entries)
        try:
            logo = '<img src="'+d.feed.image.href+'">'
        except:
            logo = ""
        date = d.feed.published
        date = datetime.datetime.fromtimestamp(calendar.timegm(rfc822.parsedate(date)))
        try:
            T_Canal = Channels.objects.get(Title = title)
            T_Canal.Logo = logo
            T_Canal.RSS = rss
            T_Canal.Date = date
            T_Canal.Url = url
            T_Canal.NumMensaje = Num_Mensaje
        except Channels.DoesNotExist:
            T_Canal = Channels(Title = title, Logo = logo, RSS = rss, Date = date, Url = url, NumMensaje = Num_Mensaje)
        T_Canal.save()
        for N_noticia in d.entries:
            titulo = N_noticia.title
            link = N_noticia.link
            date = N_d.published
            date = datetime.datetime.fromtimestamp(calendar.timegm(rfc822.parsedate(date)))
            descripcion = N_d.description
            titular = d.feed.title
            try:
                T_num = CanalNum.objects.get(Titulo = titulo, Titular = titular)
                T_num.Link = link
                T_num.Date = date
                T_num.Descripcion = descripcion
            except CanalNum.DoesNotExist:
                T_num = CanalNum(Titulo = titulo, Titular = titular, Link = link, Date = date, Descripcion = descripcion)
            T_num.save()
        try:
            T_Num = CanalNum.objects.get()
    if resource != "rss":
        return HttpResponseRedirect("http://localhost:1234/canales/"+num)
Пример #10
0
def update_rssfeed():
    pf = open(planetfile)
    data = pickle.load(pf)
    pf.close()
    entries = list(reversed(sorted([(x.pubDatedate, x) for x in data.entries])))[:15]
    resources = []
    for key, entry in entries:
        resources.append(entry.link)
    rdf_li_resources = '\n'.join(['<rdf:li resource="%s" />' % x for x in resources]) 
    template_item = templates.rdf_item
    template_subject = '<dc:subject>%s</dc:subject>'
    template_date = '<dc:date>%s</dc:date>'
    template_items=[]
    for key, entry in entries:
        subjects = ''
        if entry.categories:
            subjects = '\n'.join([template_subject % x for x in entry.categories])
        entry.subjects = subjects
        date = rfc822.parsedate(entry.publishedDate)
        entry.dcdate = template_date % time.strftime("%Y-%m-%dT%H:%M:%S-07:00", date)
        template_items.append(template_item % entry.__dict__)
    items_rdf_about = '\n'.join(template_items)
    
    template = templates.rdf % (rdf_li_resources, items_rdf_about)
    rff = open(rssfeedfile, 'w')
    rff.write(template.encode('utf-8', 'ignore'))
    rff.close()
    print 'rss done'
Пример #11
0
    def connect(self,):

        urlparsed = requests.utils.urlparse(self.url)

        self.wc = WebDAVClient(host=urlparsed.netloc,
                               protocol=urlparsed.scheme)

        self.wc.setbasicauth(self.login.encode('utf-8'),
                             self.passwd.encode('utf-8'))
        time_delta = None

        local_time = datetime.datetime.utcnow()

        response = self.wc.options('/').headers.get('date')
        if response is None:
            response = self.wc.options('/').headers.get('Date')

        remote_datetime = \
            rfc822.parsedate(response)

        self.timedelta = time.mktime(local_time.utctimetuple()) \
            - time.mktime(remote_datetime)

        self._check_notes_folder()

        return time_delta
Пример #12
0
		def saveData(feed):
			santos = []
			for item in range(0, len(feed)):
				title = strip_accents(feed[item]['text'].encode('utf-8'))
				title = title.replace("\n", " ")
				
				# Regular Expression to get what I want
				reg_notice = re.compile('^(.*)(: http://.*)$').search(title)
				reg_jogo = re.compile('^(.*)( Siga AO VIVO por aqui!)$').search(title)
				if reg_jogo is not None:
					title = reg_jogo.groups()[0][:90]
				elif reg_notice is not None:
					title = reg_notice.groups()[0][:90]
				else:
					title = title[:90]
				
				reg_via = re.compile('^(.*) - Via .*$').search(title)
				if reg_via is not None:
					title = reg_via.groups()[0][:90]
				
				created_at = rfc822.parsedate(feed[item]['created_at'])
				if (time.mktime(created_at) >= (time.time() - (60*60*12))):
					santos.append({
				 					'created_at': created_at,
				 					'title': title
						  	   	  })
			return santos
Пример #13
0
    def parse_headers(self, header, meta):
        meta.title = header.get("Subject")

        if header.get("Message-Id"):
            meta.foreign_id = unicode(header.get("Message-Id"))

        if header.get("From"):
            addr = address.parse(header.get("From"))
            if addr is not None:
                meta.author = addr.to_unicode()
                meta.add_email(addr.address)

        for hdr in ["To", "CC", "BCC"]:
            if header.get(hdr):
                for addr in address.parse_list(header.get(hdr)):
                    meta.add_email(addr.address)

        date = header.get("Date")
        date = rfc822.parsedate(date)
        if date is not None:
            dt = datetime.fromtimestamp(mktime(date))
            meta.add_date(dt)

        meta.headers = dict([(k, unicode(v)) for k, v in header.items()])
        return meta
Пример #14
0
    def getRemainingRateLimit(self):
        ## rate_limit_status = self.api.GetRateLimitStatus()
        (ret_code,
         rate_limit_status) = self.makeApiCall(self.api.GetRateLimitStatus,
                                               self.requestType)

        ## if there is an error
        if ret_code != 0:
            return [None, None]
        friend_id_limit = rate_limit_status.get(
            '/' + self.requestType + '/ids', None)
        reset_time = friend_id_limit.get('reset', None)
        limit = friend_id_limit.get('remaining', None)

        if reset_time:
            # put the reset time into a datetime object
            reset = datetime.datetime(*rfc822.parsedate(reset_time)[:7])

            # find the difference in time between now and the reset time + 1 hour
            delta = reset + datetime.timedelta(
                minutes=10) - datetime.datetime.utcnow()

            return [int(delta.seconds), int(limit)]
        else:
            return [5, 1]
Пример #15
0
    def on_data(self, data):
        """Called when raw data is received from the connection"""

        try:
            payload = json.loads(data)

            if ('in_reply_to_status_id' in payload and
                'retweeted_status' not in payload):
                if self.firehose is not None:
                    # Disconnect the current firehose connection and kill
                    # reference to the firehose worker thread
                    self.firehose.disconnect_firehose(self.predicate_type)
                    self.firehose = None

                # Twitter uses RFC822 dates, parse them as such
                droplet_date_pub = time.strftime('%a, %d %b %Y %H:%M:%S +0000',
                                                 rfc822.parsedate(
                                                    payload['created_at']))

                # Filter out non-standard Twitter RTs
                droplet_content = payload['text'].strip()
                retweet_match = re.findall('^(RT\:?)\s*', droplet_content,
                                           re.I)
                if len(retweet_match) == 0:
                    screen_name, status_id = payload['user']['screen_name'], \
                        payload['id_str']

                    # Permalink for the tweet
                    tweet_url = "https://twitter.com/%s/statuses/%s" % \
                        (screen_name, status_id)

                    links = [{'url': tweet_url, 'original_url': True}]
                    drop = {
                        'channel': 'twitter',
                        'identity_orig_id': payload['user']['id_str'],
                        'in_reply_to_user_id': payload['in_reply_to_user_id_str'],
                        'identity_name': payload['user']['name'],
                        'identity_username': screen_name,
                        'identity_avatar': payload['user']['profile_image_url'],
                        'droplet_orig_id': status_id,
                        'droplet_type': 'original',
                        'droplet_title': droplet_content,
                        'droplet_content': droplet_content,
                        'droplet_raw': droplet_content,
                        'droplet_locale': payload['user']['lang'],
                        'droplet_date_pub': droplet_date_pub,
                        'links': links}
                    self.drop_queue.put((time.time(), drop), False)

            elif 'delete' in payload:
                status = payload['delete']['status']
                self.on_delete(status['id_str'], status['user_id_str'])
            elif 'limit' in payload:
                track = payload['limit']['track']
                self.on_limit(track)

        except Exception:
            # The data delivered by the streamin API could not be
            # serialized into a JSON object, ignore error
            pass
Пример #16
0
    def guessSentTime(self, default=None):
        """
        Try to determine the time this message claims to have been sent by
        analyzing various headers.

        @return: a L{Time} instance, or C{None}, if we don't have a guess.
        """

        try:
            sentHeader = self.getHeader(u'date')
        except equotient.NoSuchHeader:
            sentHeader = None
        else:
            try:
                return Time.fromRFC2822(sentHeader)
            except ValueError:
                pass

        for received in list(self.getHeaders(u'received'))[::-1]:
            lines = received.value.splitlines()
            if lines:
                lastLine = lines[-1]
                parts = lastLine.split('; ')
                if parts:
                    date = parts[-1]
                    try:
                        when = rfc822.parsedate(date)
                        if when is None:
                            continue
                    except ValueError:
                        pass
                    else:
                        return Time.fromStructTime(when)

        return default
Пример #17
0
def _fake_fetch(url_path, headers):
    """'Fetches' when using the fake kake-server."""
    # We late-import here since these are not always ok to import in prod
    import kake.make
    abs_filename = project_root.join(url_path[1:])
    if url_path.startswith('/genfiles'):
        try:
            file_has_changed = kake.make.build(url_path[1:])
        except kake.make.BadRequestFailure as failure:
            logging.error(failure.message)
            return (failure.message, 400, {})
        except (IOError, kake.make.CompileFailure) as why:
            logging.error('Unable to build %s: %s' % (url_path[1:], why))
            return (None, 500, {})
        logging.info('Building %s' % url_path[1:])
    else:
        if not os.path.isfile(abs_filename):
            return (None, 404, {})

        file_has_changed = True
        ims = [
            v for (k, v) in headers.items() if k.lower() == 'if-modified-since'
        ]
        if ims:
            parsed_ims = time.mktime(rfc822.parsedate(ims[0]))
            if os.path.getmtime(abs_filename) <= parsed_ims:
                file_has_changed = False

    if not file_has_changed:
        return ('', 304, {})

    with open(abs_filename) as f:
        return (f.read(), 200, {})
Пример #18
0
def collect_tweets():
    """Collect new tweets about Firefox."""
    with statsd.timer('customercare.tweets.time_elapsed'):
        auth = tweepy.OAuthHandler(settings.TWITTER_CONSUMER_KEY,
                                   settings.TWITTER_CONSUMER_SECRET,
                                   secure=True)

        auth.set_access_token(settings.TWITTER_ACCESS_TOKEN,
                              settings.TWITTER_ACCESS_TOKEN_SECRET)

        api = tweepy.API(auth, parser=RawParser())

        search_options = {
            'q': 'firefox OR #fxinput OR @firefoxbrasil OR #firefoxos',
            'rpp': settings.CC_TWEETS_PERPAGE,  # Items per page.
            'result_type': 'recent',  # Retrieve tweets by date.
        }

        # If we already have some tweets, collect nothing older than what we
        # have.
        try:
            latest_tweet = Tweet.latest()
        except Tweet.DoesNotExist:
            log.debug('No existing tweets. Retrieving %d tweets from search.' % (
                settings.CC_TWEETS_PERPAGE))
        else:
            search_options['since_id'] = latest_tweet.tweet_id
            log.info('Retrieving tweets with id >= %s' % latest_tweet.tweet_id)

        # Retrieve Tweets
        try:
            raw_data = json.loads(str(api.search(**search_options)))
        except tweepy.TweepError, e:
            log.warning('Twitter request failed: %s' % e)
            return

        if not ('results' in raw_data and raw_data['results']):
            # Twitter returned 0 results.
            return

        # Drop tweets into DB
        for item in raw_data['results']:
            # Apply filters to tweet before saving
            # Allow links in #fxinput tweets
            statsd.incr('customercare.tweet.collected')
            item = _filter_tweet(item, allow_links='#fxinput' in item['text'])
            if not item:
                continue

            created_date = datetime.utcfromtimestamp(calendar.timegm(
                rfc822.parsedate(item['created_at'])))

            item_lang = item.get('iso_language_code', 'en')
            tweet = Tweet(tweet_id=item['id'], raw_json=json.dumps(item),
                          locale=item_lang, created=created_date)
            try:
                tweet.save()
                statsd.incr('customercare.tweet.saved')
            except IntegrityError:
                pass
Пример #19
0
def date_conversion(val):
    if re.match('^\d{4}\-\d\d\-\d\d$', val):
        # 2012-12-31
        val = time.mktime(datetime.strptime(val, '%Y-%m-%d').timetuple())
    elif re.match('^\d\d?\/\d\d\/\d{4}$', val):
        # 31/12/2012
        val = time.mktime(datetime.strptime(val, '%d/%m/%Y').timetuple())
    elif re.match('^\d\d?\.\d\d\.\d{4}$', val):
        # 31.12.2012
        val = time.mktime(datetime.strptime(val, '%d.%m.%Y').timetuple())
    elif re.match('^\d{4}\d{2}\d{2}$', val):
        # 20121231
        val = time.mktime(datetime.strptime(val, '%Y%m%d').timetuple())
    elif re.match('^(\w+\,\s+)?\d+ \w+ \d{4}\s+\d\d?\:\d\d(:\d\d)?', val) or re.match('^\w+,?\s+?\d+ \d{2-4}\s+\d\d?\:\d\d(:\d\d)?', val):
        # Jan 24 2003 15:26:20 +0000
        # Mon, 20 Dec 04 08:37:31 GMT
        # Sat, 13 May 2006 06:15 +0000
        # Mon, 01 Jul 2002 18:38:25
        # Mon, 26 Sep 2005 7:35:00 -0800
        # Thu, 23 Apr 2009 13:32:15 +1200
        # 26 Aug 2009 02:07:34 +0400
        import rfc822
        import datetime as dt
        # [year, month, day, hour, min, sec]
        yyyy, mm, dd, hh, mins, ss = rfc822.parsedate(val)[:-3]
        val = time.mktime(dt.datetime(yyyy, mm, dd, hh, mins, ss).timetuple())
        try:
            val = float(str(val))
        except ValueError:
            pass
    return val
Пример #20
0
Файл: views.py Проект: mitnk/mc
def my_tweets(request):
    if request.method == "POST":
        token = settings.TWITCN_PRIVATE_TOKEN
        api = getPrivateApi(token)
        info = "blank"
        if request.POST.get('action') == "save":
            max_id = request.POST.get('max_id', None)
            messages = api.GetUserTimeline(user='******', max_id=max_id, count=50)
            count = 0
            for message in messages:
                if message.text[0] == "@" or \
                    MyTweet.objects.filter(tweet_id=message.id).count() > 0:
                    continue
                added = datetime.datetime(*rfc822.parsedate(message.created_at)[:6])
                MyTweet.objects.create(name=message.user.screen_name, 
                                       text=smart_str(message.text), 
                                       tweet_id=message.id, 
                                       added=added)
                count += 1
            info = "%s\n" % count
        return HttpResponse(info)

    messages = MyTweet.objects.filter(added__year=datetime.date.today().year)
    return render_to_response("webapps/favo_tweets.html",
        {'messages': messages},
        context_instance=RequestContext(request))
Пример #21
0
    def guessSentTime(self, default=None):
        """
        Try to determine the time this message claims to have been sent by
        analyzing various headers.

        @return: a L{Time} instance, or C{None}, if we don't have a guess.
        """

        try:
            sentHeader = self.getHeader(u'date')
        except equotient.NoSuchHeader:
            sentHeader = None
        else:
            try:
                return Time.fromRFC2822(sentHeader)
            except ValueError:
                pass

        for received in list(self.getHeaders(u'received'))[::-1]:
            lines = received.value.splitlines()
            if lines:
                lastLine = lines[-1]
                parts = lastLine.split('; ')
                if parts:
                    date = parts[-1]
                    try:
                        when = rfc822.parsedate(date)
                        if when is None:
                            continue
                    except ValueError:
                        pass
                    else:
                        return Time.fromStructTime(when)

        return default
Пример #22
0
    def verify_online(self):
        url = self.download_url()

        try:
            res = urllib2.urlopen(HeadRequest(url))
        except urllib2.HTTPError as e:
            return (False, e)

        if res.code != 200:
            return (False, "%d %s" % (res.code, res.msg))

        sys.stderr.writelines("HEAD %s returned %d %s\n" % (url, res.code, res.msg))

        content_type = res.headers['content-type']
        if APK_CONTENT_TYPE != content_type:
            sys.stderr.writelines("warning: content type returned by %s should be %s, not %s\n" % (url, APK_CONTENT_TYPE, content_type))

        last_modified = res.headers.get('last-modified', None)
        if last_modified:
            last_modified = datetime.datetime.fromtimestamp(time.mktime(rfc822.parsedate(last_modified)))
        sys.stderr.writelines("last modified %s\n" % last_modified)

        size = res.headers.get('content-length', None)
        if size and size < 4000:
            return (False, "content length of %s was less than 4k." % url)

        res.close()

        return (True, None)
Пример #23
0
def get_message(msg):
    fr = rfc822.parseaddr(msg['from'])
    to = rfc822.AddressList(msg['to']).addresslist
    cc = rfc822.AddressList(msg['cc']).addresslist
    subject = msg['subject']
    date = rfc822.parsedate(msg['date'])
    date = datetime.datetime(*date[:6])
    url = msg['Archived-At']
    if not url: 
        url = msg['X-Archived-At']
    url = url.strip("<>")
    message_id = msg['Message-ID']
    in_reply_to = msg.get('In-Reply-To', None)

    return {
        "from": fr,
        "subject": subject,
        "to": to,
        "cc": cc,
        "url": url,
        "date": date,
        "message_id": message_id,
        "in_reply_to": in_reply_to,
        "raw": msg.as_string()
    }

    return None
Пример #24
0
def BuildText(tweet):
    text = tweet['text']

    text = text.replace('\r','').replace('\n','<br />')

    if 'entities' in tweet:
        for info in tweet['entities']['urls']:
            text = text.replace(
                info['url'],
                CreateLink(info['url'],info['expanded_url']))
        for info in tweet['entities']['hashtags']:
            text = text.replace(
                '#' + info['text'],
                CreateLink("//twitter.com/search/%23"+info['text'],'#'+info['text']))
        for info in tweet['entities']['user_mentions']:
            name = info['screen_name']
            text = text.replace(
                '@' + name,
                CreateLink("//twitter.com/"+name,'@'+name))

    text = '<div class="twitter-tweet"><span class="twitter-text">' + text + "</span>"

    created_at_str = calendar.timegm(rfc822.parsedate(tweet['created_at']))
    created_at = datetime.datetime.fromtimestamp(created_at_str)

    screen_name = tweet['user']['screen_name']
    id_str = tweet['id_str']
    
    text += " " + CreateLink("//twitter.com/%s/status/%s"%(screen_name, id_str), created_at.strftime("%H:%M:%S"), "twitter-permalink") + "</div>"

    return text
Пример #25
0
  def validate(self):
    if self.rfc2822_re.match(self.value):
      import calendar
      value = parsedate(self.value)

      try:
        if value[0] > 1900:
          dow = datetime.date(*value[:3]).strftime("%a")
          if self.value.find(',')>0 and dow.lower() != self.value[:3].lower():
            self.log(IncorrectDOW({"parent":self.parent.name, "element":self.name, "value":self.value[:3]}))
            return
      except ValueError as e:
        self.log(InvalidRFC2822Date({"parent":self.parent.name, "element":self.name, "value":str(e)}))
        return

      if implausible_822(self.value):
        self.log(ImplausibleDate({"parent":self.parent.name,
          "element":self.name, "value":self.value}))
      else:
        self.log(ValidRFC2822Date({"parent":self.parent.name, "element":self.name, "value":self.value}))
    else:
      value1,value2 = '', self.value
      value2 = re.sub(r'[\\](.)','',value2)
      while value1!=value2: value1,value2=value2,re.sub('\([^(]*?\)',' ',value2)
      if not self.rfc822_re.match(value2.strip().lower()):
        self.log(InvalidRFC2822Date({"parent":self.parent.name, "element":self.name, "value":self.value}))
      else:
        self.log(ProblematicalRFC822Date({"parent":self.parent.name, "element":self.name, "value":self.value}))
Пример #26
0
    def getPage(self, request, response):
        """TODO: Change this to access the Indexer instead of the Store"""
        path = urllib.unquote((request.getPathInfo())[1:])
        # replace the above with:
        # path = '/'.join(request.getRequestURL().split('/')[2:])
        # if the snakelet matches an arbitrary pattern
        if path == '':
            path = 'HomePage'
        a = self.getWebApp()
        ac = self.getAppContext()
        ac.indexer.registerHit(path)
        buffer = request.getHeader('If-Modified-Since')
        if buffer != None:
            since = time.mktime(rfc822.parsedate(buffer))
            try:
                # see if our page has been rendered and has a modification time
                our = ac.cache.mtime('soup:' + path)
                if (since > our):
                    # Reset some headers
                    response.setHeader("Cache-Control", '')
                    response.setHeader("Pragma", '')
                    response.setHeader("Expires", '')
                    # Say bye bye
                    response.setResponse(304, "Not Modified")
                    return None
            except KeyError:
                pass

        # Check for any standing redirects
        redirect = self.checkRedirects(ac, path)
        if redirect:
            response.HTTPredirect(ac.base + redirect)
            return None

        # Check for a URL variant
        try:
            page = ac.store.getRevision(path)
        except IOError:
            alias = ac.indexer.resolveAlias(path,
                                            True)  # go for approximate matches
            if alias != path:
                response.HTTPredirect(ac.base + alias)
                return
            else:
                page = ac.store.getRevision("meta/EmptyPage")
                return (page.headers,
                        renderPage(ac, page, request, response,
                                   ac.indexer.done))
        if 'x-redirect' in page.headers.keys():
            uri = page.headers['x-redirect']
            (schema, netloc, path, parameters, query,
             fragment) = urlparse.urlparse(uri)
            if schema in self.i18n['uri_schemas'].keys():
                path = uri
            else:
                path = ac.base + path
            response.HTTPredirect(path)
            return
        return (page.headers,
                renderPage(ac, page, request, response, ac.indexer.done))
Пример #27
0
    def test_1(self):
        """
        Verify the handler reples with an accurate Last-Modified header.
        """

        bogus_start_response = mock.Mock()

        # Send a request to the handler.
        self.sh({'PATH_INFO': '/static/pitz.css'}, bogus_start_response)

        assert bogus_start_response.called

        assert bogus_start_response.call_args[0][0] == '200 OK', \
        bogus_start_response.call_args[0][0]

        headers = bogus_start_response.call_args[0][1]

        # Make sure that there's a Last-Modified header.
        values = [v for (k, v) in headers if k == 'Last-Modified']
        assert len(values) == 1, headers

        last_modified_header = values[0]

        assert self.modified_time == datetime.datetime(
            *(rfc822.parsedate(last_modified_header))[:6])
Пример #28
0
def get_messages(host, user, password):
    """Download all messages (e.g. pages) from the mailbox

    Keyword arguments:
    host -- IMAP server
    user -- login connection information
    password -- password connection information

    See http://stackoverflow.com/questions/315362/properly-formatted-example-for-python-imap-email-access
    """
    M = imaplib.IMAP4_SSL(host)
    M.login(user, password)

    M.select()
    typ, data = M.search(None, 'UNSEEN')
    messages = []
    for num in data[0].split():
        typ, data = M.fetch(num, '(RFC822)')
        file = StringIO.StringIO(data[0][1])
        message = rfc822.Message(file)

        msgDate = time.mktime(rfc822.parsedate(message['date']))
        messages.append(
            create_event('IMAP', message['from'], message['to'],
                         message['subject'], int(msgDate)))
    M.close()

    M.logout()

    log.info('Found %d unseen messages at %s', len(messages), host)
    return messages
Пример #29
0
def BuildPost(timeline, day):
    if not isinstance(timeline, list):
        return None

    tweets = []

    for elem in timeline:
        # determine elem is tweet or not
        if 'user' not in elem or 'created_at' not in elem or 'text' not in elem:
            continue
        # maybe tweet

        created_at_str = calendar.timegm(rfc822.parsedate(elem['created_at']))
        created_at = datetime.datetime.fromtimestamp(created_at_str)

        # filter tweet at yesterday
        if day.year != created_at.year or day.month != created_at.month or day.day != created_at.day:
            continue

        text = BuildText(elem)
        tweets.append(text)

    post = "".join(reversed(tweets))
    #post = "<ul class=\"twitter-log\">\n  " + post + "\n</ul>";

    return post
Пример #30
0
def has_modified_header(headers):
    """
    Check request header for 'if-modified-since'.

    Return True if content wasn't modified (According to the timestamp)
    """
    global DATA_LAST_MODIFIED

    modified = headers.get('if-modified-since')
    if modified:
        oldLastModified = DATA_LAST_MODIFIED
        try:
            mtime = path.getmtime(TMPFILE_DATA_TIMESTAMP)
        except OSError:
            with open(TMPFILE_DATA_TIMESTAMP, 'a'):
                utime(TMPFILE_DATA_TIMESTAMP, None)
            mtime = time()
        DATA_LAST_MODIFIED = email.utils.formatdate(mtime, usegmt=True)
        if DATA_LAST_MODIFIED != oldLastModified:
            # reload attributes if index changed
            get_attributes_values('ind_name_exact', CHECK_ATTR_FILTER)
        # pprint([headers, modified, DATA_LAST_MODIFIED, mtime])
        # pprint([mtime, rfc822.parsedate(modified), mktime(rfc822.parsedate(modified))])
        modified_file = datetime.fromtimestamp(mtime)
        modified_file = modified_file.replace(microsecond=0)
        modified_date = datetime.fromtimestamp(mktime(rfc822.parsedate(modified)))

        # pprint([
        #     'Data: ', modified_file,
        #     'Header: ', modified_date,
        #     modified_file <= modified_date,
        # ])
        if modified_file <= modified_date:
            return True
    return False
Пример #31
0
    def parse_headers(self, header, meta):
        meta.title = header.get('Subject')

        if header.get('Message-Id'):
            meta.foreign_id = string_value(header.get('Message-Id'))

        if header.get('From'):
            addr = address.parse(header.get('From'))
            if addr is not None:
                meta.author = addr.to_unicode()
                meta.add_email(addr.address)

        for hdr in ['To', 'CC', 'BCC']:
            if header.get(hdr):
                for addr in address.parse_list(header.get(hdr)):
                    meta.add_email(addr.address)

        date = header.get('Date')
        date = rfc822.parsedate(date)
        if date is not None:
            dt = datetime.fromtimestamp(mktime(date))
            meta.add_date(dt)

        meta.headers = dict([(k, string_value(v)) for k, v in header.items()])
        return meta
Пример #32
0
def object_app(environ, start_response):
    path = os.path.join(DATAROOT, environ['PATH_INFO'][1:])

    f = open(path, 'rb')
    stat = os.fstat(f.fileno())
    expire = datetime.utcnow() + timedelta(days=365)
    expirestr = expire.strftime('%a, %d %b %Y %H:%M:%S GMT')
    etag = '"' + str(stat.st_mtime) + "_" + str(stat.st_size) + '"'
    headers = [('Content-Type', guess_mime_type(path)),
               ('Content-Length', str(stat.st_size)),
               ('Last-Modified', rfc822.formatdate(stat.st_mtime)),
               ('Expires', expirestr), ('ETag', etag)]

    for key, value in diamond_textattr(path):
        # we probably should filter out invalid characters for HTTP headers
        key = 'x-attr-' + key
        headers.append((key, value))

    if_modified = environ.get('HTTP_IF_MODIFIED_SINCE')
    if_none = environ.get('HTTP_IF_NONE_MATCH')
    if (if_modified and (rfc822.parsedate(if_modified) >= stat.st_mtime)) or \
            (if_none and (if_none == '*' or etag in if_none)):
        start_response("304 Not Modified", headers)
        return [""]

    start_response("200 OK", headers)
    # wrap the file object in an iterator that reads the file in 64KB blocks
    # instead of line-by-line.
    return environ['wsgi.file_wrapper'](f, 65536)
Пример #33
0
    def serve_static(self, fs_path, ims):
        """Given a filesystem path to a static resource, serve it.

        This is factored out for easier reuse.

        """

        # Get basic info from the filesystem and start building a response.
        # =================================================================

        mtime = os.stat(fs_path)[stat.ST_MTIME]
        content_type = mimetypes.guess_type(fs_path)[0] or 'text/plain'
        response = Response(200)

        # Support 304s, but only in deployment mode.
        # ==========================================

        if self.deploy_mode:
            if ims:
                mod_since = rfc822.parsedate(ims)
                last_modified = time.gmtime(mtime)
                if last_modified[:6] <= mod_since[:6]:
                    response.code = 304

        # Finish building the response and raise it.
        # ========================================

        response.headers['Last-Modified'] = rfc822.formatdate(mtime)
        response.headers['Content-Type'] = content_type
        if response.code != 304:
            response.body = file(fs_path, 'rb').read()
        raise response
Пример #34
0
    def test_1(self):
        """
        Verify the handler reples with an accurate Last-Modified header.
        """

        bogus_start_response = mock.Mock()

        # Send a request to the handler.
        self.sh({'PATH_INFO': '/static/pitz.css'}, bogus_start_response)

        assert bogus_start_response.called

        assert bogus_start_response.call_args[0][0] == '200 OK', \
        bogus_start_response.call_args[0][0]

        headers = bogus_start_response.call_args[0][1]

        # Make sure that there's a Last-Modified header.
        values = [v for (k, v) in headers if k == 'Last-Modified']
        assert len(values) == 1, headers

        last_modified_header = values[0]

        assert self.modified_time == datetime.datetime(
            *(rfc822.parsedate( last_modified_header))[:6])
Пример #35
0
    def CreatedAtInSeconds(self):
        """Get the time this direct message was posted, in seconds since the epoch.

        Returns:
          The time this direct message was posted, in seconds since the epoch.
        """
        return timegm(parsedate(self.created_at))
Пример #36
0
    def connect(self, ):

        urlparsed = requests.utils.urlparse(self.url)

        self.wc = WebDAVClient(host=urlparsed.netloc,
                               protocol=urlparsed.scheme)

        self.wc.setbasicauth(self.login.encode('utf-8'),
                             self.passwd.encode('utf-8'))
        time_delta = None

        local_time = datetime.datetime.utcnow()

        response = self.wc.options('/').headers.get('date')
        if response is None:
            response = self.wc.options('/').headers.get('Date')

        remote_datetime = \
            rfc822.parsedate(response)

        self.timedelta = time.mktime(local_time.utctimetuple()) \
            - time.mktime(remote_datetime)

        self._check_notes_folder()

        return time_delta
Пример #37
0
    def verify_online(self):
        url = self.download_url()

        try:
            res = urllib2.urlopen(HeadRequest(url))
        except urllib2.HTTPError as e:
            return (False, e)

        if res.code != 200:
            return (False, "%d %s" % (res.code, res.msg))

        sys.stderr.writelines("HEAD %s returned %d %s\n" %
                              (url, res.code, res.msg))

        content_type = res.headers['content-type']
        if APK_CONTENT_TYPE != content_type:
            sys.stderr.writelines(
                "warning: content type returned by %s should be %s, not %s\n" %
                (url, APK_CONTENT_TYPE, content_type))

        last_modified = res.headers.get('last-modified', None)
        if last_modified:
            last_modified = datetime.datetime.fromtimestamp(
                time.mktime(rfc822.parsedate(last_modified)))
        sys.stderr.writelines("last modified %s\n" % last_modified)

        size = res.headers.get('content-length', None)
        if size and size < 4000:
            return (False, "content length of %s was less than 4k." % url)

        res.close()

        return (True, None)
Пример #38
0
 def import_buffer (tablename, buffername):
  table = new.load_table(tablename)
  newtable = new.convert_table(table)
  buf = self.session.get_buffer_by_name(buffername)
  buf.storage.extend(newtable)
  buf.storage.sort(key=lambda a: calendar.timegm(rfc822.parsedate(a['created_at'])))
  buf.storage = misc.RemoveDuplicates(buf.storage, lambda x: x['id'])
Пример #39
0
    def getLastModified(self):
        """
        Return last modification of a resource as time tuple or C{None}.
        
        @rtype: C{time.struct_time}
        """

        datetimeString = None
        xml = self.properties.get(Constants.PROP_LAST_MODIFIED)
        if xml:
            datetimeString = xml.textof()

        if datetimeString:
            try:
                result = rfc822.parsedate(datetimeString)
                if result is None:
                    result = _parseIso8601String(
                        datetimeString
                    )  # Some servers like Tamino use ISO 8601
                return time.struct_time(result)
            except ValueError:
                self._logger.debug(
                    "Invalid date format: "
                    "The server must provide a RFC822 or ISO8601 formatted date string.",
                    exc_info=True)
Пример #40
0
 def _date(self, datefield):
     t = rfc822.parsedate(datefield)
     if t == None:
         print 'Warning!  Could not parse ' + str(
             datefield) + '.  Skipping.'
         return None
     return datetime.datetime.fromtimestamp(time.mktime(t))
Пример #41
0
    def update_cache(self, key, value, headers):
        cache = True
        expires = int(time()) + 3600

        expires = headers.get('Expires', None)
        if expires:
            expires = mktime(parsedate(expires))
        else:
            expires = int(time()) + 3600

        cache_control = headers.get('Cache-Control', '')
        for control in cache_control.split(','):
            control = control.strip(' ')
            control = control.split('=')
            if len(control) == 2:
                k, v = control
            else:
                k = control
                v = None

            if k in ('private', 'no-cache', 'no-store', 'must-revalidate'):
                cache = False

            if k in ('max-age', 'min-fresh'):
                try:
                    expires = int(time()) + int(v)
                except ValueError:
                    pass

        if cache:
            self.cache[key] = (expires, value)
Пример #42
0
def download_message(mail, data):
    msg = email.message_from_string(data)

    logging.debug("Message-ID is %s" % msg["Message-ID"])

    sender_name, sender_addr = decode_sender(msg.get_all("From")[0])
    sender_phone = get_phone_number(msg)
    date = rfc822.parsedate(msg["Date"])

    status = False
    for part in msg.walk():
        if part.get_content_maintype() == "multipart":
            continue
        if part.get("Content-Disposition") is None:
            continue
        name = decode_file_name(part.get_filename())
        if name is None:
            logging.debug("Message part has no name, skipped.")
            continue
        data = part.get_payload(decode=True)
        if process_file(name, data, sender_name, sender_addr, sender_phone,
                        date):
            status = True

    return status
Пример #43
0
    def CreatedAtInSeconds(self):
        """Get the time this direct message was posted, in seconds since the epoch.

        Returns:
          The time this direct message was posted, in seconds since the epoch.
        """
        return timegm(parsedate(self.created_at))
Пример #44
0
def has_modified_header(headers):
    """
    Check request header for 'if-modified-since'.

    Return True if content wasn't modified (According to the timestamp)
    """
    global DATA_LAST_MODIFIED

    modified = headers.get('if-modified-since')
    if modified:
        oldLastModified = DATA_LAST_MODIFIED
        try:
            mtime = path.getmtime(TMPFILE_DATA_TIMESTAMP)
        except OSError:
            with open(TMPFILE_DATA_TIMESTAMP, 'a'):
                utime(TMPFILE_DATA_TIMESTAMP, None)
            mtime = time()
        DATA_LAST_MODIFIED = email.utils.formatdate(mtime, usegmt=True)
        if DATA_LAST_MODIFIED != oldLastModified:
            # reload attributes if index changed
            get_attributes_values('ind_name_exact', CHECK_ATTR_FILTER)
        # pprint([headers, modified, DATA_LAST_MODIFIED, mtime])
        # pprint([mtime, rfc822.parsedate(modified), mktime(rfc822.parsedate(modified))])
        modified_file = datetime.fromtimestamp(mtime)
        modified_file = modified_file.replace(microsecond=0)
        modified_date = datetime.fromtimestamp(mktime(rfc822.parsedate(modified)))

        # pprint([
        #     'Data: ', modified_file,
        #     'Header: ', modified_date,
        #     modified_file <= modified_date,
        # ])
        if modified_file <= modified_date:
            return True
    return False
Пример #45
0
def get_messages(host, user, password):
    """Download all messages (e.g. pages) from the mailbox

    Keyword arguments:
    host -- IMAP server
    user -- login connection information
    password -- password connection information

    See http://stackoverflow.com/questions/315362/properly-formatted-example-for-python-imap-email-access
    """
    M = imaplib.IMAP4_SSL(host)
    M.login(user, password)

    M.select()
    typ, data = M.search(None, 'UNSEEN')
    messages = []
    for num in data[0].split():
        typ, data = M.fetch(num, '(RFC822)')
        file = StringIO.StringIO(data[0][1])
        message = rfc822.Message(file)

        msgDate = time.mktime(rfc822.parsedate(message['date']))
        messages.append(create_event('IMAP',
                                      message['from'],
                                      message['to'],
                                      message['subject'],
                                      int(msgDate)))
    M.close()

    M.logout()

    log.info('Found %d unseen messages at %s', len(messages), host)
    return messages
Пример #46
0
  def validate(self):
    if self.rfc2822_re.match(self.value):
      import calendar
      value = parsedate(self.value)

      try:
        if value[0] > 1900:
          dow = datetime.date(*value[:3]).strftime("%a")
          if self.value.find(',')>0 and dow.lower() != self.value[:3].lower():
            self.log(IncorrectDOW({"parent":self.parent.name, "element":self.name, "value":self.value[:3]}))
            return
      except ValueError as e:
        self.log(InvalidRFC2822Date({"parent":self.parent.name, "element":self.name, "value":str(e)}))
        return

      if implausible_822(self.value):
        self.log(ImplausibleDate({"parent":self.parent.name,
          "element":self.name, "value":self.value}))
      else:
        self.log(ValidRFC2822Date({"parent":self.parent.name, "element":self.name, "value":self.value}))
    else:
      value1,value2 = '', self.value
      value2 = re.sub(r'[\\](.)','',value2)
      while value1!=value2: value1,value2=value2,re.sub('\([^(]*?\)',' ',value2)
      if not self.rfc822_re.match(value2.strip().lower()):
        self.log(InvalidRFC2822Date({"parent":self.parent.name, "element":self.name, "value":self.value}))
      else:
        self.log(ProblematicalRFC822Date({"parent":self.parent.name, "element":self.name, "value":self.value}))
Пример #47
0
    def parse_headers(self, msg, meta):
        meta.title = msg.subject

        if msg.headers.get('Message-Id'):
            meta.foreign_id = unicode(msg.headers.get('Message-Id'))

        if msg.headers.get('From'):
            addr = address.parse(msg.headers.get('From'))
            if addr is not None:
                meta.author = addr.to_unicode()

        for hdr in ['To', 'CC', 'BCC']:
            if msg.headers.get(hdr):
                for addr in address.parse_list(msg.headers.get(hdr)):
                    meta.add_recipient(addr.to_unicode())

        date = msg.headers.get('Date')
        date = rfc822.parsedate(date)
        if date is not None:
            dt = datetime.fromtimestamp(mktime(date))
            meta.add_date(dt)

        meta.headers = dict([(k, unicode(v)) for k, v in
                             msg.headers.items()])
        return meta
Пример #48
0
def load_tweets(**kwargs):
    args = dict(count=20, q=Search_key)
    args.update(**kwargs)
    url = 'https://api.twitter.com/1.1/search/tweets.json?' + urlencode(args)
    user_timeline = TweetOuth.tweet_req(url) 
    tweets=json.loads(user_timeline.decode('utf-8'))
    if type(tweets) == dict and u'errors' in tweets:
        if repeat and tweets[u'errors'][0]["code"]==88:
            print(tweets[u'errors'][0]["message"],file=sys.stderr)
            time.sleep(1000)
            return load_tweets(**kwargs)
        if tweets[u'errors'][0]["code"] in (32,89,99):
            raise InvalidTokenError(tweets[u'errors'][0]['message'])
        if tweets[u'errors'][0]["code"]==88:
            raise OverflowError(tweets[u'errors'][0]['message'])
        raise Exception(tweets[u'errors'][0]['message'])
    for twit in tweets[u'statuses']:
        c.execute('INSERT INTO tweet (user, tweet_id, created, text, source, screan_name, description) VALUES (?, ?, ?, ?, ?, ?, ?)',
            (twit[u'user'][u'name'],
             twit['id'],
            time.mktime(rfc822.parsedate(twit['created_at'])),
            twit['text'],
            twit['source'],
            twit[u'user'][u'screen_name'],
            twit[u'user'][u'description']))
    c.commit()
    return len(tweets[u'statuses'])
Пример #49
0
def holvagytok(cookie):
  cookietoken = CookieToken.all().filter('cookie = ', cookie).get()
  credentials = foursquare.OAuthCredentials(oauth_key, oauth_secret)
  user_token = oauth.OAuthToken(cookietoken.token, cookietoken.secret)
  credentials.set_access_token(user_token)
  fs = foursquare.Foursquare(credentials)
  fscheckins = fs.checkins()['checkins']
  venyuz = []
  for checkin in fscheckins:
    if 'venue' in checkin:
      venue = checkin['venue']
      user = checkin['user']
      # ha létezik a venyuzban a venyu, akkor csak a dátumot és az ottlevőket frissítse
      # nemszép! pfuj! fixme!
      venyunevek = [x['name'] for x in venyuz]
      checkintimetuple = date_diff(datetime.fromtimestamp(time.mktime(parsedate(checkin['created'])))).decode("utf-8")
      if venue['name'] in venyunevek:
        ezittmost = venyuz[venyunevek.index(venue['name'])]
        #logging.error(ezittmost)
        ezittmost['here'].append(user)
        if checkintimetuple > ezittmost['lastseen']:
          ezittmost['lastseen'] = checkintimetuple
      else:
      # ha még nincs, akkor adja hozzá
        if 'geolat' in venue:
          venyuz.append({
            'name': venue['name'],
            'geolat': venue['geolat'],
            'geolong': venue['geolong'],
            'lastseen': checkintimetuple,
            'here': [user]
                })
        else:
          pass
  return venyuz
Пример #50
0
 def touch(self, filename):
     """Set last modified date on filename."""
     if self.lastModified:
         tt = rfc822.parsedate(self.lastModified)
         if tt:
             mtime = time.mktime(tt)
             os.utime(filename, (mtime, mtime))
Пример #51
0
  def GetCreatedAtInSeconds(self):
    '''Get the time this status message was posted, in seconds since the epoch.

    Returns:
      The time this status message was posted, in seconds since the epoch.
    '''
    return timegm(rfc822.parsedate(self.created_at))
Пример #52
0
def seconds_since_epoch_of_checkin(c):
  import rfc822
  try:
    checkin_ts = time.mktime(rfc822.parsedate(c['created']))
  except Exception, e:
    logging.error("Unable to parse date of checkin %s: %s" % (`c`, e))
    raise FourMapperException(500, 'Unable to parse date in checkin')
Пример #53
0
def collect_tweets():
    # Don't (ab)use the twitter API from dev and stage.
    if settings.STAGE:
        return

    """Collect new tweets about Firefox."""
    with statsd.timer('customercare.tweets.time_elapsed'):
        t = Twython(settings.TWITTER_CONSUMER_KEY,
                    settings.TWITTER_CONSUMER_SECRET,
                    settings.TWITTER_ACCESS_TOKEN,
                    settings.TWITTER_ACCESS_TOKEN_SECRET)

        search_options = {
            'q': ('firefox OR #fxinput OR @firefoxbrasil OR #firefoxos '
                  'OR @firefox_es'),
            'count': settings.CC_TWEETS_PERPAGE,  # Items per page.
            'result_type': 'recent',  # Retrieve tweets by date.
        }

        # If we already have some tweets, collect nothing older than what we
        # have.
        try:
            latest_tweet = Tweet.latest()
        except Tweet.DoesNotExist:
            log.debug('No existing tweets. Retrieving %d tweets from search.' %
                      settings.CC_TWEETS_PERPAGE)
        else:
            search_options['since_id'] = latest_tweet.tweet_id
            log.info('Retrieving tweets with id >= %s' % latest_tweet.tweet_id)

        # Retrieve Tweets
        results = t.search(**search_options)

        if len(results['statuses']) == 0:
            # Twitter returned 0 results.
            return

        # Drop tweets into DB
        for item in results['statuses']:
            # Apply filters to tweet before saving
            # Allow links in #fxinput tweets
            statsd.incr('customercare.tweet.collected')
            item = _filter_tweet(item,
                                 allow_links='#fxinput' in item['text'])
            if not item:
                continue

            created_date = datetime.utcfromtimestamp(calendar.timegm(
                rfc822.parsedate(item['created_at'])))

            item_lang = item['metadata'].get('iso_language_code', 'en')

            tweet = Tweet(tweet_id=item['id'], raw_json=json.dumps(item),
                          locale=item_lang, created=created_date)
            try:
                tweet.save()
                statsd.incr('customercare.tweet.saved')
            except IntegrityError:
                pass
Пример #54
0
def dump_tweets(q, since_id=0, verbose=True, rpp=100, result_type = 'mixed', db_cursor=False, db_table=False):
    base_url = "http://search.twitter.com/search.json"
    query = "?" + urllib.urlencode({'q' : q,
                                    'since_id'        : since_id,
                                    'rpp'             : rpp,
                                    'result_type'     : result_type,
                                    'page'            : 1,
                                    'include_entities': 1
                                    })

    max_id = counter = 0
    
    for c in range(1, 15):
        url = base_url + query
        if verbose:
            print >> sys.stderr, url
        raw_response = urllib2.urlopen(url)
        json_response = json.load(raw_response)
        max_id = json_response["max_id"]
        raw_response.close()

        all_tweets = json_response["results"]
        counter = counter + len(all_tweets)
        
        for tweet in all_tweets:
            print tweet
            id = tweet["id"]
            timestamp = calendar.timegm(rfc822.parsedate(tweet["created_at"]))
            from_user = clean_string(tweet["from_user"])
            ##from_user_id = clean_string(tweet["from_user_id"])
            text = clean_string(tweet["text"])
            ##iso_language_code = ""
            iso_language_code = tweet["iso_language_code"]

            ## "geo":{"coordinates":[48.748530,2.448800],"type":"Point"}
            if tweet["geo"]:
                geo_lat = tweet["geo"]["coordinates"][0]
                geo_long =  tweet["geo"]["coordinates"][1]
            else:
                geo_lat = 0.0
                geo_long = 0.0
            row = str(id) + " : " + str(timestamp) + " : " + from_user + " : " + text + " : " + iso_language_code
            print row.encode('utf8')
            if db_cursor != False:
                sql_statement = u"""insert into %s (id, from_user, timestamp, text, iso_language_code, geo_lat, geo_long) values (%d, '%s', %d, '%s', '%s', %f, %f)""" %  (db_table, id, from_user, timestamp, text.replace("'","\\'"), iso_language_code, geo_lat, geo_long)
                ##print >> sys.stderr, sql_statement
                try:
                    db_cursor.execute(sql_statement.encode('utf8'))
                    db_cursor.connection.commit()
                except MySQLdb.Error, e:
                    print >> sys.stderr, "Error %d: %s" % (e.args[0], e.args[1])
                    print >> sys.stderr, "Skipping inserting this tweet to the DB"
                    
        ##print json_response["next_page"]

        if "next_page" in json_response.keys():
            query = json_response["next_page"]
        else:
            break
Пример #55
0
 def check_modified_since(self, cache):
     modified_since_str = self.request.META.get("HTTP_IF_MODIFIED_SINCE",
                                                None)
     if modified_since_str:
         modified_since = time.mktime(parsedate(modified_since_str))
         file_time = time.mktime(cache.original_file_time())
         if modified_since >= file_time:
             return HttpResponseNotModified()
Пример #56
0
 def import_buffer(tablename, buffername):
     table = new.load_table(tablename)
     newtable = new.convert_table(table)
     buf = self.session.get_buffer_by_name(buffername)
     buf.storage.extend(newtable)
     buf.storage.sort(key=lambda a: calendar.timegm(
         rfc822.parsedate(a['created_at'])))
     buf.storage = misc.RemoveDuplicates(buf.storage, lambda x: x['id'])
Пример #57
0
    def created_at_in_seconds(self):
        """ Get the time this status message was posted, in seconds since
        the epoch (1 Jan 1970).

        Returns:
            int: The time this status message was posted, in seconds since
            the epoch.
        """
        return timegm(parsedate(self.created_at))