def schedularRunner():
    latestNews = dao.selectLast('derananews')

    print latestNews

    aggrigater = DeranaNewsAggregator()
    classifier = MultinomialNBClassifier()

    list = aggrigater.aggriagteNews("http://sinhala.adaderana.lk/rsshotnews.php")

    latestNewsList = [News]

    for news in list:
        if rfc822.parsedate_tz(news.publishDate) == rfc822.parsedate_tz(latestNews):
            break
        else:
            latestNewsList.append(news)

    preprocessor.prepocessor(latestNewsList)

    classifier.classify(latestNewsList)

    for news in latestNewsList:

        title = news.title
        newsSite = str(news.newsSite)
        category = news.category[0]
        link = news.link
        pubDate = str(news.publishDate)
        description = news.summary
        imgLink = news.imageLink
        if description != '':
            dao.insertNews(title, link, description, imgLink, pubDate, category, newsSite)
Пример #2
0
def _entry_disposition(response_headers, request_headers):
    """Determine freshness from the Date, Expires and Cache-Control headers.

    We don't handle the following:

    1. Cache-Control: max-stale
    2. Age: headers are not used in the calculations.

    Not that this algorithm is simpler than you might think 
    because we are operating as a private (non-shared) cache.
    This lets us ignore 's-maxage'. We can also ignore
    'proxy-invalidate' since we aren't a proxy.
    We will never return a stale document as 
    fresh as a design decision, and thus the non-implementation 
    of 'max-stale'. This also lets us safely ignore 'must-revalidate' 
    since we operate as if every server has sent 'must-revalidate'.
    Since we are private we get to ignore both 'public' and
    'private' parameters. We also ignore 'no-transform' since
    we don't do any transformations.    
    The 'no-store' parameter is handled at a higher level.
    So the only Cache-Control parameters we look at are:

    no-cache
    only-if-cached
    max-age
    min-fresh
    """
    
    retval = "STALE"
    cc = _parse_cache_control(request_headers)
    cc_response = _parse_cache_control(response_headers)

    if request_headers.has_key('pragma') and request_headers['pragma'].lower().find('no-cache') != -1:
        retval = "TRANSPARENT"
        if 'cache-control' not in request_headers:
            request_headers['cache-control'] = 'no-cache'
    elif cc.has_key('no-cache'):
        retval = "TRANSPARENT"
    elif cc_response.has_key('no-cache'):
        retval = "STALE"
    elif cc.has_key('only-if-cached'):
        retval = "FRESH"
    elif response_headers.has_key('date'):
        date = calendar.timegm(rfc822.parsedate_tz(response_headers['date']))
        now = time.time()
        current_age = max(0, now - date)
        if cc_response.has_key('max-age'):
            freshness_lifetime = int(cc_response['max-age'])
        elif response_headers.has_key('expires'):
            expires = rfc822.parsedate_tz(response_headers['expires'])
            freshness_lifetime = max(0, calendar.timegm(expires) - date)
        else:
            freshness_lifetime = 0
        if cc.has_key('max-age'):
            freshness_lifetime = min(freshness_lifetime, int(cc['max-age']))
        if cc.has_key('min-fresh'):
            current_age += int(cc['min-fresh'])
        if freshness_lifetime > current_age:
            retval = "FRESH"
    return retval 
Пример #3
0
def get_delivery_time (msg):
    # Figure out the delivery time.
    dtime = None
    if msg.has_key("Delivery-date"):
        # eg. "Thu, 12 Jul 2001 08:47:20 -0400" to 994942040 (seconds
        # since epoch in UTC)
        dtime = mktime_tz(parsedate_tz(msg["Delivery-date"]))
    elif msg.unixfrom:
        # Parse eg.
        #   "From [email protected] Thu Jul 12 08:47:20 2001"
        # -- this is the "From " line format used by Exim; hopefully other
        # MTAs do the same!
        m = re.match(r'^From (\S+) +(\w{3} \w{3}\s+\d\d? \d\d:\d\d:\d\d \d{4})$',
                     msg.unixfrom)
        if not m:
            warn("warning: could not parse \"From \" line: %s" % msg.unixfrom)
        else:
            (return_path, dtime_str) = m.groups()
            # Eg. "Thu Jul 12 08:47:20 2001" -> 994945640 -- note that
            # this might be different from what we get parsing the same
            # date string above, because this one doesn't include the
            # timezone.  Sigh.
            dtime = mktime(strptime(dtime_str, "%c"))

            # Attempt to detect and correct for DST differences.
            # (This works if we parsed a summer time during the winter;
            # what about the inverse?)
            dtime_str_curtz = ctime(dtime)
            if dtime_str_curtz != dtime_str:
                dtime_curtz = mktime(strptime(dtime_str_curtz, "%c"))
                diff = dtime_curtz - dtime
                dtime -= diff

    return dtime
Пример #4
0
    def execute(self, observation):
        station_id = observation['station_id']

        raw_time = observation['observation_time_rfc822']
        parsed_time = datetime.datetime.fromtimestamp(
            rfc822.mktime_tz(rfc822.parsedate_tz(raw_time)))

        epoch = datetime.datetime.utcfromtimestamp(0)
        delta = int((parsed_time - epoch).total_seconds())

        observation['ObservationTime'] = delta
        observation['StationId'] = station_id

        composite_key = "%s_%d" % (station_id, delta)
        observation['CompositeKey'] = composite_key

        region = os.environ['AWS_DEFAULT_REGION']
        accessKey = os.environ['AWS_ACCESS_KEY']
        secretKey = os.environ['AWS_SECRET_KEY']

        try:
            connx = boto.dynamodb2.connect_to_region(
                region,
                aws_access_key_id=accessKey,
                aws_secret_access_key=secretKey)
            obs_table = Table('VocalPelicanObservation', connection=connx)
            test_row = obs_table.get_item(CompositeKey=composite_key)
        except JSONResponseError as responseError:
            # authentication problem
            print responseError
        except boto.dynamodb2.exceptions.ItemNotFound as responseError:
            # not found implies safe to add
            return obs_table.put_item(observation)

        return False
Пример #5
0
 def parse_term(term):
     if term['type'] == 'IRI':
         return URIRef(term['value'])
     elif term['type'] == 'literal':
         datatype = URIRef(term.get('datatype', None))
         if datatype == XSD.dateTime:
             try:
                 term['value'] = float(term['value'])
                 term['value'] = datetime.utcfromtimestamp(term['value'])
             except:
                 try:
                     term['value'] = isodate.parse_datetime(term['value'])
                 except:
                     timestamp = mktime_tz(parsedate_tz(term['value']))
                     term['value'] = datetime.fromtimestamp(timestamp)
         if datatype == RDFS.Literal:
             datatype = None
             try:
                 term['value'] = float(term['value'])
             except:
                 pass
         return Literal(term['value'], datatype=datatype)
     else:
         bid = term['value'].split(':')[1]
         if bid not in bid_map:
             bid_map[bid] = shortuuid.uuid()
         return BNode(bid_map[bid])
Пример #6
0
    def get_contents_to_filename(self, filename, headers=None,
                                 cb=None, num_cb=10,
                                 torrent=False,
                                 version_id=None,
                                 res_download_handler=None,
                                 response_headers=None):
        """
        Retrieve an object from S3 using the name of the Key object as the
        key in S3.  Store contents of the object to a file named by 'filename'.
        See get_contents_to_file method for details about the
        parameters.
        
        :type filename: string
        :param filename: The filename of where to put the file contents
        
        :type headers: dict
        :param headers: Any additional headers to send in the request
        
        :type cb: function
        :param cb: a callback function that will be called to report
                   progress on the upload.  The callback should accept
                   two integer parameters, the first representing the
                   number of bytes that have been successfully
                   transmitted to S3 and the second representing the
                   size of the to be transmitted object.
                    
        :type cb: int
        :param num_cb: (optional) If a callback is specified with
                       the cb parameter this parameter determines the
                       granularity of the callback by defining
                       the maximum number of times the callback will
                       be called during the file transfer.  
             
        :type torrent: bool
        :param torrent: If True, returns the contents of a torrent file
                        as a string.

        :type res_upload_handler: ResumableDownloadHandler
        :param res_download_handler: If provided, this handler will
                                     perform the download.

        :type response_headers: dict
        :param response_headers: A dictionary containing HTTP headers/values
                                 that will override any headers associated with
                                 the stored object in the response.
                                 See http://goo.gl/EWOPb for details.
        """
        fp = open(filename, 'wb')
        self.get_contents_to_file(fp, headers, cb, num_cb, torrent=torrent,
                                  version_id=version_id,
                                  res_download_handler=res_download_handler,
                                  response_headers=response_headers)
        fp.close()
        # if last_modified date was sent from s3, try to set file's timestamp
        if self.last_modified != None:
            try:
                modified_tuple = rfc822.parsedate_tz(self.last_modified)
                modified_stamp = int(rfc822.mktime_tz(modified_tuple))
                os.utime(fp.name, (modified_stamp, modified_stamp))
            except Exception: pass
Пример #7
0
def process_reply_file(current, fname):
    new_note = {}
    reply = open(fname, "r")
    msg = rfc822.Message(reply)
    new_note['text'] = "%s\n%s" % (msg['From'], msg.fp.read())
    new_note['timestamp'] = rfc822.parsedate_tz(msg['Date'])
    current["notes"].append(new_note)
Пример #8
0
def createEvent(evType, tstamp, name = None, contextList = [], 
                entityList = []):
    """
    Create an XML element representing an event. Returns the XML object

    It expects:
    evType: Enum
    tstamp: datetime object
    name : string
    contextList: List of context elements
    entityList: List of entity elements
    """

    result = etree.Element('event')

    result.attrib['type'] = eventName(evType)
    if tstamp == None:
        tstamp = datetime.datetime.now()
    result.attrib['datetime'] = rfc822.formatdate(rfc822.mktime_tz(rfc822.parsedate_tz(tstamp.strftime("%a, %d %b %Y %H:%M:%S"))))
    if name != None:
        result.attrib['name'] = name

    for el in  entityList + contextList:
        result.append(el)

    # Create the ID
    m = hashlib.sha1()
    m.update(etree.tostring(result))
    result.attrib['id'] = m.hexdigest()

    return result
Пример #9
0
    def post(self):
        backend = get_proxy_backend()

        def our_finish():
            self.add_header("Content-type", "application/json")
            self.write(json.dumps({"result": "ok"}))
            self.finish()

        if backend is not None:
            data = self.request.body
            obj = json.loads(data)

            url = obj["url"]
            metadata = obj["metadata"]
            proxy_ip = obj["proxy_ip"]
            proxy_port = obj["proxy_port"]

            print "GOT MESH-NOTIFY from %s:%d %s" % (proxy_ip, proxy_port, url)

            last_modified = None
            for key, val in metadata["headers"]:
                if key.lower() == "last-modified":
                    last_modified = rfc822.mktime_tz(rfc822.parsedate_tz(val))
                    break

            entry = {"url": url, "last_modified": last_modified}

            tracker = LimitTracker(NOTIFY_SIMULTANEOUS_DOWNLOADS)
            backend.download_entries(proxy_ip, proxy_port, [entry], our_finish, tracker)
        else:
            our_finish()
Пример #10
0
	def matches_value( self, v ):
		t0 = time.time()

		then = rfc822.parsedate_tz( v )
		t1 = rfc822.mktime_tz(then)
		
		return (t0 - t1) > self.age
Пример #11
0
 def _onsuccess(response):
     if response.status == 200:
         checksum = response.headers['Etag'].strip('"')
         last_modified = response.headers['Last-Modified']
         modified_tuple = rfc822.parsedate_tz(last_modified)
         modified_stamp = int(rfc822.mktime_tz(modified_tuple))
         return {'checksum': checksum, 'last_modified': modified_stamp}
Пример #12
0
    def open(self):
        # XXX in future add support for compression
        headers = {'Accept-Encoding': ''}
        if _requests_version == '0':
            self._data_response = self._session.get(self._url('data'),
                                                    prefetch=False,
                                                    headers=headers)
        else:
            self._data_response = self._session.get(self._url('data'),
                                                    stream=True,
                                                    headers=headers)
        self._validate_response(self._data_response)

        size = self._data_response.headers.get('Content-Length', None)
        if size is not None:
            size = int(size)
        self._size = size

        modified = self._data_response.headers.get('Last-Modified', None)
        if modified is not None:
            modified = rfc822.mktime_tz(rfc822.parsedate_tz(modified))
        self._modified = modified

        mimetype = self._data_response.headers.get('Content-Type',
                                                   'application/octet-stream')
        self._mimetype = mimetype

        return self._data_response.raw
Пример #13
0
def _parse_midmo_date(datestring):
    """
    returns a local datetime corresponding to 
    the datestring given.
    """
    # these appear to be rfc822/2822, not documented.
    return datetime.fromtimestamp(rfc822.mktime_tz(rfc822.parsedate_tz(datestring)))
Пример #14
0
    def _parse_sibling(self, sibling, headers, data):
        """
        Parses a single sibling out of a response.
        """

        sibling.exists = True

        # Parse the headers...
        for header, value in headers:
            header = header.lower()
            if header == "content-type":
                sibling.content_type, sibling.charset = self._parse_content_type(value)
            elif header == "etag":
                sibling.etag = value
            elif header == "link":
                sibling.links = self._parse_links(value)
            elif header == "last-modified":
                sibling.last_modified = mktime_tz(parsedate_tz(value))
            elif header.startswith("x-riak-meta-"):
                metakey = header.replace("x-riak-meta-", "")
                sibling.usermeta[metakey] = value
            elif header.startswith("x-riak-index-"):
                field = header.replace("x-riak-index-", "")
                reader = csv.reader([value], skipinitialspace=True)
                for line in reader:
                    for token in line:
                        token = decode_index_value(field, token)
                        sibling.add_index(field, token)
            elif header == "x-riak-deleted":
                sibling.exists = False

        sibling.encoded_data = data

        return sibling
Пример #15
0
	def populate(self, sub):
		file = open(os.path.join(self.archdir, str(sub), 'index'))
		linepair = file.readline() + file.readline()
		prev_timestamp = 0
		while linepair:
			match = _rx_index.match(linepair.rstrip())
			if match:
				g = match.groups()
				msgnum = int(g[0])
				try:
					timestamp = rfc822.mktime_tz(rfc822.parsedate_tz(g[3]))
				except:
					timestamp = prev_timestamp + 1
				prev_timestamp = timestamp
				localtime = time.localtime(timestamp)
				self.msgs[msgnum] = {
					MSGNUM: msgnum,
					THREADID: g[1],
					SUBJECT: g[2],
					DATE: g[3],
					TIMESTAMP: timestamp,
					AUTHORID: g[4],
					AUTHOR: g[5],
					MONTH: localtime[0] * 100 + localtime[1],
					}
			linepair = file.readline() + file.readline()
		file.close()
Пример #16
0
def _readdate(txt):
    """Interpret the string as a date value."""
    import rfc822
    date = rfc822.parsedate_tz(txt.strip())
    if date is not None:
        return rfc822.mktime_tz(date)
    return None
Пример #17
0
def parse_pubdate(text):
    """Parse a date string into a Unix timestamp

    >>> parse_pubdate('Fri, 21 Nov 1997 09:55:06 -0600')
    880127706

    >>> parse_pubdate('')
    0

    >>> parse_pubdate('unknown')
    0
    """
    if not text:
        return 0

    parsed = parsedate_tz(text)
    if parsed is not None:
        return int(mktime_tz(parsed))

    # TODO: Fully RFC 3339-compliant parsing (w/ timezone)
    try:
        parsed = time.strptime(text[:19], '%Y-%m-%dT%H:%M:%S')
        if parsed is not None:
            return int(time.mktime(parsed))
    except Exception:
        pass

    logger.error('Cannot parse date: %s', repr(text))
    return 0
Пример #18
0
 def _spew_message(self, id, msg, flags, uid):
     bits = []
     if uid:
         bits.append('UID %s' % msg.uid)
     for flag in flags:
         if flag == 'FLAGS':
             bits.append('FLAGS (%s)' % ' '.join(msg.flags))
         elif flag == 'INTERNALDATE':
             idate = msg.get_internal_date()
             ttup = rfc822.parsedate_tz(idate)
             odate = time.strftime("%d-%b-%Y %H:%M:%S ", ttup[:9])
             if ttup[9] is None:
                 odate = odate + "+0000"
             else:
                 if ttup[9] >= 0:
                     sign = "+"
                 else:
                     sign = "-"
                 odate = odate + sign + str(((abs(ttup[9]) / 3600) * 100 + (abs(ttup[9]) % 3600) / 60)).zfill(4)
             bits.append('INTERNALDATE ' + _quote(odate))
         elif flag == 'RFC822.SIZE':
             bits.append('RFC822.SIZE %d' % len(msg.body))
         elif flag == 'ENVELOPE':
             bits.append('ENVELOPE ' + collapseNestedLists([getEnvelope(msg.headers)]))
         elif flag == 'BODY.PEEK[]':
             bits.append('BODY[] ' + _literal(msg.body))
         else:
             raise ValueError("Unsupported flag '%s'" % flag)
     self.send_untagged_response("%d FETCH (%s)" % (id, " ".join(bits)))
Пример #19
0
    def _parse_sibling(self, sibling, headers, data):
        """
        Parses a single sibling out of a response.
        """

        sibling.exists = True

        # Parse the headers...
        for header, value in headers:
            header = header.lower()
            if header == 'content-type':
                sibling.content_type, sibling.charset = \
                    self._parse_content_type(value)
            elif header == 'etag':
                sibling.etag = value
            elif header == 'link':
                sibling.links = self._parse_links(value)
            elif header == 'last-modified':
                sibling.last_modified = mktime_tz(parsedate_tz(value))
            elif header.startswith('x-riak-meta-'):
                metakey = header.replace('x-riak-meta-', '')
                sibling.usermeta[metakey] = value
            elif header.startswith('x-riak-index-'):
                field = header.replace('x-riak-index-', '')
                reader = csv.reader([value], skipinitialspace=True)
                for line in reader:
                    for token in line:
                        token = decode_index_value(field, token)
                        sibling.add_index(field, token)
            elif header == 'x-riak-deleted':
                sibling.exists = False

        sibling.encoded_data = data

        return sibling
Пример #20
0
def process_reply_file(current, fname):
    new_note = {}
    reply = open(fname, "r")
    msg = rfc822.Message(reply)
    new_note['text'] = "%s\n%s" % (msg['From'], msg.fp.read())
    new_note['timestamp'] = rfc822.parsedate_tz(msg['Date'])
    current["notes"].append(new_note)
Пример #21
0
    def execute(self, observation):
        station_id = observation['station_id']

        raw_time = observation['observation_time_rfc822']
        parsed_time = datetime.datetime.fromtimestamp(rfc822.mktime_tz(rfc822.parsedate_tz(raw_time)))

        epoch = datetime.datetime.utcfromtimestamp(0)
        delta = int((parsed_time - epoch).total_seconds())

        observation['ObservationTime'] = delta
        observation['StationId'] = station_id

        composite_key = "%s_%d" % (station_id, delta)
        observation['CompositeKey'] = composite_key

        region = os.environ['AWS_DEFAULT_REGION']
        accessKey = os.environ['AWS_ACCESS_KEY']
        secretKey = os.environ['AWS_SECRET_KEY']

        try:
            connx = boto.dynamodb2.connect_to_region(region, aws_access_key_id=accessKey, aws_secret_access_key=secretKey)
            obs_table = Table('VocalPelicanObservation', connection = connx)
            test_row = obs_table.get_item(CompositeKey=composite_key)
        except JSONResponseError as responseError:
            # authentication problem
            print responseError
        except boto.dynamodb2.exceptions.ItemNotFound as responseError:
            # not found implies safe to add
            return obs_table.put_item(observation)

        return False
Пример #22
0
    def getReceiveTime(self, message):
        # This is tricky...  date comes in with an offset value that
        # represents the number of seconds of difference between the
        # parsed timezone and UTC.  The events database wants all time
        # as seconds since the epoch and treats it as UTC.  As a
        # result we have to use the datetime class to do the
        # conversion because the functions in the time module do all
        # kinds of covnersions "to be helpful"
        timestamp = message.get('Date', message.get('Sent'))
        t = rfc822.parsedate_tz(timestamp)
        if t is None:
            log.warn("Unable to process timestamp '%s' -- defaulting to now",
                     timestamp)
            return time.time()

        offset_secs = t[-1]
        if offset_secs is not None:
            # Convert the offset in seconds to minutes.  calendar wants minutes
            offset_mins = offset_secs / 60
            tz = FixedOffset(offset_mins, "Unknown")
        else:
            log.warn("Timezone not specified in '%s' -- defaulting to local timezone",
                     timestamp)
            tz = None

        # Construct dt using the date and time as well as the timezone 
        dt = datetime(t[0], t[1], t[2], t[3], t[4], t[5], 0, tz)
        secs = calendar.timegm(dt.utctimetuple())
        log.debug('Timestamp of the event (should be in UTC): %s -> %f',
                  timestamp, secs)
        return secs
Пример #23
0
def check_last_modified(url):
    u = urlopen(url)
    meta = u.info()
    last_modified = meta.getheaders("Last-Modified")[0]
    # modified = datetime.strptime(last_modified, '%a, %d %b %Y %H:%M:%S GMT')
    modified = datetime(*parsedate_tz(last_modified)[:7])
    return modified
Пример #24
0
def fix_date(string):
	replacements = (
			("Sab", "Sat"),
			)
	dateformat = "%Y-%m-%d %H:%M:%S"

	# first, fix up some common mistakes
	for repl in replacements:
		string = string.replace(repl[0], repl[1])

	# try normal date parsing
	date = rfc822.parsedate_tz(string)

	# if it succeeded...
	if date:
		# check that the time zone is present and sensible
		if not date[9] or abs(date[9]) > 12*60*60:
			# use UTC if not
			dateformat += " +0000"
		else:
			# time zone is sensible, use it
			dateformat += " %0+5d" % (date[9] / 60 / 60 * 100)
		# return properly formatted date string; if formatting is not
		# possible, fail
		try:
			return time.strftime(dateformat, date[:-1])
		except ValueError, e:
			raise FixDateError(e)
Пример #25
0
	def loadfrommessage(self, msg):
		self.tofield = msg.getaddrlist("To")
		f = msg.getaddr("From")
		self.fromfield = f[1]
		self.realfromfield = f[0]
		if not self.realfromfield:
			self.realfromfield = self.fromfield
		self.ccfield = msg.getaddrlist("Cc")
		if not self.ccfield:
			self.ccfield = ()
		self.subjectfield = msg.getheader("Subject")
		if not self.subjectfield:
			self.subjectfield = ""
		self.annotation = msg.getheader("X-SQmaiL-Annotation")
		if not self.annotation:
			self.annotation = ""
		self.readstatus = "Unread"
	
		# Work out the date the message arrived.

		r = ""
		for i in msg.getallmatchingheaders("Received"):
			r = r + i
		p = string.find(r, ";")
		if (p == -1):
			self.date = 0
		else:
			r = r[p+1:]
			r = rfc822.parsedate_tz(r)
			r = rfc822.mktime_tz(r)
			self.date = r

		self.headers = string.join(msg.headers, "")
		self.body = msg.fp.read()
Пример #26
0
def was_modified_since(header=None, mtime=0, size=0):
    """
    Was something modified since the user last downloaded it?

    header
      This is the value of the If-Modified-Since header.  If this is None,
      I'll just return True.

    mtime
      This is the modification time of the item we're talking about.

    size
      This is the size of the item we're talking about.
    """
    try:
        if header is None:
            raise ValueError
        matches = re.match(r"^([^;]+)(; length=([0-9]+))?$", header,
                           re.IGNORECASE)
        header_mtime = rfc822.mktime_tz(rfc822.parsedate_tz(matches.group(1)))
        header_len = matches.group(3)
        if header_len and int(header_len) != size:
            raise ValueError
        if mtime > header_mtime:
            raise ValueError
    except (AttributeError, ValueError):
        return True
    return False
Пример #27
0
    def get_contents_to_filename(self, filename, headers=None,
                                 cb=None, num_cb=10,
                                 torrent=False,
                                 version_id=None,
                                 res_download_handler=None,
                                 response_headers=None):
        """
        Retrieve an object from S3 using the name of the Key object as the
        key in S3.  Store contents of the object to a file named by 'filename'.
        See get_contents_to_file method for details about the
        parameters.
        
        :type filename: string
        :param filename: The filename of where to put the file contents
        
        :type headers: dict
        :param headers: Any additional headers to send in the request
        
        :type cb: function
        :param cb: a callback function that will be called to report
                   progress on the upload.  The callback should accept
                   two integer parameters, the first representing the
                   number of bytes that have been successfully
                   transmitted to S3 and the second representing the
                   size of the to be transmitted object.
                    
        :type cb: int
        :param num_cb: (optional) If a callback is specified with
                       the cb parameter this parameter determines the
                       granularity of the callback by defining
                       the maximum number of times the callback will
                       be called during the file transfer.  
             
        :type torrent: bool
        :param torrent: If True, returns the contents of a torrent file
                        as a string.

        :type res_upload_handler: ResumableDownloadHandler
        :param res_download_handler: If provided, this handler will
                                     perform the download.

        :type response_headers: dict
        :param response_headers: A dictionary containing HTTP headers/values
                                 that will override any headers associated with
                                 the stored object in the response.
                                 See http://goo.gl/EWOPb for details.
        """
        fp = open(filename, 'wb')
        self.get_contents_to_file(fp, headers, cb, num_cb, torrent=torrent,
                                  version_id=version_id,
                                  res_download_handler=res_download_handler,
                                  response_headers=response_headers)
        fp.close()
        # if last_modified date was sent from s3, try to set file's timestamp
        if self.last_modified != None:
            try:
                modified_tuple = rfc822.parsedate_tz(self.last_modified)
                modified_stamp = int(rfc822.mktime_tz(modified_tuple))
                os.utime(fp.name, (modified_stamp, modified_stamp))
            except Exception: pass
Пример #28
0
def _readdate(txt):
    """Interpret the string as a date value."""
    import rfc822
    date = rfc822.parsedate_tz(txt.strip())
    if date is not None:
        return rfc822.mktime_tz(date)
    return None
Пример #29
0
    def log(self, parent=None, limit=100):
        # TODO(dcramer): we should make this streaming
        cmd = ['log', '--template=%s' % (LOG_FORMAT,)]
        if parent:
            cmd.append('-r %s' % (parent,))
        if limit:
            cmd.append('--limit=%d' % (limit,))
        result = self.run(cmd)

        for chunk in BufferParser(result, '\x02'):
            (sha, author, author_date, parents, branches, message) = chunk.split('\x01')

            branches = filter(bool, branches.split(' ')) or ['default']
            parents = filter(lambda x: x and x != '0' * 40, parents.split(' '))

            author_date = datetime.utcfromtimestamp(
                mktime_tz(parsedate_tz(author_date)))

            yield RevisionResult(
                id=sha,
                author=author,
                author_date=author_date,
                message=message,
                parents=parents,
                branches=branches,
            )
Пример #30
0
def _parse_date_rfc822(dateString):
    '''Parse an RFC822, RFC1123, RFC2822, or asctime-style date'''
    data = dateString.split()
    if not data:
        return None
    if data[0][-1] in (',', '.') or data[0].lower() in rfc822._daynames:
        del data[0]
    if len(data) == 4:
        s = data[3]
        i = s.find('+')
        if i > 0:
            data[3:] = [s[:i], s[i+1:]]
        else:
            data.append('')
        dateString = " ".join(data)
    # Account for the Etc/GMT timezone by stripping 'Etc/'
    elif len(data) == 5 and data[4].lower().startswith('etc/'):
        data[4] = data[4][4:]
        dateString = " ".join(data)
    if len(data) < 5:
        dateString += ' 00:00:00 GMT'
    tm = rfc822.parsedate_tz(dateString)
    if tm:
        # Jython doesn't adjust for 2-digit years like CPython does,
        # so account for it by shifting the year so that it's in the
        # range 1970-2069 (1970 being the year of the Unix epoch).
        if tm[0] < 100:
            tm = (tm[0] + (1900, 2000)[tm[0] < 70],) + tm[1:]
        return time.gmtime(rfc822.mktime_tz(tm))
Пример #31
0
    def getReceiveTime(self, message):
        # This is tricky...  date comes in with an offset value that
        # represents the number of seconds of difference between the
        # parsed timezone and UTC.  The events database wants all time
        # as seconds since the epoch and treats it as UTC.  As a
        # result we have to use the datetime class to do the
        # conversion because the functions in the time module do all
        # kinds of conversions "to be helpful"
        timestamp = message.get('Date', message.get('Sent'))
        t = rfc822.parsedate_tz(timestamp)
        if t is None:
            log.warn("Unable to process timestamp '%s' -- defaulting to now",
                     timestamp)
            return time.time()

        offset_secs = t[-1]
        if offset_secs is not None:
            # Convert the offset in seconds to minutes.  calendar wants minutes
            offset_mins = offset_secs / 60
            tz = FixedOffset(offset_mins, "Unknown")
        else:
            log.warn("Timezone not specified in '%s' -- defaulting to local timezone",
                     timestamp)
            tz = None

        # Construct dt using the date and time as well as the timezone
        dt = datetime(t[0], t[1], t[2], t[3], t[4], t[5], 0, tz)
        secs = calendar.timegm(dt.utctimetuple())
        log.debug('Timestamp of the event (should be in UTC): %s -> %f',
                  timestamp, secs)
        return secs
Пример #32
0
def parse_pubdate(text):
    """Parse a date string into a Unix timestamp

    >>> parse_pubdate('Fri, 21 Nov 1997 09:55:06 -0600')
    880127706

    >>> parse_pubdate('')
    0

    >>> parse_pubdate('unknown')
    0
    """
    if not text:
        return 0

    parsed = parsedate_tz(text)
    if parsed is not None:
        return int(mktime_tz(parsed))

    # TODO: Fully RFC 3339-compliant parsing (w/ timezone)
    try:
        parsed = time.strptime(text[:19], '%Y-%m-%dT%H:%M:%S')
        if parsed is not None:
            return int(time.mktime(parsed))
    except Exception:
        pass

    logger.error('Cannot parse date: %s', repr(text))
    return 0
Пример #33
0
    def open(self):
        # XXX in future add support for compression
        headers = {'Accept-Encoding': ''}
        if _requests_version == '0':
            self._data_response = self._session.get(self._url('data'),
                                                    prefetch=False,
                                                    headers=headers)
        else:
            self._data_response = self._session.get(self._url('data'),
                                                    stream=True,
                                                    headers=headers)
        self._validate_response(self._data_response)

        size = self._data_response.headers.get('Content-Length', None)
        if size is not None:
            size = int(size)
        self._size = size

        modified = self._data_response.headers.get('Last-Modified', None)
        if modified is not None:
            modified = rfc822.mktime_tz(rfc822.parsedate_tz(modified))
        self._modified = modified

        mimetype = self._data_response.headers.get('Content-Type',
                                                   'application/octet-stream')
        self._mimetype = mimetype

        return self._data_response.raw
Пример #34
0
Файл: jb2bz.py Проект: EQ4/h5vcc
def process_reply_file(current, fname):
    new_note = {}
    reply = open(fname, "r")
    msg = rfc822.Message(reply)
    new_note["text"] = "%s\n%s" % (msg["From"], msg.fp.read())
    new_note["timestamp"] = rfc822.parsedate_tz(msg["Date"])
    current["notes"].append(new_note)
Пример #35
0
def was_modified_since(header=None, mtime=0, size=0):
    """
    Was something modified since the user last downloaded it?

    header
      This is the value of the If-Modified-Since header.  If this is None,
      I'll just return True.

    mtime
      This is the modification time of the item we're talking about.

    size
      This is the size of the item we're talking about.
    """
    try:
        if header is None:
            raise ValueError
        matches = re.match(r"^([^;]+)(; length=([0-9]+))?$", header,
                           re.IGNORECASE)
        header_mtime = rfc822.mktime_tz(rfc822.parsedate_tz(
            matches.group(1)))
        header_len = matches.group(3)
        if header_len and int(header_len) != size:
            raise ValueError
        if mtime > header_mtime:
            raise ValueError
    except (AttributeError, ValueError):
        return True
    return False
Пример #36
0
 def parse(self, *args, **kwargs):
     """ return the time value (in seconds since 1970) """
     value = self.__call__(*args, **kwargs)
     if value:
         try:
             return mktime_tz(parsedate_tz(value))
         except TypeError:
             raise HTTPBadRequest(("Received an ill-formed timestamp for %s: %s\r\n") % (self.name, value))
Пример #37
0
  def convert_rfc822(self, date_string):
    """ Helper to convert rfc822 to datetime object """

    date_object = rfc822.parsedate_tz(date_string)
    date_object = rfc822.mktime_tz(date_object)
    date_object = datetime.datetime.fromtimestamp(date_object)

    return date_object
Пример #38
0
def twitter():
    response = requests.get(TWITTER_URL)
    if not response.ok:
        raise StopIteration()
    for obj in json.loads(response.content)['results']:
        obj['timestamp'] = time.mktime(rfc822.parsedate_tz(obj['created_at'])[:-1])
        obj['template'] = 'twitter/tweet.html'
        yield obj
 def _normalize_rfc822_date( self, date_string ):
     return  datetime.fromtimestamp(
                 rfc822.mktime_tz(
                     rfc822.parsedate_tz(
                         date_string
                     )
                 )
             )
Пример #40
0
def numericTime(s):
	# local not supported under windows
	# since we are using english anyway we skip that
	# locale.setlocale(locale.LC_ALL, 'en_GB')
	pt = rfc822.parsedate_tz(s[:20])
	# ts = time.mktime(time.strptime(s[:20], '%d %b %Y %H:%M:%S'))
	ts = time.mktime(pt[:9])
	return ts
Пример #41
0
def numericTime(s):
	# local not supported under windows
	# since we are using english anyway we skip that
	# locale.setlocale(locale.LC_ALL, 'en_GB')
	pt = rfc822.parsedate_tz(s[:20])
	# ts = time.mktime(time.strptime(s[:20], '%d %b %Y %H:%M:%S'))
	ts = time.mktime(pt[:9])
	return ts
Пример #42
0
    def addMail(self, mailString):
        """ Store mail as news item
            Returns created item
        """

        archive = self.context
        pw = self.context.portal_workflow
        
        (header, body) = splitMail(mailString)

        # if 'keepdate' is set, get date from mail,
        if self.getValueFor('keepdate'):
            timetuple = rfc822.parsedate_tz(header.get('date'))
            time = DateTime(rfc822.mktime_tz(timetuple))
        # ... take our own date, clients are always lying!
        else:
            time = DateTime()

        (TextBody, ContentType, HtmlBody, Attachments) = unpackMail(mailString)

        # Test Zeitangabe hinter Subject
        from datetime import date
        today = date.today()
        mydate = today.strftime("%d.%m.%Y")





        # let's create the news item

        subject = mime_decode_header(header.get('subject', 'No Subject'))
        sender = mime_decode_header(header.get('from','No From'))
        #title = "%s / %s" % (subject, sender)
        title = "%s"  % (subject)

        new_id = IUserPreferredURLNormalizer(self.request).normalize(title)
        id = self._findUniqueId(new_id)
        # ContentType is only set for the TextBody
        if ContentType:
            body = TextBody
        else:
            body = self.HtmlToText(HtmlBody)

# als vorlaeufige Loesung
        desc = "%s..." % (body[:60])
        uni_aktuell_body = "<p><strong>%s: %s</strong></p> <p>&nbsp;</p><pre>%s</pre>" % (mydate, sender, body)
#        uni_aktuell_body = '<p>&nbsp;</p>' + body

        objid = self.context.invokeFactory(NewsItem.meta_type, id=id, title=title, text=uni_aktuell_body, description=desc)

        mailObject = getattr(self.context, objid)
        try:
#original            pw.doActionFor(mailObject, 'hide')
            pw.doActionFor(mailObject, 'publish')
        except:
            pass
        return mailObject
Пример #43
0
    def log(self, parent=None, branch=None, author=None, offset=0, limit=100, paths=None):
        """ Gets the commit log for the repository.

        Each revision returned has exactly one branch name associated with it.
        This is the branch name encoded into the revision changeset description.

        See documentation for the base for general information on this function.
        """
        start_time = time()

        # TODO(dcramer): we should make this streaming
        cmd = ['log', '--template=%s' % (LOG_FORMAT,)]

        if parent and branch:
            raise ValueError('Both parent and branch cannot be set')

        # Build the -r parameter value into r_str with branch, parent and author
        r_str = None
        if branch:
            cmd.append('-b{0}'.format(branch))
        if parent:
            r_str = ('ancestors(%s)' % parent)
        if author:
            r_str = ('({r}) and author("{0}")' if r_str else 'author("{0}")')\
                .format(author, r=r_str)
        if r_str:
            cmd.append('-r reverse({0})'.format(r_str))

        if limit:
            cmd.append('--limit=%d' % (offset + limit,))

        if paths:
            cmd.extend(["glob:" + p.strip() for p in paths])

        result = self.run(cmd)

        self.log_timing('log', start_time)

        for idx, chunk in enumerate(BufferParser(result, '\x02')):
            if idx < offset:
                continue

            (sha, author, author_date, parents, branches, message) = chunk.split('\x01')

            branches = filter(bool, branches.split(' ')) or ['default']
            parents = filter(lambda x: x and x != '0' * 40, parents.split(' '))

            author_date = datetime.utcfromtimestamp(
                mktime_tz(parsedate_tz(author_date)))

            yield RevisionResult(
                id=sha,
                author=author,
                author_date=author_date,
                message=message,
                parents=parents,
                branches=branches,
            )
Пример #44
0
    def _item_publish_date(self, item):
        """Returns the UTC date that the comic strip was published

        Converts a RFC822 string to a UTC datetime.

        """
        parts = rfc822.parsedate_tz(item['pubDate'])
        timestamp = rfc822.mktime_tz(parts)
        return datetime.fromtimestamp(timestamp, pytz.utc)
Пример #45
0
def improve_date(input):
	mytz = input.split(" ")[-1].strip()
	if mytz[1:-1] in time.tzname or mytz == get_zone():
		return input
	else:
		tz = rfc822.parsedate_tz(input)
		if not tz or not tz[9]:
			return input
		return "%s (%s)" % (formatdate(time.mktime(tz[:9])-tz[9]-(time.timezone), True), input)
Пример #46
0
 def _onsuccess(boto_key):
     checksum = boto_key.etag.strip('"')
     last_modified = boto_key.last_modified
     modified_tuple = rfc822.parsedate_tz(last_modified)
     modified_stamp = int(rfc822.mktime_tz(modified_tuple))
     return {'checksum': checksum,
             'last_modified': modified_stamp,
             'width': boto_key.metadata.width,
             'height': boto_key.metadata.height}
Пример #47
0
    def _item_publish_date_tz(self, item):
        """Returns the date that the comic strip was published.

        The original timezone is preserved.

        """
        parts = rfc822.parsedate_tz(item['pubDate'])
        timestamp = rfc822.mktime_tz(parts)
        return datetime.fromtimestamp(timestamp)
Пример #48
0
def parse_pubdate(text):
    """Parse a date string into a Unix timestamp

    >>> parse_pubdate('Fri, 21 Nov 1997 09:55:06 -0600')
    880127706

    >>> parse_pubdate('2003-12-13T00:00:00+02:00')
    1071266400

    >>> parse_pubdate('2003-12-13T18:30:02Z')
    1071340202

    >>> parse_pubdate('Mon, 02 May 1960 09:05:01 +0100')
    -305049299

    >>> parse_pubdate('')
    0

    >>> parse_pubdate('unknown')
    0
    """
    if not text:
        return 0

    parsed = parsedate_tz(text)
    if parsed is not None:
        try:
            pubtimeseconds = int(mktime_tz(parsed))
            return pubtimeseconds
        except (OverflowError, ValueError):
            logger.warning(
                'bad pubdate %s is before epoch or after end of time (2038)',
                parsed)
            return 0

    try:
        parsed = time.strptime(text[:19], '%Y-%m-%dT%H:%M:%S')
        if parsed is not None:
            m = re.match(r'^(?:Z|([+-])([0-9]{2})[:]([0-9]{2}))$', text[19:])
            if m:
                parsed = list(iter(parsed))
                if m.group(1):
                    offset = 3600 * int(m.group(2)) + 60 * int(m.group(3))
                    if m.group(1) == '-':
                        offset = 0 - offset
                else:
                    offset = 0
                parsed.append(offset)
                return int(mktime_tz(tuple(parsed)))
            else:
                return int(time.mktime(parsed))
    except Exception:
        pass

    logger.error('Cannot parse date: %s', repr(text))
    return 0
def convert_created_at(line, created_at_format):
    """ fix the created_at time since it is 'RFC 2822' """
    created_at = line['created_at']
    # convert the tuple to a list, so we can pop the tz out of it.
    c = list(parsedate_tz(created_at))
    tz = c.pop(-1)
    dt = datetime.datetime.fromtimestamp(mktime(c))
    line['created_at'] = dt.strftime(created_at_format)
    line['created_at_shift'] = tz
    return line
Пример #50
0
def DateHeader_parse(self, *args, **kwargs):
    """ return the time value (in seconds since 1970) """
    value = self.__call__(*args, **kwargs)
    if value:
        try:
            return mktime_tz(parsedate_tz(value))
        except (OverflowError, TypeError):
            raise HTTPBadRequest(
                ("Received an ill-formed timestamp for %s: %s\r\n") %
                (self.name, value))
Пример #51
0
def mirror_refdata(refdataTopdir='https://kbase.us/refdata/', refdataDiskdir='refdata'):
    refdataReq = requests.get(refdataTopdir)
    modules = refdataReq.json()

    for module in modules:
        moduledir = refdataTopdir + module['name']
        moduleReq = requests.get(moduledir)
        versions = moduleReq.json()
        for version in versions:
            versiondir = moduledir + '/' + version['name']
            versionDiskPath= refdataDiskdir+'/'+module['name']+'/'+version['name']

# for retrieving dot files (requires building .dotfile manually at the source)
            remotedotfilesls=[]
            remotedotfilesreq=requests.get(versiondir+'/.dotfiles')
# for now just get the .dotfiles file
            try:
                remotedotfilesls=remotedotfilesreq.json()
            except:
                pass

            readyHeadReq = requests.head(versiondir+'/__READY__')
	    print readyHeadReq.headers['Last-Modified']
	    mirrorDatestamp=rfc822.mktime_tz(rfc822.parsedate_tz(readyHeadReq.headers['Last-Modified']))
            readyFile=versionDiskPath+'/__READY__'
            if os.path.isfile(readyFile):
                fileDatestamp=os.path.getmtime(readyFile)
                print mirrorDatestamp
		print fileDatestamp
                if mirrorDatestamp < fileDatestamp:
                    print "mirror __READY__ older than local file, skipping " + versiondir
		    continue
            try:
                os.makedirs(versionDiskPath)
                print 'created dir ' + versionDiskPath
            except OSError as exc:
	        if os.path.isdir(versionDiskPath):
	            pass
                else:
	            raise
            retrieve_dir(versiondir,versionDiskPath)

# hacky support for dot files
            for dotfile in remotedotfilesls:
# for now only support for dot dirs
# (need to break out the file retrieval into a separate method to do files here)
                retrieve_dir(versiondir+'/'+dotfile['name'],versionDiskPath+'/'+dotfile['name'])

            # if this works, retrieve __READY__ file
	    print 'retrieve ' + versiondir + ' succeeded, retrieving __READY__ file'
            filereq=requests.get(versiondir + '/__READY__', timeout=5, stream=True)
	    with open (versionDiskPath + '/__READY__', 'wb') as fd:
	        for chunk in filereq.iter_content(1024):
		    fd.write(chunk)
Пример #52
0
 def __init__(self, id, spamHeader, dateHeader, headers):
     self.id = id
     self.score, self.required = scoreFromHeader(spamHeader)
     if self.score is None:
         warn('no score on msg id=%s header="%s"' % (self.id, spamHeader))
     if dateHeader:
         self.date = datetime.fromtimestamp(
             rfc822.mktime_tz(rfc822.parsedate_tz(dateHeader)))
     self.headers = headers
     self.data = {}
     self.flags = None
def s3_has_uptodate_file(bucket, transfer_file, s3_key_name):
    """Check if S3 has an existing, up to date version of this file.
    """
    s3_key = bucket.get_key(s3_key_name)
    if s3_key:
        s3_size = s3_key.size
        local_size = os.path.getsize(transfer_file)
        s3_time = rfc822.mktime_tz(rfc822.parsedate_tz(s3_key.last_modified))
        local_time = os.path.getmtime(transfer_file)
        return s3_size == local_size and s3_time >= local_time
    return False
Пример #54
0
def from_internaldate(date):
    try:
        #print >>sys.stderr, "ndate", date
        date = rfc822.parsedate_tz(date)
        #print >>sys.stderr, "xdate", date, type(date)
        date = gmtime.mkgmtime(date)
        #print >>sys.stderr, "okdate", date
    except:
        date = 0  # None # (1970, 1, 1, 18, 16, 22, 0, 1, 0)

    return date
Пример #55
0
 def got_contents_to_filename(response):
     fp.close()
     # if last_modified date was sent from s3, try to set file's timestamp
     if self.last_modified != None:
         try:
             modified_tuple = rfc822.parsedate_tz(self.last_modified)
             modified_stamp = int(rfc822.mktime_tz(modified_tuple))
             os.utime(fp.name, (modified_stamp, modified_stamp))
         except Exception: pass
     if callable(callback):
         callback(response)
Пример #56
0
def _parse_date(value):
    if not value:
        return None
    t = parsedate_tz(value)
    if t is None:
        # Could not parse
        return None
    if t[-1] is None:
        # No timezone given.  None would mean local time, but we'll force UTC
        t = t[:9] + (0, )
    t = mktime_tz(t)
    return datetime.fromtimestamp(t, UTC)