def schedularRunner(): latestNews = dao.selectLast('derananews') print latestNews aggrigater = DeranaNewsAggregator() classifier = MultinomialNBClassifier() list = aggrigater.aggriagteNews("http://sinhala.adaderana.lk/rsshotnews.php") latestNewsList = [News] for news in list: if rfc822.parsedate_tz(news.publishDate) == rfc822.parsedate_tz(latestNews): break else: latestNewsList.append(news) preprocessor.prepocessor(latestNewsList) classifier.classify(latestNewsList) for news in latestNewsList: title = news.title newsSite = str(news.newsSite) category = news.category[0] link = news.link pubDate = str(news.publishDate) description = news.summary imgLink = news.imageLink if description != '': dao.insertNews(title, link, description, imgLink, pubDate, category, newsSite)
def _entry_disposition(response_headers, request_headers): """Determine freshness from the Date, Expires and Cache-Control headers. We don't handle the following: 1. Cache-Control: max-stale 2. Age: headers are not used in the calculations. Not that this algorithm is simpler than you might think because we are operating as a private (non-shared) cache. This lets us ignore 's-maxage'. We can also ignore 'proxy-invalidate' since we aren't a proxy. We will never return a stale document as fresh as a design decision, and thus the non-implementation of 'max-stale'. This also lets us safely ignore 'must-revalidate' since we operate as if every server has sent 'must-revalidate'. Since we are private we get to ignore both 'public' and 'private' parameters. We also ignore 'no-transform' since we don't do any transformations. The 'no-store' parameter is handled at a higher level. So the only Cache-Control parameters we look at are: no-cache only-if-cached max-age min-fresh """ retval = "STALE" cc = _parse_cache_control(request_headers) cc_response = _parse_cache_control(response_headers) if request_headers.has_key('pragma') and request_headers['pragma'].lower().find('no-cache') != -1: retval = "TRANSPARENT" if 'cache-control' not in request_headers: request_headers['cache-control'] = 'no-cache' elif cc.has_key('no-cache'): retval = "TRANSPARENT" elif cc_response.has_key('no-cache'): retval = "STALE" elif cc.has_key('only-if-cached'): retval = "FRESH" elif response_headers.has_key('date'): date = calendar.timegm(rfc822.parsedate_tz(response_headers['date'])) now = time.time() current_age = max(0, now - date) if cc_response.has_key('max-age'): freshness_lifetime = int(cc_response['max-age']) elif response_headers.has_key('expires'): expires = rfc822.parsedate_tz(response_headers['expires']) freshness_lifetime = max(0, calendar.timegm(expires) - date) else: freshness_lifetime = 0 if cc.has_key('max-age'): freshness_lifetime = min(freshness_lifetime, int(cc['max-age'])) if cc.has_key('min-fresh'): current_age += int(cc['min-fresh']) if freshness_lifetime > current_age: retval = "FRESH" return retval
def get_delivery_time (msg): # Figure out the delivery time. dtime = None if msg.has_key("Delivery-date"): # eg. "Thu, 12 Jul 2001 08:47:20 -0400" to 994942040 (seconds # since epoch in UTC) dtime = mktime_tz(parsedate_tz(msg["Delivery-date"])) elif msg.unixfrom: # Parse eg. # "From [email protected] Thu Jul 12 08:47:20 2001" # -- this is the "From " line format used by Exim; hopefully other # MTAs do the same! m = re.match(r'^From (\S+) +(\w{3} \w{3}\s+\d\d? \d\d:\d\d:\d\d \d{4})$', msg.unixfrom) if not m: warn("warning: could not parse \"From \" line: %s" % msg.unixfrom) else: (return_path, dtime_str) = m.groups() # Eg. "Thu Jul 12 08:47:20 2001" -> 994945640 -- note that # this might be different from what we get parsing the same # date string above, because this one doesn't include the # timezone. Sigh. dtime = mktime(strptime(dtime_str, "%c")) # Attempt to detect and correct for DST differences. # (This works if we parsed a summer time during the winter; # what about the inverse?) dtime_str_curtz = ctime(dtime) if dtime_str_curtz != dtime_str: dtime_curtz = mktime(strptime(dtime_str_curtz, "%c")) diff = dtime_curtz - dtime dtime -= diff return dtime
def execute(self, observation): station_id = observation['station_id'] raw_time = observation['observation_time_rfc822'] parsed_time = datetime.datetime.fromtimestamp( rfc822.mktime_tz(rfc822.parsedate_tz(raw_time))) epoch = datetime.datetime.utcfromtimestamp(0) delta = int((parsed_time - epoch).total_seconds()) observation['ObservationTime'] = delta observation['StationId'] = station_id composite_key = "%s_%d" % (station_id, delta) observation['CompositeKey'] = composite_key region = os.environ['AWS_DEFAULT_REGION'] accessKey = os.environ['AWS_ACCESS_KEY'] secretKey = os.environ['AWS_SECRET_KEY'] try: connx = boto.dynamodb2.connect_to_region( region, aws_access_key_id=accessKey, aws_secret_access_key=secretKey) obs_table = Table('VocalPelicanObservation', connection=connx) test_row = obs_table.get_item(CompositeKey=composite_key) except JSONResponseError as responseError: # authentication problem print responseError except boto.dynamodb2.exceptions.ItemNotFound as responseError: # not found implies safe to add return obs_table.put_item(observation) return False
def parse_term(term): if term['type'] == 'IRI': return URIRef(term['value']) elif term['type'] == 'literal': datatype = URIRef(term.get('datatype', None)) if datatype == XSD.dateTime: try: term['value'] = float(term['value']) term['value'] = datetime.utcfromtimestamp(term['value']) except: try: term['value'] = isodate.parse_datetime(term['value']) except: timestamp = mktime_tz(parsedate_tz(term['value'])) term['value'] = datetime.fromtimestamp(timestamp) if datatype == RDFS.Literal: datatype = None try: term['value'] = float(term['value']) except: pass return Literal(term['value'], datatype=datatype) else: bid = term['value'].split(':')[1] if bid not in bid_map: bid_map[bid] = shortuuid.uuid() return BNode(bid_map[bid])
def get_contents_to_filename(self, filename, headers=None, cb=None, num_cb=10, torrent=False, version_id=None, res_download_handler=None, response_headers=None): """ Retrieve an object from S3 using the name of the Key object as the key in S3. Store contents of the object to a file named by 'filename'. See get_contents_to_file method for details about the parameters. :type filename: string :param filename: The filename of where to put the file contents :type headers: dict :param headers: Any additional headers to send in the request :type cb: function :param cb: a callback function that will be called to report progress on the upload. The callback should accept two integer parameters, the first representing the number of bytes that have been successfully transmitted to S3 and the second representing the size of the to be transmitted object. :type cb: int :param num_cb: (optional) If a callback is specified with the cb parameter this parameter determines the granularity of the callback by defining the maximum number of times the callback will be called during the file transfer. :type torrent: bool :param torrent: If True, returns the contents of a torrent file as a string. :type res_upload_handler: ResumableDownloadHandler :param res_download_handler: If provided, this handler will perform the download. :type response_headers: dict :param response_headers: A dictionary containing HTTP headers/values that will override any headers associated with the stored object in the response. See http://goo.gl/EWOPb for details. """ fp = open(filename, 'wb') self.get_contents_to_file(fp, headers, cb, num_cb, torrent=torrent, version_id=version_id, res_download_handler=res_download_handler, response_headers=response_headers) fp.close() # if last_modified date was sent from s3, try to set file's timestamp if self.last_modified != None: try: modified_tuple = rfc822.parsedate_tz(self.last_modified) modified_stamp = int(rfc822.mktime_tz(modified_tuple)) os.utime(fp.name, (modified_stamp, modified_stamp)) except Exception: pass
def process_reply_file(current, fname): new_note = {} reply = open(fname, "r") msg = rfc822.Message(reply) new_note['text'] = "%s\n%s" % (msg['From'], msg.fp.read()) new_note['timestamp'] = rfc822.parsedate_tz(msg['Date']) current["notes"].append(new_note)
def createEvent(evType, tstamp, name = None, contextList = [], entityList = []): """ Create an XML element representing an event. Returns the XML object It expects: evType: Enum tstamp: datetime object name : string contextList: List of context elements entityList: List of entity elements """ result = etree.Element('event') result.attrib['type'] = eventName(evType) if tstamp == None: tstamp = datetime.datetime.now() result.attrib['datetime'] = rfc822.formatdate(rfc822.mktime_tz(rfc822.parsedate_tz(tstamp.strftime("%a, %d %b %Y %H:%M:%S")))) if name != None: result.attrib['name'] = name for el in entityList + contextList: result.append(el) # Create the ID m = hashlib.sha1() m.update(etree.tostring(result)) result.attrib['id'] = m.hexdigest() return result
def post(self): backend = get_proxy_backend() def our_finish(): self.add_header("Content-type", "application/json") self.write(json.dumps({"result": "ok"})) self.finish() if backend is not None: data = self.request.body obj = json.loads(data) url = obj["url"] metadata = obj["metadata"] proxy_ip = obj["proxy_ip"] proxy_port = obj["proxy_port"] print "GOT MESH-NOTIFY from %s:%d %s" % (proxy_ip, proxy_port, url) last_modified = None for key, val in metadata["headers"]: if key.lower() == "last-modified": last_modified = rfc822.mktime_tz(rfc822.parsedate_tz(val)) break entry = {"url": url, "last_modified": last_modified} tracker = LimitTracker(NOTIFY_SIMULTANEOUS_DOWNLOADS) backend.download_entries(proxy_ip, proxy_port, [entry], our_finish, tracker) else: our_finish()
def matches_value( self, v ): t0 = time.time() then = rfc822.parsedate_tz( v ) t1 = rfc822.mktime_tz(then) return (t0 - t1) > self.age
def _onsuccess(response): if response.status == 200: checksum = response.headers['Etag'].strip('"') last_modified = response.headers['Last-Modified'] modified_tuple = rfc822.parsedate_tz(last_modified) modified_stamp = int(rfc822.mktime_tz(modified_tuple)) return {'checksum': checksum, 'last_modified': modified_stamp}
def open(self): # XXX in future add support for compression headers = {'Accept-Encoding': ''} if _requests_version == '0': self._data_response = self._session.get(self._url('data'), prefetch=False, headers=headers) else: self._data_response = self._session.get(self._url('data'), stream=True, headers=headers) self._validate_response(self._data_response) size = self._data_response.headers.get('Content-Length', None) if size is not None: size = int(size) self._size = size modified = self._data_response.headers.get('Last-Modified', None) if modified is not None: modified = rfc822.mktime_tz(rfc822.parsedate_tz(modified)) self._modified = modified mimetype = self._data_response.headers.get('Content-Type', 'application/octet-stream') self._mimetype = mimetype return self._data_response.raw
def _parse_midmo_date(datestring): """ returns a local datetime corresponding to the datestring given. """ # these appear to be rfc822/2822, not documented. return datetime.fromtimestamp(rfc822.mktime_tz(rfc822.parsedate_tz(datestring)))
def _parse_sibling(self, sibling, headers, data): """ Parses a single sibling out of a response. """ sibling.exists = True # Parse the headers... for header, value in headers: header = header.lower() if header == "content-type": sibling.content_type, sibling.charset = self._parse_content_type(value) elif header == "etag": sibling.etag = value elif header == "link": sibling.links = self._parse_links(value) elif header == "last-modified": sibling.last_modified = mktime_tz(parsedate_tz(value)) elif header.startswith("x-riak-meta-"): metakey = header.replace("x-riak-meta-", "") sibling.usermeta[metakey] = value elif header.startswith("x-riak-index-"): field = header.replace("x-riak-index-", "") reader = csv.reader([value], skipinitialspace=True) for line in reader: for token in line: token = decode_index_value(field, token) sibling.add_index(field, token) elif header == "x-riak-deleted": sibling.exists = False sibling.encoded_data = data return sibling
def populate(self, sub): file = open(os.path.join(self.archdir, str(sub), 'index')) linepair = file.readline() + file.readline() prev_timestamp = 0 while linepair: match = _rx_index.match(linepair.rstrip()) if match: g = match.groups() msgnum = int(g[0]) try: timestamp = rfc822.mktime_tz(rfc822.parsedate_tz(g[3])) except: timestamp = prev_timestamp + 1 prev_timestamp = timestamp localtime = time.localtime(timestamp) self.msgs[msgnum] = { MSGNUM: msgnum, THREADID: g[1], SUBJECT: g[2], DATE: g[3], TIMESTAMP: timestamp, AUTHORID: g[4], AUTHOR: g[5], MONTH: localtime[0] * 100 + localtime[1], } linepair = file.readline() + file.readline() file.close()
def _readdate(txt): """Interpret the string as a date value.""" import rfc822 date = rfc822.parsedate_tz(txt.strip()) if date is not None: return rfc822.mktime_tz(date) return None
def parse_pubdate(text): """Parse a date string into a Unix timestamp >>> parse_pubdate('Fri, 21 Nov 1997 09:55:06 -0600') 880127706 >>> parse_pubdate('') 0 >>> parse_pubdate('unknown') 0 """ if not text: return 0 parsed = parsedate_tz(text) if parsed is not None: return int(mktime_tz(parsed)) # TODO: Fully RFC 3339-compliant parsing (w/ timezone) try: parsed = time.strptime(text[:19], '%Y-%m-%dT%H:%M:%S') if parsed is not None: return int(time.mktime(parsed)) except Exception: pass logger.error('Cannot parse date: %s', repr(text)) return 0
def _spew_message(self, id, msg, flags, uid): bits = [] if uid: bits.append('UID %s' % msg.uid) for flag in flags: if flag == 'FLAGS': bits.append('FLAGS (%s)' % ' '.join(msg.flags)) elif flag == 'INTERNALDATE': idate = msg.get_internal_date() ttup = rfc822.parsedate_tz(idate) odate = time.strftime("%d-%b-%Y %H:%M:%S ", ttup[:9]) if ttup[9] is None: odate = odate + "+0000" else: if ttup[9] >= 0: sign = "+" else: sign = "-" odate = odate + sign + str(((abs(ttup[9]) / 3600) * 100 + (abs(ttup[9]) % 3600) / 60)).zfill(4) bits.append('INTERNALDATE ' + _quote(odate)) elif flag == 'RFC822.SIZE': bits.append('RFC822.SIZE %d' % len(msg.body)) elif flag == 'ENVELOPE': bits.append('ENVELOPE ' + collapseNestedLists([getEnvelope(msg.headers)])) elif flag == 'BODY.PEEK[]': bits.append('BODY[] ' + _literal(msg.body)) else: raise ValueError("Unsupported flag '%s'" % flag) self.send_untagged_response("%d FETCH (%s)" % (id, " ".join(bits)))
def _parse_sibling(self, sibling, headers, data): """ Parses a single sibling out of a response. """ sibling.exists = True # Parse the headers... for header, value in headers: header = header.lower() if header == 'content-type': sibling.content_type, sibling.charset = \ self._parse_content_type(value) elif header == 'etag': sibling.etag = value elif header == 'link': sibling.links = self._parse_links(value) elif header == 'last-modified': sibling.last_modified = mktime_tz(parsedate_tz(value)) elif header.startswith('x-riak-meta-'): metakey = header.replace('x-riak-meta-', '') sibling.usermeta[metakey] = value elif header.startswith('x-riak-index-'): field = header.replace('x-riak-index-', '') reader = csv.reader([value], skipinitialspace=True) for line in reader: for token in line: token = decode_index_value(field, token) sibling.add_index(field, token) elif header == 'x-riak-deleted': sibling.exists = False sibling.encoded_data = data return sibling
def execute(self, observation): station_id = observation['station_id'] raw_time = observation['observation_time_rfc822'] parsed_time = datetime.datetime.fromtimestamp(rfc822.mktime_tz(rfc822.parsedate_tz(raw_time))) epoch = datetime.datetime.utcfromtimestamp(0) delta = int((parsed_time - epoch).total_seconds()) observation['ObservationTime'] = delta observation['StationId'] = station_id composite_key = "%s_%d" % (station_id, delta) observation['CompositeKey'] = composite_key region = os.environ['AWS_DEFAULT_REGION'] accessKey = os.environ['AWS_ACCESS_KEY'] secretKey = os.environ['AWS_SECRET_KEY'] try: connx = boto.dynamodb2.connect_to_region(region, aws_access_key_id=accessKey, aws_secret_access_key=secretKey) obs_table = Table('VocalPelicanObservation', connection = connx) test_row = obs_table.get_item(CompositeKey=composite_key) except JSONResponseError as responseError: # authentication problem print responseError except boto.dynamodb2.exceptions.ItemNotFound as responseError: # not found implies safe to add return obs_table.put_item(observation) return False
def getReceiveTime(self, message): # This is tricky... date comes in with an offset value that # represents the number of seconds of difference between the # parsed timezone and UTC. The events database wants all time # as seconds since the epoch and treats it as UTC. As a # result we have to use the datetime class to do the # conversion because the functions in the time module do all # kinds of covnersions "to be helpful" timestamp = message.get('Date', message.get('Sent')) t = rfc822.parsedate_tz(timestamp) if t is None: log.warn("Unable to process timestamp '%s' -- defaulting to now", timestamp) return time.time() offset_secs = t[-1] if offset_secs is not None: # Convert the offset in seconds to minutes. calendar wants minutes offset_mins = offset_secs / 60 tz = FixedOffset(offset_mins, "Unknown") else: log.warn("Timezone not specified in '%s' -- defaulting to local timezone", timestamp) tz = None # Construct dt using the date and time as well as the timezone dt = datetime(t[0], t[1], t[2], t[3], t[4], t[5], 0, tz) secs = calendar.timegm(dt.utctimetuple()) log.debug('Timestamp of the event (should be in UTC): %s -> %f', timestamp, secs) return secs
def check_last_modified(url): u = urlopen(url) meta = u.info() last_modified = meta.getheaders("Last-Modified")[0] # modified = datetime.strptime(last_modified, '%a, %d %b %Y %H:%M:%S GMT') modified = datetime(*parsedate_tz(last_modified)[:7]) return modified
def fix_date(string): replacements = ( ("Sab", "Sat"), ) dateformat = "%Y-%m-%d %H:%M:%S" # first, fix up some common mistakes for repl in replacements: string = string.replace(repl[0], repl[1]) # try normal date parsing date = rfc822.parsedate_tz(string) # if it succeeded... if date: # check that the time zone is present and sensible if not date[9] or abs(date[9]) > 12*60*60: # use UTC if not dateformat += " +0000" else: # time zone is sensible, use it dateformat += " %0+5d" % (date[9] / 60 / 60 * 100) # return properly formatted date string; if formatting is not # possible, fail try: return time.strftime(dateformat, date[:-1]) except ValueError, e: raise FixDateError(e)
def loadfrommessage(self, msg): self.tofield = msg.getaddrlist("To") f = msg.getaddr("From") self.fromfield = f[1] self.realfromfield = f[0] if not self.realfromfield: self.realfromfield = self.fromfield self.ccfield = msg.getaddrlist("Cc") if not self.ccfield: self.ccfield = () self.subjectfield = msg.getheader("Subject") if not self.subjectfield: self.subjectfield = "" self.annotation = msg.getheader("X-SQmaiL-Annotation") if not self.annotation: self.annotation = "" self.readstatus = "Unread" # Work out the date the message arrived. r = "" for i in msg.getallmatchingheaders("Received"): r = r + i p = string.find(r, ";") if (p == -1): self.date = 0 else: r = r[p+1:] r = rfc822.parsedate_tz(r) r = rfc822.mktime_tz(r) self.date = r self.headers = string.join(msg.headers, "") self.body = msg.fp.read()
def was_modified_since(header=None, mtime=0, size=0): """ Was something modified since the user last downloaded it? header This is the value of the If-Modified-Since header. If this is None, I'll just return True. mtime This is the modification time of the item we're talking about. size This is the size of the item we're talking about. """ try: if header is None: raise ValueError matches = re.match(r"^([^;]+)(; length=([0-9]+))?$", header, re.IGNORECASE) header_mtime = rfc822.mktime_tz(rfc822.parsedate_tz(matches.group(1))) header_len = matches.group(3) if header_len and int(header_len) != size: raise ValueError if mtime > header_mtime: raise ValueError except (AttributeError, ValueError): return True return False
def log(self, parent=None, limit=100): # TODO(dcramer): we should make this streaming cmd = ['log', '--template=%s' % (LOG_FORMAT,)] if parent: cmd.append('-r %s' % (parent,)) if limit: cmd.append('--limit=%d' % (limit,)) result = self.run(cmd) for chunk in BufferParser(result, '\x02'): (sha, author, author_date, parents, branches, message) = chunk.split('\x01') branches = filter(bool, branches.split(' ')) or ['default'] parents = filter(lambda x: x and x != '0' * 40, parents.split(' ')) author_date = datetime.utcfromtimestamp( mktime_tz(parsedate_tz(author_date))) yield RevisionResult( id=sha, author=author, author_date=author_date, message=message, parents=parents, branches=branches, )
def _parse_date_rfc822(dateString): '''Parse an RFC822, RFC1123, RFC2822, or asctime-style date''' data = dateString.split() if not data: return None if data[0][-1] in (',', '.') or data[0].lower() in rfc822._daynames: del data[0] if len(data) == 4: s = data[3] i = s.find('+') if i > 0: data[3:] = [s[:i], s[i+1:]] else: data.append('') dateString = " ".join(data) # Account for the Etc/GMT timezone by stripping 'Etc/' elif len(data) == 5 and data[4].lower().startswith('etc/'): data[4] = data[4][4:] dateString = " ".join(data) if len(data) < 5: dateString += ' 00:00:00 GMT' tm = rfc822.parsedate_tz(dateString) if tm: # Jython doesn't adjust for 2-digit years like CPython does, # so account for it by shifting the year so that it's in the # range 1970-2069 (1970 being the year of the Unix epoch). if tm[0] < 100: tm = (tm[0] + (1900, 2000)[tm[0] < 70],) + tm[1:] return time.gmtime(rfc822.mktime_tz(tm))
def getReceiveTime(self, message): # This is tricky... date comes in with an offset value that # represents the number of seconds of difference between the # parsed timezone and UTC. The events database wants all time # as seconds since the epoch and treats it as UTC. As a # result we have to use the datetime class to do the # conversion because the functions in the time module do all # kinds of conversions "to be helpful" timestamp = message.get('Date', message.get('Sent')) t = rfc822.parsedate_tz(timestamp) if t is None: log.warn("Unable to process timestamp '%s' -- defaulting to now", timestamp) return time.time() offset_secs = t[-1] if offset_secs is not None: # Convert the offset in seconds to minutes. calendar wants minutes offset_mins = offset_secs / 60 tz = FixedOffset(offset_mins, "Unknown") else: log.warn("Timezone not specified in '%s' -- defaulting to local timezone", timestamp) tz = None # Construct dt using the date and time as well as the timezone dt = datetime(t[0], t[1], t[2], t[3], t[4], t[5], 0, tz) secs = calendar.timegm(dt.utctimetuple()) log.debug('Timestamp of the event (should be in UTC): %s -> %f', timestamp, secs) return secs
def process_reply_file(current, fname): new_note = {} reply = open(fname, "r") msg = rfc822.Message(reply) new_note["text"] = "%s\n%s" % (msg["From"], msg.fp.read()) new_note["timestamp"] = rfc822.parsedate_tz(msg["Date"]) current["notes"].append(new_note)
def was_modified_since(header=None, mtime=0, size=0): """ Was something modified since the user last downloaded it? header This is the value of the If-Modified-Since header. If this is None, I'll just return True. mtime This is the modification time of the item we're talking about. size This is the size of the item we're talking about. """ try: if header is None: raise ValueError matches = re.match(r"^([^;]+)(; length=([0-9]+))?$", header, re.IGNORECASE) header_mtime = rfc822.mktime_tz(rfc822.parsedate_tz( matches.group(1))) header_len = matches.group(3) if header_len and int(header_len) != size: raise ValueError if mtime > header_mtime: raise ValueError except (AttributeError, ValueError): return True return False
def parse(self, *args, **kwargs): """ return the time value (in seconds since 1970) """ value = self.__call__(*args, **kwargs) if value: try: return mktime_tz(parsedate_tz(value)) except TypeError: raise HTTPBadRequest(("Received an ill-formed timestamp for %s: %s\r\n") % (self.name, value))
def convert_rfc822(self, date_string): """ Helper to convert rfc822 to datetime object """ date_object = rfc822.parsedate_tz(date_string) date_object = rfc822.mktime_tz(date_object) date_object = datetime.datetime.fromtimestamp(date_object) return date_object
def twitter(): response = requests.get(TWITTER_URL) if not response.ok: raise StopIteration() for obj in json.loads(response.content)['results']: obj['timestamp'] = time.mktime(rfc822.parsedate_tz(obj['created_at'])[:-1]) obj['template'] = 'twitter/tweet.html' yield obj
def _normalize_rfc822_date( self, date_string ): return datetime.fromtimestamp( rfc822.mktime_tz( rfc822.parsedate_tz( date_string ) ) )
def numericTime(s): # local not supported under windows # since we are using english anyway we skip that # locale.setlocale(locale.LC_ALL, 'en_GB') pt = rfc822.parsedate_tz(s[:20]) # ts = time.mktime(time.strptime(s[:20], '%d %b %Y %H:%M:%S')) ts = time.mktime(pt[:9]) return ts
def addMail(self, mailString): """ Store mail as news item Returns created item """ archive = self.context pw = self.context.portal_workflow (header, body) = splitMail(mailString) # if 'keepdate' is set, get date from mail, if self.getValueFor('keepdate'): timetuple = rfc822.parsedate_tz(header.get('date')) time = DateTime(rfc822.mktime_tz(timetuple)) # ... take our own date, clients are always lying! else: time = DateTime() (TextBody, ContentType, HtmlBody, Attachments) = unpackMail(mailString) # Test Zeitangabe hinter Subject from datetime import date today = date.today() mydate = today.strftime("%d.%m.%Y") # let's create the news item subject = mime_decode_header(header.get('subject', 'No Subject')) sender = mime_decode_header(header.get('from','No From')) #title = "%s / %s" % (subject, sender) title = "%s" % (subject) new_id = IUserPreferredURLNormalizer(self.request).normalize(title) id = self._findUniqueId(new_id) # ContentType is only set for the TextBody if ContentType: body = TextBody else: body = self.HtmlToText(HtmlBody) # als vorlaeufige Loesung desc = "%s..." % (body[:60]) uni_aktuell_body = "<p><strong>%s: %s</strong></p> <p> </p><pre>%s</pre>" % (mydate, sender, body) # uni_aktuell_body = '<p> </p>' + body objid = self.context.invokeFactory(NewsItem.meta_type, id=id, title=title, text=uni_aktuell_body, description=desc) mailObject = getattr(self.context, objid) try: #original pw.doActionFor(mailObject, 'hide') pw.doActionFor(mailObject, 'publish') except: pass return mailObject
def log(self, parent=None, branch=None, author=None, offset=0, limit=100, paths=None): """ Gets the commit log for the repository. Each revision returned has exactly one branch name associated with it. This is the branch name encoded into the revision changeset description. See documentation for the base for general information on this function. """ start_time = time() # TODO(dcramer): we should make this streaming cmd = ['log', '--template=%s' % (LOG_FORMAT,)] if parent and branch: raise ValueError('Both parent and branch cannot be set') # Build the -r parameter value into r_str with branch, parent and author r_str = None if branch: cmd.append('-b{0}'.format(branch)) if parent: r_str = ('ancestors(%s)' % parent) if author: r_str = ('({r}) and author("{0}")' if r_str else 'author("{0}")')\ .format(author, r=r_str) if r_str: cmd.append('-r reverse({0})'.format(r_str)) if limit: cmd.append('--limit=%d' % (offset + limit,)) if paths: cmd.extend(["glob:" + p.strip() for p in paths]) result = self.run(cmd) self.log_timing('log', start_time) for idx, chunk in enumerate(BufferParser(result, '\x02')): if idx < offset: continue (sha, author, author_date, parents, branches, message) = chunk.split('\x01') branches = filter(bool, branches.split(' ')) or ['default'] parents = filter(lambda x: x and x != '0' * 40, parents.split(' ')) author_date = datetime.utcfromtimestamp( mktime_tz(parsedate_tz(author_date))) yield RevisionResult( id=sha, author=author, author_date=author_date, message=message, parents=parents, branches=branches, )
def _item_publish_date(self, item): """Returns the UTC date that the comic strip was published Converts a RFC822 string to a UTC datetime. """ parts = rfc822.parsedate_tz(item['pubDate']) timestamp = rfc822.mktime_tz(parts) return datetime.fromtimestamp(timestamp, pytz.utc)
def improve_date(input): mytz = input.split(" ")[-1].strip() if mytz[1:-1] in time.tzname or mytz == get_zone(): return input else: tz = rfc822.parsedate_tz(input) if not tz or not tz[9]: return input return "%s (%s)" % (formatdate(time.mktime(tz[:9])-tz[9]-(time.timezone), True), input)
def _onsuccess(boto_key): checksum = boto_key.etag.strip('"') last_modified = boto_key.last_modified modified_tuple = rfc822.parsedate_tz(last_modified) modified_stamp = int(rfc822.mktime_tz(modified_tuple)) return {'checksum': checksum, 'last_modified': modified_stamp, 'width': boto_key.metadata.width, 'height': boto_key.metadata.height}
def _item_publish_date_tz(self, item): """Returns the date that the comic strip was published. The original timezone is preserved. """ parts = rfc822.parsedate_tz(item['pubDate']) timestamp = rfc822.mktime_tz(parts) return datetime.fromtimestamp(timestamp)
def parse_pubdate(text): """Parse a date string into a Unix timestamp >>> parse_pubdate('Fri, 21 Nov 1997 09:55:06 -0600') 880127706 >>> parse_pubdate('2003-12-13T00:00:00+02:00') 1071266400 >>> parse_pubdate('2003-12-13T18:30:02Z') 1071340202 >>> parse_pubdate('Mon, 02 May 1960 09:05:01 +0100') -305049299 >>> parse_pubdate('') 0 >>> parse_pubdate('unknown') 0 """ if not text: return 0 parsed = parsedate_tz(text) if parsed is not None: try: pubtimeseconds = int(mktime_tz(parsed)) return pubtimeseconds except (OverflowError, ValueError): logger.warning( 'bad pubdate %s is before epoch or after end of time (2038)', parsed) return 0 try: parsed = time.strptime(text[:19], '%Y-%m-%dT%H:%M:%S') if parsed is not None: m = re.match(r'^(?:Z|([+-])([0-9]{2})[:]([0-9]{2}))$', text[19:]) if m: parsed = list(iter(parsed)) if m.group(1): offset = 3600 * int(m.group(2)) + 60 * int(m.group(3)) if m.group(1) == '-': offset = 0 - offset else: offset = 0 parsed.append(offset) return int(mktime_tz(tuple(parsed))) else: return int(time.mktime(parsed)) except Exception: pass logger.error('Cannot parse date: %s', repr(text)) return 0
def convert_created_at(line, created_at_format): """ fix the created_at time since it is 'RFC 2822' """ created_at = line['created_at'] # convert the tuple to a list, so we can pop the tz out of it. c = list(parsedate_tz(created_at)) tz = c.pop(-1) dt = datetime.datetime.fromtimestamp(mktime(c)) line['created_at'] = dt.strftime(created_at_format) line['created_at_shift'] = tz return line
def DateHeader_parse(self, *args, **kwargs): """ return the time value (in seconds since 1970) """ value = self.__call__(*args, **kwargs) if value: try: return mktime_tz(parsedate_tz(value)) except (OverflowError, TypeError): raise HTTPBadRequest( ("Received an ill-formed timestamp for %s: %s\r\n") % (self.name, value))
def mirror_refdata(refdataTopdir='https://kbase.us/refdata/', refdataDiskdir='refdata'): refdataReq = requests.get(refdataTopdir) modules = refdataReq.json() for module in modules: moduledir = refdataTopdir + module['name'] moduleReq = requests.get(moduledir) versions = moduleReq.json() for version in versions: versiondir = moduledir + '/' + version['name'] versionDiskPath= refdataDiskdir+'/'+module['name']+'/'+version['name'] # for retrieving dot files (requires building .dotfile manually at the source) remotedotfilesls=[] remotedotfilesreq=requests.get(versiondir+'/.dotfiles') # for now just get the .dotfiles file try: remotedotfilesls=remotedotfilesreq.json() except: pass readyHeadReq = requests.head(versiondir+'/__READY__') print readyHeadReq.headers['Last-Modified'] mirrorDatestamp=rfc822.mktime_tz(rfc822.parsedate_tz(readyHeadReq.headers['Last-Modified'])) readyFile=versionDiskPath+'/__READY__' if os.path.isfile(readyFile): fileDatestamp=os.path.getmtime(readyFile) print mirrorDatestamp print fileDatestamp if mirrorDatestamp < fileDatestamp: print "mirror __READY__ older than local file, skipping " + versiondir continue try: os.makedirs(versionDiskPath) print 'created dir ' + versionDiskPath except OSError as exc: if os.path.isdir(versionDiskPath): pass else: raise retrieve_dir(versiondir,versionDiskPath) # hacky support for dot files for dotfile in remotedotfilesls: # for now only support for dot dirs # (need to break out the file retrieval into a separate method to do files here) retrieve_dir(versiondir+'/'+dotfile['name'],versionDiskPath+'/'+dotfile['name']) # if this works, retrieve __READY__ file print 'retrieve ' + versiondir + ' succeeded, retrieving __READY__ file' filereq=requests.get(versiondir + '/__READY__', timeout=5, stream=True) with open (versionDiskPath + '/__READY__', 'wb') as fd: for chunk in filereq.iter_content(1024): fd.write(chunk)
def __init__(self, id, spamHeader, dateHeader, headers): self.id = id self.score, self.required = scoreFromHeader(spamHeader) if self.score is None: warn('no score on msg id=%s header="%s"' % (self.id, spamHeader)) if dateHeader: self.date = datetime.fromtimestamp( rfc822.mktime_tz(rfc822.parsedate_tz(dateHeader))) self.headers = headers self.data = {} self.flags = None
def s3_has_uptodate_file(bucket, transfer_file, s3_key_name): """Check if S3 has an existing, up to date version of this file. """ s3_key = bucket.get_key(s3_key_name) if s3_key: s3_size = s3_key.size local_size = os.path.getsize(transfer_file) s3_time = rfc822.mktime_tz(rfc822.parsedate_tz(s3_key.last_modified)) local_time = os.path.getmtime(transfer_file) return s3_size == local_size and s3_time >= local_time return False
def from_internaldate(date): try: #print >>sys.stderr, "ndate", date date = rfc822.parsedate_tz(date) #print >>sys.stderr, "xdate", date, type(date) date = gmtime.mkgmtime(date) #print >>sys.stderr, "okdate", date except: date = 0 # None # (1970, 1, 1, 18, 16, 22, 0, 1, 0) return date
def got_contents_to_filename(response): fp.close() # if last_modified date was sent from s3, try to set file's timestamp if self.last_modified != None: try: modified_tuple = rfc822.parsedate_tz(self.last_modified) modified_stamp = int(rfc822.mktime_tz(modified_tuple)) os.utime(fp.name, (modified_stamp, modified_stamp)) except Exception: pass if callable(callback): callback(response)
def _parse_date(value): if not value: return None t = parsedate_tz(value) if t is None: # Could not parse return None if t[-1] is None: # No timezone given. None would mean local time, but we'll force UTC t = t[:9] + (0, ) t = mktime_tz(t) return datetime.fromtimestamp(t, UTC)