Пример #1
0
def get_data():
	page = html.parse(url)
	root = page.getroot()

	datenames = root.find_class('datename')
	missiondatas = root.find_class('missiondata')
	missdescrips = root.find_class('missdescrip')

	launches = zip(datenames, missiondatas, missdescrips)

	res = []

	curtime = time.gmtime()
	year = curyear = curtime.tm_year
	curmon = curtime.tm_mon
	prev_mon = -1

	for name, data, desc in launches:
		datestr = name.find_class('launchdate')[0].text.strip()
		mission = name.find_class('mission')[0].text.strip()
		desc = desc.text_content().strip()

		timestr, site = data.text_content().strip().split('\n')

		site = rm_pref(site)
		timestr = rm_pref(timestr)

		mon, day = parse_date(datestr)
		tm1, tm2 = parse_time(timestr)

		if mon > 0:
			if mon < prev_mon:
				year += 1
			elif mon < curmon - 2 and curyear == year:
				year = curyear + 1

			prev_mon = mon

		stamp = None
		if mon > 0 and day > 0:
			date = '%02d-%02d-%04d' % (day, mon, year)
			if tm1:
				dt = '%s %s' % (date, tm1)
				stamp = get_stamp(time.strptime(dt, '%d-%m-%Y %H:%M'))
		else:
			date = '%s %s' % (datestr, year)

		res.append({'tm1': tm1,
					'tm2': tm2,
					'timestr': timestr,
					'date': date,
					'stamp': stamp,
					'mission': mission,
					'site': site,
					'desc': desc})
	return res
Пример #2
0
def _parse_duration_combined(durationstr):
    #Period of the form P<date>T<time>

    #Split the string in to its component parts
    datepart, timepart = durationstr[1:].split('T') #We skip the 'P'

    datevalue = parse_date(datepart)
    timevalue = parse_time(timepart)

    totaldays = datevalue.year * 365 + datevalue.month * 30 + datevalue.day

    return datetime.timedelta(days=totaldays, hours=timevalue.hour, minutes=timevalue.minute, seconds=timevalue.second, microseconds=timevalue.microsecond)
Пример #3
0
def parse_duration_combined(durationstr):
    #Period of the form P<date>T<time>

    #Split the string in to its component parts
    datepart, timepart = durationstr[1:].split('T') #We skip the 'P'

    datevalue = parse_date(datepart)
    timevalue = parse_time(timepart)

    totaldays = datevalue.year * 365 + datevalue.month * 30 + datevalue.day

    return datetime.timedelta(days=totaldays, hours=timevalue.hour, minutes=timevalue.minute, seconds=timevalue.second, microseconds=timevalue.microsecond)
Пример #4
0
def parse_datetime(isodatetimestr, delimiter='T'):
    #Given a string in ISO8601 date time format, return a datetime.datetime
    #object that corresponds to the given date time.
    #By default, the ISO8601 specified T delimiter is used to split the
    #date and time (<date>T<time>). Fixed offset tzdata will be included
    #if UTC offset is given in the input string.

    isodatestr, isotimestr = isodatetimestr.split(delimiter)

    datepart = parse_date(isodatestr)
    timepart = parse_time(isotimestr)

    return datetime.datetime.combine(datepart, timepart)
Пример #5
0
def parse_datetime(isodatetimestr, delimiter='T'):
    #Given a string in ISO8601 date time format, return a datetime.datetime
    #object that corresponds to the given date time.
    #By default, the ISO8601 specified T delimiter is used to split the
    #date and time (<date>T<time>). Fixed offset tzdata will be included
    #if UTC offset is given in the input string.

    isodatestr, isotimestr = isodatetimestr.split(delimiter)

    datepart = parse_date(isodatestr)
    timepart = parse_time(isotimestr)

    return datetime.datetime.combine(datepart, timepart)
Пример #6
0
def parse_interval(isointervalstr, intervaldelimiter='/', datetimedelimiter='T'):
    #Given a string representing an ISO8601 interval, return a
    #tuple of datetime.date or date.datetime objects representing the beginning
    #and end of the specified interval. Valid formats are:
    #
    #<start>/<end>
    #<start>/<duration>
    #<duration>/<end>
    #
    #The <start> and <end> values can represent dates, or datetimes,
    #not times.
    #
    #The format:
    #
    #<duration>
    #
    #Is expressly not supported as there is no way to provide the addtional
    #required context.

    firstpart, secondpart = isointervalstr.split(intervaldelimiter)

    if firstpart[0] == 'P':
        #<duration>/<end>
        #Notice that these are not returned 'in order' (earlier to later), this
        #is to maintain consistency with parsing <start>/<end> durations, as
        #well as making repeating interval code cleaner. Users who desire
        #durations to be in order can use the 'sorted' operator.

        #We need to figure out if <end> is a date, or a datetime
        if secondpart.find(datetimedelimiter) != -1:
            #<end> is a datetime
            duration = parse_duration(firstpart)
            enddatetime = parse_datetime(secondpart, delimiter=datetimedelimiter)

            return (enddatetime, enddatetime - duration)
        else:
            #<end> must just be a date
            duration = parse_duration(firstpart)
            enddate = parse_date(secondpart)

            #See if we need to upconvert to datetime to preserve resolution
            if firstpart.find(datetimedelimiter) != -1:
                return (enddate, datetime.combine(enddate, datetime.min.time()) - duration)
            else:
                return (enddate, enddate - duration)
    elif secondpart[0] == 'P':
        #<start>/<duration>
        #We need to figure out if <start> is a date, or a datetime
        if firstpart.find(datetimedelimiter) != -1:
            #<end> is a datetime
            duration = parse_duration(secondpart)
            startdatetime = parse_datetime(firstpart, delimiter=datetimedelimiter)

            return (startdatetime, startdatetime + duration)
        else:
            #<start> must just be a date
            duration = parse_duration(secondpart)
            startdate = parse_date(firstpart)

            #See if we need to upconvert to datetime to preserve resolution
            if secondpart.find(datetimedelimiter) != -1:
                return (startdate, datetime.combine(startdate, datetime.min.time()) + duration)
            else:
                return (startdate, startdate + duration)
    else:
        #<start>/<end>
        if firstpart.find(datetimedelimiter) != -1 and secondpart.find(datetimedelimiter) != -1:
            #Both parts are datetimes
            return (parse_datetime(firstpart, delimiter=datetimedelimiter), parse_datetime(secondpart, delimiter=datetimedelimiter))
        elif firstpart.find(datetimedelimiter) != -1 and secondpart.find(datetimedelimiter) == -1:
            #First part is a datetime, second part is a date
            return (parse_datetime(firstpart, delimiter=datetimedelimiter), parse_date(secondpart))
        elif firstpart.find(datetimedelimiter) == -1 and secondpart.find(datetimedelimiter) != -1:
            #First part is a date, second part is a datetime
            return (parse_date(firstpart), parse_datetime(secondpart, delimiter=datetimedelimiter))
        else:
            #Both parts are dates
            return (parse_date(firstpart), parse_date(secondpart))
Пример #7
0
def parse_interval(isointervalstr,
                   intervaldelimiter='/',
                   datetimedelimiter='T'):
    #Given a string representing an ISO8601 interval, return a
    #tuple of datetime.date or date.datetime objects representing the beginning
    #and end of the specified interval. Valid formats are:
    #
    #<start>/<end>
    #<start>/<duration>
    #<duration>/<end>
    #
    #The <start> and <end> values can represent dates, or datetimes,
    #not times.
    #
    #The format:
    #
    #<duration>
    #
    #Is expressly not supported as there is no way to provide the addtional
    #required context.

    firstpart, secondpart = isointervalstr.split(intervaldelimiter)

    if firstpart[0] == 'P':
        #<duration>/<end>
        #Notice that these are not returned 'in order' (earlier to later), this
        #is to maintain consistency with parsing <start>/<end> durations, as
        #well asmaking repeating interval code cleaner. Users who desire
        #durations to be in order can use the 'sorted' operator.

        #We need to figure out if <end> is a date, or a datetime
        if secondpart.find(datetimedelimiter) != -1:
            #<end> is a datetime
            duration = parse_duration(firstpart)
            enddatetime = parse_datetime(secondpart,
                                         delimiter=datetimedelimiter)

            return (enddatetime, enddatetime - duration)
        else:
            #<end> must just be a date
            duration = parse_duration(firstpart)
            enddate = parse_date(secondpart)

            return (enddate, enddate - duration)
    elif secondpart[0] == 'P':
        #<start>/<duration>
        #We need to figure out if <start> is a date, or a datetime
        if firstpart.find(datetimedelimiter) != -1:
            #<end> is a datetime
            duration = parse_duration(secondpart)
            startdatetime = parse_datetime(firstpart,
                                           delimiter=datetimedelimiter)

            return (startdatetime, startdatetime + duration)
        else:
            #<start> must just be a date
            duration = parse_duration(secondpart)
            startdate = parse_date(firstpart)

            return (startdate, startdate + duration)
    else:
        #<start>/<end>
        if firstpart.find(datetimedelimiter) != -1 and secondpart.find(
                datetimedelimiter) != -1:
            #Both parts are datetimes
            return (parse_datetime(firstpart, delimiter=datetimedelimiter),
                    parse_datetime(secondpart, delimiter=datetimedelimiter))
        elif firstpart.find(datetimedelimiter) != -1 and secondpart.find(
                datetimedelimiter) == -1:
            #First part is a datetime, second part is a date
            return (parse_datetime(firstpart, delimiter=datetimedelimiter),
                    parse_date(secondpart))
        elif firstpart.find(datetimedelimiter) == -1 and secondpart.find(
                datetimedelimiter) != -1:
            #First part is a date, second part is a datetime
            return (parse_date(firstpart),
                    parse_datetime(secondpart, delimiter=datetimedelimiter))
        else:
            #Both parts are dates
            return (parse_date(firstpart), parse_date(secondpart))