def get_week_number_as_two_digits_string(in_date, in_start_iso_weekday=1):
     if in_start_iso_weekday == 7:
         in_date = in_date + timedelta(days=1)
     week_iso_num = datetime.isocalendar(in_date)[1]
     iso_wek_to_return = str(week_iso_num)
     if week_iso_num < 10:
         iso_wek_to_return = '0' + iso_wek_to_return
     return str(datetime.isocalendar(in_date)[0]), iso_wek_to_return
示例#2
0
def parse_mint_date(date_str):
    current_year = datetime.isocalendar(date.today())[0]
    try:
        new_date = datetime.strptime(date_str + str(current_year), '%b %d%Y')
    except ValueError:
        new_date = datetime.strptime(date_str, '%m/%d/%y')
    return new_date.date()
示例#3
0
文件: bott-12.py 项目: Bogdan7r/cs102
def get_tommorow(message):
    """ Получить расписание на следующий день """
    # PUT YOUR CODE HERE

    _, group, week = message.text.split()

    day = datetime.today()
    week = datetime.isocalendar(day)
    day = datetime.isoweekday(day) + 1
    week = (week[1] + 1) % 2 + 1
    web_page = get_page(group, week)
    if day == 8:
        day = 1

    test = parse_schedule(web_page, day)

    if not test:
        resp = 'Занятий нет'
    else:
        times_list, locations_list, classrooms_list, lessons_list = test
        resp = ''
        for time, location, classroom, lesson in zip(times_list,
                                                     locations_list,
                                                     classrooms_list,
                                                     lessons_list):
            resp += '<b>{}</b>, {}, {}, {}\n'.format(time, location, classroom,
                                                     lesson)
    bot.send_message(message.chat.id, resp, parse_mode='HTML')
示例#4
0
 def __setitem__(self, index, items):
     '''Sobrecarga del operador [] para cambiar el dato de una posición dada.'''
     self.config.set(index, items[0], items[1])
     self.config["UPDATED"] = '"%s/%s/%s"'%datetime.isocalendar(datetime.now())
     with open(self.name, 'w') as f:
         strjson = json.dumps( self.config, sort_keys=True,indent=4, separators=(',', ': ') )
         f.write(strjson)
def process_log_data(spark, input_data, output_data):

    print("--- Starting Process Log_Data ---")
    # get filepath to log data file
    log_data = os.path.join(input_data, "log_data/*/*/*.json")

    # read log data file
    df = spark.read.json(log_data)
    # filter by actions for song plays
    df = df.where(col("page") == "NextSong")

    # extract columns for users table
    # user_id, first_name, last_name, gender, level
    users_table = df['userId', 'firstName', 'lastName', 'gender', 'level',
                     'ts']
    users_table = users_table.orderBy(
        "ts", ascending=False).dropDuplicates(subset=["userId"]).drop('ts')
    # write users table to parquet files
    users_table.write.parquet(os.path.join(output_data, 'users.parquet'),
                              'overwrite')

    # create datetime column from original timestamp column
    get_datetime = udf(lambda x: datetime.fromtimestamp(int(int(x) / 1000)),
                       TimestampType())
    get_weekday = udf(lambda x: x.weekday())
    get_week = udf(lambda x: datetime.isocalendar(x)[1])
    get_hour = udf(lambda x: x.hour)
    get_day = udf(lambda x: x.day)
    get_year = udf(lambda x: x.year)
    get_month = udf(lambda x: x.month)

    df = df.withColumn('start_time', get_datetime(df.ts))
    df = df.withColumn('hour', get_hour(df.start_time))
    df = df.withColumn('day', get_day(df.start_time))
    df = df.withColumn('week', get_week(df.start_time))
    df = df.withColumn('month', get_month(df.start_time))
    df = df.withColumn('year', get_year(df.start_time))
    df = df.withColumn('weekday', get_weekday(df.start_time))
    time_table = df['start_time', 'hour', 'day', 'week', 'month', 'year',
                    'weekday']
    time_table = time_table.drop_duplicates(subset=['start_time'])

    # write time table to parquet files partitioned by year and month
    time_table.write.partitionBy('year', 'month').parquet(
        os.path.join(output_data, 'time.parquet'), 'overwrite')

    # read in song data to use for songplays table
    song_df = spark.read.parquet("output/songs.parquet")

    # extract columns from joined song and log datasets to create songplays table
    df = df.join(song_df, (song_df.title == df.song) &
                 (song_df.artist_name == df.artist))
    df = df.withColumn('songplay_id', monotonically_increasing_id())
    songplays_table = df['songplay_id', 'start_time', 'userId', 'level',
                         'song_id', 'artist_id', 'sessionId', 'location',
                         'userAgent']

    # write songplays table to parquet files partitioned by year and month
    songplays_table.write.partitionBy('year', 'month').parquet(
        os.path.join(output_data, 'songplays.parquet'), 'overwrite')
示例#6
0
def json_date_to_datetime(dateraw):
    cy = datetime.isocalendar(date.today())[0]
    try:
        newdate = datetime.strptime(dateraw + str(cy), '%b %d%Y')
    except ValueError:
        newdate = datetime.strptime(dateraw, '%m/%d/%y')
    return newdate
示例#7
0
def addMinutes(minutes, day=None, week=None, year=None):
    currentyear, currentweek, currentday = datetime.isocalendar(datetime.now())
    if day is None: day = currentday
    if week is None: week = currentweek
    if year is None: year = currentyear
    data = read()
    data['work_hours'][year][week][day - 1] += minutes / 60
    write(data)
示例#8
0
文件: api.py 项目: jbms/mintapi
 def _dateconvert(self, dateraw):
     # Converts dates from json data
     cy = datetime.isocalendar(date.today())[0]
     try:
         newdate = datetime.strptime(dateraw + str(cy), '%b %d%Y')
     except:
         newdate = datetime.strptime(dateraw, '%m/%d/%y')
     return newdate
示例#9
0
 def _dateconvert(self, dateraw):
     # Converts dates from json data
     cy = datetime.isocalendar(date.today())[0]
     try:
         newdate = datetime.strptime(dateraw + str(cy), '%b %d%Y')
     except:
         newdate = datetime.strptime(dateraw, '%m/%d/%y')
     return newdate
示例#10
0
def menu_scrape():
    d = datetime.isocalendar(date.today())
    #this matches the format of zerocator
    today_match = str(d).replace(', ','-').replace('(','').replace(')','')
    f = soup.find_all("div", {"data-date": today_match})
    for x in f:
        today_soup = BeautifulSoup(f)
        menu_info(today_soup)
示例#11
0
 def menu_scrape():
     d = datetime.isocalendar(date.today())
     today_match = date.today().isoformat()
     f = soup.find_all("div", {"data-date": today_match})
     meal_num = 1
     for x in f:
         menu_info(x.encode('utf-8'),meal_num)
         meal_num += 1
示例#12
0
def _get_up_week_check():
    """Возвращает 0, если верхняя неделя четная, иначе 1"""
    start_week = date(config.START_YEAR, config.START_MONTH, config.START_DAY)

    start_week_number = datetime.isocalendar(start_week)[1]
    if start_week_number % 2 == 0:
        up_week_check = 0
    else:
        up_week_check = 1
    return up_week_check
示例#13
0
def get_current_week_type():
    """Возвращает текущий тип недели."""
    today = date.today()
    up_week_check = _get_up_week_check()
    current_week_number = datetime.isocalendar(today)[1]
    if current_week_number % 2 == 0 and up_week_check == 0:
        week_type = 1
    else:
        week_type = 0

    return week_type
def get_monday_sunday(date):
    """convert date into isocalendar to pull out date of Monday and Sunday"""
    iso_date = datetime.isocalendar(date)
    year = iso_date[0]
    week = iso_date[1]
    monday_no_time = Week(year, week).monday()
    monday = datetime.combine(monday_no_time, datetime.min.time())
    sunday_no_time = Week(year, week).sunday()
    sunday = datetime.combine(sunday_no_time, datetime.min.time())

    return monday, sunday
示例#15
0
    def gather_sentences(self, period):
        # Use for merge list element
        from itertools import chain

        if not period in self.meta_keys:  # check argv
            print('key error (period)')
            sys.exit(1)

        # get current time
        now = datetime.now()
        week = datetime.isocalendar(now)[1]
        now = (str(now.year), str(now.month), str(week))

        sents_of_period = []

        sources = super(Ngrams, self).walk_src_path()

        self.a_info['total_articles'] = len(sources)

        # seperate path name by slash rule
        # sep_pn => ".*/website/year/month/week/day/.*"
        sep_pn = re.compile('.*/(\S+)/(\S+)/(\S+)/(\S+)/(\S+)/.*')

        # gather sentences
        for src in sources:
            with open(src, mode='r', encoding='UTF-8') as jf:
                sentences = json.load(jf)['content']
            if not sentences:
                continue
            else:
                pop = sep_pn.findall(src)[0]  # part of path
                if period == 'total':
                    self.a_info['total_zi'] += len(list(chain(*sentences)))
                    sents_of_period += sentences

                elif period == 'y':
                    if pop[1] == now[0]:
                        sents_of_period += sentences

                elif period == 'm':
                    if pop[2] == now[1]:
                        sents_of_period += sentences

                elif period == 'w':
                    if pop[3] == now[2]:
                        sents_of_period += sentences
                else:
                    return []  # default is return blank list

        gc.collect()
        #print('gargabe collected')
        #print('gather sentences complete for period {0}'.format(period))
        return sents_of_period
示例#16
0
    def test_parse_mint_date(self):
        current_year = datetime.isocalendar(date.today())[0]
        self.assertEqual(mint.parse_mint_date('Jan 10'),
                         date(current_year, 1, 10))
        self.assertEqual(mint.parse_mint_date('Nov 30'),
                         date(current_year, 11, 30))
        self.assertEqual(mint.parse_mint_date('Oct 08'),
                         date(current_year, 10, 8))

        self.assertEqual(mint.parse_mint_date('10/8/10'), date(2010, 10, 8))
        self.assertEqual(mint.parse_mint_date('1/23/10'), date(2010, 1, 23))
        self.assertEqual(mint.parse_mint_date('6/1/01'), date(2001, 6, 1))
示例#17
0
def get_tomorrow(message):
    _, group = message.text.split()
    n = datetime.today()
    n = datetime.isocalendar(n)
    day_w = str(n[2]+1) + 'day'
    week = what_week(n[1], day_w)
    web_page = get_page(group, week)
    times_lst, locations_lst, lessons_lst, rooms_lst = get_schedule(web_page, day_w)
    resp = ''
    for time, location, lession, room in zip(times_lst, locations_lst, lessons_lst, rooms_lst):
        resp += '<b>{}</b>, {}, {},{}\n'.format(time, room, location, lession)
    bot.send_message(message.chat.id, resp, parse_mode='HTML')
示例#18
0
def add_text(started, text):
    current_week = datetime.isocalendar(started)[1]

    week_dir = f'v{current_week}'
    if week_dir not in os.listdir(DATA_ROOT):
        os.mkdir(DATA_ROOT + week_dir)

    p = f"{DATA_ROOT}{week_dir}/{started.strftime('%Y-%m-%d')}.txt"

    f = open(p, 'a+')
    f.write(text+'\n')
    f.close()
示例#19
0
def prepare_for_timeseries_weekly(listing):
    listing.sort()
    df = pd.DataFrame()
    tmp = {}
    year,week,day_of_week = datetime.isocalendar(listing[0])
    tmp["year"] = year
    tmp["week"] = week
    tmp["count"] = 1
    tmp["to_order"] = listing[0]
    for date in listing[1:]:
        year,week,day_of_week = datetime.isocalendar(date)
        if tmp["year"] == year and tmp["week"] == week:
            tmp["count"] += 1
        else:
            df = df.append(tmp,ignore_index=True)
            tmp = {}
            tmp["year"] = year
            tmp["week"] = week
            tmp["count"] = 1
            tmp["to_order"] = date
    df = df.drop("week",1)
    df = df.drop("year",1)
    return df
示例#20
0
	def getWeek(self, course, date):
		# Get week number from date
		( _, week, _ ) = datetime.isocalendar(date)
		if week > 52:
			week -= 52

		# Make weekid for cache identification
		weekid = course + str(week)

		# Download the individual page for that course and that week
		if not weekid in self.cache:
			self.cache[weekid] = self.downloadPage(self.week_template_url.format(course,week))
		
		# Return the table that actually contains the schedule
		return self.cache[weekid].xpath('/html/body/table[2]')[0]
示例#21
0
    def __init__(self, name):
        try:
            self.name = name
            if not os.path.exists(name):
                from datetime import datetime
                with open(name, 'w') as f:
                    f.write("""{\n\t"UPDATED":"%s/%s/%s","PLUGINS":{},\n\t"PERMI
SSIONS":{}\n}\n'""" %
                            datetime.isocalendar(datetime.now()))
            with open(name) as configFile:
                content = configFile.read()
                self.config = json.loads(content)
            io.log('loaded', name.split(os.sep)[-1])
        except Exception as error:
            io.error(error)
示例#22
0
文件: charts.py 项目: mbuthiya/lorre
    def get_data(self):
        '''
        Query the db for chart data, pack them into a dict and return it.
        '''

        this_week = datetime.isocalendar(datetime.today())[1]
        harvest= [harvest_season for harvest_season in Season.objects.all() if datetime.isocalendar(harvest_season.expected_harvest_date)[1]== this_week]
        
        start = datetime.today() - timedelta(days=datetime.today().weekday())
        end = start + timedelta(days=6)

        dateList =pd.date_range(start,end)
        dateListConvert = list(map(pd.Timestamp.to_pydatetime,dateList))

        data = {}
        
        
        for day in dateListConvert:
           data[day.date()] = 0

        for harvests in harvest:
            if harvests.expected_harvest_date in data:
                data[harvests.expected_harvest_date] = harvests.estimated_yield
        return data
示例#23
0
	def getEventsFromDate(self, course, date):
		# Get weeks schedule
		week = self.getWeek(course, date)

		# Get day of week
		( _, _, day ) = datetime.isocalendar(date)
		
		# How many rows does the day span
		row_span = week.xpath('tr/td[1]//@rowspan')
		row_span = [int(x) for x in row_span]
		start_row = 2 + sum(row_span[:day-1])
		row_count = row_span[day-1]
		
		xml_events = []
		# Get the events from a specific day
		for row in range(0,row_count):
			# Get the events from a specific row
			xml_events += week.xpath('tr[' + str(start_row + row) + ']/td[table]')

		# Reverse so we can use pop and append from the end
		xml_events.reverse()

		# List of events
		events = []
		
		while xml_events:
			# Pick first event
			event = Event(xml_events.pop(), date)
			
			if xml_events:
				# Look at next event
				next_event = Event(xml_events[-1],date)
				
				# If next event is the same course as first event and it's only a small break
				if next_event.course == event.course and (next_event.starttime - event.endtime) < self.smallbreak:
					# Combine the two events
					event.endtime = next_event.endtime
					
					# Remove the next event from the queue
					xml_events.pop()
					
			# Push event to list
			events.append(event)
			
		return events
示例#24
0
    def __init__(self, weeknum, classcode, buildingcode, sectorcode):
        self.weeknum = datetime.isocalendar(datetime.today())[1]
        self.classcode = "HEITO19AO-A"
        self.buildingcode = "HRN"
        self.sectorcode = "ECO"

        try:
            if weeknum is not None and isinstance(weeknum, str):
                self.weeknum = weeknum
            if classcode is not None and isinstance(classcode, str):
                self.classcode = classcode
            if buildingcode is not None and isinstance(buildingcode, str):
                self.buildingcode = buildingcode
            if sectorcode is not None and isinstance(sectorcode, str):
                self.sectorcode = sectorcode
        except:
            raise ClientException(
                "Een van de url parameters die is ingevoerd, is ongeldig")
示例#25
0
def submit_form():
    """Show and Process timetracker form"""
########################################Show Proper Form#######################################

    #set date
    old_date = datetime.now()
    stripped_date = datetime.date(old_date)
    date = datetime.combine(stripped_date, datetime.min.time())

    #extract day from datetime stamp
    iso_week = datetime.isocalendar(date)
    day = iso_week[2]

    #set user_id to logged in user
    user_id = session["user_id"]

    #see if there is already a response with the same day and time id in db
    test_response = (db.session.query(Response.time_interval)
                     .filter(Response.date == date,
                             Response.user_id == session["user_id"]).all())
    #create a list of the time intervals from above query
    used_times = [item[0] for item in test_response]  #list comprehension omg

    #only display times that haven't already been filled out
    if request.method == 'GET':
        return render_template("form.html", times=TIMES, used_times=used_times)
#######################################Process form###############################
    else:
        # get form variables
        hourint = request.form["hourint"]
        text = request.form["text"]
        color = request.form["color"]

        # create a new response
        new_response = (Response(user_id=user_id, color=color, date=date, day=day,
                                 time_interval=hourint, text=text))

        # add new response to database
        db.session.add(new_response)
        db.session.commit()

        return redirect("/chart?times="+",".join(str(x) for x in TIMES))
示例#26
0
def main():
    currentTime = datetime.now()
    year, week, day = datetime.isocalendar(currentTime)
    with open(fp, 'rb+') as f:
        data = pickle.load(f)
        if data['work_hours'].setdefault(year, {}).setdefault(
                week,
            [0, 0, 0, 0, 0, 0, 0
             ])[day - 1] == 0 and data['tracking_start'] is None:  # hack
            data['day_start'] = currentTime
            data['timestamps'] = data['timestamps'][-20:]
        if data['tracking_start'] is None:
            data['tracking_start'] = currentTime
            print('Started Tracking..')
        else:
            currentWork = (currentTime -
                           data['tracking_start']).total_seconds() / 3600
            data['work_hours'][year][week][day - 1] += currentWork
            hoursPassed = (currentTime - data['day_start']
                           ).total_seconds() / 3600 + PREDICTION_BUFFER
            dayLength = DAY_END - data['day_start'].hour - data[
                'day_start'].minute / 60 + PREDICTION_BUFFER
            dayPrediction = dayLength * (
                data['work_hours'][year][week][day - 1]) / hoursPassed
            if day == 1:
                weekAvg = (sum(data['work_hours'][year][week - 1])) / 7
            else:
                weekAvg = (sum(
                    data['work_hours'][year][week][:day - 1])) / (day - 1)
            weekprediction = (sum(data['work_hours'][year][week][:day - 1]) +
                              dayPrediction) / day
            data['timestamps'].append((data['tracking_start'], currentTime))
            data['tracking_start'] = None
            print(day, f'{currentTime.hour}:{currentTime.minute} \n')
            print(
                f'{currentWork:.2f}   {data["work_hours"][year][week][day-1]:.2f}   {dayPrediction:.2f}\n'
            )
            print(f'{weekAvg:.2f}   {weekprediction:.2f}')
        f.seek(0)
        pickle.dump(data, f)
示例#27
0
def get_near_lesson(message):
    _, group = message.text.split()
    n = datetime.today()
    n = datetime.isocalendar(n)
    time = str(datetime.time(datetime.now()))[0:5]
    hour_now = int(time[:2])
    minute_now = int(time[3:])
    day_w = str(n[2]) + 'day'
    print(day_w)
    week = what_week(n[1], day_w)
    web_page = get_page(group, week)
    if day_w == '7day':
        times_lst, locations_lst, lessons_lst, rooms_lst = get_schedule(web_page, '1day')
        resp='<b>{}</b>, {}, {},{}\n'.format(times_lst[0], rooms_lst[0], locations_lst[0], lessons_lst[0])
    times_lst, locations_lst, lessons_lst, rooms_lst = get_schedule(web_page, day_w)
    times_hour = []
    times_minute = []
    for i in range(len(times_lst)):
        t=times_lst[i][:5]
        if t[-1] == '-': 
            t=t[:-1]
        h ,m = t.split(':')
        times_hour.append(int(h))
        times_minute.append(int(m))
    for k in range(len(times_hour)):
        if hour_now < times_hour[k]:
            resp = '<b>{}</b>, {}, {},{}\n'.format(times_lst[k], rooms_lst[k], locations_lst[k], lessons_lst[k])
            break
        if hour_now == times_hour[k]:
            if minute_now <= times_minute[k]:
                resp = '<b>{}</b>, {}, {},{}\n'.format(times_lst[k], rooms_lst[k], locations_lst[k], lessons_lst[k])
                break
    if hour_now>times_hour[-1]:
        if day_w == '6day':
            times_lst, locations_lst, lessons_lst, rooms_lst = get_schedule(web_page, '1day')
            resp='<b>{}</b>, {}, {},{}\n'.format(times_lst[0], rooms_lst[0], locations_lst[0], lessons_lst[0])
        else:
            times_lst, locations_lst, lessons_lst, rooms_lst = get_schedule(web_page, '{}day'.format(int(day_w[0])+1))
            resp='<b>{}</b>, {}, {},{}\n'.format(times_lst[0], rooms_lst[0], locations_lst[0], lessons_lst[0])
    bot.send_message(message.chat.id, resp, parse_mode='HTML')
示例#28
0
def process_log_data(spark, input_data, output_data):

   log_data =os.path.join(input_data,"log_data/*/*/*.json")
   df = spark.read.json(log_data)
   df= df.where(col("page")=="NextSong")
 
    users_table = df['userId', 'firstName', 'lastName', 'gender', 'level','ts']
    users_table = users_table.orderBy("ts",ascending=False).dropDuplicates(subset=["userId"]).drop('ts')
    users_table.write.parquet(os.path.join(output_data, 'users.parquet'), 'overwrite')
    
    get_datetime = udf(lambda x: datetime.fromtimestamp(int(int(x)/1000)), TimestampType())
    get_weekday = udf(lambda x: x.weekday())
    get_week = udf(lambda x: datetime.isocalendar(x)[1])
    get_hour = udf(lambda x: x.hour)
    get_day = udf(lambda x : x.day)
    get_year = udf(lambda x: x.year)
    get_month = udf(lambda x: x.month)
    
    df = df.withColumn('start_time', get_datetime(df.ts))
    df = df.withColumn('hour', get_hour(df.start_time))
    df = df.withColumn('day', get_day(df.start_time))
    df = df.withColumn('week', get_week(df.start_time))
    df = df.withColumn('month', get_month(df.start_time))
    df = df.withColumn('year', get_year(df.start_time))
    df = df.withColumn('weekday', get_weekday(df.start_time))
    time_table  = df['start_time', 'hour', 'day', 'week', 'month', 'year', 'weekday']
    time_table = time_table.drop_duplicates(subset=['start_time'])
    time_table.write.partitionBy('year', 'month').parquet(os.path.join(output_data, 'time.parquet'), 'overwrite')
    
    song_df = spark.read.parquet("results/songs.parquet")

    df = df.join(song_df, (song_df.title == df.song) & (song_df.artist_name == df.artist))
    df = df.withColumn('songplay_id', monotonically_increasing_id()) 
    songplays_table = df['songplay_id','start_time', 'userId', 'level', 'song_id', 'artist_id', 'sessionId', 'location', 'userAgent']
    
    songplays_table.write.parquet(os.path.join(output_data, 'songplays.parquet'), 'overwrite')
示例#29
0
    def test_dictionary_creation(self):
        """given query response, is a json dictionary created?"""

        #create fake response
        hourint = 1
        text = "This is some text"
        color = "green"
        date_obj = datetime.strptime("2015, 2, 16", "%Y, %m, %d")
        date= datetime.date(date_obj)
        iso_week = datetime.isocalendar(date)
        day = iso_week[2]
        user_id = 5000

        new_response = Response(user_id=user_id, color=color, date=date, day=day, time_interval=hourint, text=text)

        #add and commit it to my database
        db.session.add(new_response)
        db.session.commit()

        #query for that data
        query = db.session.query(Response).filter_by(user_id = 5000).first()
        print query
        #write out what that query jsonified would look like

        json_dict = {
            "response_id": 1,
            "day": 2,
            "words": "This is some text",
            "hour": 1,
            "value": "green"
        }

        #pass that query data to to_d3_dict for comparison
        self.assertEqual(query.to_d3_dict()["words"], json_dict["words"])
        #rollback database to get rid of new fake response (db.session.rollback())
        db.session.rollback()
示例#30
0
# python -u $CINGROOT/python/cing/Scripts/interactive/mouseBuffer3.py

from datetime import datetime
from numpy import * #@UnusedWildImport
dt = datetime.now()
print datetime.isocalendar(dt)

# Below is for a memory test which can show that python doesn't like to do over 2 G in any one chunk but can go higher with multiple chunks.
# a floating point in python is iimplemented as a C double
# on 32 bit executable this is 64 bits per double; 8 bytes
a = ones( (1024,1024,10) ) * 1.1
aSizeInMb = a.size * 8 / ( 1024 * 1024 )
print aSizeInMb
v = ones( (1024,1024,10) ) * 1.1
del( a ) # instant release.
示例#31
0
def week_number(datetime):
    return datetime.isocalendar()[1]
示例#32
0
def macro_RecentChangeSummury(macro, year=0, week_number=0, comments_only=False):
    
    if year == 0 and week_number == 0:
        year, week_number, wd = datetime.isocalendar(datetime.today())

    return getPageListFromLog(macro,year,week_number,comments_only)
示例#33
0
def getPageListFromLog (macro, req_year, req_week_number,comments_only):
    request = macro.request
    pages = {}
    oldyw = -1
    passed= False

    for line in editlog.EditLog(request).reverse():

        if not request.user.may.read(line.pagename):
            continue

        line.time_tuple = request.user.getTime(wikiutil.version2timestamp(line.ed_time_usecs))
        year,wn,wd = datetime.isocalendar(datetime.fromtimestamp(time.mktime(line.time_tuple)))
        yw = '%04d%02d' % (year,wn)

        if req_year > 0 and req_week_number > 0:
            if req_week_number == wn and req_year == year:
                passed = True
            elif passed and ((req_week_number < wn and req_year == year) or req_year < year):
                break #for a performance
            else:
                continue

        if not pages.has_key(yw):
            pages[yw] = {}

        if pages[yw].has_key(line.pagename):
            pages[yw][line.pagename].append(line.comment)
        else:
            pages[yw][line.pagename] = [line.comment]


    ret = []
    for yw in reversed(sorted(pages.keys())):
        if len(pages[yw].keys()) > 0:
            ret.append("WEEK%s, %s" % (yw[-2:], yw[:4]))
            for page in reversed(sorted(pages[yw].keys(), key=lambda x:len(pages[yw][x]))):
                edit_cnt = len(pages[yw][page])
                comments = filter(lambda x:len(x)>0, pages[yw][page])


                p = Page(request, page)

                if len(comments)>0 or not comments_only:
                    if p.exists():
                        ret.append(' * [[%s]] (%s)' % (page, str(edit_cnt)))
                    else:
                        ret.append(' * `%s` (%s)' % (page, str(edit_cnt)))
                    for comment in comments:
                        ret.append('  * ' + comment)
            """
            ret.append('<b>WEEK%s, %s</b>'% (yw[-2:],yw[:4]))
            ret.append('<ol>')
            for page in reversed(sorted(pages[yw].keys(), key=lambda x:len(pages[yw][x]))):
                page_link = Page(request,page).link_to(request, '%s(%d) ' % (page,len(pages[yw][page]),), css_class="include-page-link")
                comments = filter(lambda x:len(x)>0, pages[yw][page])
                if comments_only and len(comments)>0:
                    ret.append('<li>'+page_link+'</li>')
                    ret.append('<ul>')
                    for comment in comments:
                        ret.append('<li>' + comment + '</li>')
                    ret.append('</ul>')
                elif not comments_only:
                    ret.append('<li>'+page_link+'</li>')
                    ret.append('<ul>')
                    for comment in comments:
                        ret.append('<li>' + comment + '</li>')
                    ret.append('</ul>')
            ret.append('</ol>')
            """

    macro_str = "<<%s(%s)>>" % (macro.name, macro.args)
    content_str = '\n'.join(ret)
    form = u'''<form method='post'>
    <input type='hidden' name='action' value='ReplaceTagAction'>
    <input type='hidden' name='rsv' value='0'>
    <input type='hidden' name='regexp' value='0'>
    <textarea name='tag' style='display:none'>%s</textarea>
    <textarea name='txt' style='display:none'>%s</textarea>
    <input type='submit' value='   HARDCOPY TO THIS PAGE   '>
</form>
''' % (macro_str, content_str)
    return wikiutil.renderText(request, WikiParser, wikiutil.escape(content_str)) + form
示例#34
0
    # extract columns for users table    
    users_table = df['userId', 'firstName', 'lastName', 'gender', 'level','ts']
    users_table = users_table.orderBy("ts",ascending=False).dropDuplicates(subset=["userId"]).drop('ts')
    
    # write users table to parquet files
    users_table.write.parquet(os.path.join(output_data, 'users.parquet'), 'overwrite')

    # create timestamp column from original timestamp column
    get_timestamp = udf(lambda x : datetime.utcfromtimestamp(int(x)/1000), TimestampType())
    df = df.withColumn("start_time", get_timestamp("ts"))
    
    # extract columns to create time table
    get_datetime = udf(lambda x: datetime.fromtimestamp(int(int(x)/1000)), TimestampType())
    get_weekday = udf(lambda x: x.weekday())
    get_week = udf(lambda x: datetime.isocalendar(x)[1])
    get_hour = udf(lambda x: x.hour)
    get_day = udf(lambda x : x.day)
    get_year = udf(lambda x: x.year)
    get_month = udf(lambda x: x.month)
    
    df = df.withColumn('start_time', get_datetime(df.ts))
    df = df.withColumn('hour', get_hour(df.start_time))
    df = df.withColumn('day', get_day(df.start_time))
    df = df.withColumn('week', get_week(df.start_time))
    df = df.withColumn('month', get_month(df.start_time))
    df = df.withColumn('year', get_year(df.start_time))
    df = df.withColumn('weekday', get_weekday(df.start_time))
    time_table  = df['start_time', 'hour', 'day', 'week', 'month', 'year', 'weekday']
    time_table = time_table.drop_duplicates(subset=['start_time'])
    
示例#35
0
def process_log_data(spark, input_data, output_data):
    """
    Processing log_data 
        
    Processing song_data from S3 to local directory. 
    Creates dimension tables "users" and "time" and also the fact table "songplays"
    
    Params:
        spark: SparkSession
        input_data: Root-URL to S3 bucket 
        output_data: Path to local directory
    Returns:
        None
    """
    # get filepath to log data file
    log_data = os.path.join(input_data, "log_data/*/*/*.json")

    # read log data file
    df = spark.read.json(log_data)

    # filter by actions for song plays
    df = df.where(col("page") == "NextSong")

    # extract columns for users table
    users_table = df['userId', 'firstName', 'lastName', 'gender', 'level',
                     'ts']
    users_table = users_table.orderBy(
        "ts", ascending=False).dropDuplicates(subset=["userId"]).drop('ts')

    # write users table to parquet files
    users_table.write.parquet(os.path.join(output_data, 'users.parquet'),
                              'overwrite')

    get_datetime = udf(lambda x: datetime.fromtimestamp(int(int(x) / 1000)),
                       TimestampType())
    get_weekday = udf(lambda x: x.weekday())
    get_week = udf(lambda x: datetime.isocalendar(x)[1])
    get_hour = udf(lambda x: x.hour)
    get_day = udf(lambda x: x.day)
    get_year = udf(lambda x: x.year)
    get_month = udf(lambda x: x.month)

    df = df.withColumn('start_time', get_datetime(df.ts))
    df = df.withColumn('hour', get_hour(df.start_time))
    df = df.withColumn('day', get_day(df.start_time))
    df = df.withColumn('week', get_week(df.start_time))
    df = df.withColumn('month', get_month(df.start_time))
    df = df.withColumn('year', get_year(df.start_time))
    df = df.withColumn('weekday', get_weekday(df.start_time))
    time_table = df['start_time', 'hour', 'day', 'week', 'month', 'year',
                    'weekday']
    time_table = time_table.drop_duplicates(subset=['start_time'])
    # write time table to parquet files partitioned by year and month
    time_table.write.partitionBy('year', 'month').parquet(
        os.path.join(output_data, 'time.parquet'), 'overwrite')

    # read in song data to use for songplays table
    song_df = spark.read.parquet(os.path.join(output_data, 'songs.parquet'))

    # extract columns from joined song and log datasets to create songplays table
    df = df.join(song_df, (song_df.title == df.song) &
                 (song_df.artist_name == df.artist))
    df = df.withColumn('songplay_id', monotonically_increasing_id())
    songplays_table = df['songplay_id', 'start_time', 'userId', 'level',
                         'song_id', 'artist_id', 'sessionId', 'location',
                         'userAgent']
    # write songplays table to parquet files partitioned by year and month
    songplays_table.write.parquet(
        os.path.join(output_data, 'songplays.parquet'), 'overwrite')
示例#36
0
def get_week(datetime):
    year, week, weekday = datetime.isocalendar()
    return (year, week)
示例#37
0
def process_log_data(spark, input_data, output_data):
    """Processes the log data

    Arguments:
        spark  -- The spark session used for computation
        input_data {str} -- The URI where the data needs to be loaded from
        output_data {str} -- The URI where the transformed data is saved
    """
    # get filepath to log data file
    log_data = os.path.join(input_data, "log_data/*.json")

    # read log data file
    df = spark.read.json(log_data)

    # filter by actions for song plays
    df = df.where(df.page == 'NextSong')

    # extract columns for users table
    users_table = df['userId', 'firstName', 'lastName', 'gender', 'level',
                     'ts'].distinct()

    # write users table to parquet files
    users_table.write.parquet(output_data + "users.parquet", "overwrite")

    # create datetime column from original timestamp column
    get_datetime = udf(lambda x: datetime.fromtimestamp(int(int(x) / 1000)),
                       TimestampType())
    get_weekday = udf(lambda x: x.weekday())
    get_week = udf(lambda x: datetime.isocalendar(x)[1])
    get_hour = udf(lambda x: x.hour)
    get_day = udf(lambda x: x.day)
    get_year = udf(lambda x: x.year)
    get_month = udf(lambda x: x.month)

    df = df.withColumn('start_time', get_datetime(df.ts))
    df = df.withColumn('hour', get_hour(df.start_time))
    df = df.withColumn('day', get_day(df.start_time))
    df = df.withColumn('week', get_week(df.start_time))
    df = df.withColumn('month', get_month(df.start_time))
    df = df.withColumn('year', get_year(df.start_time))
    df = df.withColumn('weekday', get_weekday(df.start_time))
    time_table = df['start_time', 'hour', 'day', 'week', 'month', 'year',
                    'weekday']

    # write time table to parquet files partitioned by year and month
    time_table.write.parquet(output_data + "time.parquet", "overwrite")

    # read in song data to use for songplays table
    song_df = spark.read.parquet(output_data + "songs.parquet")

    # extract columns from joined song and log datasets to create songplays table
    df = df.join(song_df, (song_df.title == df.song)
                 & (song_df.artist_name == df.artist))
    df = df.withColumn('songplay_id', monotonically_increasing_id())
    songplays_table = df['songplay_id', 'start_time', 'userId', 'level',
                         'song_id', 'artist_id', 'sessionId', 'location',
                         'userAgent'].distinct()

    # write songplays table to parquet files partitioned by year and month
    songplays_table.write.parquet(output_data + "songplays.parquet",
                                  "overwrite")
示例#38
0
def process_log_data(spark, input_data, output_data):
    """Process log json data
    
    Pulls the raw json log data from s3 and saves 3 tables in parquet format to local
    Creates fact table songplays, dimension tables users and time
    songplays fact table takes song.parquet file as input
    
    Args:
        spark: SparkSession Object
        input_data: s3 bucket url
        output_data: destination s3 bucket url
    
    Returns:
        None
    """
    # get filepath to log data file
    log_data = os.path.join(input_data, "log_data/*/*/*.json")

    # read log data file
    df = spark.read.json(log_data)

    # filter by actions for song plays
    df = df.where(col("page") == "Nextsong")

    # extract columns for users table
    users_table = df["userId", "firstName", "lastName", "gender", "level",
                     "ts"]
    # sort table on timestamp then drop timestamp
    users_table = users_table.orderBy("ts", ascending=False).drop("ts")
    #drop duplicates
    users_table = users_table.drop_duplicates()

    # write users table to parquet files
    users_table.write.parquet(os.path.join(output_data, "users.parquet"),
                              "overwrite")

    print("users.parquet file created and saved locally")

    # create datetime column from original timestamp column
    get_datetime = udf(lambda x: datetime.fromtimestamp(int(int(x) / 1000)),
                       TimestampType())
    get_weekday = udf(lambda x: x.weekday())
    get_week = udf(lambda x: datetime.isocalendar(x)[1])
    get_hour = udf(lambda x: x.hour)
    get_day = udf(lambda x: x.day)
    get_year = udf(lambda x: x.year)
    get_month = udf(lambda x: x.month)

    df = df.withColumn("start_time", get_datetime(df.ts))

    # derive further columns from new start_time column
    df = df.withColumn("weekday", get_weekday(df.start_time))
    df = df.withColumn("week", get_week(df.start_time))
    df = df.withColumn("hour", get_hour(df.start_time))
    df = df.withColumn("day", get_day(df.start_time))
    df = df.withColumn("year", get_year(df.start_time))
    df = df.withColumn("month", get_month(df.start_time))

    # extract columns to create time table
    time_table = df["start_time", "weekday", "hour", "day", "week", "month",
                    "year"]

    #remove duplicates
    time_table = time_table.drop_duplicates()

    # write time table to parquet files partitioned by year and month
    time_table = time_table.write.partitionBy("year", "month").parquet(
        os.path.join(output_data, "time.parquet"), "overwrite")

    print("time.parquet file written")

    # read in song data to use for songplays table
    song_df = spark.read.parquet(os.path.join(output_data, "songs.parquet"))

    # extract columns from joined song and log datasets to create songplays table
    df = df.join(song_df, (song_df.title == df.song) &
                 (song_df.artist_name == df.artist))
    df = df.withColumn("songplay_id", monotonically_increasing_id())
    songplays_table = df["songplay_id", "start_time", "userId", "level",
                         "song_id", "artist_id", "sessionId", "location",
                         "userAgent", "year", "month"]

    # write songplays table to parquet files partitioned by year and month
    songplays_table.write.partitionBy("year", "month").parquet(
        os.path.join(output_data, "songplays.parquet"), "overwrite")
    print("songplays.parquet file written, job done!")
示例#39
0
 def week_from_score(score) -> int:
     datetime = parsedate(score.findtext("DateTime"))
     week = datetime.isocalendar()[1]
     return week
示例#40
0
def fmtweek(datetime):
	(year, week) = datetime.isocalendar()[:2]
	return '%04dW%02d' % (year, week)
def generate_data_by_date(apple_data_type, dataset_name, data_type):

    date_dict = dict()
    for child in root:
        attr = child.attrib

        # fild the matching data type
        if child.tag == 'Record' and attr['type'] == apple_data_type:

            start_date = datetime.strptime(attr['startDate'],
                                           '%Y-%m-%d %H:%M:%S %z')
            end_date = datetime.strptime(attr['endDate'],
                                         '%Y-%m-%d %H:%M:%S %z')

            #check year
            if start_date.year == YEAR:

                # step count & date
                count = int(attr['value'])
                date = datetime.strftime(start_date, '%-m/%-d/%Y')

                # check start and end date if count happens over two or more days
                if datetime.isocalendar(start_date) != datetime.isocalendar(
                        end_date):
                    # split the count in proportion to duration of before and after midnight
                    midnight = datetime.strftime(datetime.date(end_date),
                                                 '%Y-%m-%d %H:%M:%S')
                    midnight_time = datetime.strptime(midnight,
                                                      '%Y-%m-%d %H:%M:%S')
                    till_midnight = (midnight_time -
                                     start_date.replace(tzinfo=None)).seconds
                    from_midnight = (end_date.replace(tzinfo=None) -
                                     midnight_time).seconds

                    in_the_middle = 0
                    mid_date_count = (end_date - start_date).days - 1
                    # more than one day gap, second of the middle days
                    if mid_date_count > 0:
                        in_the_middle = 60 * 60 * 24 * mid_date_count

                    count_before_midnight = round(
                        till_midnight /
                        (till_midnight + in_the_middle + from_midnight) *
                        count)
                    count_after_midnight = round(
                        from_midnight /
                        (till_midnight + in_the_middle + from_midnight) *
                        count)

                    # add count to start and end date
                    date_dict[date] = date_dict[
                        date] + count_before_midnight if date in date_dict.keys(
                        ) else count_before_midnight

                    if end_date.year == YEAR:
                        next_date = datetime.strftime(end_date, '%-m/%-d/%Y')
                        date_dict[next_date] = date_dict[
                            next_date] + count_after_midnight if next_date in date_dict.keys(
                            ) else count_after_midnight

                    # add count to the dates evenly distributed to the dates in the middle
                    for i in range(mid_date_count):
                        count_in_a_mid_day = round(
                            (count - count_before_midnight -
                             count_after_midnight) / mid_date_count)
                        mid_datetime = start_date + timedelta(days=(i + 1))
                        mid_date = datetime.strftime(mid_datetime,
                                                     '%-m/%-d/%Y')
                        if mid_datetime.year == YEAR:
                            date_dict[mid_date] = date_dict[
                                mid_date] + count_in_a_mid_day if mid_date in date_dict.keys(
                                ) else count_in_a_mid_day

                else:
                    date_dict[date] = date_dict[
                        date] + count if date in date_dict.keys() else count

    # convert dict to array
    data_of_year = []
    for d in date_dict:
        data_of_year.append(dict(date=d, value=date_dict[d]))
    # sort by date; often date isn't ordered in the original data
    data_of_year = sorted(
        data_of_year,
        key=lambda i: datetime.strptime(i['date'], '%m/%d/%Y').timestamp())
    # save data as json
    _savedatasets.save_dataset(data_of_year, _setup.NAME, dataset_name,
                               data_type)