示例#1
0
    def _populate(self, year):
        # New Year's Day
        self._long_weekends("Ano novo", [year, JAN, 1])

        self._long_weekends("Dia do Início da Luta Armada", [year, FEB, 4])

        self._long_weekends("Dia Internacional da Mulher", [year, MAR, 8])

        self._long_weekends("Dia da Libertação da África Austral",
                            [year, MAR, 23])

        self._long_weekends("Dia da Paz e Reconciliação", [year, APR, 4])

        self._long_weekends("Dia Mundial do Trabalho", [year, MAY, 1])

        self._long_weekends("Dia do Herói Nacional", [year, SEP, 17])

        self._long_weekends("Dia dos Finados", [year, NOV, 2])

        self._long_weekends("Dia da Independência", [year, NOV, 11])

        # Christmas Day
        self._long_weekends("Dia de Natal e da Família", [year, DEC, 25])

        self[easter(year) - rd(days=2)] = "Sexta-feira Santa"
        self[easter(year)] = "Páscoa"
        quaresma = easter(year) - rd(days=46)
        self[quaresma - rd(weekday=TU(-1))] = "Carnaval"
示例#2
0
    def R7_parser(self):

        self.START_DATE = self.gdate
        if "MONTHS" in self.kwargs:
            self.START_DATE = delta(self.START_DATE,
                                    months=self.kwargs["MONTHS"])

        if "DAYINDEX" in self.kwargs and "DAYNAME" in self.kwargs:
            if self.kwargs["DAYINDEX"] - 2 == 0:
                self.START_DATE = self.START_DATE + relativedelta(
                    day=1, weekday=MO(self.kwargs["DAYNAME"]))
            if self.kwargs["DAYINDEX"] - 2 == 1:
                self.START_DATE = self.START_DATE + relativedelta(
                    day=1, weekday=TU(self.kwargs["DAYNAME"]))
            if self.kwargs["DAYINDEX"] - 2 == 2:
                self.START_DATE = self.START_DATE + relativedelta(
                    day=1, weekday=WE(self.kwargs["DAYNAME"]))
            if self.kwargs["DAYINDEX"] - 2 == 3:
                self.START_DATE = self.START_DATE + relativedelta(
                    day=1, weekday=TH(self.kwargs["DAYNAME"]))
            if self.kwargs["DAYINDEX"] - 2 == 4:
                self.START_DATE = self.START_DATE + relativedelta(
                    day=1, weekday=FR(self.kwargs["DAYNAME"]))
            if self.kwargs["DAYINDEX"] - 2 == 5:
                self.START_DATE = self.START_DATE + relativedelta(
                    day=1, weekday=SA(self.kwargs["DAYNAME"]))
            if self.kwargs["DAYINDEX"] - 2 == 6:
                self.START_DATE = self.START_DATE + relativedelta(
                    day=1, weekday=SU(self.kwargs["DAYNAME"]))
def set_next_relative_date(start_date, day_of_week, week_of_month,
                           separation_count):
    arg = MO(1)

    if day_of_week == 1:
        arg = MO(week_of_month)
    if day_of_week == 2:
        arg = TU(week_of_month)
    if day_of_week == 3:
        arg = WE(week_of_month)
    if day_of_week == 4:
        arg = TH(week_of_month)
    if day_of_week == 5:
        arg = FR(week_of_month)
    if day_of_week == 6:
        arg = SA(week_of_month)
    if day_of_week == 7:
        arg = SU(week_of_month)

    if week_of_month == -1:
        return start_date + relativedelta(
            day=31, months=+separation_count, weekday=arg)

    return start_date + relativedelta(
        day=1, months=+separation_count, weekday=arg)
示例#4
0
 def _get_start_of_week(self, span):
     user_lang = self.env['res.lang'].search([('code', '=', self.env.user.lang)])
     week_start_map = {'1': MO(-1), '2': TU(-1), '3': WE(-1), '4': TH(-1), '5': FR(-1), '6': SA(-1), '7': SU(-1)}
     week_start_delta = relativedelta(weekday=week_start_map.get(user_lang.week_start, MO(-1)))
     if span == 'week':
         return week_start_delta
     return START_OF[span] + week_start_delta
    def get_next_tuesday(self):
        today = datetime.date.today()
        offset = +1
        if today.isoweekday() == 2:
            # If today is Tuesday then a offset of +1 will return today's date
            # so we need to use an offset of +2 in that case to get the next tuesday
            offset = +2

        next_tuesday = today + relativedelta(weekday=TU(offset))
        return next_tuesday
示例#6
0
文件: mensa.py 项目: N-Coder/mensabot
def get_semester_dates() -> List[semester]:
    """
    Get a list of the start and end dates of semesters coming and past, including the ranges of dates which
    have no lectures according to the website. So this does not include public holidays or weekends (where the
    university is closed completely).
    """

    r = requests.get(DATES_URL)
    r.raise_for_status()
    soup = BeautifulSoup(r.text, 'html.parser')
    table = soup.find("main").find("table", class_="contenttable")
    assert list(table.find("thead").strings) == [
        'Semester', 'Beginn', 'Ende',
        'Verfügungstag der Universität Passau (vorlesungsfrei)'
    ]
    assert table.next_sibling.string == \
           "Die Vorlesungszeit wird unterbrochen vom 24. Dezember bis einschließlich 6. Januar, vom Gründonnerstag " \
           "bis einschließlich Dienstag nach Ostern sowie am Dienstag nach Pfingsten!"
    semesters = []
    for semester_tr in table.find("tbody").find_all("tr"):
        name, *dates = semester_tr.strings
        try:
            dates = [
                dtm.datetime.strptime(d, "%d.%m.%Y").date() for d in dates
            ]
        except ValueError:
            continue
        if len(dates) >= 3:
            start, end, bruecke = dates
            holidays = [(bruecke, bruecke)]
        else:
            start, end = dates
            holidays = []
        winter_sem = name.startswith("Winter")
        assert winter_sem == (start.year != end.year)

        if winter_sem:
            # 24. Dezember bis einschließlich 6. Januar
            holidays.append((dtm.date(start.year, 12,
                                      24), dtm.date(end.year, 1, 6)))
        else:
            easter_date = easter(start.year)
            # Gründonnerstag bis einschließlich Dienstag nach Ostern
            holidays.append((easter_date + relativedelta(weekday=TH(-1)),
                             easter_date + relativedelta(weekday=TU(+1))))
            # Dienstag nach Pfingsten
            holidays.append((easter_date + relativedelta(days=50),
                             easter_date + relativedelta(days=51)))

        semesters.append(semester(name, winter_sem, start, end, holidays))

    semesters.sort(key=lambda e: e.start)
    return semesters
def FourWeekCreate():
    today = date.today()
    weekArr = []
    for i in range(4):
        lastMon = -2 - i #must be -1 more than other days as it runs every monday. so it needs to look at LAST monday
        otherDays = -1 - i
        weekArr.append(today + relativedelta(weekday=MO(lastMon))) #last MON
        weekArr.append(today + relativedelta(weekday=TU(otherDays))) #last TUE
        weekArr.append(today + relativedelta(weekday=WE(otherDays))) #last WED
        weekArr.append(today + relativedelta(weekday=TH(otherDays))) #last THU
        weekArr.append(today + relativedelta(weekday=FR(otherDays))) #last FRI
        weekArr.append(today + relativedelta(weekday=SA(otherDays))) #last SAT
        weekArr.append(today + relativedelta(weekday=SU(otherDays))) #last SUN
    return weekArr
示例#8
0
def replace_days(dictionary):
    weekday = dictionary.get("weekday", None)
    if weekday:
        week_expr = dictionary["weekday"]
        week_day = week_expr[:2]
        week_int = int(week_expr.split("(")[1].split(")")[0])
        if week_day == "MO":
            dictionary["weekday"] = MO(week_int)
        elif week_day == "TU":
            dictionary["weekday"] = TU(week_int)
        elif week_day == "WE":
            dictionary["weekday"] = WE(week_int)
        elif week_day == "TH":
            dictionary["weekday"] = TH(week_int)
        elif week_day == "FR":
            dictionary["weekday"] = FR(week_int)
        elif week_day == "SA":
            dictionary["weekday"] = SA(week_int)
        elif week_day == "SU":
            dictionary["weekday"] = SU(week_int)
        else:
            logger.error("Wrong day code")
    return dictionary
示例#9
0
def parse(raw_date):
    weekday = {
        'monday': MO(-1),
        'tuesday': TU(-1),
        'wednesday': WE(-1),
        'thursday': TH(-1),
        'friday': FR(-1),
        'saturday': SA(-1),
        'sunday': SU(-1),
    }
    if raw_date == 'today':
        date = datetime.now().date()
    elif raw_date == 'yesterday':
        date = datetime.now().date() - relativedelta(days=1)
    elif raw_date in weekday:
        date = datetime.now().date() + relativedelta(weekday=weekday[raw_date])
    elif re.match('\d{4}-w\d{2}', raw_date) is not None:
        fromDate = datetime.strptime(raw_date + '-1', "%Y-W%W-%w").date()
        toDate = fromDate + relativedelta(days=7)
        return fromDate, toDate
    else:
        date = datetime.strptime(raw_date, '%Y-%m-%d')
    toDate = date + relativedelta(days=1)
    return date, toDate
示例#10
0
import vk_api
import datetime
import random
from datetime import date
import time
from dateutil.relativedelta import relativedelta, MO, WE, TH, TU, SU, FR, SA
import requests
import xml.etree.ElementTree as et
from vk_api.longpoll import VkLongPoll, VkEventType

now = datetime.datetime.now()
today = date.today()
monday = (today + relativedelta(weekday=MO(-1))).day
tuesday = (today + relativedelta(weekday=TU(-1))).day
wednesday = (today + relativedelta(weekday=WE(-1))).day
thursday = (today + relativedelta(weekday=TH(-1))).day
friday = (today + relativedelta(weekday=FR(-1))).day
saturday = (today + relativedelta(weekday=SA(-1))).day
mon = now.month
moth = ""
user_login = {}
user_password = {}
if mon == 1:
    moth = "Январь"
if mon == 2:
    moth = "Феварль"
if mon == 3:
    moth = "Март"
if mon == 4:
    moth = "Апрель"
if mon == 5:
示例#11
0
    def _populate(self, year):
        # New Year's Day
        self[date(year, JAN, 1)] = "Ano novo"

        self[date(year, APR, 21)] = "Tiradentes"

        self[date(year, MAY, 1)] = "Dia Mundial do Trabalho"

        self[date(year, SEP, 7)] = "Independência do Brasil"

        self[date(year, OCT, 12)] = "Nossa Senhora Aparecida"

        self[date(year, NOV, 2)] = "Finados"

        self[date(year, NOV, 15)] = "Proclamação da República"

        # Christmas Day
        self[date(year, DEC, 25)] = "Natal"

        self[easter(year) - rd(days=2)] = "Sexta-feira Santa"

        self[easter(year)] = "Páscoa"

        self[easter(year) + rd(days=60)] = "Corpus Christi"

        quaresma = easter(year) - rd(days=46)
        self[quaresma] = "Quarta-feira de cinzas (Início da Quaresma)"

        self[quaresma - rd(weekday=TU(-1))] = "Carnaval"

        if self.state == 'AC':
            self[date(year, JAN, 23)] = "Dia do evangélico"
            self[date(year, JUN, 15)] = "Aniversário do Acre"
            self[date(year, SEP, 5)] = "Dia da Amazônia"
            self[date(year, NOV, 17)] = "Assinatura do Tratado de" \
                                        " Petrópolis"

        if self.state == 'AL':
            self[date(year, JUN, 24)] = "São João"
            self[date(year, JUN, 29)] = "São Pedro"
            self[date(year, SEP, 16)] = "Emancipação política de Alagoas"
            self[date(year, NOV, 20)] = "Consciência Negra"

        if self.state == 'AP':
            self[date(year, MAR, 19)] = "Dia de São José"
            self[date(year, JUL, 25)] = "São Tiago"
            self[date(year, OCT, 5)] = "Criação do estado"
            self[date(year, NOV, 20)] = "Consciência Negra"

        if self.state == 'AM':
            self[date(year, SEP, 5)] = "Elevação do Amazonas" \
                " à categoria de província"
            self[date(year, NOV, 20)] = "Consciência Negra"
            self[date(year, DEC, 8)] = "Dia de Nossa Senhora da Conceição"

        if self.state == 'BA':
            self[date(year, JUL, 2)] = "Independência da Bahia"

        if self.state == 'CE':
            self[date(year, MAR, 19)] = "São José"
            self[date(year, MAR, 25)] = "Data Magna do Ceará"

        if self.state == 'DF':
            self[date(year, APR, 21)] = "Fundação de Brasília"
            self[date(year, NOV, 30)] = "Dia do Evangélico"

        if self.state == 'ES':
            self[date(year, OCT, 28)] = "Dia do Servidor Público"

        if self.state == 'GO':
            self[date(year, OCT, 28)] = "Dia do Servidor Público"

        if self.state == 'MA':
            self[date(year, JUL, 28)] = "Adesão do Maranhão" \
                " à independência do Brasil"
            self[date(year, DEC, 8)] = "Dia de Nossa Senhora da Conceição"

        if self.state == 'MT':
            self[date(year, NOV, 20)] = "Consciência Negra"

        if self.state == 'MS':
            self[date(year, OCT, 11)] = "Criação do estado"

        if self.state == 'MG':
            self[date(year, APR, 21)] = "Data Magna de MG"

        if self.state == 'PA':
            self[date(year, AUG, 15)] = "Adesão do Grão-Pará" \
                " à independência do Brasil"

        if self.state == 'PB':
            self[date(year, AUG, 5)] = "Fundação do Estado"

        if self.state == 'PE':
            self[date(year, MAR, 6)] = "Revolução Pernambucana (Data Magna)"
            self[date(year, JUN, 24)] = "São João"

        if self.state == 'PI':
            self[date(year, MAR, 13)] = "Dia da Batalha do Jenipapo"
            self[date(year, OCT, 19)] = "Dia do Piauí"

        if self.state == 'PR':
            self[date(year, DEC, 19)] = "Emancipação do Paraná"

        if self.state == 'RJ':
            self[date(year, APR, 23)] = "Dia de São Jorge"
            self[date(year, OCT, 28)] = "Dia do Funcionário Público"
            self[date(year, NOV, 20)] = "Zumbi dos Palmares"

        if self.state == 'RN':
            self[date(year, JUN, 29)] = "Dia de São Pedro"
            self[date(year, OCT, 3)] = "Mártires de Cunhaú e Uruaçuu"

        if self.state == 'RS':
            self[date(year, SEP, 20)] = "Revolução Farroupilha"

        if self.state == 'RO':
            self[date(year, JAN, 4)] = "Criação do estado"
            self[date(year, JUN, 18)] = "Dia do Evangélico"

        if self.state == 'RR':
            self[date(year, OCT, 5)] = "Criação de Roraima"

        if self.state == 'SC':
            self[date(year, AUG, 11)] = "Criação da capitania," \
                " separando-se de SP"

        if self.state == 'SP':
            self[date(year, JUL, 9)] = "Revolução Constitucionalista de 1932"

        if self.state == 'SE':
            self[date(year, JUL, 8)] = "Autonomia política de Sergipe"

        if self.state == 'TO':
            self[date(year, JAN, 1)] = "Instalação de Tocantins"
            self[date(year, SEP, 8)] = "Nossa Senhora da Natividade"
            self[date(year, OCT, 5)] = "Criação de Tocantins"
示例#12
0
 def handle(self) -> date:
     return self.result + relativedelta(weekday=TU(self.number))
示例#13
0
文件: explorer.py 项目: michal3141/ed
import pymongo
import matplotlib.pyplot as plt
import os
from collections import namedtuple, defaultdict, deque
from datetime import date
from dateutil.relativedelta import relativedelta, MO, TU, WE, TH, FR, SA, SU
from graphviz import Graph, Digraph
from itertools import groupby, product
from operator import itemgetter
from utils import _sanitize_username, _sanitize_question

# This is needed when obtaining date for last Monday, Tuesday, etc.
WEEKDAYS = {
    'Mon': MO(-1),
    'Tue': TU(-1),
    'Wed': WE(-1),
    'Thu': TH(-1),
    'Fri': FR(-1),
    'Sat': SA(-1),
    'Sun': SU(-1)
}

# Ordering months.
MONTHS = {
    'Jan': 1,
    'Feb': 2,
    'Mar': 3,
    'Apr': 4,
    'May': 5,
    'Jun': 6,
    def create_next_event(self):
        current = fields.Date.today()
        current_date = datetime.strptime(current, '%Y-%m-%d')
        current_day = current_date.weekday()
        dpth = 1

        event_ids = self.env['event.event'].search([('is_template', '=', True),
                                                    ('recurrency', '=', True),
                                                    ('state', '!=', 'done')])
        for event in event_ids:
            if event.end_type == 'count':
                event.counter += 1
            week_list = []
            while dpth <= event.depth:

                if event.mo:
                    event_date = current_date + relativedelta(weekday=MO(dpth))
                    week_list.append(event_date)
                if event.tu:
                    event_date = current_date + relativedelta(weekday=TU(dpth))
                    week_list.append(event_date)
                if event.we:
                    event_date = current_date + relativedelta(weekday=WE(dpth))
                    week_list.append(event_date)
                if event.th:
                    event_date = current_date + relativedelta(weekday=TH(dpth))
                    week_list.append(event_date)
                if event.fr:
                    event_date = current_date + relativedelta(weekday=FR(dpth))
                    week_list.append(event_date)
                if event.sa:
                    event_date = current_date + relativedelta(weekday=SA(dpth))
                    week_list.append(event_date)
                if event.su:
                    event_date = current_date + relativedelta(weekday=SU(dpth))
                    week_list.append(event_date)

                event_tmpl_start_date = event.date_begin
                event_tmpl_start_time = datetime.strptime(
                    event_tmpl_start_date, "%Y-%m-%d %H:%M:%S").time()

                event_tmpl_end_date = event.date_end
                event_tmpl_end_time = datetime.strptime(
                    event_tmpl_end_date, "%Y-%m-%d %H:%M:%S").time()

                dpth += 1

                for event_day in week_list:

                    event_start_date = datetime.combine(
                        event_day, event_tmpl_start_time)
                    event_end_date = datetime.combine(event_day,
                                                      event_tmpl_end_time)

                    date = event_start_date.strftime('%Y-%m-%d %H:%M:%S')
                    final_date = event.final_date
                    day = event_day.strftime('%Y-%m-%d')
                    event_find = self.env['event.event'].search([
                        ('template_id', '=', event.id),
                        ('date_begin', '=', date)
                    ])
                    if event.end_type == 'end_date' and day > event.final_date:
                        event.state = 'done'
                        return True
                    elif event.end_type == 'count' and event.counter > event.count:
                        event.state = 'done'
                        return True
                    elif not event_find:
                        new_event_id = event.copy(
                            default={
                                'is_template': False,
                                'date_begin': event_start_date,
                                'date_end': event_end_date,
                                'recurrency': False,
                                'template_id': event.id,
                            })
def main():

    #MAIN IDEA:
    #Identify the sales volume and ROAS per margin and display as a bubble chart in data studio
    c = CurrencyConverter()

    print('------------Product Margins------------')
    #analytics dates to pull individually
    #NEED TO REWORK THIS TO HAVE LESS API REQUESTS
    today = date.today()
    mon = today + relativedelta(
        weekday=MO(-2)
    )  #last MON: -2 because it will run every monday so we need to look 2 mondays back
    tue = today + relativedelta(weekday=TU(-1))  #last TUE
    wed = today + relativedelta(weekday=WE(-1))  #last WED
    thu = today + relativedelta(weekday=TH(-1))  #last THU
    fri = today + relativedelta(weekday=FR(-1))  #last FRI
    sat = today + relativedelta(weekday=SA(-1))  #last SAT
    sun = today + relativedelta(weekday=SU(-1))  #last SUN
    weekArr = [mon, tue, wed, thu, fri, sat, sun]

    analyticsCSV = ""
    #pull for each day of the week - avoids data sampling this way but takes a bit longer
    for day in weekArr:
        analyticsCSV += analytics_pull.main(str(day))
        print("Pulled date: " + str(day))
    dfAnalytics = csv_parser.parseToCSV(analyticsCSV, "ANL")

    dfAnalytics['date'] = dfAnalytics.apply(
        lambda row: match_maker.CheckDateFormatAnalytics(row), axis=1)
    dfAnalytics = dfAnalytics[dfAnalytics.date != -1]
    dfAnalytics['date'] = dfAnalytics.apply(lambda row: formatDate(row),
                                            axis=1)

    print('-----Analytics Dataframe-----')

    #adwords
    adwordsCSV = adwords_pull.get_raw_report()

    dfAdwords = csv_parser.parseToCSV(adwordsCSV, "ADW")

    #REMOVE THE SPACE AFTER THE GID - WILL VARY DEPENDING ON CLIENTS
    GIDnew = []
    for dfAdwordsIndex, dfAdwordsRow in dfAdwords.iterrows():
        newGID = dfAdwordsRow['GID'].partition(" ")[0]

        GIDnew.append(newGID)

    dfAdwords = dfAdwords.drop(columns="GID")
    dfAdwords.loc[:, "GID"] = GIDnew

    print('-----Adwords Dataframe-----')

    sql = '''
    SELECT dfAdwords.Date, SUM(dfAdwords.Cost) AS Cost, dfAdwords.GID
    FROM dfAdwords
    GROUP BY dfAdwords.Date, dfAdwords.GID
    '''
    dfAdwords = pandasql.sqldf(sql, locals())

    print('-----Grouped Adwords Dataframe-----')

    #XML
    dfXML = process_xml.main(margins=True)

    dfXML['GID'] = dfXML['GID'].str.lower()
    dfAnalytics['productSku'] = dfAnalytics['productSku'].str.lower()

    print('-----Default XML Dataframe-----')

    sql = '''
    SELECT dfXML.Margin, dfXML.GID, dfXML.Brand
    FROM dfXML
    GROUP BY dfXML.Margin, dfXML.GID, dfXML.Brand
    '''

    dfXML = pandasql.sqldf(sql, locals())

    print('-----Grouped XML Dataframe-----')

    sql = '''
    SELECT dfXML.Margin, dfXML.Brand, dfAnalytics.date, dfAnalytics.itemQuantity, dfAnalytics.productSku,
    dfAnalytics.productName, dfAnalytics.itemRevenue
    FROM dfAnalytics
    INNER JOIN dfXML ON dfXML.GID = dfAnalytics.productSku
    '''
    dfXML = pandasql.sqldf(sql, locals())

    print('-----Joined XML Dataframe-----')
    print(dfXML)

    sql = '''
    SELECT dfXML.Margin, dfXML.Brand, dfXML.date, dfXML.itemQuantity, dfXML.productSku,
    dfXML.productName, dfXML.itemRevenue, dfAdwords.Cost
    FROM dfXML
    INNER JOIN dfAdwords ON dfXML.Date = dfAdwords.Date AND dfXML.productSku = dfAdwords.GID
    '''

    dfXML = pandasql.sqldf(sql, locals())
    print('-----Joined XML Dataframe-----')
    print(dfXML)

    newCost = []
    newROAS = []
    for dfXMLIndex, dfXMLRow in dfXML.iterrows():
        newVal = c.convert(dfXMLRow['Cost'] / 1000000, 'USD', 'YOUR_CURRENCY')
        newCost.append(newVal)
        if float(newVal) <= 0:
            newROAS.append(0)
        else:
            newROAS.append(float(dfXMLRow['itemRevenue']) / float(newVal))

    dfXML.loc[:, "newCost"] = newCost
    dfXML.loc[:, "newROAS"] = newROAS

    print('-----Final XML Dataframe-----')

    sql = '''
    SELECT 
    date,
    Margin,
    (SUM(CAST(itemRevenue as float64)) / sum(CAST(newCost as float64))) as ROAS,
    SUM(CAST(itemQuantity as int64)) as salesVolume
    FROM dfXML
    GROUP BY Margin, date
    '''
    dfXML = pandasql.sqldf(sql, locals())

    dfXML.to_gbq('DATASET_NAME.TABLE_NAME',
                 project_id='YOUR_PROJECT_ID',
                 chunksize=None,
                 if_exists='append',
                 table_schema=None,
                 location='LOCATION',
                 progress_bar=True,
                 credentials=google_auth.getCreds())
示例#16
0
from datetime import date
from dateutil.relativedelta import relativedelta, TU

format = "%Y-%m-%d"
today = date.today()
sunday = today + relativedelta(weekday=TU(-1))
date = sunday.strftime(format)
print(date)
def extract_target_location():
    last_monday = date.today() + relativedelta(weekday=TU(-2))
    s3_target_location = f"week_start={last_monday}"
    return s3_target_location
        '/usr/local/bin/run-pan-bbc-churn-predictions.sh',
    ]
}
aws_job_submission = AWSBatchOperator(
    task_id='aws-batch-job-submission',
    job_name='airflow-job-submission-and-run-' +
    datetime.today().strftime('%Y-%m-%d'),
    job_definition='live-Airflow-Pan-BBC-Churn-Classification-final',
    job_queue='live-Airflow-Pan-BBC-Churn--JobQueue',
    overrides=command,
    aws_conn_id='aws_default',
    dag=dag)

## Copy shap values and propensity scores to historical storage

last_monday = date.today() + relativedelta(weekday=TU(-2))
historical_storage = 'historical-storage'
s3_target_location = f"{historical_storage}/week_start={last_monday}/"
'''
export AWS_DEFAULT_REGION=eu-west-1
ENV=${ENV:-int}

current_date=`python get_current_week_start.py`
echo ${current_date}

'''
last_monday = date.today() + relativedelta(weekday=MO(-1))
s3_target_location = f"week_start={last_monday}"

source_propensity_scores_path = "data/output/loyalty-propensity-scores/"
target_propensity_scores_path = "historical-storage/" + s3_target_location + "/loyalty-propensity-scores/"
示例#19
0
# 날짜의 차이를 계산
from dateutil.relativedelta import relativedelta
from datetime import datetime, date
now = datetime.now()  # 현재 일시를 얻음
today = date.today()  # 현재 날자를 얻음
print('한달후=', now + relativedelta(months=+1))  # 한달후
print('한달전의 일주일전=', now + relativedelta(months=-1, weeks=+1))  # 한달전의 일주일전
print('한달후 10시=', today + relativedelta(months=+1, hour=10))  # 한달후 10시   10시지정

print('{:->100}'.format(''))
# 요일 지정
from dateutil.relativedelta import MO, TU, WE, TH, FR, SA, SU
print('다음 금요일=', today + relativedelta(weekday=FR))  # 다음 금요일  요일 계산처리
print('이번달의 마지막 금요일=',
      today + relativedelta(day=31, weekday=FR(-1)))  # 이번달의 마지막 금요일
print('다음 화요일=', today + relativedelta(weekday=TU(+1)))
print('오늘을 제외한 다음 화요일=',
      today + relativedelta(days=+1, weekday=TU(+1)))  #오늘이 화요일이여도 무조건 다음주 화요일

print('{:->100}'.format(''))
print('2015년의 100일째 =', date(2015, 1, 1) + relativedelta(yearday=100))
print('2015년의 100일째(날짜상관없이 연도부터 센다) =',
      date(2015, 10, 11) + relativedelta(yearday=100))
print('2012년 100일째=', date(2012, 1, 1) + relativedelta(yearday=100))
print('2012년 윤일제외한 일자=', date(2012, 1, 1) + relativedelta(nlyearday=100))
print(relativedelta(date(2015, 1, 1), today))

print('{:->100}'.format(''))
# rrule은 달력 어플등에서 반복을 지정하기 위해 자주 사용,
from dateutil.rrule import rrule
from dateutil.rrule import DAILY, MONTHLY, WEEKLY
示例#20
0
)
USIndependenceDay = Holiday(
    'July 4th',
    month=7,
    day=4,
    start_date=Timestamp('1954-01-01'),
    observance=nearest_workday,
)
# http://www.tradingtheodds.com/nyse-full-day-closings/
USElectionDay1848to1967 = Holiday(
    'Election Day',
    month=11,
    day=2,
    start_date=Timestamp('1848-1-1'),
    end_date=Timestamp('1967-12-31'),
    offset=DateOffset(weekday=TU(1)),
)
# http://www.tradingtheodds.com/nyse-full-day-closings/
USElectionDay1968to1980 = Holiday(
    'Election Day',
    month=11,
    day=2,
    start_date=Timestamp('1968-01-01'),
    end_date=Timestamp('1980-12-31'),
    observance=following_tuesday_every_four_years_observance)
# http://www.tradingtheodds.com/nyse-full-day-closings/
USVeteransDay1934to1953 = Holiday(
    'Veteran Day',
    month=11,
    day=11,
    start_date=Timestamp('1934-1-1'),
示例#21
0
def main(filePath):
    c = CurrencyConverter()
    today = date.today()

    mon = today + relativedelta(weekday=MO(-2)) #last MON
    tue = today + relativedelta(weekday=TU(-1)) #last TUE
    wed = today + relativedelta(weekday=WE(-1)) #last WED
    thu = today + relativedelta(weekday=TH(-1)) #last THU
    fri = today + relativedelta(weekday=FR(-1)) #last FRI
    sat = today + relativedelta(weekday=SA(-1)) #last SAT
    sun = today + relativedelta(weekday=SU(-1)) #last SUN
    weekArr = [mon,tue,wed,thu,fri,sat,sun]

    print("Executing for dates: ")
    for day in weekArr:
        print(day)
        print(day.strftime("%m"))
    
    countryArr = {"US": "ACCOUNT NAME"}
    
    print('loading XML dataframe...')
    dfXML = process_xml.main()

    print('loading adwords csv...')
    adwordsCSV = adwords_pull.get_raw_report()

    print('parsing adwords into dataframe...')
    dfCSV = csv_parser.parseToCSV(adwordsCSV,"ADW")

    print('parsing analytics into dataframe...')
    analyticsCSV = ""
    #pull for each day of the week - avoids data sampling this way but takes a bit longer
    #THIS NEEDS TO BE REFACTORED IN THE FUTURE
    for day in weekArr:#
        analyticsCSV += analytics_pull.main(str(day))
        print("Pulled date: " + str(day))
    dfAnalytics = csv_parser.parseToCSV(analyticsCSV,"ANL")
    
    dfAnalytics['date'] = dfAnalytics.apply(lambda row: CheckDateFormatAnalytics(row), axis=1)
    dfAnalytics = dfAnalytics[dfAnalytics.date != -1]

    #add month and Year to analytics
    dfAnalytics['monthNum'] = dfAnalytics.apply(lambda row: ApplyMonth(row,"ANL"), axis=1)
    dfAnalytics['yearNum'] = dfAnalytics.apply(lambda row: ApplyYear(row,"ANL"), axis=1)

    print("Done Processing APIs...")
    print("Processing Data...")

    dfXML['ID'] = dfXML['ID'].str.lower()
    dfCSV['GID'] = dfCSV['GID'].str.lower()
    dfXML['GID'] = dfXML['GID'].str.lower()
    dfCSV['OfferID'] = dfCSV['OfferID'].str.lower()


    #FILER US ONLY
    sql = f'''
    SELECT *
    FROM dfCSV 
    WHERE dfCSV.Account = "{countryArr["US"]}"
    '''
    dfCSV = pandasql.sqldf(sql, locals())

    dfCSV['monthNum'] = dfCSV.apply(lambda row: ApplyMonth(row), axis=1)
    dfCSV['yearNum'] = dfCSV.apply(lambda row: ApplyYear(row), axis=1)  

    #REMOVE THE SPACE AFTER THE GID
    GIDnew = []
    for dfCSVIndex, dfCSVRow in dfCSV.iterrows(): 
        newGID = dfCSVRow['GID'].partition(" ")[0]

        GIDnew.append(newGID)
    print(len(GIDnew))
    print(dfCSV.shape)
    dfCSV=dfCSV.drop(columns="GID")
    dfCSV.loc[:,"GID"] = GIDnew
    print("processing csv to xml matches for linking table")
    #take the XML feed and the adwords report and EXACT match the OfferIDs together
    #ATTACH GTINS TO PRODUCTS
    #   GOAL IS TO MATCH GTIN ON EVERY GROUP ID FROM ADWORDS
    #   BEST WAY TO DO IT IS GROUP ADWORDS DF BY GID TO MAKE THE FILE SMALLER
    sqlGroupAdwords = '''
    SELECT dfCSV.GID,
    SUM(dfCSV.Impressions) AS Impressions,
    SUM(dfCSV.Clicks) AS Clicks,
    SUM(dfCSV.Cost) AS Cost,
    SUM(dfCSV.ConversionValue) AS ConversionValue,
    dfCSV.monthNum as monthNum, 
    dfCSV.yearNum as yearNum,
    dfCSV.Account as Account
    FROM dfCSV
    GROUP BY dfCSV.GID, dfCSV.Account, dfCSV.monthNum, dfCSV.yearNum
    '''
    # DROP UNUSED DF
    dfCSV.iloc[0:0]
    # WRITE NEW DF
    adwordsGrpDF = pandasql.sqldf(sqlGroupAdwords, locals())

    sql = '''
    SELECT dfXML.GTIN as GTIN, 
    dfXML.ID as ID,
    dfXML.GID as GID,
    dfXML.IMG as IMG, 
    dfXML.BRAND as BRAND, 
    adwordsGrpDF.monthNum as monthNum, 
    adwordsGrpDF.yearNum as yearNum,
    adwordsGrpDF.Impressions as Impressions, 
    adwordsGrpDF.Clicks as Clicks, 
    adwordsGrpDF.ConversionValue as ConversionValue,
    adwordsGrpDF.Cost as Cost
    FROM dfXML, adwordsGrpDF 
    WHERE dfXML.GID = adwordsGrpDF.GID
    '''
    # DROP UNUSED DF
    adwordsGrpDF.iloc[0:0]
    dfXML.iloc[0:0]

    # WRITE NEW DF
    linkingTable = pandasql.sqldf(sql, locals())
    
    print("-------XML AND ADWORDS MATCHED ON OfferID VIA SQL-------")
    print("-------LINKS GTIN TO THE OFFER ID === THEREFORE GTIN TO THE Adwords Metrics-------")

    #At this point the linkingTable DF holds the adwords stats and the gtin of the products

    #popular products CVS from import into df
    #imported CSV in dataframe form
    print('parsing imported CSV into dataframe...')
    print('----------------impoted CSV dataframe----------------')
    dfICSV = process_csv.main(filePath)
    #print(dfICSV)

    #match popular products with linkingTable
    dfICSV.rename(columns={'PopularityRank' : 'Pop'},inplace=True)

    #order by popularity
    dfICSV.sort_values(by=['Pop'])

    #TAKE THE FIRST 2000 RECORDS FROM THE POPULAR PRODUCTS CSV INTO A NEW DF - This can be any number between 0 and about 10k
    dfPopM = dfICSV[:2000]

    print("-----GET PRODUCTS THEY DONT SELL-----")
    sqlNotSold = '''
    SELECT Pop, ChangeFromLastWeek, Title, GTIN, Brand, PriceRangeStart, PriceRangeEnd, WeekNumber
    FROM dfPopM
    WHERE ProductInventoryStatus = "Not in inventory"
    ORDER BY Pop
    '''
    dfNotSold = pandasql.sqldf(sqlNotSold, locals())

    #match popular products with linkingTable (adwords stats)
    count = 0
    print("-----Processing matches-----")

    sql = '''
    SELECT linkingTable.GID,
    linkingTable.Impressions,
    linkingTable.Cost,
    linkingTable.Clicks,
    linkingTable.ConversionValue,
    linkingTable.IMG,
    linkingTable.BRAND,
    linkingTable.monthNum,
    linkingTable.yearNum,
    dfPopM.Pop
    FROM linkingTable, dfPopM
    WHERE SUBSTR(linkingTable.GTIN,0,LENGTH(linkingTable.GTIN) - 1) = SUBSTR(dfPopM.GTIN,0,LENGTH(dfPopM.GTIN) - 3)

    '''
    dfPopM.iloc[0:0]

    dfPopXmlMatch = pandasql.sqldf(sql, locals())

    sqlGroupMatchTable = '''
    SELECT dfPopXmlMatch.Pop,
    dfPopXmlMatch.GID
    FROM dfPopXmlMatch
    GROUP BY dfPopXmlMatch.Pop,dfPopXmlMatch.GID
    '''
    dfPopXmlMatch = pandasql.sqldf(sqlGroupMatchTable, locals())
    
    sqlMatchAll = '''
    SELECT linkingTable.GID,
    linkingTable.Impressions,
    linkingTable.Cost,
    linkingTable.Clicks,
    linkingTable.ConversionValue,
    linkingTable.IMG,
    linkingTable.BRAND,
    linkingTable.monthNum,
    linkingTable.yearNum,
    dfPopXmlMatch.Pop
    FROM linkingTable, dfPopXmlMatch
    WHERE dfPopXmlMatch.GID = linkingTable.GID
    GROUP BY linkingTable.GID,
    linkingTable.Impressions,
    linkingTable.Cost,
    linkingTable.Clicks,
    linkingTable.ConversionValue,
    linkingTable.IMG,
    linkingTable.monthNum,
    linkingTable.yearNum,
    dfPopXmlMatch.Pop
    '''
    linkingTable.iloc[0:0]
    dfPopXmlMatch = pandasql.sqldf(sqlMatchAll, locals())

    #group the results by date and IDNew while adding the metrics together
    sqlGroup = '''
    SELECT dfPopXmlMatch.GID as GID, 
    SUM(dfPopXmlMatch.Impressions) as Impressions, 
    SUM(dfPopXmlMatch.Cost) as Cost, 
    SUM(dfPopXmlMatch.Clicks) as Clicks, 
    SUM(dfPopXmlMatch.ConversionValue) as ConversionValue, 
    dfPopXmlMatch.IMG as IMG,
    dfPopXmlMatch.BRAND as BRAND,
    dfPopXmlMatch.monthNum as monthNum,
    dfPopXmlMatch.yearNum as yearNum,
    dfPopXmlMatch.Pop
    FROM dfPopXmlMatch 
    GROUP BY dfPopXmlMatch.GID, dfPopXmlMatch.IMG, dfPopXmlMatch.Pop, dfPopXmlMatch.monthNum , dfPopXmlMatch.yearNum 
    '''
    dfPopXmlMatch = pandasql.sqldf(sqlGroup, locals())
    print('---------GROUPED THE ABOVE---------')

    sqlGroupAnalytics = '''
    SELECT dfAnalytics.productName as productName, 
    dfAnalytics.productSku as productSku,
    dfAnalytics.country as country,
    dfAnalytics.monthNum as monthNum,
    dfAnalytics.yearNum as yearNum,
    SUM(dfAnalytics.itemRevenue) as itemRevenue,
    AVG(dfAnalytics.buyToDetailRate) as buyToDetailRate
    FROM dfAnalytics
    WHERE dfAnalytics.country = "United States"
    GROUP BY dfAnalytics.country, dfAnalytics.productSku, dfAnalytics.productName, dfAnalytics.monthNum , dfAnalytics.yearNum
    '''
    dfAnalytics = pandasql.sqldf(sqlGroupAnalytics, locals())
    sql = '''
    SELECT dfPopXmlMatch.GID as ID, 
    dfPopXmlMatch.Impressions as Impressions, 
    dfPopXmlMatch.Cost as Cost, 
    dfPopXmlMatch.Clicks as Clicks, 
    dfPopXmlMatch.ConversionValue as ConversionValue, 
    dfPopXmlMatch.IMG as IMG, 
    dfPopXmlMatch.BRAND as BRAND, 
    dfPopXmlMatch.Pop as Pop,
    dfAnalytics.itemRevenue as itemRev, 
    dfAnalytics.buyToDetailRate as buyToDetailRate, 
    dfAnalytics.productName as productName, 
    dfAnalytics.country as country,
    dfAnalytics.monthNum as monthNum, 
    dfAnalytics.yearNum as yearNum
    FROM dfPopXmlMatch, dfAnalytics 
    WHERE LOWER(dfPopXmlMatch.GID) = LOWER(dfAnalytics.productSku) 
    AND dfAnalytics.monthNum = dfPopXmlMatch.monthNum
    AND dfAnalytics.yearNum = dfPopXmlMatch.yearNum
    '''
    dfPopXmlMatch.iloc[0:0]
    dfFinal = pandasql.sqldf(sql, locals())

    ######### currency converting start #########
    #COMMENT OUT THE BELOW IF YOU DONT NEED TO CONVER CURRENCY - MORE EFFICIENT WAY IN product_sizes.py - make into method in futurre updates
    newCost = []
    newROAS = []
    for dfFinalIndex, dfFinalRow in dfFinal.iterrows(): 
        newVal = c.convert(dfFinalRow['Cost'] / 1000000, 'USD', 'YOUR_CURRENCY')
        newCost.append(newVal)
        if float(newVal) <= 0:
            newROAS.append(0)
        else:
            newROAS.append(float(dfFinalRow['itemRev']) / float(newVal))

    dfFinal.loc[:,"newCost"] = newCost
    dfFinal.loc[:,"newROAS"] = newROAS
    ######### currency converting end #########

    #add a new column to df and fill it with the above week number
    weekNo = int(mon.strftime("%W")) + 1
    dfFinal["WeekNumber"] = str(weekNo) #if not string, data studio gives an error

    print("Processing Complete...")
    
    #Products they dont sell dfNotSold
    dfNotSold.to_gbq('DATASET_NAME.TABLE_NAME',
                            project_id='YOUR_PROJECT_ID', 
                            chunksize=None, 
                            if_exists='append', 
                            table_schema=None, 
                            location='LOCATION', 
                            progress_bar=True, 
                            credentials=google_auth.getCreds())

    #linked table with what they have sold dfFinal
    dfFinal.to_gbq('DATASET_NAME.TABLE_NAME', #TABLE_NAME here is refered to as CLIENT_DO_SELL in the SQL files
                            project_id='YOUR_PROJECT_ID', 
                            chunksize=None, 
                            if_exists='append', 
                            table_schema=None, 
                            location='LOCATION', 
                            progress_bar=True, 
                            credentials=google_auth.getCreds())

    #the csv from google mc
    dfICSV = dfICSV[:2000] #clip it if needed here 
    dfICSV.to_gbq('DATASET_NAME.TABLE_NAME',
                            project_id='YOUR_PROJECT_ID', 
                            chunksize=None, 
                            if_exists='append', 
                            table_schema=None, 
                            location='LOCATION', 
                            progress_bar=True, 
                            credentials=google_auth.getCreds())
    dfFinal.iloc[0:0]
    dfICSV.iloc[0:0]
示例#22
0
def following_tuesday_every_four_years_observance(dt):
    return dt + DateOffset(years=(4 - (dt.year % 4)) % 4, weekday=TU(1))
示例#23
0
# This file contains all the functions that the workflow
# uses for specialised dates.
from math import floor

from arrow.arrow import datetime, timedelta
# The DAY_MAP is specific to relative delta
from date_format_mappings import DATE_MAPPINGS, TIME_MAPPINGS, DATE_TIME_MAPPINGS
from dateutil.relativedelta import relativedelta, MO, TU, WE, TH, FR, SA, SU
from dateutil.rrule import rrule, YEARLY, DAILY

DAY_MAP = {
    "mon": relativedelta(days=+1, weekday=MO(+1)),
    "tue": relativedelta(days=+1, weekday=TU(+1)),
    "wed": relativedelta(days=+1, weekday=WE(+1)),
    "thu": relativedelta(days=+1, weekday=TH(+1)),
    "fri": relativedelta(days=+1, weekday=FR(+1)),
    "sat": relativedelta(days=+1, weekday=SA(+1)),
    "sun": relativedelta(days=+1, weekday=SU(+1)),
    "prev mon": relativedelta(days=-1, weekday=MO(-1)),
    "prev tue": relativedelta(days=-1, weekday=TU(-1)),
    "prev wed": relativedelta(days=-1, weekday=WE(-1)),
    "prev thu": relativedelta(days=-1, weekday=TH(-1)),
    "prev fri": relativedelta(days=-1, weekday=FR(-1)),
    "prev sat": relativedelta(days=-1, weekday=SA(-1)),
    "prev sun": relativedelta(days=-1, weekday=SU(-1)),
}

DAYS_OF_WEEK_ABBREVIATIONS = {
    "mon": "monday",
    "tue": "tuesday",
    "wed": "wednesday",