Exemplo n.º 1
0
    def prepare_education_train_data(self, cv_data):
        education = cv_data.get('education', {})
        schools = education.get('SchoolOrInstitution', [])

        educations = []
        for school in schools:
            school_name = school['School'][0].get(
                'SchoolName', '') if school.get('School', []) else ''

            degrees = school.get('Degree', [])
            for degree in degrees:
                degree_name = degree.get('DegreeName', '')
                comment = re.sub(r'[\s]', ' ', degree.get('Comments', ''))

                date_of_attendance = degree['DatesOfAttendance'][
                    0] if degree.get('DatesOfAttendance', []) else {}
                start_date, end_date = get_dates(date_of_attendance)

                entities = []
                label_entity(comment, entities, degree_name, 'DEGREE')
                label_entity(comment, entities, school_name, 'ORG')
                label_entity(comment, entities, start_date, 'START_DATE')
                label_entity(comment, entities, end_date, 'END_DATE')
                educations.append((comment, {'entities': entities}))
        return educations
Exemplo n.º 2
0
def analyze_enis(account_id,
                 bucket,
                 prefix,
                 enis,
                 ips,
                 start,
                 end,
                 store_dir,
                 ipdb=None,
                 cmdb=None,
                 ipranges=None,
                 region=None,
                 reject=None,
                 targets=None,
                 ports=None,
                 tz=None,
                 sample_count=20):

    logging.basicConfig(level=logging.INFO)
    logging.getLogger('botocore').setLevel(logging.WARNING)
    ports = map(int, ports)
    start, end = get_dates(start, end, tz)
    client = boto3.client('s3')
    log_prefix = "%s/%s/flow-log/%s/%s" % (
        prefix.rstrip('/'), account_id, start.strftime('%Y/%m/%d'),
        "00000000-0000-0000-0000-000000000000")

    resolver = IPResolver(ipdb, cmdb, ipranges)

    agg_in_traffic = Counter()
    agg_out_traffic = Counter()
    agg_inport_traffic = Counter()
    agg_outport_traffic = Counter()

    for eni, ip in zip(enis, ips):
        files = eni_download_flows(client, bucket, log_prefix, start, end, eni,
                                   store_dir)

        in_traffic, out_traffic, inport_traffic, outport_traffic = eni_log_analyze(
            set(ips),
            eni_flow_stream(files, start, end),
            start=start,
            end=end,
            reject=reject,
            target_ips=targets,
            ports=ports)
        agg_in_traffic.update(in_traffic)
        agg_out_traffic.update(out_traffic)
        agg_inport_traffic.update(inport_traffic)
        agg_outport_traffic.update(outport_traffic)

    print("Inbound %d Most Commmon" % sample_count)
    for ip, bcount in resolve_ip_address(agg_in_traffic, resolver, start,
                                         end).most_common(sample_count):
        print("%s %s" % ip, human_size(bcount))

    print("Outbound %d Most Common" % sample_count)
    for ip, bcount in resolve_ip_address(agg_out_traffic, resolver, start,
                                         end).most_common(sample_count):
        print("%s %s" % ip, human_size(bcount))
Exemplo n.º 3
0
    def write(self):
        """
        Writes the simulated arrival times and scores of
        archaeological dates to csv files.
        """
        timestamp = int(time())
        sim_file = './results/sim{}.csv'.format(str(timestamp))

        with open(sim_file, 'w') as file:
            file.write('x,y,bp\n')
            for coords in self.grid:
                if self.grid[coords]['arrival_time']:
                    bp = self.grid[coords]['arrival_time']
                    x, y = to_lonlat(transform_coords(coords))
                    file.write(str(x) + ',' + str(y) + ',' + str(bp) + '\n')

        date_file = './results/dates{}.csv'.format(str(timestamp))
        dates = get_dates()

        with open(date_file, 'w') as file:
            file.write('x,y,score\n')
            for coords in dates:
                sim_date = self.grid[coords]['arrival_time']
                if sim_date in dates[coords]:
                    score = (dates[coords][sim_date] /
                             max(dates[coords].values()))
                else:
                    score = 0
                x, y = to_lonlat(transform_coords(coords))
                file.write(str(x) + ',' + str(y) + ',' + str(score) + '\n')
Exemplo n.º 4
0
    def prepare_employment_train_data(self, cv_data):
        employment = cv_data.get('employment', {})
        employers = employment.get('EmployerOrg', [])

        employments = []
        for employer in employers:
            positions = employer.get('PositionHistory', [])
            for position in positions:
                title = position.get('Title', '')
                organization = position.get('OrgName',
                                            {}).get('OrganizationName', '')

                date_of_attendance = position['DatesOfAttendance'][
                    0] if position.get('DatesOfAttendance', []) else {}
                start_date, end_date = get_dates(date_of_attendance)

                if title:
                    employments.append((title, {
                        'entities': [(0, len(title), "DESIGNATION")]
                    }))
                if organization:
                    employments.append((organization, {
                        'entities': [(0, len(organization), "ORG")]
                    }))
                if start_date:
                    employments.append((organization, {
                        'entities': [(0, len(start_date), "START_DATE")]
                    }))
                if end_date:
                    employments.append((organization, {
                        'entities': [(0, len(end_date), "END_DATE")]
                    }))
        return employments
Exemplo n.º 5
0
def back_populator():
    # Start date of season
    start_date = datetime.date(2020, 12, 22)
    end_date, _ = get_dates()
    while start_date <= end_date:
        Report().generate_reports(start_date)
        Scores(start_date).poll()
        start_date = start_date + datetime.timedelta(days=1)
Exemplo n.º 6
0
    def __init__(self):
        self.states = get_states()
        self.reports_file = 'reports.json'
        self.reports_dir = 'reports/'
        self.image_dir = 'static/images/'
        self.api_endpoint = 'https://covid-api.com/api/'
        self.dates_file = 'dates.json'
        self.first_date = '2020-04-16'
        self.dates = get_dates(self.first_date)
        self.reports = []
        self.reports_parsed = []

        dump_json(self.dates_file, self.dates)
Exemplo n.º 7
0
def analyze_enis(
        account_id, bucket, prefix,
        enis, ips, start, end, store_dir,
        ipdb=None, cmdb=None, ipranges=None,
        region=None, reject=None, targets=None,
        ports=None, tz=None, sample_count=20):

    logging.basicConfig(level=logging.INFO)
    logging.getLogger('botocore').setLevel(logging.WARNING)
    ports = map(int, ports)
    start, end = get_dates(start, end, tz)
    client = boto3.client('s3')
    log_prefix = "%s/%s/flow-log/%s/%s" % (
        prefix.rstrip('/'),
        account_id,
        start.strftime('%Y/%m/%d'),
        "00000000-0000-0000-0000-000000000000")

    resolver = IPResolver(ipdb, cmdb, ipranges)

    agg_in_traffic = Counter()
    agg_out_traffic = Counter()
    agg_inport_traffic = Counter()
    agg_outport_traffic = Counter()

    for eni, ip in zip(enis, ips):
        files = eni_download_flows(
            client, bucket, log_prefix, start, end, eni, store_dir)

        in_traffic, out_traffic, inport_traffic, outport_traffic = eni_log_analyze(
            set(ips),
            eni_flow_stream(files, start, end),
            start=start,
            end=end,
            reject=reject,
            target_ips=targets,
            ports=ports)
        agg_in_traffic.update(in_traffic)
        agg_out_traffic.update(out_traffic)
        agg_inport_traffic.update(inport_traffic)
        agg_outport_traffic.update(outport_traffic)

    print("Inbound %d Most Commmon" % sample_count)
    for ip, bcount in resolve_ip_address(
            agg_in_traffic, resolver, start, end).most_common(sample_count):
        print("%s %s" % ip, human_size(bcount))

    print("Outbound %d Most Common" % sample_count)
    for ip, bcount in resolve_ip_address(
            agg_out_traffic, resolver, start, end).most_common(sample_count):
        print("%s %s" % ip, human_size(bcount))
Exemplo n.º 8
0
def animate_better_barchart(data_dict: Dict[str, np.ndarray],
                            metadata: Dict[str, Any],
                            savedir: str):
    dates = get_dates(Message.initial_date, Message.final_date)
    df = pd.DataFrame.from_dict(data_dict)
    df.index = dates
    bcr.bar_chart_race(
        df,
        n_bars=metadata['max_names_per_graph'],
        steps_per_period=metadata['animation']['fps'],
        title=metadata['title'],
        filename=os.path.join(savedir, f'{metadata["title"]}.mp4'),
        period_length=metadata['animation']['bcr_period']
    )
Exemplo n.º 9
0
def search(question):
    urls = utils.get_urls(
        question
    )  #Sends question to get_urls function in utils.py, which googles query and generates list of suitable urls
    if utils.getqtype(question) == "who":
        names_dict = utils.get_names(urls, question)
    if utils.getqtype(question) == "when":
        names_dict = utils.get_dates(urls)
    number = 0
    for i in names_dict:
        if names_dict[i] > number:
            answer = i
            number = names_dict[i]
    return render_template("search.html",
                           question=question,
                           urls=urls,
                           names_dict=names_dict,
                           answer=answer)
Exemplo n.º 10
0
    def eval(self):
        """
        Returns a score from 0 to 1 of model fitness based on match
        with archaeological dates.
        """
        total_score = 0

        dates = get_dates()

        for coords in dates:
            score = 0
            sim_date = self.grid[coords]['arrival_time']
            if sim_date and sim_date in dates[coords]:
                # Normalize probability distribution
                score += (dates[coords][sim_date] /
                          max(dates[coords].values()))

            total_score += score

        return total_score / len(dates)
Exemplo n.º 11
0
def update3(input6, input7, input8, input31):
    city1 = get_given_city_data(city_df, input6)
    city2 = get_given_city_data(city_df, input7)
    dfresult = city1
    dfresult[input6] = city1[input8]
    dfresult[input7] = city2[input8]
    fig = None
    if input31 == 'plot':
        fig = px.line(
            dfresult,
            x=get_dates(city_df),
            y=[input6, input7],
            title="Covid-19 : Cases comparison between %s and %s . " %
            (input6, input7),
            labels={
                "x": "Time",
                "value": "Number of Cases"
            },
            template="presentation")
    if input31 == 'bar':
        fig = px.bar(dfresult,
                     x=get_dates(city_df),
                     y=[input6, input7],
                     title="Covid-19 : Cases comparison between %s and %s . " %
                     (input6, input7),
                     labels={
                         "x": "Time",
                         "value": "Number of Cases"
                     },
                     template="presentation")
    if input31 == 'histogram':
        fig = px.histogram(
            dfresult,
            x=get_dates(city_df),
            y=[input6, input7],
            title="Covid-19 : Cases comparison between %s and %s . " %
            (input6, input7),
            labels={
                "x": "Time",
                "value": "Number of Cases"
            },
            template="presentation")
    if input31 == 'area':
        fig = px.area(
            dfresult,
            x=get_dates(city_df),
            y=[input6, input7],
            title="Covid-19 : Cases comparison between %s and %s . " %
            (input6, input7),
            labels={
                "x": "Time",
                "value": "Number of Cases"
            },
            template="presentation")
    if input31 == 'funnel':
        fig = px.funnel(
            dfresult,
            x=get_dates(city_df),
            y=[input6, input7],
            title="Covid-19 : Cases comparison between %s and %s . " %
            (input6, input7),
            labels={
                "x": "Time",
                "value": "Number of Cases"
            },
            template="presentation")
    if input31 == 'scatter':
        fig = px.scatter(
            dfresult,
            x=get_dates(city_df),
            y=[input6, input7],
            title="Covid-19 : Cases comparison between %s and %s . " %
            (input6, input7),
            labels={
                "x": "Time",
                "value": "Number of Cases"
            },
            template="presentation")
    fig.update_layout(font_family="Courier New",
                      font_color="blue",
                      title_font_family="Times New Roman",
                      title_font_color="red",
                      legend_title_font_color="green",
                      transition_duration=500)
    return fig
Exemplo n.º 12
0
def update_figure(sta1, input2):
    fig = None
    state = get_given_state_data(state_df, sta1)
    if input2 == 'plot':
        fig = px.line(state,
                      x=get_dates(state_df),
                      y=['Confirmed', 'Active', 'Recovered', 'Deceased'],
                      title="Covid-19 data of state.",
                      labels={
                          "x": "Time",
                          "value": "Number of Cases"
                      },
                      template="presentation")

    if input2 == 'bar':
        fig = px.bar(state,
                     x=get_dates(state_df),
                     y=['Confirmed', 'Active', 'Recovered', 'Deceased'],
                     title="Covid-19 data of state.",
                     labels={
                         "x": "Time",
                         "value": "Number of Cases"
                     },
                     template="presentation")
    if input2 == 'area':
        fig = px.area(state,
                      x=get_dates(state_df),
                      y=['Confirmed', 'Active', 'Recovered', 'Deceased'],
                      title="Covid-19 data of state.",
                      labels={
                          "x": "Time",
                          "value": "Number of Cases"
                      },
                      template="presentation")
    if input2 == 'histogram':
        fig = px.histogram(state,
                           x=get_dates(state_df),
                           y=['Confirmed', 'Active', 'Recovered', 'Deceased'],
                           title="Covid-19 data of state.",
                           labels={
                               "x": "Time",
                               "value": "Number of Cases"
                           },
                           template="presentation")
    if input2 == 'funnel':
        fig = px.funnel(state,
                        x=get_dates(state_df),
                        y=['Confirmed', 'Active', 'Recovered', 'Deceased'],
                        title="Covid-19 data of state.",
                        labels={
                            "x": "Time",
                            "value": "Number of Cases"
                        },
                        template="presentation")
    if input2 == 'scatter':
        fig = px.scatter(state,
                         x=get_dates(state_df),
                         y=['Confirmed', 'Active', 'Recovered', 'Deceased'],
                         title="Covid-19 data of state.",
                         labels={
                             "x": "Time",
                             "value": "Number of Cases"
                         },
                         template="presentation")
    fig.update_layout(font_family="Courier New",
                      font_color="blue",
                      title_font_family="Times New Roman",
                      title_font_color="red",
                      legend_title_font_color="green",
                      transition_duration=500)
    return fig
Exemplo n.º 13
0
def update2(input3, input4, input5, input21):
    state1 = get_given_state_data(state_df, input3)
    state2 = get_given_state_data(state_df, input4)
    dfresult = state1
    dfresult[input3] = state1[input5]
    dfresult[input4] = state2[input5]
    fig = None
    if input21 == "plot":
        fig = px.line(
            dfresult,
            x=get_dates(state_df),
            y=[input3, input4],
            title="Covid-19 : Cases comparison between %s and %s . " %
            (input3, input4),
            labels={
                "x": "Time",
                "value": "Number of Cases"
            },
            template="presentation")
    if input21 == "bar":
        fig = px.bar(dfresult,
                     x=get_dates(state_df),
                     y=[input3, input4],
                     title="Covid-19 : Cases comparison between %s and %s . " %
                     (input3, input4),
                     labels={
                         "x": "Time",
                         "value": "Number of Cases"
                     },
                     template="presentation")
    if input21 == "histogram":
        fig = px.histogram(
            dfresult,
            x=get_dates(state_df),
            y=[input3, input4],
            title="Covid-19 : Cases comparison between %s and %s . " %
            (input3, input4),
            labels={
                "x": "Time",
                "value": "Number of Cases"
            },
            template="presentation")
    if input21 == "area":
        fig = px.area(
            dfresult,
            x=get_dates(state_df),
            y=[input3, input4],
            title="Covid-19 : Cases comparison between %s and %s . " %
            (input3, input4),
            labels={
                "x": "Time",
                "value": "Number of Cases"
            },
            template="presentation")
    if input21 == "funnel":
        fig = px.funnel(
            dfresult,
            x=get_dates(state_df),
            y=[input3, input4],
            title="Covid-19 : Cases comparison between %s and %s . " %
            (input3, input4),
            labels={
                "x": "Time",
                "value": "Number of Cases"
            },
            template="presentation")
    if input21 == "scatter":
        fig = px.scatter(
            dfresult,
            x=get_dates(state_df),
            y=[input3, input4],
            title="Covid-19 : Cases comparison between %s and %s . " %
            (input3, input4),
            labels={
                "x": "Time",
                "value": "Number of Cases"
            },
            template="presentation")

    fig.update_layout(font_family="Courier New",
                      font_color="blue",
                      title_font_family="Times New Roman",
                      title_font_color="red",
                      legend_title_font_color="green",
                      transition_duration=500)
    return fig
Exemplo n.º 14
0
#!/usr/bin/python

import utils
from stockmarket import Stock, Market

sap_tickers = utils.get_tickers() # Standard & Poor's tickers
stocks = utils.get_stocks_from_tickerslist(sap_tickers)
dates = utils.get_dates(stocks[0].ticker) 
market = Market(stocks)

spread = []
market_performance = []
dates_shown = []
STEP = 30

for i in range(STEP, len(stocks[0].values)-STEP, STEP):
    previous = i - STEP
    now = i
    next = i + STEP
    strong_stocks = [s for s in stocks 
                     if s.getOffensive(previous, now) >= 100 and s.getDefensive(previous, now) <= 100]
    weak_stocks = [s for s in stocks 
                   if s.getOffensive(previous, now) <= 100 and s.getDefensive(previous, now) >= 100]
 
    mean_strong_performance = utils.mean([s.getPerformance(now, next) for s in strong_stocks])
    mean_weak_performance = utils.mean([s.getPerformance(now, next) for s in weak_stocks])
    spread.append(mean_strong_performance - mean_weak_performance)
    market_performance.append(market.getPerformance(previous, now)) # to look for correlations with the spread
    dates_shown.append(dates[now]) # dates to be shown in the X axis

# format the data properly and save it in a file
Exemplo n.º 15
0
def analyze_app(
        app, env, account_id,
        bucket, prefix, store_dir,
        resources, ipdb, ipranges,
        start, end, tz,
        sink, period, sample_count,
        debug):
    """Analyze flow log records for application and generate metrics per period"""
    logging.basicConfig(level=logging.INFO)
    logging.getLogger('botocore').setLevel(logging.WARNING)

    executor = debug and MainThreadExecutor or ThreadPoolExecutor
    start, end = get_dates(start, end, tz)
    resolver = IPResolver(ipdb, ipdb, ipranges)

    for rtype_name in resources:
        rtype = Resource.get_type(rtype_name)
        resource_map = {
            rtype.id(r): r for r
            in rtype.get_resources(ipdb, start, end, app, env)}
        log.info("App:%s Env:%s Type:%s Found:%d",
                 app, env, rtype_name, len(resource_map))

        with sqlite3.connect(ipdb) as db:
            db.row_factory = row_factory
            cursor = db.cursor()
            cursor.execute(
                'select * from enis where resource_type in (%s)' % (
                    ", ".join(["'%s'" % r for r in resource_map.keys()])))
            enis = list(cursor)
            eni_map = {e['eni_id']: e for e in enis}

        # TODO: Download should be doing date bits here across the range of days.
        log_prefix = "%s/%s/flow-log/%s/%s" % (
            prefix.rstrip('/'),
            account_id,
            start.strftime('%Y/%m/%d'),
            "00000000-0000-0000-0000-000000000000")

        f_downloads = {}
        f_metrics = {}
        files = {}

        # should probably just queue this out to distributed worker pool
        with executor(max_workers=5) as w:
            client = boto3.client('s3')
            for e in enis:
                f_downloads[
                    w.submit(
                        eni_download_flows,
                        client, bucket,
                        log_prefix, start, end,
                        e['eni_id'], store_dir)] = e

            for f in as_completed(f_downloads):
                if f.exception():
                    log.warning(
                        "error processing eni %s download: %s",
                        eni_map[f_downloads[f]],
                        f.exception())
                    continue
                e = f_downloads[f]
                files[e['eni_id']] = f.result()

            ipset = {e['ip_address'] for e in enis}

            for eni_id, files in files.items():
                stream = eni_flow_stream(files, start, end)
                f_metrics[w.submit(
                    process_eni_metrics,
                    eni_map[eni_id], ipset,
                    stream,
                    start, end, period, sample_count,
                    resolver, sink)] = eni_id

            for f in as_completed(f_metrics):
                if f.exception():
                    log.warning(
                        "error processing eni %s download %s",
                        eni_map[f_metrics[f]],
                        f.exception())
                    continue
Exemplo n.º 16
0
def poll():
    _, yesterday = get_dates()
    print(f"Polling for date: {yesterday}")
    Report().generate_reports(yesterday)
    Scores(yesterday).poll()
Exemplo n.º 17
0
def draw_line_chart(
    data_dict: Union[List[Tuple[str, np.ndarray]], Dict[str, np.ndarray]],
    init_date: datetime,
    final_date: datetime,
    metadata: Dict[str, Any],
    fig=None,
    ax=None
) -> None:

    anex_data = get_anex_data(data_dict)
    if anex_data:
        (init_date, final_date) = anex_data

    (start, final) = fit_dates_to_history_time_length(init_date, final_date)

    plt.clf()
    if not fig or not ax:
        fig, ax = plt.subplots(figsize=(15, 8))
    ax.clear()

    locator = mdates.AutoDateLocator(minticks=3, maxticks=10)
    formater = mdates.ConciseDateFormatter(locator)
    dates = get_dates(init_date, final_date)

    max_names_per_graph = metadata.get('max_names_per_graph', 0)

    curves_to_plot = metadata.get('plot_keys', [''])
    if not isinstance(curves_to_plot, list):
        raise SystemExit('The names to plot should be list of strings')
    if isinstance(data_dict, dict):
        data_dict = [(k, v) for k, v in data_dict.items()]

    filtered_data_dicts: List[Tuple[str, np.ndarray]]

    if metadata.get('plot_total'):
        curves_to_plot.append('TOTAL')

    filtered_data_dicts = filter_data_to_plot(
        sorted(data_dict,
               key=lambda kv: np.sum(kv[1]),
               reverse=True
               )[:max_names_per_graph],
        curves_to_plot,
        max_names_per_graph
    )

    if metadata.get('plot_total'):
        ax2 = ax.twinx()
        ax2.clear()
        ax2.set_ylabel('Total')

    delay = len(dates[:final]) - len(filtered_data_dicts[0][1][start:final])  # type: ignore # noqa
    ax.xaxis.set_major_locator(locator)
    ax.xaxis.set_major_formatter(formater)
    for name, info in filtered_data_dicts:
        if name == 'TOTAL':
            ax2.xaxis.set_major_locator(locator)
            ax2.xaxis.set_major_formatter(formater)
            ax2.plot(dates[delay:final],
                     info[start:final],
                     label=name,
                     color='k',
                     linewidth=1)
            continue
        ax.xaxis.set_major_locator(locator)
        ax.xaxis.set_major_formatter(formater)
        ax.plot(dates[delay:final], info[start:final], label=name)
    ax.set_title(metadata['title'])
    ax.grid(True)
    fig.legend()
Exemplo n.º 18
0
def test_get_Dates():
    data = get_state_data()[0]
    expected_output = '2020-07-07'
    assert any(i == expected_output for i in list(get_dates(data)))
Exemplo n.º 19
0
def main():
    """
    e.g.
    python download.py -n rthk -s 2019-09-01 -e 2019-09-30
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('-n',
                        '--name',
                        type=str,
                        help='name of source',
                        required=True)
    parser.add_argument('-c',
                        '--channels',
                        type=str,
                        help='name of auido channels',
                        default='all')
    parser.add_argument('-d',
                        '--dir',
                        type=str,
                        help='directory for downloaded files')
    # only for rthk
    parser.add_argument('-s', '--start', type=str, help='start date')
    parser.add_argument('-e', '--end', type=str, help='end date')

    parser.add_argument('--n_speaker',
                        type=int,
                        help='number of speakers in the youtube channel',
                        default=2)

    arg = parser.parse_args()
    name = arg.name

    if arg.channels == 'all':
        channels = list(BASE_URL[name].keys())
    else:
        channels = arg.channels.split(',')

    if arg.dir is None:
        download_dir = DEFAULT_DIR_ROOT + '/' + name + '/wav'
    else:
        download_dir = arg.dir

    n_speaker = arg.n_speaker

    params = {}
    if name == 'rthk':
        download_helper = mapper[name](name, BASE_URL, download_dir)
        start_date = arg.start
        end_date = arg.end
        dates = utils.get_dates(start_date, end_date)

        for channel in channels:
            params['channel'] = channel
            for date in dates:
                params['date'] = date
                done = False
                while not done:
                    try:
                        start_time = time.perf_counter()
                        download_helper.download(params)
                        print('Time needed for this sample: {}'.format(
                            time.perf_counter() - start_time))
                        done = True  # turn off the loop
                    except ConnectionResetError:
                        print('Sleep for 5 secs to avoid from blocking...')
                        time.sleep(5)
                    except Exception as exe:
                        print('download.py')
                        traceback.print_tb(exe.__traceback__)
    elif name == 'youtube':
        download_helper = mapper[name](name,
                                       BASE_URL,
                                       download_dir,
                                       api_key=GOOGLE_API_KEY,
                                       n_per_playlist=N_PER_PLAYLIST)
        playlist_ids = MULTI_SPEAKER_PLAYLIST if n_speaker > 1 else SINGLE_SPEAKER_PLAYLIST
        for playlist_id in playlist_ids:
            download_helper.download(playlist_id)
Exemplo n.º 20
0
                int(game['awayTeam']['teamId']), game['gameStatusText'],
                int(game['homeTeam']['score']),
                int(game['awayTeam']['score']),
                int(game['homeTeam']['wins']),
                int(game['awayTeam']['wins']),
                int(game['homeTeam']['losses']),
                int(game['awayTeam']['losses']),
                f"{game['awayTeam']['teamName']} at {game['homeTeam']['teamName']}"
            ]
            data.append(obj)
        return data

    def upload_data(self, parsed_data):
        rows = self.upload_to_db(parsed_data)
        return parsed_data

    def poll(self):
        raw_data = self.get_data()
        parsed_data = self.parse_data(raw_data)
        data = self.upload_data(parsed_data)


if __name__ == '__main__':
    # print(Scores('04/2/2021').poll())
    start_date = datetime.date(2020, 12, 22)
    # start_date = datetime.date(2021, 4, 2)
    end_date, _ = get_dates()
    while start_date <= end_date:
        Scores(start_date).poll()
        start_date = start_date + datetime.timedelta(days=1)
Exemplo n.º 21
0
    args = parser.parse_args()

    maybe_make_dir('weights')
    maybe_make_dir('portfolio_val')

    timestamp = time.strftime('%Y%m%d%H%M')

    stocks = Stocks()
    portfolio = Portfolio()

    data = np.around(get_data(ticker=args.ticker))
    dataSplit = data.shape[1] - args.testingDays
    train_data = data[:, :dataSplit]
    test_data = data[:, dataSplit:]

    dates = (get_dates(ticker=args.ticker))
    dataesSplit = dates.shape[1] - args.testingDays
    train_dates = dates[:, :dataSplit]
    test_dates = dates[:, dataSplit:]

    env = TradingEnv(stocks, train_data, train_dates, args.initial_invest)
    state_size = env.observation_space.shape
    action_size = env.action_space.n
    agent = DQNAgent(state_size, action_size)
    scaler = get_scaler(env)

    portfolio_value = []

    if args.mode == 'test':
        # remake the env with test data
        env = TradingEnv(stocks, test_data, test_dates, args.initial_invest)
Exemplo n.º 22
0
def analyze_app(
        app, env, account_id,
        bucket, prefix, store_dir,
        resources, ipdb, ipranges,
        start, end, tz,
        sink, period, sample_count,
        debug):
    """Analyze flow log records for application and generate metrics per period"""
    logging.basicConfig(level=logging.INFO)
    logging.getLogger('botocore').setLevel(logging.WARNING)

    executor = debug and MainThreadExecutor or ThreadPoolExecutor
    start, end = get_dates(start, end, tz)
    resolver = IPResolver(ipdb, ipdb, ipranges)

    for rtype_name in resources:
        rtype = Resource.get_type(rtype_name)
        resource_map = {
            rtype.id(r): r for r
            in rtype.get_resources(ipdb, start, end, app, env)}
        log.info("App:%s Env:%s Type:%s Found:%d",
                 app, env, rtype_name, len(resource_map))

        with sqlite3.connect(ipdb) as db:
            db.row_factory = row_factory
            cursor = db.cursor()
            cursor.execute(
                'select * from enis where resource_type in (%s)' % (
                    ", ".join(["'%s'" % r for r in resource_map.keys()])))
            enis = list(cursor)
            eni_map = {e['eni_id']: e for e in enis}

        # TODO: Download should be doing date bits here across the range of days.
        log_prefix = "%s/%s/flow-log/%s/%s" % (
            prefix.rstrip('/'),
            account_id,
            start.strftime('%Y/%m/%d'),
            "00000000-0000-0000-0000-000000000000")

        f_downloads = {}
        f_metrics = {}
        files = {}

        # should probably just queue this out to distributed worker pool
        with executor(max_workers=5) as w:
            client = boto3.client('s3')
            for e in enis:
                f_downloads[
                    w.submit(
                        eni_download_flows,
                        client, bucket,
                        log_prefix, start, end,
                        e['eni_id'], store_dir)] = e

            for f in as_completed(f_downloads):
                if f.exception():
                    log.warning(
                        "error processing eni %s download: %s",
                        eni_map[f_downloads[f]],
                        f.exception())
                    continue
                e = f_downloads[f]
                files[e['eni_id']] = f.result()

            ipset = {e['ip_address'] for e in enis}

            for eni_id, files in files.items():
                stream = eni_flow_stream(files, start, end)
                f_metrics[w.submit(
                    process_eni_metrics,
                    eni_map[eni_id], ipset,
                    stream,
                    start, end, period, sample_count,
                    resolver, sink)] = eni_id

            for f in as_completed(f_metrics):
                if f.exception():
                    log.warning(
                        "error processing eni %s download %s",
                        eni_map[f_metrics[f]],
                        f.exception())
                    continue