Пример #1
0
def buzz(mute_alerts,
         msg,
         username='******',
         channel='@david',
         icon=':bee:'):
    if not mute_alerts:
        send_to_slack(msg, username, channel, icon)
Пример #2
0
 def p_watch(self):
     bars, header = self.projects()
     msg = '\n'.join(bars)
     send_to_slack(msg,
                   username='******',
                   channel='@david',
                   icon=':film_projector:')
Пример #3
0
def main(schema, **kwparams):
    # Scrape location of zip file (and designation of the election):
    r = requests.get(
        "http://www.alleghenycounty.us/elections/election-results.aspx")
    tree = html.fromstring(r.content)
    #title_kodos = tree.xpath('//div[@class="custom-form-table"]/table/tbody/tr[1]/td[2]/a/@title')[0] # Xpath to find the title for the link
    # As the title is human-generated, it can differ from the actual text shown on the web page.
    # In one instance, the title was '2019 Primary', while the link text was '2019 General'.
    election_index = 1  # Manually increment this to re-pull older elections
    title_kodos = tree.xpath(
        '//div[@class="custom-form-table"]/table/tbody/tr[{}]/td[2]/a/text()'.
        format(election_index))[0]  # Xpath to find the text for the link
    ## to the MOST RECENT election (e.g., "2017 General Election").

    url = tree.xpath(
        '//div[@class="custom-form-table"]/table/tbody/tr[{}]/td[2]/a'.format(
            election_index))[0].attrib['href']
    # But this looks like this:
    #   'http://results.enr.clarityelections.com/PA/Allegheny/71801/Web02/#/'
    # so it still doesn't get us that other 6-digit number needed for the
    # full path, leaving us to scrape that too, and it turns out that
    # such scraping is necessary since the directory where the zipped CSV
    # files are found changes too.

    path = dname + "/tmp"
    # If this path doesn't exist, create it.
    if not os.path.exists(path):
        os.makedirs(path)

    # Worse than that, the page is server-side generated, so one must
    # use something like Selenium to find out what the download link is.
    from selenium import webdriver
    from selenium.common.exceptions import TimeoutException
    chrome_options = webdriver.ChromeOptions()
    prefs = {'download.default_directory': path}
    chrome_options.add_experimental_option('prefs', prefs)
    chromedriver_path = "/usr/local/bin/chromedriver"
    try:
        chrome_options.add_argument(
            "--headless")  # Enable headless mode to allow ETL job to
        chrome_options.add_argument(
            "--window-size=1920x1080")  # run when the screen is locked.
        driver = webdriver.Chrome(chromedriver_path,
                                  chrome_options=chrome_options)
    except:
        driver = webdriver.Chrome("/Users/drw/Apps/Internet/chromedriver",
                                  chrome_options=chrome_options)
        # This is just a different location to check for chromedriver. The path
        # could be moved to a local preferences file.

    driver.get(url)
    # At this point, it's not possible to get the link since
    # the page is generated and loaded too slowly.
    # "the webdriver will wait for a page to load by default. It does
    # not wait for loading inside frames or for ajax requests. It means
    # when you use .get('url'), your browser will wait until the page
    # is completely loaded and then go to the next command in the code.
    # But when you are posting an ajax request, webdriver does not wait
    # and it's your responsibility to wait an appropriate amount of time
    # for the page or a part of page to load; so there is a module named
    # expected_conditions."
    delay = 15  # seconds
    time.sleep(delay)

    download_class = "pl-2"
    download_entities = fetch_download_entities(driver, download_class)
    if len(download_entities) == 0:
        # Fall back to older download_class (2019 Primary election and earlier
        # [yes, the HTML can change from election to election]).
        download_class = "list-download-link"
        download_entities = fetch_download_entities(driver, download_class)

    if len(download_entities) == 0:
        send_to_slack(
            "countermeasures can no longer find the part of the DOM that contains the download links.",
            username='******',
            channel='@david',
            icon=':satellite_antenna:')
        driver.quit()
        raise RuntimeError(
            "Screen-scraping error. Nothing found in class {}.".format(
                download_class))

    summary_file_url = download_entities[0].get_attribute("href")

    # Download ZIP file
    #r = requests.get("http://results.enr.clarityelections.com/PA/Allegheny/63905/188108/reports/summary.zip") # 2016 General Election file URL
    #election_type = "Primary"
    #r = requests.get("http://results.enr.clarityelections.com/PA/Allegheny/68994/188052/reports/summary.zip") # 2017 Primary Election file URL

    election_type = "General"
    #path_for_current_results = "http://results.enr.clarityelections.com/PA/Allegheny/71801/189912/reports/"
    #summary_file_url = path_for_current_results + "summary.zip"
    r = requests.get(summary_file_url)  # 2017 General Election file URL
    # For now, this is hard-coded.
    #xml_file_url = path_for_current_results + "detailxml.zip"
    xml_index = 2  # Previously this was 3
    #xml_file_url = driver.find_elements_by_class_name(download_class)[xml_index].get_attribute("href")
    xml_file_url = download_entities[xml_index].get_attribute("href")
    found = True
    if re.search("xml", xml_file_url) is None:
        xml_index = 1
        found = False
        #list_download_links = driver.find_elements_by_class_name(download_class)
        while xml_index < len(download_entities) and not found:
            xml_file_url = download_entities[xml_index].get_attribute("href")
            found = re.search("xml", xml_file_url) is not None
            xml_index += 1

    driver.quit()

    print("xml_file_url = {}".format(xml_file_url))
    if not found:
        notify_admins(
            "Scraping Failure: Unable to find an XML file. Countermeasures terminated."
        )
        raise ValueError(
            "This ETL job is broken on account of scraping failure.")

    # Save result from requests to zip_file location.
    zip_file = dname + '/tmp/summary.zip'
    with open(format(zip_file), 'wb') as f:
        f.write(r.content)

    print("zip_file = {}".format(zip_file))
    today = datetime.now()

    # Make name of hash database dependent on the server
    # as a very clear way of differentiating test and production
    # datasets.
    server = kwparams.get('server', "test")
    db = dataset.connect('sqlite:///{}/hashes-{}.db'.format(dname, server))
    table = db['election']

    # with open(os.path.dirname(os.path.abspath(__file__))+'/ckan_settings.json') as f: # The path of this file needs to be specified.
    with open(ELECTION_RESULTS_SETTINGS_FILE) as f:
        settings = json.load(f)
    site = settings['loader'][server]['ckan_root_url']
    package_id = settings['loader'][server]['package_id']
    API_key = settings['loader'][server]['ckan_api_key']

    changed, last_hash_entry, last_modified = is_changed(
        table, zip_file, title_kodos)
    if not changed:
        print(
            "The Election Results summary file for {} seems to be unchanged.".
            format(title_kodos))
        return
    else:
        print(
            "The Election Results summary file for {} does not match a previous file."
            .format(title_kodos))
        election_type = None  # Change this to force a particular election_type to be used, but it's
        # basically irrelevant since r_name_kang is not being used.
        r_name_kang = build_resource_name(today, last_modified, election_type)
        #r_name_kodos = re.sub(" Results"," Election Results",title_kodos)
        # Sample names from titles of links:
        # Special Election for 35th Legislative District
        # 2017 General Results
        # Election Results: 2014 Primary
        # Election Results: 2014 General Election
        # 2012 Special 40th State Sen Results

        # Since there's so much variation in these names, maybe it's best just
        # to use them without modifying them and accept that the resource
        # names will vary a little. They can always be cleaned up after the election.
        r_name_kodos = title_kodos

        print("Inferred name = {}, while scraped name = {}".format(
            r_name_kang, r_name_kodos))

        r_chosen_name = r_name_kodos  # Using the scraped name seems better.

        if r_name_kang != r_name_kodos:
            resource_id = find_resource_id(site,
                                           package_id,
                                           r_chosen_name,
                                           API_key=API_key)
            if resource_id is None:
                send_to_slack(
                    "countermeasures has found two conflicting names for the resource: {} and {}. Neither can be found in the dataset. {} is being used as the default.\nThis is your reminder to move the new resources to the top of the list."
                    .format(r_name_kodos, r_name_kang, r_name_kodos),
                    username='******',
                    channel='@david',
                    icon=':satellite_antenna:')
                # The first time this notification fired, the Kodos name was "Special Election for 35th Legislative District" and the Kang name was "2018 General Election Results".
                # The second name was (incorrectly) used for storing the CSV file, while the first name was used for storing the zipped XML file.

    # Unzip the file
    filename = "summary.csv"
    zf = PyZipFile(zip_file).extract(filename, path=path)
    target = "{}/{}".format(path, filename)
    print("target = {}".format(target))
    specify_resource_by_name = True
    if specify_resource_by_name:
        kwargs = {'resource_name': r_chosen_name}
    #else:
    #kwargs = {'resource_id': ''}

    # Code below stolen from prime_ckan/*/open_a_channel() but really
    # from utility_belt/gadgets

    print(
        "Preparing to pipe data from {} to resource {} (package ID = {}) on {}"
        .format(target,
                list(kwargs.values())[0], package_id, site))
    time.sleep(1.0)

    pipeline = pl.Pipeline('election_results_pipeline',
                              'Pipeline for the County Election Results',
                              log_status=False,
                              settings_file=ELECTION_RESULTS_SETTINGS_FILE,
                              settings_from_file=True,
                              start_from_chunk=0
                              ) \
        .connect(pl.FileConnector, target, encoding='utf-8') \
        .extract(pl.CSVExtractor, firstline_headers=True) \
        .schema(schema) \
        .load(pl.CKANDatastoreLoader, server,
              fields=fields_to_publish,
              #package_id=package_id,
              #resource_id=resource_id,
              #resource_name=resource_name,
              key_fields=['line_number'],
              method='upsert',
              **kwargs).run()

    update_hash(db, table, zip_file, r_chosen_name, last_modified)

    # Also update the zipped XML file.

    r_xml = requests.get(xml_file_url)
    xml_file = dname + '/tmp/detailxml.zip'
    with open(format(xml_file), 'wb') as g:
        g.write(r_xml.content)

    xml_name = r_chosen_name + ' by Precinct (zipped XML file)'

    ckan = RemoteCKAN(site, apikey=API_key)
    resource_id = find_resource_id(site, package_id, xml_name, API_key=API_key)
    if resource_id is None:
        ckan.action.resource_create(
            package_id=package_id,
            url='dummy-value',  # ignored but required by CKAN<2.6
            name=xml_name,
            upload=open(xml_file, 'rb'))
    else:
        ckan.action.resource_update(
            package_id=package_id,
            url='dummy-value',  # ignored but required by CKAN<2.6
            id=resource_id,
            upload=open(xml_file, 'rb'))

    log = open(dname + '/uploaded.log', 'w+')
    if specify_resource_by_name:
        print("Piped data to {}".format(kwargs['resource_name']))
        log.write("Finished upserting {}\n".format(kwargs['resource_name']))
    else:
        print("Piped data to {}".format(kwargs['resource_id']))
        log.write("Finished upserting {}\n".format(kwargs['resource_id']))
    log.close()

    # Delete temp file after extraction.
    delete_temporary_file(zip_file)
    delete_temporary_file(path + '/' + filename)
Пример #4
0
def notify_admins(msg):
    print(msg)
    send_to_slack(msg,
                  username='******',
                  channel='#other-notifications',
                  icon=':satellite_antenna:')
Пример #5
0
    try:
        if len(sys.argv) > 1:
            server = sys.argv[1]
            # When invoking this function from the command line, the
            # argument 'production' must be given to push data to
            # a public repository. Otherwise, it will default to going
            # to a test directory.
            main(schema, server=server)
            # Note that the hash database is currently unaware of which
            # server a file is saved to, so if it's first saved to
            # the test server and you run the ETL script again for the
            # production server, if the file hasn't changed, the script
            # will not push the data to the production server.
        else:
            main(schema)
    except:
        e = sys.exc_info()[0]
        print("Error: {} : ".format(e))
        exc_type, exc_value, exc_traceback = sys.exc_info()
        lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
        traceback_msg = ''.join('!! ' + line for line in lines)
        print(traceback_msg)  # Log it or whatever here
        msg = "countermeasures ran into an error: {}.\nHere's the traceback:\n{}".format(
            e, traceback_msg)
        mute_alerts = False  #kwargs.get('mute_alerts',False)
        if not mute_alerts:
            send_to_slack(msg,
                          username='******',
                          channel='@david',
                          icon=':satellite_antenna:')
Пример #6
0
            # https://github.com/WPRDC/data-guide/blob/master/docs/metadata_extras.md
            # The format is like this:
            #       u'extras': [{u'key': u'dcat_issued', u'value': u'2014-01-07T15:27:45.000Z'}, ...
            # not a dict, but a list of dicts.
            extras = {d['key']: d['value'] for d in extras_list}
            #if 'dcat_issued' not in extras:
            if 'time_field' in extras:
                time_field_lookup = json.loads(extras['time_field'])
                fix_temporal_coverage(package['id'],time_field_lookup,just_testing)

from credentials import production
try:
    if __name__ == '__main__':
        just_testing = False
        if len(sys.argv) > 1:
            if sys.argv[1] == 'True':
                just_testing = True
            elif sys.argv[1] == 'False':
                just_testing = False
        main(just_testing=just_testing)
except:
    e = sys.exc_info()[0]
    msg = "Error: {} : \n".format(e)
    exc_type, exc_value, exc_traceback = sys.exc_info()
    lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
    msg = ''.join('!! ' + line for line in lines)
    msg = "watchdog.py failed for some reason.\n" + msg
    print(msg) # Log it or whatever here
    if not just_testing and production:
        send_to_slack(msg,username='******',channel='@david',icon=':doge:')
Пример #7
0
def main(mute_alerts=True,
         check_private_datasets=False,
         skip_watchdog=False,
         test_mode=False):
    if not skip_watchdog:
        watchdog.main(just_testing=False)
    if False:  # [ ] The code in this branch can be eliminated.
        host = "data.wprdc.org"
        url = "https://{}/api/3/action/current_package_list_with_resources?limit=999999".format(
            host)
        r = requests.get(url)
        response = r.json()
        if not response['success']:
            msg = "Unable to get the package list."
            print(msg)
            raise ValueError(msg)

        packages = response['result']
    else:
        from credentials import site, ckan_api_key as API_key
        if not check_private_datasets:
            API_key = None
        ckan = ckanapi.RemoteCKAN(site, apikey=API_key)
        try:
            packages = ckan.action.current_package_list_with_resources(
                limit=999999)
        except:
            packages = ckan.action.current_package_list_with_resources(
                limit=999999)

    period = {
        'Annually': timedelta(days=366),
        'Bi-Annually': timedelta(days=183),
        'Quarterly': timedelta(days=31 + 30 + 31),
        'Bi-Monthly': timedelta(days=31 + 30),
        'Monthly': timedelta(days=31),
        'Bi-Weekly': timedelta(days=14),
        'Weekly': timedelta(
            days=7
        ),  # 'Weekdays' could be another period, though it seems I'm coding exceptions into the no_updates_on metadata field.
        'Daily': timedelta(days=1),
        'Hourly': timedelta(hours=1),
        'Multiple Times per Hour': timedelta(minutes=30)
    }

    # Some datasets are showing up as stale for one day because
    # (for instance) the County doesn't post jail census data
    # on a given day to their FTP server; our ETL script runs
    # but it doesn't update the metadata_modified.

    # One better solution to this would be to create a package-
    # (and maybe also resource-) level metadata field called
    # etl_job_last_ran.

    # [ ] These hard-coded exceptions can now be moved to package-level metadata.
    extensions = {}
    extensions['d15ca172-66df-4508-8562-5ec54498cfd4'] = {
        'title': 'Allegheny County Jail Daily Census',
        'extra_time': timedelta(days=1),
        'actual_data_source_reserve': timedelta(days=15)
    }
    extensions['046e5b6a-0f90-4f8e-8c16-14057fd8872e'] = {
        'title': 'Police Incident Blotter (30 Day)',
        'extra_time': timedelta(days=1)
    }

    nonperiods = ['', 'As Needed', 'Not Updated (Historical Only)']

    packages_with_frequencies = 0
    stale_count = 0
    stale_packages = {}
    for i, package in enumerate(packages):
        if 'frequency_publishing' in package.keys():
            title = package['title']
            package_id = package['id']
            dataset_url = "https://data.wprdc.org/dataset/{}".format(
                package['name'])
            metadata_modified = datetime.strptime(package['metadata_modified'],
                                                  "%Y-%m-%dT%H:%M:%S.%f")
            publishing_frequency = package['frequency_publishing']
            data_change_rate = package['frequency_data_change']
            publisher = package['organization']['title']
            private = package['private']
            if private:
                title = "(private) " + title

            temporal_coverage_end_date = temporal_coverage_end(
                package
            )  # Check for 'time_field' and auto-updated temporal_coverage field

            if publishing_frequency in period:
                publishing_period = period[publishing_frequency]
            else:
                publishing_period = None
                if publishing_frequency not in nonperiods:
                    raise ValueError(
                        "{}) {}: {} is not a known publishing frequency".
                        format(k, title, publishing_frequency))
            #print("{} ({}) was last modified {} (according to its metadata). {}".format(title,package_id,metadata_modified,package['frequency_publishing']))

            if publishing_period is not None:
                no_updates_on = get_scheduled_gaps(package)

                lateness = compute_lateness(
                    extensions, package, package_id, publishing_period,
                    metadata_modified
                )  # Include no_updates_on here if the ETL jobs
                # get rescheduled to match actual data updates (rather than state update frequency).
                if temporal_coverage_end_date is not None:
                    temporal_coverage_end_dt = datetime.strptime(
                        temporal_coverage_end_date, "%Y-%m-%d") + timedelta(
                            days=1
                        )  # [ ] This has no time zone associated with it.
                    # [ ] Change this to use parser.parse to allow times to be included, but also think more carefully about adding that offset.

                    # Note that temporal_coverage_end_dt is advanced by one one day (to be the first day after the temporal coverage) and
                    # also is technically a datetime but is actually just date information, with the time information thrown out.
                    data_lateness = compute_lateness(extensions, package,
                                                     package_id,
                                                     publishing_period,
                                                     temporal_coverage_end_dt,
                                                     no_updates_on)
                else:
                    data_lateness = timedelta(seconds=0)

                if lateness.total_seconds() > 0 or data_lateness.total_seconds(
                ) > 0:  # Either kind of lateness triggers the listing of another stale package.
                    stale_packages[package_id] = {
                        'publishing_frequency': publishing_frequency,
                        'data_change_rate': data_change_rate,
                        'publisher': publisher,
                        'json_index': i,
                        'title': title,
                        'package_id': package_id,
                        'package_url': dataset_url,
                        'upload_method': infer_upload_method(package),
                        'url': dataset_url
                    }
                    if lateness.total_seconds() > 0:
                        stale_packages[package_id][
                            'cycles_late'] = lateness.total_seconds(
                            ) / publishing_period.total_seconds()
                        stale_packages[package_id][
                            'last_modified'] = metadata_modified
                        stale_packages[package_id][
                            'days_late'] = lateness.total_seconds() / (60.0 *
                                                                       60 * 24)
                    else:
                        stale_packages[package_id]['cycles_late'] = 0
                        stale_packages[package_id][
                            'last_modified'] = metadata_modified
                        stale_packages[package_id]['days_late'] = 0.0

                    #if temporal_coverage_end_date is not None:
                    if data_lateness.total_seconds() > 0:
                        stale_packages[package_id][
                            'temporal_coverage_end'] = temporal_coverage_end_date  # This is a string.
                        stale_packages[package_id][
                            'data_cycles_late'] = data_lateness.total_seconds(
                            ) / publishing_period.total_seconds()

                    # Describe the evidence that the package is stale.
                    output = "{}) {} updates {}".format(
                        i, title, package['frequency_publishing'])
                    if lateness.total_seconds(
                    ) > 0 and data_lateness.total_seconds() > 0:
                        output += " but metadata_modified = {} and temporal_coverage_end_date = {} making it DOUBLE STALE!".format(
                            metadata_modified, temporal_coverage_end_date)
                    elif lateness.total_seconds() > 0:
                        output += " but metadata_modified = {} making it STALE!".format(
                            metadata_modified)
                    elif data_lateness.total_seconds() > 0:
                        output += " but temporal_coverage_end_date = {} making it STALE!".format(
                            temporal_coverage_end_date)
                    stale_packages[package_id]['output'] = output

                    stale_count += 1
            packages_with_frequencies += 1

    # Sort stale packages by relative tardiness so the most recently tardy ones
    # appear at the bottom of the output and the most egregiously late ones
    # at the top.
    #stale_ps_sorted = sorted(stale_packages.iteritems(), key=lambda(k,v): -v['cycles_late'])
    #Note that in Python 3, key=lambda(k,v): v['position'] must be written as key=lambda k_v: k_v[1]['position']
    stale_ps_sorted = sorted(stale_packages.items(),
                             key=lambda k_v: -k_v[1]['cycles_late'])

    print("\nDatasets by Staleness: ")
    print_table(stale_ps_sorted)

    stale_ps_by_recency = sorted(stale_packages.items(),
                                 key=lambda k_v: -k_v[1]['days_late'])
    print("\n\nStale Datasets by Lateness: ")
    print_table(stale_ps_by_recency)

    stale_ps_by_data_lateness = {
        p_id: sp
        for p_id, sp in stale_packages.items() if 'temporal_coverage_end' in sp
    }
    stale_ps_by_data_lateness = sorted(
        stale_ps_by_data_lateness.items(),
        key=lambda k_v: -k_v[1]['data_cycles_late'])
    if len(stale_ps_by_data_lateness) > 0:
        print("\n\nStale Datasets by Data-Lateness: ")
        print_table(stale_ps_by_data_lateness, 'data-lateness')
    else:
        print("No datasets are stale by data-lateness.")

    coda = "Out of {} packages, only {} have specified publication frequencies. {} are stale (past their refresh-by date), according to the metadata_modified field.".format(
        len(packages), packages_with_frequencies, stale_count)
    print(textwrap.fill(coda, 70))

    # Store list of stale packages in a JSON file as a record of the last
    # glance (with the intent of sending notifications whenever new ones show up).
    currently_stale = []

    previously_stale = load_from_json()
    previously_stale_ids = [x['id'] for x in previously_stale]
    newly_stale = []
    for sp in stale_ps_by_recency:
        r = {'id': sp[0], 'title': sp[1]['title']}
        currently_stale.append(r)

        if sp[0] not in previously_stale_ids:
            newly_stale.append(sp)

    wprdc_datasets = [
        '22fe57da-f5b8-4c52-90ea-b10591a66f90',  # Liens
        'f2141a79-c0b9-4cf9-b4d2-d591b4aaa8e6'  # Foreclosures
    ]  # These are WPRDC-maintained datasets.

    if len(newly_stale) > 0:
        printable_stale_items = [
            "{} ({})".format(sp[1]['title'], sp[1]['package_url'])
            for sp in newly_stale
        ]
        linked_stale_items = [
            "<{}|{}> ({})".format(sp[1]['package_url'], sp[1]['title'],
                                  sp[1]['upload_method']) for sp in newly_stale
        ]
        includes_etl_string = " (includes ETL job)" if any(
            [sp[1]['upload_method'] == 'etl' for sp in newly_stale]) else ""

        msg = "NEWLY STALE{}: {}".format(
            includes_etl_string,
            ', '.join(linked_stale_items))  # formatted for Slack
        printable_msg = "NEWLY STALE{}: {}".format(
            includes_etl_string, ', '.join(printable_stale_items))
        print(printable_msg)
        if not mute_alerts:
            send_to_slack(msg,
                          username='******',
                          channel='#stale-datasets',
                          icon=':illuminati:')
            other_notifications = [{
                'publisher': 'Allegheny County',
                'medium': 'Slack',
                'channel': '#county-stale-datasets',
                'slack_group': 'wprdc-and-friends',
                'slack-config': 'something'
            }]

            for other in other_notifications:
                if other['publisher'] in [
                        sp[1]['publisher'] for sp in newly_stale
                ]:
                    publisher_stale_sets = []
                    for sp in newly_stale:
                        if other['publisher'] == sp[1]['publisher'] and sp[
                                0] not in wprdc_datasets:
                            publisher_stale_sets.append(sp)

                    publisher_stale_ones = [
                        "<{}|{}>".format(sp[1]['url'], sp[1]['title'])
                        for sp in publisher_stale_sets
                    ]
                    if len(publisher_stale_ones) > 0:
                        printable_publisher_stale_ones = [
                            sp[1]['title'] for sp in publisher_stale_sets
                        ]
                        multiple = len(publisher_stale_ones) != 1
                        publisher_msg = "Hey there! I just noticed {} newly stale {}: {}".format(
                            len(publisher_stale_ones),
                            pluralize("dataset", publisher_stale_ones, False),
                            ', '.join(publisher_stale_ones))
                        #send_to_different_slack: wprdc-and-friends
                        print(publisher_msg)
                        send_to_slack(publisher_msg,
                                      username='******',
                                      channel='#county-stale-datasets',
                                      slack_group=other['slack_group'])
                        #send_to_slack(publisher_msg,username='******',channel='#boring-tests',slack_group=other['slack_group'])
        else:
            print("[Slack alerts are muted.]")

    store_as_json(currently_stale)
Пример #8
0
            elif arg in ['test']:
                test_mode = True
                args.remove(arg)
            elif arg in ['production']:
                test_mode = False
                args.remove(arg)
            elif arg in ['private']:
                check_private_datasets = True
                args.remove(arg)
            elif arg in ['skip', 'snooze']:
                skip_watchdog = True
                args.remove(arg)
        if len(args) > 0:
            print("Unused command-line arguments: {}".format(args))

        main(mute_alerts, check_private_datasets, skip_watchdog, test_mode)

except:
    e = sys.exc_info()[0]
    msg = "Error: {} : \n".format(e)
    exc_type, exc_value, exc_traceback = sys.exc_info()
    lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
    msg = ''.join('!! ' + line for line in lines)
    msg = "pocket_watch/glance.py failed for some reason.\n" + msg
    print(msg)  # Log it or whatever here
    if production:
        send_to_slack(msg,
                      username='******',
                      channel='@david',
                      icon=':illuminati:')