def deploy_checks(request=None): passed = [] failed = [] # cache something now to see if it's still there further down. randval = random.randint(1, 1000000) cache.set('check_things_cache_test', randval, 60) # Django database try: n = Semester.objects.all().count() if n > 0: passed.append(('Main database connection', 'okay')) else: failed.append(('Main database connection', "Can't find any coredata.Semester objects")) except django.db.utils.OperationalError: failed.append( ('Main database connection', "can't connect to database")) except django.db.utils.ProgrammingError: failed.append(('Main database connection', "database tables missing")) # non-BMP Unicode in database try: l = LogEntry.objects.create(userid='ggbaker', description='Test Unicode \U0001F600', related_object=Semester.objects.first()) except OperationalError: failed.append(('Unicode handling in database', 'non-BMP character not supported by connection')) else: l = LogEntry.objects.get(id=l.id) if '\U0001F600' in l.description: passed.append(('Unicode handling in database', 'okay')) else: failed.append(('Unicode handling in database', 'non-BMP character not stored correctly')) # Celery tasks celery_okay = False try: if settings.USE_CELERY: try: from coredata.tasks import ping except ImportError: failed.append( ('Celery task', "Couldn't import task: probably missing MySQLdb module")) else: try: t = ping.apply_async() except kombu.exceptions.OperationalError: failed.append( ('Celery task', 'Kombu error. Probably RabbitMQ not running.')) else: res = t.get(timeout=5) if res == True: passed.append(('Celery task', 'okay')) celery_okay = True else: failed.append( ('Celery task', 'got incorrect result from task')) else: failed.append(('Celery task', 'celery disabled in settings')) except celery.exceptions.TimeoutError: failed.append( ('Celery task', "didn't get result before timeout: celeryd maybe not running")) except socket.error: failed.append(('Celery task', "can't communicate with broker")) except NotImplementedError: failed.append(('Celery task', 'celery disabled')) except django.db.utils.ProgrammingError: failed.append(('Celery task', 'celery DB tables missing')) except django.db.utils.OperationalError: failed.append(('Celery task', 'djkombu tables missing: try migrating')) # celery beat try: from coredata.tasks import BEAT_TEST_FILE, BEAT_FILE_MAX_AGE beatfile_age = time.time() - os.stat(BEAT_TEST_FILE).st_mtime if beatfile_age < BEAT_FILE_MAX_AGE: passed.append(('Celery beat', 'okay')) else: failed.append(( 'Celery beat', 'marker file is old: celery beat likely not processing tasks')) except OSError: failed.append(( 'Celery beat', 'marker file is missing: celery beat likely not processing tasks')) # Django cache # (has a subprocess do something to make sure we're in a persistent shared cache, not DummyCache) subprocess.call( ['python3', 'manage.py', 'check_things', '--cache_subcall']) cache_okay = False res = cache.get('check_things_cache_test') if res == randval: failed.append(( 'Django cache', 'other processes not sharing cache: dummy/local probably being used instead of memcached' )) elif res is None: failed.append( ('Django cache', 'unable to retrieve anything from cache')) elif res != randval + 1: failed.append(('Django cache', 'unknown result')) else: passed.append(('Django cache', 'okay')) cache_okay = True # Reporting DB connection try: db = SIMSConn() db.execute("SELECT last_name FROM ps_names WHERE emplid=301355288", ()) result = list(db) # whoever this is, they have non-ASCII in their name: let's hope they don't change it. lname = result[0][0] if not isinstance(lname, str): failed.append( ('Reporting DB connection', 'string result not a string: check Unicode decoding')) elif lname[1] != u'\u00e4': failed.append(('Reporting DB connection', 'returned incorrectly-decoded Unicode')) elif len(result) == 0: failed.append(('Reporting DB connection', 'query inexplicably returned nothing')) else: passed.append(('Reporting DB connection', 'okay')) except SIMSProblem as e: failed.append( ('Reporting DB connection', 'SIMSProblem, %s' % (str(e)))) except ImportError: failed.append( ('Reporting DB connection', "couldn't import DB2 module")) # compression enabled? if settings.COMPRESS_ENABLED: passed.append(('Asset compression enabled', 'okay')) else: failed.append(('Asset compression enabled', 'disabled in settings')) # Haystack searching from haystack.query import SearchQuerySet try: res = SearchQuerySet().filter(text='cmpt') if res: passed.append(('Haystack search', 'okay')) else: failed.append(( 'Haystack search', 'nothing found: maybe update_index, or wait for search server to fully start' )) except IOError: failed.append(('Haystack search', "can't read/write index")) # photo fetching if cache_okay and celery_okay: try: res = do_photo_fetch(['301222726']) if '301222726' not in res: # I don't know who 301222726 is, but he/she is real. failed.append( ('Photo fetching', "didn't find photo we expect to exist")) else: passed.append(('Photo fetching', 'okay')) except (KeyError, Unit.DoesNotExist, django.db.utils.ProgrammingError): failed.append(('Photo fetching', 'photo password not set')) except urllib.error.HTTPError as e: failed.append( ('Photo fetching', 'failed to fetch photo (%s). Maybe wrong password?' % (e))) else: failed.append( ('Photo fetching', 'not testing since memcached or celery failed')) # emplid/userid API emplid = userid_to_emplid('ggbaker') if not emplid: failed.append(('Emplid API', 'no emplid returned')) elif isinstance(emplid, str) and not emplid.startswith('2000'): failed.append(('Emplid API', 'incorrect emplid returned')) else: passed.append(('Emplid API', 'okay')) # Piwik API #if not request: # failed.append(('Piwik API', "can only check in web frontend with valid request object")) #elif not settings.PIWIK_URL or not settings.PIWIK_TOKEN: # failed.append(('Piwik API', "not configured in secrets.py")) #else: # # try to re-log this request in piwik and see what happens # from piwik_middleware.tracking import PiwikTrackerLogic, urllib_errors # tracking_logic = PiwikTrackerLogic() # kwargs = tracking_logic.get_track_kwargs(request) # try: # tracking_logic.do_track_page_view(fail_silently=False, **kwargs) # except urllib_errors as e: # failed.append(('Piwik API', "API call failed: %s" % (e))) # else: # passed.append(('Piwik API', 'okay')) # Backup server #if not settings.BACKUP_SERVER or not settings.BACKUP_USER or not settings.BACKUP_PATH or not settings.BACKUP_PASSPHRASE: # failed.append(('Backup server', 'Backup server settings not all present')) #else: # from coredata.management.commands.backup_remote import do_check # try: # do_check() # except RuntimeError as e: # failed.append(('Backup server', unicode(e))) # passed.append(('Backup server', 'okay')) # certificates bad_cert = 0 res = _check_cert('/etc/stunnel/stunnel.pem') if res: failed.append(('Stunnel cert', res)) bad_cert += 1 res = _check_cert('/etc/nginx/cert.pem') if res: failed.append(('SSL PEM', res)) bad_cert += 1 res = _check_cert('/etc/nginx/cert.key') if res: failed.append(('SSL KEY', res)) bad_cert += 1 if bad_cert == 0: passed.append(( 'Certificates', 'All okay, but maybe check http://www.digicert.com/help/ or https://www.ssllabs.com/ssltest/' )) # file creation in the necessary places dirs_to_check = [ (settings.DB_BACKUP_DIR, 'DB backup dir'), (settings.SUBMISSION_PATH, 'submitted files path'), (os.path.join(settings.COMPRESS_ROOT, 'CACHE'), 'compressed media root'), ] for directory, label in dirs_to_check: res = _check_file_create(directory) if res is None: passed.append(('File creation in ' + label, 'okay')) else: failed.append(('File creation in ' + label, res)) # are any services listening publicly that shouldn't? hostname = socket.gethostname() ports = [ 25, # mail server #4369, # epmd, erlang port mapper daemon is okay to listen externally and won't start with ERL_EPMD_ADDRESS set. http://serverfault.com/questions/283913/turn-off-epmd-listening-port-4369-in-ubuntu-rabbitmq 45130, # beam? rabbitmq something 4000, # main DB stunnel 50000, # reporting DB 8000, # gunicorn 11211, # memcached 9200, 9300, # elasticsearch ] connected = [] for p in ports: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: s.connect((hostname, p)) except socket.error: # couldn't connect: good pass else: connected.append(p) finally: s.close() if connected: failed.append( ('Ports listening externally', 'got connections to port ' + ','.join(str(p) for p in connected))) else: passed.append(('Ports listening externally', 'okay')) # is the server time close to real-time? import ntplib c = ntplib.NTPClient() response = c.request('0.ca.pool.ntp.org') if abs(response.offset) > 0.1: failed.append( ('Server time', 'Time is %g seconds off NTP pool.' % (response.offset, ))) else: passed.append(('Server time', 'okay')) # library sanity err = bitfield_check() if err: failed.append(('Library sanity', 'django-bitfield: ' + err)) else: err = cache_check() if err: failed.append(('Library sanity', 'django cache: ' + err)) else: passed.append(('Library sanity', 'okay')) # github-flavoured markdown subprocess from courselib.markup import markdown_to_html try: # checks that script runs; does github-flavour correctly; does Unicode correctly. html = markdown_to_html( 'test *markup*\n\n```python\nprint(1)\n```\n\u2605\U0001F600') if html.strip( ) == '<p>test <em>markup</em></p>\n<pre lang="python"><code>print(1)\n</code></pre>\n<p>\u2605\U0001F600</p>': passed.append(('Markdown subprocess', 'okay')) else: failed.append(('Markdown subprocess', 'markdown script returned incorrect markup')) except OSError: failed.append(( 'Markdown subprocess', 'failed to start ruby command: ruby package probably not installed' )) except RuntimeError: failed.append(('Markdown subprocess', 'markdown script failed')) return passed, failed
def fetch_photos_task(emplids): return do_photo_fetch(emplids)
def deploy_checks(request=None): passed = [] failed = [] # cache something now to see if it's still there further down. randval = random.randint(1, 1000000) cache.set('check_things_cache_test', randval, 60) # Django database try: n = Semester.objects.all().count() if n > 0: passed.append(('Main database connection', 'okay')) else: failed.append(('Main database connection', "Can't find any coredata.Semester objects")) except django.db.utils.OperationalError: failed.append( ('Main database connection', "can't connect to database")) except django.db.utils.ProgrammingError: failed.append(('Main database connection', "database tables missing")) # non-BMP Unicode in database try: l = LogEntry.objects.create(userid='ggbaker', description='Test Unicode \U0001F600', related_object=Semester.objects.first()) except OperationalError: failed.append(('Unicode handling in database', 'non-BMP character not supported by connection')) else: l = LogEntry.objects.get(id=l.id) if '\U0001F600' in l.description: passed.append(('Unicode handling in database', 'okay')) else: failed.append(('Unicode handling in database', 'non-BMP character not stored correctly')) # check that all database tables are utf8mb4, if mysql if settings.DATABASES['default']['ENGINE'].endswith('.mysql'): from django.apps import apps from django.db import connection CORRECT_CHARSET = 'utf8mb4' CORRECT_COLLATION = 'utf8mb4_unicode_ci' db_name = settings.DATABASES['default']['NAME'] with connection.cursor() as cursor: # check database defaults cursor.execute( "SELECT @@character_set_database, @@collation_database;") row = cursor.fetchone() if row != (CORRECT_CHARSET, CORRECT_COLLATION): failed.append(( 'MySQL database charset', 'database default CHARACTER SET and COLLATION incorrect (it is %s): consider "ALTER DATABASE %s CHARACTER SET %s COLLATE %s;"' % (row, db_name, CORRECT_CHARSET, CORRECT_COLLATION))) # check each table table_names = [model._meta.db_table for model in apps.get_models()] # inspect table charset and collations, adapted from https://stackoverflow.com/a/1049958/6871666 cursor.execute( '''SELECT T.table_name, CCSA.character_set_name, CCSA.collation_name FROM information_schema.`TABLES` T, information_schema.`COLLATION_CHARACTER_SET_APPLICABILITY` CCSA WHERE CCSA.collation_name=T.table_collation AND T.table_schema=%s AND T.table_name IN %s ''', (db_name, table_names)) for table, charset, collation in cursor.fetchall(): if (charset, collation) != (CORRECT_CHARSET, CORRECT_COLLATION): failed.append(( 'MySQL database charset', 'table %s has incorrect CHARACTER SET and COLLATION: consider "ALTER TABLE %s CHARACTER SET=%s COLLATE=%s;"' % (table, table, CORRECT_CHARSET, CORRECT_COLLATION))) cursor.execute( '''SELECT table_name, column_name, character_set_name, collation_name FROM information_schema.`COLUMNS` WHERE table_schema=%s AND (character_set_name IS NOT NULL OR collation_name IS NOT NULL) AND (character_set_name!=%s OR collation_name!=%s); ''', (db_name, CORRECT_CHARSET, CORRECT_COLLATION)) for table, column, charset, collation in cursor.fetchall(): failed.append(( 'MySQL database charset', 'table %s has incorrect CHARACTER SET and COLLATION on a column (%s and %s): consider "ALTER TABLE %s CONVERT TO CHARACTER SET %s COLLATE %s;"' % (table, charset, collation, table, CORRECT_CHARSET, CORRECT_COLLATION))) # Celery tasks celery_okay = False sims_task = None try: if settings.USE_CELERY: try: from coredata.tasks import ping except ImportError: failed.append( ('Celery task', "Couldn't import task: probably missing MySQLdb module")) else: try: task = ping.apply_async() except kombu.exceptions.OperationalError: failed.append( ('Celery task', 'Kombu error. Probably RabbitMQ not running.')) except amqp.exceptions.AccessRefused: failed.append(( 'Celery task', 'AccessRefused error. Probably bad RabbitMQ auth details.' )) else: from coredata.tasks import check_sims_task sims_task = check_sims_task.apply_async( ) # start here, in case it's slow res = task.get(timeout=5) if res == True: passed.append(('Celery task', 'okay')) celery_okay = True else: failed.append( ('Celery task', 'got incorrect result from task')) else: failed.append(('Celery task', 'celery disabled in settings')) except celery.exceptions.TimeoutError: failed.append( ('Celery task', "didn't get result before timeout: celeryd maybe not running")) except socket.error: failed.append(('Celery task', "can't communicate with broker")) except NotImplementedError: failed.append( ('Celery task', 'celery failed to start with NotImplementedError')) except django.db.utils.ProgrammingError: failed.append(('Celery task', 'celery DB tables missing')) except django.db.utils.OperationalError: failed.append(('Celery task', 'djkombu tables missing: try migrating')) # celery beat if settings.USE_CELERY: try: from coredata.tasks import beat_time_okay if beat_time_okay(): passed.append(('Celery beat', 'okay')) else: failed.append(( 'Celery beat', 'marker file is old: celery beat likely not processing tasks' )) except OSError: failed.append(( 'Celery beat', 'marker file is missing: celery beat likely not processing tasks' )) # Django cache # (has a subprocess do something to make sure we're in a persistent shared cache, not DummyCache) subprocess.call( ['python3', 'manage.py', 'check_things', '--cache_subcall']) cache_okay = False res = cache.get('check_things_cache_test') if res == randval: failed.append(( 'Django cache', 'other processes not sharing cache: dummy/local probably being used instead of memcached' )) elif res is None: failed.append( ('Django cache', 'unable to retrieve anything from cache')) elif res != randval + 1: failed.append(('Django cache', 'unknown result')) else: passed.append(('Django cache', 'okay')) cache_okay = True # Reporting DB connection try: db = SIMSConn() db.execute("SELECT last_name FROM ps_names WHERE emplid=301355288", ()) result = list(db) # whoever this is, they have non-ASCII in their name: let's hope they don't change it. lname = result[0][0] if not isinstance(lname, str): failed.append( ('Reporting DB connection', 'string result not a string: check Unicode decoding')) elif lname[1] != u'\u00e4': failed.append(('Reporting DB connection', 'returned incorrectly-decoded Unicode')) elif len(result) == 0: failed.append(('Reporting DB connection', 'query inexplicably returned nothing')) else: passed.append(('Reporting DB connection', 'okay')) except SIMSProblem as e: failed.append( ('Reporting DB connection', 'SIMSProblem, %s' % (str(e)))) except ImportError: failed.append( ('Reporting DB connection', "couldn't import DB2 module")) except Exception as e: failed.append( ('Reporting DB connection', 'Generic exception, %s' % (str(e)))) if settings.USE_CELERY and sims_task: # sims_task started above, so we can double-up on any wait try: res = sims_task.get(timeout=5) if res: failed.append(('Celery Reporting DB', res)) else: passed.append(('Celery Reporting DB', 'okay')) except celery.exceptions.TimeoutError: failed.append(( 'Celery Reporting DB', "didn't get result before timeout: maybe reporting database is slow?" )) elif sims_task is None: failed.append( ('Celery Reporting DB', "didn't check because of Celery failure")) # compression enabled? if settings.COMPRESS_ENABLED: passed.append(('Asset compression enabled', 'okay')) else: failed.append(('Asset compression enabled', 'disabled in settings')) # Haystack searching from haystack.query import SearchQuerySet try: res = SearchQuerySet().filter(text='cmpt') if res: passed.append(('Haystack search', 'okay')) else: failed.append(( 'Haystack search', 'nothing found: maybe update_index, or wait for search server to fully start' )) except IOError: failed.append(('Haystack search', "can't read/write index")) # photo fetching if cache_okay and celery_okay: try: res = do_photo_fetch(['301222726']) if '301222726' not in res: # I don't know who 301222726 is, but he/she is real. failed.append( ('Photo fetching', "didn't find photo we expect to exist")) else: passed.append(('Photo fetching', 'okay')) except (KeyError, Unit.DoesNotExist, django.db.utils.ProgrammingError): failed.append(('Photo fetching', 'photo password not set')) except urllib.error.HTTPError as e: failed.append( ('Photo fetching', 'failed to fetch photo (%s). Maybe wrong password?' % (e))) else: failed.append( ('Photo fetching', 'not testing since memcached or celery failed')) # emplid/userid API emplid = userid_to_emplid('ggbaker') if not emplid: failed.append(('Emplid API', 'no emplid returned')) elif isinstance(emplid, str) and not emplid.startswith('2000'): failed.append(('Emplid API', 'incorrect emplid returned')) else: passed.append(('Emplid API', 'okay')) # file creation in the necessary places dirs_to_check = [ (settings.DB_BACKUP_DIR, 'DB backup dir'), (settings.SUBMISSION_PATH, 'submitted files path'), (os.path.join(settings.COMPRESS_ROOT, 'CACHE'), 'compressed media root'), ] for directory, label in dirs_to_check: res = _check_file_create(directory) if res is None: passed.append(('File creation in ' + label, 'okay')) else: failed.append(('File creation in ' + label, res)) # are any services listening publicly that shouldn't? hostname = socket.gethostname() ports = [ 25, # mail server #4369, # epmd, erlang port mapper daemon is okay to listen externally and won't start with ERL_EPMD_ADDRESS set. http://serverfault.com/questions/283913/turn-off-epmd-listening-port-4369-in-ubuntu-rabbitmq 45130, # beam? rabbitmq something 4000, # main DB stunnel 50000, # reporting DB 8000, # gunicorn 11211, # memcached 9200, 9300, # elasticsearch 8983, # solr ] connected = [] for p in ports: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: s.connect((hostname, p)) except socket.error: # couldn't connect: good pass else: connected.append(p) finally: s.close() if connected: failed.append( ('Ports listening externally', 'got connections to port ' + ','.join(str(p) for p in connected))) else: passed.append(('Ports listening externally', 'okay')) # correct serving/redirecting of production domains if settings.DEPLOY_MODE == 'production': production_host_fails = 0 for host in settings.SERVE_HOSTS + settings.REDIRECT_HOSTS: # check HTTPS serving/redirect try: url = 'https://' + host + reverse( 'docs:list_docs' ) # must be a URL that doesn't require auth resp = requests.get(url, allow_redirects=False, timeout=5) if host in settings.SERVE_HOSTS and resp.status_code != 200: failed.append(('HTTPS Serving', 'expected 200 okay, but got %i at %s' % (resp.status_code, url))) production_host_fails += 1 elif host in settings.REDIRECT_HOSTS and resp.status_code != 301: failed.append(('HTTPS Serving', 'expected 301 redirect, but got %i at %s' % (resp.status_code, url))) production_host_fails += 1 except requests.exceptions.SSLError: failed.append(('HTTPS Serving', 'bad SSL/TLS certificate for %s' % (url, ))) production_host_fails += 1 except requests.exceptions.RequestException: failed.append(('HTTPS Serving', 'unable to connect to request %s' % (url, ))) production_host_fails += 1 # check HTTP redirect try: url = 'http://' + host + reverse( 'docs:list_docs' ) # must be a URL that doesn't require auth resp = requests.get(url, allow_redirects=False, timeout=5) if resp.status_code != 301: failed.append(( 'HTTP Serving', 'expected 301 redirect to https://, but got %i at %s' % (resp.status_code, url))) production_host_fails += 1 except requests.exceptions.RequestException: failed.append(('HTTP Serving', 'unable to connect to request %s' % (url, ))) production_host_fails += 1 if production_host_fails == 0: passed.append(( 'HTTPS Serving', 'okay: certs and redirects as expected, but maybe check http://www.digicert.com/help/ or https://www.ssllabs.com/ssltest/' )) # is the server time close to real-time? import ntplib try: c = ntplib.NTPClient() response = c.request('pool.ntp.org') if abs(response.offset) > 0.1: failed.append( ('Server time', 'Time is %g seconds off NTP pool.' % (response.offset, ))) else: passed.append(('Server time', 'okay')) except ntplib.NTPException as e: failed.append(('Server time', 'Unable to query NTP pool: %s' % (e, ))) # library sanity err = bitfield_check() if err: failed.append(('Library sanity', 'django-bitfield: ' + err)) else: err = cache_check() if err: failed.append(('Library sanity', 'django cache: ' + err)) else: passed.append(('Library sanity', 'okay')) # github-flavoured markdown from courselib.github_markdown import markdown_to_html_rpc, markdown_to_html_subprocess md = 'test *markup*\n\n```python\nprint(1)\n```\n\u2605\U0001F600' correct = '<p>test <em>markup</em></p>\n<pre lang="python"><code>print(1)\n</code></pre>\n<p>\u2605\U0001F600</p>' try: # checks that ruby subprocess runs; does github-flavour correctly; does Unicode correctly. html = markdown_to_html_subprocess(md, fallback=False) if html.strip() == correct: passed.append(('Markdown subprocess', 'okay')) else: failed.append(('Markdown subprocess', 'markdown script returned incorrect markup')) except OSError: failed.append(( 'Markdown subprocess', 'failed to start ruby command: ruby package probably not installed' )) except RuntimeError: failed.append(('Markdown subprocess', 'markdown script failed')) try: # checks that docker RPC runs; does github-flavour correctly; does Unicode correctly. html = markdown_to_html_rpc(md, fallback=False) if html.strip() == correct: passed.append(('Markdown RPC', 'okay')) else: failed.append( ('Markdown RPC', 'markdown script returned incorrect markup')) except OSError: failed.append( ('Markdown RPC', 'unable to connect for RPC: docker container may be down')) except AttributeError: failed.append( ('Markdown RPC', 'unable to connect to RabbitMQ: not configured in settings.py')) # MOSS subprocess from submission.moss import check_moss_executable check_moss_executable(passed, failed) # locale is UTF-8 (matters for markdown script calls, the SIMS database connection) import locale _, encoding = locale.getdefaultlocale() if encoding == 'UTF-8': passed.append(('Locale encoding', 'okay')) else: failed.append( ('Locale encoding', "is %r; should be 'UTF-8'" % (encoding, ))) return passed, failed
def deploy_checks(): passed = [] failed = [] # cache something now to see if it's still there further down. randval = random.randint(1, 1000000) cache.set('check_things_cache_test', randval, 60) # Django database try: n = Semester.objects.all().count() if n > 0: passed.append(('Main database connection', 'okay')) else: failed.append(('Main database connection', "Can't find any coredata.Semester objects")) except django.db.utils.OperationalError: failed.append( ('Main database connection', "can't connect to database")) except django.db.utils.ProgrammingError: failed.append(('Main database connection', "database tables missing")) # Celery tasks celery_okay = False try: if settings.USE_CELERY: try: from coredata.tasks import ping except ImportError: failed.append( ('Celery task', "Couldn't import task: probably missing MySQLdb module")) else: t = ping.apply_async() res = t.get(timeout=5) if res == True: passed.append(('Celery task', 'okay')) celery_okay = True else: failed.append( ('Celery task', 'got incorrect result from task')) else: failed.append(('Celery task', 'celery disabled in settings')) except celery.exceptions.TimeoutError: failed.append( ('Celery task', "didn't get result before timeout: celeryd maybe not running")) except socket.error: failed.append(('Celery task', "can't communicate with broker")) except NotImplementedError: failed.append(('Celery task', 'celery disabled')) except django.db.utils.ProgrammingError: failed.append(('Celery task', 'celery DB tables missing')) except django.db.utils.OperationalError: failed.append(('Celery task', 'djkombu tables missing: try migrating')) # Django cache # (has a subprocess do something to make sure we're in a persistent shared cache, not DummyCache) subprocess.call(['python', 'manage.py', 'check_things', '--cache_subcall']) cache_okay = False res = cache.get('check_things_cache_test') if res == randval: failed.append(( 'Django cache', 'other processes not sharing cache: dummy/local probably being used instead of memcached' )) elif res is None: failed.append( ('Django cache', 'unable to retrieve anything from cache')) elif res != randval + 1: failed.append(('Django cache', 'unknown result')) else: passed.append(('Django cache', 'okay')) cache_okay = True # Reporting DB connection try: db = SIMSConn() db.execute("SELECT last_name FROM ps_names WHERE emplid=200133427", ()) n = len(list(db)) if n > 0: passed.append(('Reporting DB connection', 'okay')) else: failed.append(('Reporting DB connection', 'query inexplicably returned nothing')) except SIMSProblem as e: failed.append( ('Reporting DB connection', 'SIMSProblem, %s' % (unicode(e)))) except ImportError: failed.append( ('Reporting DB connection', "couldn't import DB2 module")) # compression enabled? if settings.COMPRESS_ENABLED: passed.append(('Asset compression enabled', 'okay')) else: failed.append(('Asset compression enabled', 'disabled in settings')) # Haystack searching from haystack.query import SearchQuerySet try: res = SearchQuerySet().filter(text='cmpt') if res: passed.append(('Haystack search', 'okay')) else: failed.append(( 'Haystack search', 'nothing found: maybe update_index, or wait for search server to fully start' )) except IOError: failed.append(('Haystack search', "can't read/write index")) # photo fetching if cache_okay and celery_okay: try: res = do_photo_fetch(['301222726']) if '301222726' not in res: # I don't know who 301222726 is, but he/she is real. failed.append( ('Photo fetching', "didn't find photo we expect to exist")) else: passed.append(('Photo fetching', 'okay')) except (KeyError, Unit.DoesNotExist, django.db.utils.ProgrammingError): failed.append(('Photo fetching', 'photo password not set')) except urllib2.HTTPError as e: failed.append( ('Photo fetching', 'failed to fetch photo (%s). Maybe wrong password?' % (e))) else: failed.append( ('Photo fetching', 'not testing since memcached or celery failed')) # emplid/userid API emplid = userid_to_emplid('ggbaker') if not emplid: failed.append(('Emplid API', 'no emplid returned')) elif isinstance(emplid, basestring) and not emplid.startswith('2000'): failed.append(('Emplid API', 'incorrect emplid returned')) else: passed.append(('Emplid API', 'okay')) # certificates bad_cert = 0 res = _check_cert('/etc/stunnel/stunnel.pem') if res: failed.append(('Stunnel cert', res)) bad_cert += 1 res = _check_cert('/etc/nginx/cert.pem') if res: failed.append(('SSL PEM', res)) bad_cert += 1 res = _check_cert('/etc/nginx/cert.key') if res: failed.append(('SSL KEY', res)) bad_cert += 1 if bad_cert == 0: passed.append( ('Certificates', 'All okay, but maybe check http://www.digicert.com/help/')) # SVN database if settings.SVN_DB_CONNECT: from courselib.svn import SVN_TABLE, _db_conn import MySQLdb try: db = _db_conn() db.execute('SELECT count(*) FROM ' + SVN_TABLE, ()) n = list(db)[0][0] if n > 0: passed.append(('SVN database', 'okay')) else: failed.append(('SVN database', "couldn't access records")) except MySQLdb.OperationalError: failed.append(('SVN database', "can't connect to database")) else: failed.append(('SVN database', 'SVN_DB_CONNECT not set in secrets.py')) # AMAINT database if settings.AMAINT_DB_PASSWORD: from coredata.importer import AMAINTConn import MySQLdb try: db = AMAINTConn() db.execute("SELECT count(*) FROM idMap", ()) n = list(db)[0][0] if n > 0: passed.append(('AMAINT database', 'okay')) else: failed.append(('AMAINT database', "couldn't access records")) except MySQLdb.OperationalError: failed.append(('AMAINT database', "can't connect to database")) else: failed.append( ('AMAINT database', 'AMAINT_DB_PASSWORD not set in secrets.py')) # file creation in the necessary places dirs_to_check = [ (settings.DB_BACKUP_DIR, 'DB backup dir'), (settings.SUBMISSION_PATH, 'submitted files path'), (os.path.join(settings.COMPRESS_ROOT, 'CACHE'), 'compressed media root'), ] for directory, label in dirs_to_check: res = _check_file_create(directory) if res is None: passed.append(('File creation in ' + label, 'okay')) else: failed.append(('File creation in ' + label, res)) # are any services listening publicly that shouldn't? hostname = socket.gethostname() ports = [ 25, # mail server #4369, # epmd, erlang port mapper daemon is okay to listen externally and won't start with ERL_EPMD_ADDRESS set. http://serverfault.com/questions/283913/turn-off-epmd-listening-port-4369-in-ubuntu-rabbitmq 45130, # beam? rabbitmq something 4000, # main DB stunnel 50000, # reporting DB 8000, # gunicorn 11211, # memcached 9200, 9300, # elasticsearch ] connected = [] for p in ports: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: s.connect((hostname, p)) except socket.error: # couldn't connect: good pass else: connected.append(p) finally: s.close() if connected: failed.append( ('Ports listening externally', 'got connections to port ' + ','.join(str(p) for p in connected))) else: passed.append(('Ports listening externally', 'okay')) return passed, failed
def deploy_checks(request=None): passed = [] failed = [] # cache something now to see if it's still there further down. randval = random.randint(1, 1000000) cache.set('check_things_cache_test', randval, 60) # Django database try: n = Semester.objects.all().count() if n > 0: passed.append(('Main database connection', 'okay')) else: failed.append(('Main database connection', "Can't find any coredata.Semester objects")) except django.db.utils.OperationalError: failed.append(('Main database connection', "can't connect to database")) except django.db.utils.ProgrammingError: failed.append(('Main database connection', "database tables missing")) # non-BMP Unicode in database try: l = LogEntry.objects.create(userid='ggbaker', description='Test Unicode \U0001F600', related_object=Semester.objects.first()) except OperationalError: failed.append(('Unicode handling in database', 'non-BMP character not supported by connection')) else: l = LogEntry.objects.get(id=l.id) if '\U0001F600' in l.description: passed.append(('Unicode handling in database', 'okay')) else: failed.append(('Unicode handling in database', 'non-BMP character not stored correctly')) # Celery tasks celery_okay = False try: if settings.USE_CELERY: try: from coredata.tasks import ping except ImportError: failed.append(('Celery task', "Couldn't import task: probably missing MySQLdb module")) else: try: t = ping.apply_async() except kombu.exceptions.OperationalError: failed.append(('Celery task', 'Kombu error. Probably RabbitMQ not running.')) else: res = t.get(timeout=5) if res == True: passed.append(('Celery task', 'okay')) celery_okay = True else: failed.append(('Celery task', 'got incorrect result from task')) else: failed.append(('Celery task', 'celery disabled in settings')) except celery.exceptions.TimeoutError: failed.append(('Celery task', "didn't get result before timeout: celeryd maybe not running")) except socket.error: failed.append(('Celery task', "can't communicate with broker")) except NotImplementedError: failed.append(('Celery task', 'celery disabled')) except django.db.utils.ProgrammingError: failed.append(('Celery task', 'celery DB tables missing')) except django.db.utils.OperationalError: failed.append(('Celery task', 'djkombu tables missing: try migrating')) # celery beat try: from coredata.tasks import BEAT_TEST_FILE, BEAT_FILE_MAX_AGE beatfile_age = time.time() - os.stat(BEAT_TEST_FILE).st_mtime if beatfile_age < BEAT_FILE_MAX_AGE: passed.append(('Celery beat', 'okay')) else: failed.append(('Celery beat', 'marker file is old: celery beat likely not processing tasks')) except OSError: failed.append(('Celery beat', 'marker file is missing: celery beat likely not processing tasks')) # Django cache # (has a subprocess do something to make sure we're in a persistent shared cache, not DummyCache) subprocess.call(['python3', 'manage.py', 'check_things', '--cache_subcall']) cache_okay = False res = cache.get('check_things_cache_test') if res == randval: failed.append(('Django cache', 'other processes not sharing cache: dummy/local probably being used instead of memcached')) elif res is None: failed.append(('Django cache', 'unable to retrieve anything from cache')) elif res != randval + 1: failed.append(('Django cache', 'unknown result')) else: passed.append(('Django cache', 'okay')) cache_okay = True # Reporting DB connection try: db = SIMSConn() db.execute("SELECT last_name FROM ps_names WHERE emplid=301355288", ()) result = list(db) # whoever this is, they have non-ASCII in their name: let's hope they don't change it. lname = result[0][0] if not isinstance(lname, str): failed.append(('Reporting DB connection', 'string result not a string: check Unicode decoding')) elif lname[1] != u'\u00e4': failed.append(('Reporting DB connection', 'returned incorrectly-decoded Unicode')) elif len(result) == 0: failed.append(('Reporting DB connection', 'query inexplicably returned nothing')) else: passed.append(('Reporting DB connection', 'okay')) except SIMSProblem as e: failed.append(('Reporting DB connection', 'SIMSProblem, %s' % (str(e)))) except ImportError: failed.append(('Reporting DB connection', "couldn't import DB2 module")) # compression enabled? if settings.COMPRESS_ENABLED: passed.append(('Asset compression enabled', 'okay')) else: failed.append(('Asset compression enabled', 'disabled in settings')) # Haystack searching from haystack.query import SearchQuerySet try: res = SearchQuerySet().filter(text='cmpt') if res: passed.append(('Haystack search', 'okay')) else: failed.append(('Haystack search', 'nothing found: maybe update_index, or wait for search server to fully start')) except IOError: failed.append(('Haystack search', "can't read/write index")) # photo fetching if cache_okay and celery_okay: try: res = do_photo_fetch(['301222726']) if '301222726' not in res: # I don't know who 301222726 is, but he/she is real. failed.append(('Photo fetching', "didn't find photo we expect to exist")) else: passed.append(('Photo fetching', 'okay')) except (KeyError, Unit.DoesNotExist, django.db.utils.ProgrammingError): failed.append(('Photo fetching', 'photo password not set')) except urllib.error.HTTPError as e: failed.append(('Photo fetching', 'failed to fetch photo (%s). Maybe wrong password?' % (e))) else: failed.append(('Photo fetching', 'not testing since memcached or celery failed')) # emplid/userid API emplid = userid_to_emplid('ggbaker') if not emplid: failed.append(('Emplid API', 'no emplid returned')) elif isinstance(emplid, str) and not emplid.startswith('2000'): failed.append(('Emplid API', 'incorrect emplid returned')) else: passed.append(('Emplid API', 'okay')) # Piwik API #if not request: # failed.append(('Piwik API', "can only check in web frontend with valid request object")) #elif not settings.PIWIK_URL or not settings.PIWIK_TOKEN: # failed.append(('Piwik API', "not configured in secrets.py")) #else: # # try to re-log this request in piwik and see what happens # from piwik_middleware.tracking import PiwikTrackerLogic, urllib_errors # tracking_logic = PiwikTrackerLogic() # kwargs = tracking_logic.get_track_kwargs(request) # try: # tracking_logic.do_track_page_view(fail_silently=False, **kwargs) # except urllib_errors as e: # failed.append(('Piwik API', "API call failed: %s" % (e))) # else: # passed.append(('Piwik API', 'okay')) # Backup server #if not settings.BACKUP_SERVER or not settings.BACKUP_USER or not settings.BACKUP_PATH or not settings.BACKUP_PASSPHRASE: # failed.append(('Backup server', 'Backup server settings not all present')) #else: # from coredata.management.commands.backup_remote import do_check # try: # do_check() # except RuntimeError as e: # failed.append(('Backup server', unicode(e))) # passed.append(('Backup server', 'okay')) # certificates bad_cert = 0 res = _check_cert('/etc/stunnel/stunnel.pem') if res: failed.append(('Stunnel cert', res)) bad_cert += 1 res = _check_cert('/etc/nginx/cert.pem') if res: failed.append(('SSL PEM', res)) bad_cert += 1 res = _check_cert('/etc/nginx/cert.key') if res: failed.append(('SSL KEY', res)) bad_cert += 1 if bad_cert == 0: passed.append(('Certificates', 'All okay, but maybe check http://www.digicert.com/help/ or https://www.ssllabs.com/ssltest/')) # file creation in the necessary places dirs_to_check = [ (settings.DB_BACKUP_DIR, 'DB backup dir'), (settings.SUBMISSION_PATH, 'submitted files path'), (os.path.join(settings.COMPRESS_ROOT, 'CACHE'), 'compressed media root'), ] for directory, label in dirs_to_check: res = _check_file_create(directory) if res is None: passed.append(('File creation in ' + label, 'okay')) else: failed.append(('File creation in ' + label, res)) # are any services listening publicly that shouldn't? hostname = socket.gethostname() ports = [ 25, # mail server #4369, # epmd, erlang port mapper daemon is okay to listen externally and won't start with ERL_EPMD_ADDRESS set. http://serverfault.com/questions/283913/turn-off-epmd-listening-port-4369-in-ubuntu-rabbitmq 45130, # beam? rabbitmq something 4000, # main DB stunnel 50000, # reporting DB 8000, # gunicorn 11211, # memcached 9200, 9300, # elasticsearch ] connected = [] for p in ports: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: s.connect((hostname, p)) except socket.error: # couldn't connect: good pass else: connected.append(p) finally: s.close() if connected: failed.append(('Ports listening externally', 'got connections to port ' + ','.join(str(p) for p in connected))) else: passed.append(('Ports listening externally', 'okay')) # is the server time close to real-time? import ntplib c = ntplib.NTPClient() response = c.request('0.ca.pool.ntp.org') if abs(response.offset) > 0.1: failed.append(('Server time', 'Time is %g seconds off NTP pool.' % (response.offset,))) else: passed.append(('Server time', 'okay')) # library sanity err = bitfield_check() if err: failed.append(('Library sanity', 'django-bitfield: ' + err)) else: err = cache_check() if err: failed.append(('Library sanity', 'django cache: ' + err)) else: passed.append(('Library sanity', 'okay')) # github-flavoured markdown subprocess from courselib.markup import markdown_to_html try: # checks that script runs; does github-flavour correctly; does Unicode correctly. html = markdown_to_html('test *markup*\n\n```python\nprint(1)\n```\n\u2605\U0001F600') if html.strip() == '<p>test <em>markup</em></p>\n<pre lang="python"><code>print(1)\n</code></pre>\n<p>\u2605\U0001F600</p>': passed.append(('Markdown subprocess', 'okay')) else: failed.append(('Markdown subprocess', 'markdown script returned incorrect markup')) except OSError: failed.append(('Markdown subprocess', 'failed to start ruby command: ruby package probably not installed')) except RuntimeError: failed.append(('Markdown subprocess', 'markdown script failed')) # locale is UTF-8 (matters for markdown script calls, the SIMS database connection) import locale _, encoding = locale.getdefaultlocale() if encoding == 'UTF-8': passed.append(('Locale encoding', 'okay')) else: failed.append(('Locale encoding', "is %r; should be 'UTF-8'" % (encoding,))) return passed, failed
def deploy_checks(): passed = [] failed = [] # cache something now to see if it's still there further down. randval = random.randint(1, 1000000) cache.set('check_things_cache_test', randval, 60) # Django database try: n = Semester.objects.all().count() if n > 0: passed.append(('Main database connection', 'okay')) else: failed.append(('Main database connection', "Can't find any coredata.Semester objects")) except django.db.utils.OperationalError: failed.append(('Main database connection', "can't connect to database")) except django.db.utils.ProgrammingError: failed.append(('Main database connection', "database tables missing")) # Celery tasks celery_okay = False try: if settings.USE_CELERY: try: from coredata.tasks import ping except ImportError: failed.append(('Celery task', "Couldn't import task: probably missing MySQLdb module")) else: t = ping.apply_async() res = t.get(timeout=5) if res == True: passed.append(('Celery task', 'okay')) celery_okay = True else: failed.append(('Celery task', 'got incorrect result from task')) else: failed.append(('Celery task', 'celery disabled in settings')) except celery.exceptions.TimeoutError: failed.append(('Celery task', "didn't get result before timeout: celeryd maybe not running")) except socket.error: failed.append(('Celery task', "can't communicate with broker")) except NotImplementedError: failed.append(('Celery task', 'celery disabled')) except django.db.utils.ProgrammingError: failed.append(('Celery task', 'celery DB tables missing')) except django.db.utils.OperationalError: failed.append(('Celery task', 'djkombu tables missing: try migrating')) # celery beat try: from coredata.tasks import BEAT_TEST_FILE, BEAT_FILE_MAX_AGE beatfile_age = time.time() - os.stat(BEAT_TEST_FILE).st_mtime if beatfile_age < BEAT_FILE_MAX_AGE: passed.append(('Celery beat', 'okay')) else: failed.append(('Celery beat', 'marker file is old: celery beat likely not processing tasks')) except OSError: failed.append(('Celery beat', 'marker file is missing: celery beat likely not processing tasks')) # Django cache # (has a subprocess do something to make sure we're in a persistent shared cache, not DummyCache) subprocess.call(['python', 'manage.py', 'check_things', '--cache_subcall']) cache_okay = False res = cache.get('check_things_cache_test') if res == randval: failed.append(('Django cache', 'other processes not sharing cache: dummy/local probably being used instead of memcached')) elif res is None: failed.append(('Django cache', 'unable to retrieve anything from cache')) elif res != randval + 1: failed.append(('Django cache', 'unknown result')) else: passed.append(('Django cache', 'okay')) cache_okay = True # Reporting DB connection try: db = SIMSConn() db.execute("SELECT last_name FROM ps_names WHERE emplid=200133427", ()) n = len(list(db)) if n > 0: passed.append(('Reporting DB connection', 'okay')) else: failed.append(('Reporting DB connection', 'query inexplicably returned nothing')) except SIMSProblem as e: failed.append(('Reporting DB connection', 'SIMSProblem, %s' % (unicode(e)))) except ImportError: failed.append(('Reporting DB connection', "couldn't import DB2 module")) # compression enabled? if settings.COMPRESS_ENABLED: passed.append(('Asset compression enabled', 'okay')) else: failed.append(('Asset compression enabled', 'disabled in settings')) # Haystack searching from haystack.query import SearchQuerySet try: res = SearchQuerySet().filter(text='cmpt') if res: passed.append(('Haystack search', 'okay')) else: failed.append(('Haystack search', 'nothing found: maybe update_index, or wait for search server to fully start')) except IOError: failed.append(('Haystack search', "can't read/write index")) # photo fetching if cache_okay and celery_okay: try: res = do_photo_fetch(['301222726']) if '301222726' not in res: # I don't know who 301222726 is, but he/she is real. failed.append(('Photo fetching', "didn't find photo we expect to exist")) else: passed.append(('Photo fetching', 'okay')) except (KeyError, Unit.DoesNotExist, django.db.utils.ProgrammingError): failed.append(('Photo fetching', 'photo password not set')) except urllib2.HTTPError as e: failed.append(('Photo fetching', 'failed to fetch photo (%s). Maybe wrong password?' % (e))) else: failed.append(('Photo fetching', 'not testing since memcached or celery failed')) # emplid/userid API emplid = userid_to_emplid('ggbaker') if not emplid: failed.append(('Emplid API', 'no emplid returned')) elif isinstance(emplid, basestring) and not emplid.startswith('2000'): failed.append(('Emplid API', 'incorrect emplid returned')) else: passed.append(('Emplid API', 'okay')) # certificates bad_cert = 0 res = _check_cert('/etc/stunnel/stunnel.pem') if res: failed.append(('Stunnel cert', res)) bad_cert += 1 res = _check_cert('/etc/nginx/cert.pem') if res: failed.append(('SSL PEM', res)) bad_cert += 1 res = _check_cert('/etc/nginx/cert.key') if res: failed.append(('SSL KEY', res)) bad_cert += 1 if bad_cert == 0: passed.append(('Certificates', 'All okay, but maybe check http://www.digicert.com/help/')) # SVN database if settings.SVN_DB_CONNECT: from courselib.svn import SVN_TABLE, _db_conn import MySQLdb try: db = _db_conn() db.execute('SELECT count(*) FROM '+SVN_TABLE, ()) n = list(db)[0][0] if n > 0: passed.append(('SVN database', 'okay')) else: failed.append(('SVN database', "couldn't access records")) except MySQLdb.OperationalError: failed.append(('SVN database', "can't connect to database")) else: failed.append(('SVN database', 'SVN_DB_CONNECT not set in secrets.py')) # AMAINT database if settings.AMAINT_DB_PASSWORD: from coredata.importer import AMAINTConn import MySQLdb try: db = AMAINTConn() db.execute("SELECT count(*) FROM idMap", ()) n = list(db)[0][0] if n > 0: passed.append(('AMAINT database', 'okay')) else: failed.append(('AMAINT database', "couldn't access records")) except MySQLdb.OperationalError: failed.append(('AMAINT database', "can't connect to database")) else: failed.append(('AMAINT database', 'AMAINT_DB_PASSWORD not set in secrets.py')) # file creation in the necessary places dirs_to_check = [ (settings.DB_BACKUP_DIR, 'DB backup dir'), (settings.SUBMISSION_PATH, 'submitted files path'), (os.path.join(settings.COMPRESS_ROOT, 'CACHE'), 'compressed media root'), ] for directory, label in dirs_to_check: res = _check_file_create(directory) if res is None: passed.append(('File creation in ' + label, 'okay')) else: failed.append(('File creation in ' + label, res)) # are any services listening publicly that shouldn't? hostname = socket.gethostname() ports = [ 25, # mail server #4369, # epmd, erlang port mapper daemon is okay to listen externally and won't start with ERL_EPMD_ADDRESS set. http://serverfault.com/questions/283913/turn-off-epmd-listening-port-4369-in-ubuntu-rabbitmq 45130, # beam? rabbitmq something 4000, # main DB stunnel 50000, # reporting DB 8000, # gunicorn 11211, # memcached 9200, 9300, # elasticsearch ] connected = [] for p in ports: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: s.connect((hostname, p)) except socket.error: # couldn't connect: good pass else: connected.append(p) finally: s.close() if connected: failed.append(('Ports listening externally', 'got connections to port ' + ','.join(str(p) for p in connected))) else: passed.append(('Ports listening externally', 'okay')) return passed, failed