def cleanHouse(): # Grab a reference to the existing logger. # This only works if the script calling this function has # already called mosHelper.setUpTheLogger(). module_logger = logging.getLogger('mosgraphics.cleanHouse') dictDirNames = mosHelper.getDirNames() # Can probably rewrite mosplots.calc_dates based on the work here. Perhaps # in the ample free time with which all forecasters are blessed. Maybe use # xrange instead, also? # # Alright, here's the deal. The UTC date may be in the future compared to the # local date at times, so we'll toss in some negative numbers just to be sure the # early day runs (00z, 06z) won't get accidentally deleted. # Next, grab the current YYYY-MM-DD (local time) and use that as a starting point # from which to calculate which files to keep. Some of the filenames may not exist # yet, but that's OK. The important thing is that they won't be deleted. Yeah, that # makes total sense... hrsToKeep = {} hrsToKeep['MEX'] = [-24, -12, 0, 12, 24, 36, 48, 60, 72, 84, 96, 108, 120, 132, 144, 156, 168, 180, 192, 204, 216] hrsToKeep['MAV'] = [-24, -18, -12, -6, 0, 6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66, 72] hrsToKeep['MET'] = [-36, -24, -12, 0, 12, 24, 36, 48, 60, 72, 84] #hrsToKeep['ECE'] = [-24, 0, 24, 48, 72, 96, 120, 144, 168, 192, 216] #hrsToKeep['ECS'] = [-24, 0, 24, 48, 72, 96] # Now. You're looking at now, sir. Everything that happens now, is happening now. # What happened to then? # We passed then. # When? # Just now. We're at now now. rightnow = dt.datetime.now() # But not anymore! Choose 0 o'clock as a baseline. It makes the math easier. nowish = dt.datetime(year = rightnow.year, month = rightnow.month, day = rightnow.day, hour = 0) keyIter = hrsToKeep.iterkeys() # Loop over time to create filenames to keep for key in keyIter: keepfiles = [] for hr in hrsToKeep[key]: mostype = key.lower() goback = dt.timedelta(hours = hr) prev = nowish - goback Y = prev.strftime('%Y') M = prev.strftime('%m') D = prev.strftime('%d') H = prev.strftime('%H') appendme = mosHelper.makeFilenames(mostype, 'ABCD', Y, M, D, H)['raw'] keepfiles.append(appendme) keepfiles = set(keepfiles) # get the contents of the raw files directory for this mostype rawfiles = set(mosHelper.listRawFiles(mostype)) # Suppose set1 = ([f1, f2, f3, etc.]) contains the filenames to keep, and # set2 = ([f0, f1, f2, f3, f4, f5, f6]) has the names of all raw files. # Then set2.difference(set1) is the set of files to delete. delme = rawfiles.difference(keepfiles) module_logger.info('%s are marked for deletion from %s', delme, mostype.upper()) for fn in delme: fullname = os.path.join(dictDirNames['raw'], fn) os.remove(fullname)
def GrabEm(): # Grab a reference to the existing logger. # This only works if the script calling this function has # already called mosHelper.setUpTheLogger(). module_logger = logging.getLogger('mosgraphics.GrabEm') dictDirNames = mosHelper.getDirNames() moslist = ['MET', 'MEX', 'MAV'] # Use this to keep track of which files were written and still need # to be processed with mosHelper.parseStations. rawfiles = [] for mosname in moslist: tempObj = MOS(mosname) tempObj.set_filethresh() module_logger.info('Asking MDL for the {}'.format(mosname)) status = tempObj.check_primary() if status is not 1: module_logger.warning('Lost the connection with MDL. File was not downloaded.') module_logger.info('Trying another server') status2 = tempObj.check_backup() if status2 is not 1: module_logger.warning('Bummer. Unable to download {}'.format(mosname)) elif status2 is 1: module_logger.info('Success!') else: module_logger.info('? ? ? ? ? ?') if tempObj.fileurls is not None: for furl, localfilename in zip(tempObj.fileurls, tempObj.localfnames): # If there already exists a file with the intended localfilename, # check to see if it has an appropriate size. If the file seems # too small, then try downloading it again. Otherwise, don't bother # because it's probably OK. existingRawFiles = mosHelper.listRawFiles(mosname) if localfilename in existingRawFiles: module_logger.info('{} already exists on disk.'.format(localfilename)) fpath = os.path.join(dictDirNames['raw'], localfilename) # If the file size is too small, then something went wrong the # last time the file was downloaded. Try to download it again # now so that it will be available for the next script run. thresh = tempObj.filethresh * 1000 if os.path.getsize(fpath) > thresh: module_logger.info('Skipping. It\'s probably OK.') # 'continue': the current iteration of the loop terminates # and execution continues with the next iteration of the loop. continue else: module_logger.info('Downloading. The copy on disk seems too small.') # Note that in order to reach this part of the script, the file # size must pass the above if-else. response = urllib2.urlopen(furl) contents = response.read() response.close() module_logger.info('Writing to %s', localfilename) fullname = os.path.join(dictDirNames['raw'], localfilename) output = open(fullname, 'w') output.write(contents) output.close() rawfiles.append(localfilename) else: module_logger.info('A rolling stone gathers no {} MOS.'.format(mosname)) return(rawfiles)