def handle_label(self, comment = '', **options): verbosity = int(options.get('verbosity', 0)) if verbosity > 1: debug.DEBUG = True print "Scan URLs started at {0}\n".format(datetime.datetime.utcnow()) # Some basic checking if comment == '': raise CommandError("Please specify a comment for this scan.\n\n") iterations = int(options.get('iterations', 10)) # Create an error log debug.errorlog_start('scan_urls') debug.errorlog("Log started for: " + comment) t = URLMonitorTask() t.comment = str(comment) t.save() targets = URLMonitorURL.objects.filter(active__exact=True) count = 1 for iter in range(1,iterations+1): for target in targets: s = URLMonitorScan() s.url = target s.task = t s.iteration = iter try: s.status_code, s.time_of_request, s.ttfb, s.ttlb = self.scan_url(target.url) except urllib2.HTTPError, e: s.status_code = e.code s.time_of_request = datetime.datetime.utcnow() s.save() count += 1
def handle(self, institution_name = YOUR_INSTITUTION, **options): verbosity = int(options.get('verbosity', 0)) if verbosity > 1: debug.DEBUG = True # Create an error log debug.errorlog_start('scan_itunes') # Some basic error checking if institution_name is None: debug.errorlog("Please specify the institution to scan.", display=True) return False try: mode = int(options.get("mode",1)) except ValueError: debug.errorlog("""Please specify a valid mode for this scan. 1) Scan an institution's collection 2) Scan the Top Collections chart 3) Scan the Top Downloads chart 4) Scan the list of institutions """, display=True) return False if mode < 1 or mode > 4: debug.errorlog("""Please specify a valid mode for this scan. 1) Scan an institution's collection 2) Scan the Top Collections chart 3) Scan the Top Downloads chart 4) Scan the list of institutions """, display=True) return False scantime = datetime.datetime.now(pytz.utc) print "Scan iTunes started at " + str(scantime) + "\n" scanlog = ItuScanLog(mode=mode, time=scantime, comments="") scanlog.save() if mode == 1: try: institution = ItuInstitution.objects.filter(name__iexact=institution_name)[0] except: debug.errorlog(institution_name + u" is not a recognised institution.", display=True) scanlog.delete() return False scanlog.institution = institution scanlog.save() comment = u"Scan (and update) of " + institution_name + u"\'s collection from %s" % institution.url debug.log(u"Log started for: %s" % unicode(comment), display=True) print comment print("Getting information about collections...") collections = itunes.get_institution_collections(institution, hurry=True) print("Processing collection information and scanning individual items...") collections_spotted = [] items_spotted = [] for collection_itunes in collections: if collection_itunes: # for k in collection_itunes.keys(): # print(k + ': ' + collection_itunes[k]) #Check if this collection's genre exists - if not, create it. genre = ItuGenre(name=collection_itunes['genre'], itu_id=int(collection_itunes['genre_id']), url=collection_itunes['genre_url']) genre_exists = False for saved_genre in ItuGenre.objects.all(): if int(genre.itu_id) == int(saved_genre.itu_id) and genre.name==saved_genre.name and genre.url==saved_genre.url: genre_exists = True genre = saved_genre if not genre_exists: debug.log(u'Created new genre ' + unicode(genre.name), display=True) genre.save() collection_record_absolute = ItuCollection(institution=institution) if collection_itunes['last modified']: last_modified = parse(collection_itunes['last modified']).date() else: last_modified = None collection_record_historical = ItuCollectionHistorical(name=collection_itunes['series'], itu_id=int(collection_itunes['series_id']), img170=collection_itunes['series_img_170'], url=collection_itunes['series_url'], language=collection_itunes['language'], last_modified=last_modified, contains_movies=collection_itunes['contains_movies'], missing=None, version=1, institution=institution, scanlog=scanlog, genre=genre, previous=None, itucollection=collection_record_absolute) rating_checksum = 0 for rating in collection_itunes['ratings']: rating_checksum += pow(10,rating['stars']) + (rating['count']/1000000000) #Put together a list of saved collection_record_historicals that look like they're the same as our collection_record_historical, really. similar_collection_records_historical = [] collection_record_historical_exists = False for collection_record_historical_saved in ItuCollectionHistorical.objects.filter((Q(name=collection_record_historical.name) & Q(contains_movies=collection_record_historical.contains_movies)) | Q(itu_id=collection_record_historical.itu_id) | Q(url=collection_record_historical.url)): #name AND Video/Audio if collection_record_historical.url != collection_record_historical_saved.url: #Don't add similar collection_record_historical if the URLs are different, but both are accessible. try: urllib2.urlopen(collection_record_historical.url) urllib2.urlopen(collection_record_historical_saved.url) except urllib2.URLError: similar_collection_records_historical.append(collection_record_historical_saved) else: similar_collection_records_historical.append(collection_record_historical_saved) if collection_record_historical.name==collection_record_historical_saved.name and collection_record_historical.contains_movies==collection_record_historical_saved.contains_movies and int(collection_record_historical.itu_id)==int(collection_record_historical_saved.itu_id) and collection_record_historical.url==collection_record_historical_saved.url and collection_record_historical.img170==collection_record_historical_saved.img170 and collection_record_historical.language==collection_record_historical_saved.language and rating_checksum==collection_record_historical_saved.rating_checksum(): collection_record_historical_exists=True collection_record_historical = collection_record_historical_saved else: similar_collection_records_historical.append(collection_record_historical_saved) if not collection_record_historical_exists: if similar_collection_records_historical: similar_collection_records_historical.sort(key=lambda this_collection_record_historical: this_collection_record_historical.version) latest_similar_collection_record_historical = similar_collection_records_historical[-1] collection_record_historical.previous = latest_similar_collection_record_historical collection_record_historical.version = latest_similar_collection_record_historical.version + 1 collection_record_historical.itucollection = latest_similar_collection_record_historical.itucollection else: collection_record_absolute.save() collection_record_historical.itucollection = collection_record_absolute debug.log(u'Created new historical collection record for ' + unicode(collection_record_historical.name) + u', version ' + unicode(collection_record_historical.version), display=True) collection_record_historical.save() for r in collection_itunes['ratings']: try: rating = ItuRating(stars=r['stars'], count=r['count'], itucollectionhistorical=collection_record_historical) rating.save() except: debug.log(u'WARNING: Failed to save rating.', display=True) for comment in collection_itunes['comments']: if comment and len(ItuComment.objects.filter(detail=comment['detail'])) == 0: try: new_comment = ItuComment(itucollectionhistorical=collection_record_historical, stars=comment['rating'], date=comment['date'], detail=comment['detail'], source=comment['source'], ituinstitution=institution) new_comment.save() debug.log(u'Saved new comment by ' + unicode(new_comment.source) + u': \"' + unicode(new_comment.detail) + u'\".', display=True) except: debug.log(u'WARNING: Failed to save comment.', display=True) collections_spotted.append(collection_record_historical) #Acquire the list of items for this collection. try: items = itunes.get_collection_items(collection_record_historical.url, hurry=True) except: debug.errorlog('Could not get items for collection ' + collection_record_historical.name + '.', display=True) items = [] for item in items: if item is not {}: #Dictionary will be blank if we have failed to retrieve data on an item. If so, don't do anything with the item. item_record_absolute = ItuItem(institution=institution) try: #Deal with things with no duration (like PDFs...) if 'duration' in item.keys(): item['duration'] = int(item['duration']) else: item['duration'] = None if 'songName' not in item.keys(): item['songName'] = item['playlistName'] + ' ' + str(item['rank']) + ' {UNKNOWN NAME}' item_record_historical = ItuItemHistorical(name=item['songName'], itu_id=item['itemId'], url=item['url'], artist_name=item['artistName'], description=item['description'], duration=item['duration'], explicit=bool(item['explicit']), feed_url=item['feedURL'], file_extension=item['fileExtension'], kind=item['kind'], long_description=item['longDescription'], playlist_id=int(item['playlistId']), playlist_name=item['playlistName'], popularity=float(item['popularity']), preview_length=int(item['previewLength']), preview_url=item['previewURL'], rank=int(item['rank']), release_date=pytz.utc.localize(parse(item['releaseDate'],ignoretz=True)), missing=None, version=1, previous=None, ituitem=item_record_absolute, institution=institution, genre=genre, scanlog=scanlog, series=collection_record_historical) except KeyError: #See if we've got data from a last-ditch attempt at downloading it instead. try: duration = 0 feedurl = "" for offerkey in item['store-offers'].keys(): #offerkey is something like 'standard-audio'. This code works on the assumption that, whatever the key, we want all the items in its list. try: duration = item['store-offers'][offerkey]['duration'] except KeyError: duration = None feedurl = item['store-offers'][offerkey]['asset-url'] item_record_historical = ItuItemHistorical(name=item['title'], itu_id=item['item-id'], url=item['url'], artist_name=item['artist-name'], description=item['description'], duration=duration, explicit=False, feed_url=feedurl, file_extension=feedurl.split('.')[-1], kind='unknown', long_description=item['long-description'], playlist_id=collection_record_historical.id, playlist_name=collection_record_historical.name, popularity=0.0, preview_length=0, preview_url='unknown', rank=int(item['track-number']), release_date=item['release-date'], missing=None, version=1, previous=None, ituitem=item_record_absolute, institution=institution, genre=genre, scanlog=scanlog, series=collection_record_historical) except KeyError: debug.errorlog(u'Missing key when trying to create an ItuItemHistorical. item=' + unicode(item), display=True) except: debug.errorlog(u'Failed to process ItuItemHistorical.', display=True) try: #We can't afford this bit to die in the middle of the night. # Put together a list of saved item_record_historicals that look like they're the same as our item_record_historical, really. similar_item_record_historicals = [] item_record_historical_exists = False for saved_item_record_historical in ItuItemHistorical.objects.filter(Q(series__itucollection=collection_record_historical.itucollection) & (Q(name=item_record_historical.name) | Q(itu_id=item_record_historical.itu_id) | Q(url=item_record_historical.url)) & Q(file_extension=item_record_historical.file_extension)): #name AND Video/Audio if item_record_historical.url != saved_item_record_historical.url: #Don't add similar item_record_historical if the URLs are different, but both are accessible. try: urllib2.urlopen(item_record_historical.url) urllib2.urlopen(saved_item_record_historical.url) except urllib2.URLError: similar_item_record_historicals.append(saved_item_record_historical) else: if item_record_historical.name==saved_item_record_historical.name and item_record_historical.itu_id==saved_item_record_historical.itu_id and item_record_historical.url==saved_item_record_historical.url and item_record_historical.artist_name==saved_item_record_historical.artist_name and item_record_historical.description==saved_item_record_historical.description and item_record_historical.duration==saved_item_record_historical.duration and item_record_historical.explicit==saved_item_record_historical.explicit and item_record_historical.feed_url==saved_item_record_historical.feed_url and item_record_historical.file_extension==saved_item_record_historical.file_extension and item_record_historical.kind==saved_item_record_historical.kind and item_record_historical.long_description==saved_item_record_historical.long_description and item_record_historical.playlist_id==saved_item_record_historical.playlist_id and item_record_historical.playlist_name==saved_item_record_historical.playlist_name and item_record_historical.popularity==saved_item_record_historical.popularity and item_record_historical.preview_length==saved_item_record_historical.preview_length and item_record_historical.preview_url==saved_item_record_historical.preview_url and item_record_historical.rank==saved_item_record_historical.rank and item_record_historical.release_date==saved_item_record_historical.release_date: item_record_historical_exists = True item_record_historical = saved_item_record_historical else: similar_item_record_historicals.append(saved_item_record_historical) if not item_record_historical_exists: if similar_item_record_historicals: similar_item_record_historicals.sort(key=lambda this_item_record_historical: this_item_record_historical.version) latest_similar_item_record_historical = similar_item_record_historicals[-1] item_record_historical.previous = latest_similar_item_record_historical item_record_historical.version = latest_similar_item_record_historical.version + 1 item_record_historical.ituitem = latest_similar_item_record_historical.ituitem else: item_record_absolute.save() item_record_historical.ituitem = item_record_absolute debug.log(u'Created new historical item record for ' + unicode(item_record_historical.name) + u', version ' + unicode(item_record_historical.version), display=True) item_record_historical.save() items_spotted.append(item_record_historical) except: debug.errorlog(u'Failed to process potential historical item record.', display=True) else: debug.log(u'WARNING: Blank item - perhaps we couldn\'t download the appropriate page?', display=True) else: debug.log(u'WARNING: Blank category - perhaps we couldn\'t download the appropriate page?', display=True) print(u"Checking whether anything has gone missing or reappeared...") if collections: counter = 0 for historical_collection_record in ItuCollectionHistorical.objects.filter(Q(institution=institution) & Q(itucollection__latest=F('id'))): if historical_collection_record not in collections_spotted and historical_collection_record.missing == None: debug.log(unicode(historical_collection_record.name) + u" appears to have gone missing! We last saw it at " + unicode(historical_collection_record.scanlog.time), display=True) historical_collection_record.missing = scanlog historical_collection_record.save() elif historical_collection_record in collections_spotted and historical_collection_record.missing: debug.log(unicode(historical_collection_record.name) + u" has reappeared! It went missing at " + unicode(historical_collection_record.missing.time), display=True) historical_collection_record.missing = None historical_collection_record.save() counter += 1 if float(counter)/100.0 == int(float(counter)/100.0): print (u'Still checking... (at object ' + unicode(counter) + u')') for historical_item_record in ItuItemHistorical.objects.filter(Q(institution=institution) & Q(ituitem__latest=F('id'))): if historical_item_record not in items_spotted and historical_item_record.missing == None: debug.log(unicode(historical_item_record.name) + u" appears to have gone missing! We last saw it at " + unicode(historical_item_record.scanlog.time), display=True) historical_item_record.missing = scanlog historical_item_record.save() elif historical_item_record in items_spotted and historical_item_record.missing: debug.log(unicode(historical_item_record.name) + u" has reappeared! It went missing at " + unicode(historical_item_record.missing.time), display=True) historical_item_record.missing = None historical_item_record.save() counter += 1 if float(counter)/100.0 == int(float(counter)/100.0): print (u'Still checking... (at object ' + unicode(counter) + u')') else: debug.log(u"WARNING: No collections found. Perhaps you scanned an institution that only publishes courses?", display=True) elif mode == 2: comment = u"Scan of the Top Collections Chart..." debug.log(u"Log started for: %s" % unicode(comment), display=True) updated_institutions = False collections = itunes.get_topcollections() for collection in collections: if collection: try: historical_collections=ItuCollectionHistorical.objects.filter(url=collection['series_url']) if not historical_collections: debug.log(u'WARNING: Couldn\'t find an historical record of collection at ' + unicode(collection['series_url']) + u'. Attempting an historical scan of ' + unicode(collection['institution']) + u' first...', display=True) if not updated_institutions: management.call_command('scan_itunes', mode=4) updated_institutions = True try: management.call_command('scan_itunes', collection['institution'], mode=1) except: try: #Deal with institutions which aren't listed by Apple. institution = ItuInstitution(name = collection['institution'], itu_id = int(collection['institution_id']), url = collection['institution_url']) institution.save() management.call_command('scan_itunes', collection['institution'], mode=1) except: debug.errorlog('Failed to scan institution ' + collection['institution'] + '. Perhaps this institution isn\'t listed by Apple?', display=True) historical_collections=ItuCollectionHistorical.objects.filter(url=collection['series_url']) if historical_collections.exists(): historical_collection=historical_collections[0].latest() debug.log(u'Creating new chart row: ' + unicode(historical_collection.name) + u' Position: ' + unicode(collection['chart_position']), display=True) chartrow=ItuCollectionChartScan(position=int(collection['chart_position']), itucollection=historical_collection.itucollection, itucollectionhistorical=historical_collection, scanlog=scanlog, date=scanlog.time) chartrow.save() else: debug.errorlog(u'Couldn\'tfind an historical record of collection at ' + unicode(collection['series_url']) + u' despite updating the database.', display=True) except KeyError: debug.errorlog('WARNING: Couldn\'t access collection (KeyError):' + str(collection), display=True) elif mode == 3: comment = u"Scan of the Top Downloads Chart..." debug.log(u"Log started for: %s" % unicode(comment), display=True) updated_institutions = False items = itunes.get_topdownloads() for item in items: if item: try: historical_items=ItuItemHistorical.objects.filter(name=item['item']) if not historical_items: debug.log(u'WARNING: Couldn\'t find an historical record of item at ' + unicode(item['item_url']) + u'. Attempting an historical scan of ' + unicode(item['institution']) + u' first...', display=True) if not updated_institutions: management.call_command('scan_itunes', mode=4) updated_institutions = True try: management.call_command('scan_itunes', item['institution'], mode=1) except: try: #Deal with institutions which aren't listed by Apple. institution = ItuInstitution(name = item['institution'], itu_id = int(item['institution_id']), url = item['institution_url']) institution.save() management.call_command('scan_itunes', item['institution'], mode=1) except: debug.errorlog('Failed to scan institution ' + item['institution'] + '. This is a bug.', display=True) historical_items=ItuItemHistorical.objects.filter(name=item['item']) if historical_items.exists(): historical_item=historical_items[0].latest() debug.log(u'Created new download chart row: ' + unicode(historical_item.name) + u' Position: ' + unicode(item['chart_position']), display=True) chartrow=ItuItemChartScan(position=int(item['chart_position']), ituitem=historical_item.ituitem, ituitemhistorical=historical_item, scanlog=scanlog, date=scanlog.time) chartrow.save() else: debug.errorlog(u'Couldn\'t find an historical record of item at ' + unicode(item['item_url']) + u' despite updating the database.', display=True) except KeyError: debug.errorlog('WARNING: Couldn\'t access item (KeyError):' + str(item), display=True) elif mode == 4: comment = "Scan of list of institutions..." debug.log(u"Log started for: %s" % unicode(comment)) print comment institutions = itunes.get_institutions() for institution_itunes in institutions: if institution_itunes: institution = ItuInstitution(name = institution_itunes['text'], itu_id = int(institution_itunes['itu_id']), url = institution_itunes['url']) need_update = False need_create = True for saved_institution in ItuInstitution.objects.filter(Q(itu_id=institution.itu_id) | Q(name=institution.name) | Q(url = institution.url)): if saved_institution.itu_id == institution.itu_id and saved_institution.name == institution.name and saved_institution.url == institution.url: need_update = False need_create = False else: need_update = True need_create = False saved_institution.itu_id = institution.itu_id saved_institution.name = institution.name saved_institution.url = institution.url institution = saved_institution if need_update: debug.log(u'Updated institution ' + unicode(institution.name), display=True) institution.save() elif need_create: debug.log(u'Created new institution ' + unicode(institution.name), display=True) institution.save() else: debug.errorlog(u"We shouldn't ever get this scan...", display=True) print "\nScan iTunes finished at " + str(datetime.datetime.now(pytz.utc)) # Write the error cache to disk debug.errorlog_save() debug.errorlog_stop() scanlog.complete = True scanlog.save() return None
def handle_label(self, path, **options): verbosity = int(options.get('verbosity', 0)) if verbosity > 1: debug.DEBUG = True # # Some basic checking # if not filename.endswith('.txt'): # raise CommandError("This is not a text (.txt) log file.\n\n") # Scan directory for files, compare them to names in the existing LogFile list. Import the first X new files. found_files_list = self._list_files(path) found_files_list.sort() # Trust the naming conventions to put a sortable date on them import_file_limit = 21 if len(found_files_list) < import_file_limit: import_file_limit = len(found_files_list) # Have this only work on the one file when debug is switched on if debug.DEBUG: import_file_limit = 1 print "{0} files have been found. Importing up to {1} of them now.".format( len(found_files_list), import_file_limit ) for filename in found_files_list: if import_file_limit > 0: # Reset statistics self.import_stats['filename'] = filename self.import_stats['line_counter'] = 0 self.import_stats['line_count'] = 0 self.import_stats['import_starttime'] = datetime.datetime.now(pytz.utc) self.import_stats['import_startline'] = int(options.get('start_at_line', 1)) # This only needs setting/getting the once per call of this function logfile_obj, created = self._logfile(filename, 'itu-raw') if not created: err_string = "This file has already been imported: ({0})".format(filename) debug.onscreen(err_string) continue import_file_limit -= 1 print "Import of [{0}] started at {1:%Y-%m-%d %H:%M:%S}\n".format( filename, datetime.datetime.now(pytz.utc) ) # Create an error log per import file debug.errorlog_start(filename + '_import-error.log') # Send the file off to be parsed self._parsefile(logfile_obj) # Final stats output at end of file try: self.import_stats['import_duration'] = float((datetime.datetime.now(pytz.utc) - self.import_stats.get('import_starttime')).seconds) self.import_stats['import_rate'] = float(self.import_stats.get('line_counter')-self.import_stats.get('import_startline')) /\ self.import_stats['import_duration'] except ZeroDivisionError: self.import_stats['import_rate'] = 0 # Write the error cache to disk debug.errorlog_stop() print """ Import finished at {0:%Y-%m-%d %H:%M:%S} {1:d} Lines parsed over {2:.1f} seconds Giving a rate of {3:.3f} lines/sec """.format( datetime.datetime.now(pytz.utc), self.import_stats.get('line_counter'), self.import_stats.get('import_duration'), self.import_stats.get('import_rate') ) return None