def handle(self, *args, **options): logging.debug("started") for root, dirs, files in os.walk(args[0]): for f in files: if f.endswith('.wav'): # First check to see if it exists path = os.path.join(root, f) if os.path.getsize(path) < MIN_FILE_SIZE: logging.info('small file ignored: %s', path) continue md5 = get_md5(path) try: recording = Recording.objects.get(md5=md5) logging.info('recording with same MD5 already in database: %s', path) save_canonical(recording) continue except Recording.DoesNotExist: logging.debug('recording not already in database: %s', path) pass # Now get the file path try: starttime = get_starttime(f) except ValueError: logging.error('unable to extract date and time from filename: %s', path) try: recorder_code, site_code = get_recorder_site(path) logging.debug('recorder %s and site %s: %s', recorder_code, site_code, path) if site_code and recorder_code: deployment = Deployment.objects.get(recorder__code=recorder_code, site__code=site_code, start__lt=starttime, end__gt=starttime) elif recorder_code: deployment = Deployment.objects.get(recorder__code=recorder_code, start__lt=starttime, end__gt=starttime) elif site_code: deployment = Deployment.objects.get(site__code=site_code, start__lt=starttime, end__gt=starttime) else: logging.error('no site or recorder identified in path: %s', path) continue logging.debug('found the deployment: %s', deployment) try: Recording.objects.get(datetime=starttime, deployment=deployment) logging.error('recording already exists with the same startime (%s) and deployment (%s): %s', starttime, deployment, path) continue except Recording.DoesNotExist: pass recording = Recording(datetime=starttime, deployment=deployment, path=path) logging.debug('created the recording: %s', recording) recording.save() logging.info('added recording to database: %s', path) save_canonical(recording) logging.debug('generate the snippets: %s', path) #Now generate the snippets if not recording.snippets.count(): try: with closing(wave.open(path, 'r')) as w: frames = w.getnframes() rate = w.getframerate() length = frames/float(rate) snippet_length = 60 snippet_overlap = 0 snippet_minimum = 59.9 seconds = 0 while seconds + snippet_minimum < length: offset = max(seconds - snippet_overlap, 0) duration = min(snippet_length + 2*snippet_overlap, length - offset) Snippet(recording=recording, offset=offset, duration=duration).save() seconds += snippet_length except KeyboardInterrupt: break except: logging.error('error extracting snippet: %s', path) except Deployment.DoesNotExist: logging.error('no matching deployment found: %s', path) except Deployment.MultipleObjectsReturned: logging.error('multiple matching deployment found: %s', path) except IntegrityError: logging.error('integrity error when trying to save file: %s', path) except wave.Error: logging.error("doesn't seem to be a WAV file: %s", path) except RecorderSiteError: logging.error('unable to extract recorder or site from path: %s', path) except KeyboardInterrupt: break
def handle(self, *args, **options): logging.debug("started") recordings = [] for root, dirs, files in os.walk(args[0]): for f in files: if f.endswith('.wav'): recordings.append((f, os.path.join(root, f))) recordings.sort() #First make the organisation try: doc = Organisation.objects.get(code='doc') except Organisation.DoesNotExist: doc = Organisation(code='doc', name='Department of Conservation') doc.save() logging.info('Made a new organisation: doc') #Now make the sites site_codes = set(['_'.join(f.split('_')[:2]) for (f, path) in recordings]) for code in site_codes: try: Site.objects.get(code=code, organisation=doc) except Site.DoesNotExist: Site(code=code, organisation=doc).save() logging.info('Made a new site: %s' % code) #Now make the deployments tz = pytz.timezone('Etc/GMT-12') old_site = '' last_date = tz.localize(datetime.strptime('20000101', '%Y%m%d')) count = 0 for f, path in recordings: count += 1 site_code = '_'.join(f.split('_')[:2]) date = tz.localize(datetime.strptime(f.split('_')[2], '%y%m%d')) starttime = tz.localize(datetime.strptime('_'.join(f.split('_')[2:4]), '%y%m%d_%H%M%S.wav')) if site_code != old_site or (date - last_date).days > 1: old_site = site_code site = Site.objects.get(code=site_code, organisation=doc) deployment, created = Deployment.objects.get_or_create(site=site, owner=doc, start=date, start_timezone='Pacific/Auckland') if created: deployment.save() logging.info('Made a new deployment: %s, %s' % (site, date)) last_date = date if os.path.getsize(path) < MIN_FILE_SIZE: logging.info('small file ignored: %s', path) continue md5 = get_md5(path) try: recording = Recording.objects.get(md5=md5) logging.info('recording with same MD5 already in database: %s', path) recording.path = path recording.save() continue except Recording.DoesNotExist: logging.debug('recording not already in database: %s', path) pass try: try: Recording.objects.get(datetime=starttime, deployment=deployment) logging.error('recording already exists with the same startime (%s) and deployment (%s): %s', starttime, deployment, path) continue except Recording.DoesNotExist: pass recording = Recording(datetime=starttime, deployment=deployment, path=path) logging.debug('created the recording: %s', recording) recording.save() logging.debug('generate the snippets: %s', path) #Now generate the snippets if not recording.snippets.count(): try: with closing(wave.open(path, 'r')) as w: frames = w.getnframes() rate = w.getframerate() length = frames/float(rate) snippet_length = 60 snippet_overlap = 0 snippet_minimum = 59.9 seconds = 0 while seconds + snippet_minimum < length: offset = max(seconds - snippet_overlap, 0) duration = min(snippet_length + 2*snippet_overlap, length - offset) Snippet(recording=recording, offset=offset, duration=duration).save() seconds += snippet_length except KeyboardInterrupt: break except: logging.error('error extracting snippet: %s', path) except Deployment.DoesNotExist: logging.error('no matching deployment found: %s', path) except Deployment.MultipleObjectsReturned: logging.error('multiple matching deployment found: %s', path) except IntegrityError: logging.error('integrity error when trying to save file: %s', path) except wave.Error: logging.error("doesn't seem to be a WAV file: %s", path) except RecorderSiteError: logging.error('unable to extract recorder or site from path: %s', path) except KeyboardInterrupt: break
def handle(self, *args, **options): logging.debug("started") recordings = [] for root, dirs, files in os.walk(args[0]): for f in files: if f.endswith('.wav'): recordings.append((f, os.path.join(root, f))) recordings.sort() #First make the organisation try: doc = Organisation.objects.get(code='doc') except Organisation.DoesNotExist: doc = Organisation(code='doc', name='Department of Conservation') doc.save() logging.info('Made a new organisation: doc') #Now make the sites site_codes = set( ['_'.join(f.split('_')[:2]) for (f, path) in recordings]) for code in site_codes: try: Site.objects.get(code=code, organisation=doc) except Site.DoesNotExist: Site(code=code, organisation=doc).save() logging.info('Made a new site: %s' % code) #Now make the deployments tz = pytz.timezone('Etc/GMT-12') old_site = '' last_date = tz.localize(datetime.strptime('20000101', '%Y%m%d')) count = 0 for f, path in recordings: count += 1 site_code = '_'.join(f.split('_')[:2]) date = tz.localize(datetime.strptime(f.split('_')[2], '%y%m%d')) starttime = tz.localize( datetime.strptime('_'.join(f.split('_')[2:4]), '%y%m%d_%H%M%S.wav')) if site_code != old_site or (date - last_date).days > 1: old_site = site_code site = Site.objects.get(code=site_code, organisation=doc) deployment, created = Deployment.objects.get_or_create( site=site, owner=doc, start=date, start_timezone='Pacific/Auckland') if created: deployment.save() logging.info('Made a new deployment: %s, %s' % (site, date)) last_date = date if os.path.getsize(path) < MIN_FILE_SIZE: logging.info('small file ignored: %s', path) continue md5 = get_md5(path) try: recording = Recording.objects.get(md5=md5) logging.info('recording with same MD5 already in database: %s', path) recording.path = path recording.save() continue except Recording.DoesNotExist: logging.debug('recording not already in database: %s', path) pass try: try: Recording.objects.get(datetime=starttime, deployment=deployment) logging.error( 'recording already exists with the same startime (%s) and deployment (%s): %s', starttime, deployment, path) continue except Recording.DoesNotExist: pass recording = Recording(datetime=starttime, deployment=deployment, path=path) logging.debug('created the recording: %s', recording) recording.save() logging.debug('generate the snippets: %s', path) #Now generate the snippets if not recording.snippets.count(): try: with closing(wave.open(path, 'r')) as w: frames = w.getnframes() rate = w.getframerate() length = frames / float(rate) snippet_length = 60 snippet_overlap = 0 snippet_minimum = 59.9 seconds = 0 while seconds + snippet_minimum < length: offset = max(seconds - snippet_overlap, 0) duration = min( snippet_length + 2 * snippet_overlap, length - offset) Snippet(recording=recording, offset=offset, duration=duration).save() seconds += snippet_length except KeyboardInterrupt: break except: logging.error('error extracting snippet: %s', path) except Deployment.DoesNotExist: logging.error('no matching deployment found: %s', path) except Deployment.MultipleObjectsReturned: logging.error('multiple matching deployment found: %s', path) except IntegrityError: logging.error('integrity error when trying to save file: %s', path) except wave.Error: logging.error("doesn't seem to be a WAV file: %s", path) except RecorderSiteError: logging.error( 'unable to extract recorder or site from path: %s', path) except KeyboardInterrupt: break
def handle(self, *args, **options): logging.debug("started") for root, dirs, files in os.walk( "/home/jasonhideki/songscape/www/recordings/recordings"): for f in files: if f.endswith('.wav'): # First check to see if it exists path = os.path.join(root, f) if os.path.getsize(path) < MIN_FILE_SIZE: logging.info('small file ignored: %s', path) continue md5 = get_md5(path) ''' try: recording = Recording.objects.get(md5=md5) logging.info('recording with same MD5 already in database: %s', path) save_canonical(recording) #continue except Recording.DoesNotExist: logging.debug('recording not already in database: %s', path) pass ''' # Now get the file path try: starttime = get_starttime(f) except ValueError: logging.error( 'unable to extract date and time from filename: %s', path) try: recorder_code, site_code = get_recorder_site(f) logging.debug('recorder %s and site %s: %s', recorder_code, site_code, f) #try: # recorder_code, site_code = get_recorder_site(path) # logging.debug('recorder %s and site %s: %s', recorder_code, site_code, path) if site_code and recorder_code: deployment = Deployment.objects.get( recorder__code=recorder_code, site__code=site_code, start__lt=starttime, end__gt=starttime) elif recorder_code: deployment = Deployment.objects.get( recorder__code=recorder_code, start__lt=starttime, end__gt=starttime) elif site_code: deployment = Deployment.objects.get( site__code=site_code, start__lt=starttime, end__gt=starttime) else: logging.error( 'no site or recorder identified in path: %s', path) continue logging.debug('found the deployment: %s', deployment) try: Recording.objects.get(datetime=starttime, deployment=deployment) logging.error( 'recording already exists with the same startime (%s) and deployment (%s): %s', starttime, deployment, path) continue except Recording.DoesNotExist: pass recording = Recording(datetime=starttime, deployment=deployment, path=path) logging.debug('created the recording: %s', recording) recording.save() logging.info('added recording to database: %s', path) #save_canonical(recording) logging.debug('generate the snippets: %s', path) #Now generate the snippets if not recording.snippets.count(): try: with closing(wave.open(path, 'r')) as w: frames = w.getnframes() rate = w.getframerate() length = frames / float(rate) snippet_length = 60 snippet_overlap = 0 snippet_minimum = 59.9 seconds = 0 while seconds + snippet_minimum < length: offset = max(seconds - snippet_overlap, 0) duration = min( snippet_length + 2 * snippet_overlap, length - offset) Snippet(recording=recording, offset=offset, duration=duration).save() seconds += snippet_length except KeyboardInterrupt: break except: logging.error('error extracting snippet: %s', path) except Deployment.DoesNotExist: logging.error('no matching deployment found: %s', path) except Deployment.MultipleObjectsReturned: logging.error('multiple matching deployment found: %s', path) except IntegrityError: logging.error( 'integrity error when trying to save file: %s', path) except wave.Error: logging.error("doesn't seem to be a WAV file: %s", path) except RecorderSiteError: logging.error( 'unable to extract recorder or site from path: %s', path) except KeyboardInterrupt: break except: logging.error( 'Hmmm. Something weird happened with this file: %s', path)