def scrape_track(name, folder): # TODO DETAIL in logger name_parts = name.split('-') if len(name_parts) == 4: year = name_parts[0] title = name_parts[1] artist = name_parts[2] track = name_parts[3] else: print "Invalid name: %s" % (name,) return False, False track_name = '%s - %s' % (artist, track) try: link = YouTubeExtractor.search_youtube_links(track_name) except Exception: return False, False try: # search in youtube based on en artist - track ydl = youtube_dl.YoutubeDL({'outtmpl': '%(id)s%(ext)s', 'noplaylist': '--no-playlist'}) # Add all the available extractors ydl.add_default_info_extractors() result = ydl.extract_info(link, download=False) found = False Track.sync() display_id = result['display_id'] exists = Track.objects.filter(youtube_code=display_id).count() > 0 # Check Tracker.youtube_code doesn't exist if not exists: for format in result['formats']: if format['ext'] == 'm4a': url = format['url'] try: r = requests.get(url, stream=True) chunk_size = 1000 filename = result['display_id']+'.mp3' try: with open('/%s/%s' % (folder, filename,), 'wb') as fd: for chunk in r.iter_content(chunk_size): fd.write(chunk) except Exception: raise StorageException('Some problem writing file /%s/%s' % (folder, filename)) found = True break except Exception: pass if found: return name, '/%s/%s' % (folder, filename,) else: return False, False else: return False, False except: return False, False
def generate_fingerprint_from_list(results, file_list): # TODO: os.system is thread safe?? # TODO: How to test this? codes_file = '/tmp/allcodes_%s.json' % (random.randint(1, 10000)) command = '/home/vagrant/echoprint-codegen/echoprint-codegen -s 10 30 < %s > %s' % (file_list, codes_file) os.system(command) # Create the Track models with open(codes_file, 'r') as data_file: data = json.load(data_file) for fingerprint in data: # check fp doesn't exist in database code_string = fingerprint.get('code') if code_string: response = fp.best_match_for_query(code_string) if not response.match(): label = [v for v in results if v[1] == fingerprint['metadata']['filename']][0][0] youtube_code = fingerprint['metadata']['filename'].replace('.mp3', '').replace('/tmp/', '') year = label.split('-')[0].strip() release = label.split('-')[1].strip() artist = label.split('-')[2].strip() title = label.split('-')[3].strip() fingerprint['metadata']['artist'] = artist fingerprint['metadata']['title'] = title # Track creation Track.sync() track = Track(band=artist, release=release, name=title, year=year, youtube_code=youtube_code) track.save() # Remove all - (due to limitation in fingerprint-server track_id match) fingerprint['metadata']['track_id'] = track.echoprint_id else: # remove duplicate element data.remove(fingerprint) print "This file is duplicated" # Overwrite with artist and title with open(codes_file, 'w') as data_file: data_file.write(json.dumps(data)) # Fastingest invoke => post all into echo-fingerprint codes, _ = parse_json_dump(codes_file) fp.ingest(codes) FileHandler.delete_file(codes_file) return True