Exemplo n.º 1
0
 def _find_client_id(self):
     res = get_req(url='https://soundcloud.com/', headers=self._headers)
     for src in reversed(re.findall(r'<script[^>]+src="([^"]+)"',
                                    res.text)):
         res_js_script = get_req(url=src, headers=self._headers)
         if res_js_script:
             cliend_id = re.findall(r'client_id\s*:\s*"([0-9a-zA-Z]{32})"',
                                    res_js_script.text)
             if cliend_id:
                 return cliend_id[0]
     raise SoundCloudException('Unable to extract client id')
Exemplo n.º 2
0
    def _extract_info_dict_for_sets(self, *args, **kwargs):
        query = kwargs.get('query', {}).copy()
        query['client_id'] = kwargs.get('client_id')
        url_info = kwargs.get('url_info')
        response = get_req(url=url_info, headers=self._headers, params=query)
        res_json = response.json()

        _title = res_json.get('title')
        _id = res_json.get('id')
        _tracks = res_json.get('tracks')
        sys.stdout.write(fg + '[' + fc + '*' + fg +
                         '] : Playlist %s found %s track.\n' %
                         (_title, len(_tracks)))
        if _tracks:
            for ele, track in enumerate(_tracks):
                _id_track = track.get('id')
                dict_info_track = self._extract_info_dict(
                    query=query,
                    url_info=self._API_V2_BASE + 'resolve?url=' +
                    self._API_BASE + 'tracks/' + str(_id_track),
                    client_id=self._cliend_id,
                    ele=ele + 1)
                self._download_track(dict_info_track=dict_info_track,
                                     _show_all_info=self._show_all_info,
                                     ele=ele + 1)
Exemplo n.º 3
0
 def _extract_playlist(self, url, query):
     query['client_id'] = self._cliend_id
     query.update({
         'limit': 2000000000,
         'linked_partitioning': '1',
     })
     res = get_req(url=url, headers=self._headers, params=query)
     if not isinstance(res.json(), dict):
         return
     return res.json()
Exemplo n.º 4
0
 def _get_info_user(self, url, query):
     query['client_id'] = self._cliend_id
     res = get_req(url=url, headers=self._headers, params=query)
     if not isinstance(res.json(), dict):
         return
     return res.json()
Exemplo n.º 5
0
 def _extract_url_transcodings(self, url_trans, query):
     res = get_req(url=url_trans, params=query, headers=self._headers)
     return res.json()
Exemplo n.º 6
0
    def _extract_info_dict(self, *args, **kwargs):
        ele = kwargs.get('ele') or ''
        text = fg + '\r[' + fc + '*' + fg + '] : Extracting info of track %s ... ' % (
            ele)
        spinner(text=text)
        query = kwargs.get('query', {}).copy()
        query['client_id'] = kwargs.get('client_id')
        url_info = kwargs.get('url_info')
        response = get_req(url=url_info, headers=self._headers, params=query)
        res_json = response.json()
        format_urls = set()
        formats = []

        _description = res_json.get('description')
        _title = res_json.get('title')
        _artwork_url = res_json.get('artwork_url')
        _id = res_json.get('id')

        formats.append({
            'title': _title,
            'id': _id,
            'artwork_url': _artwork_url,
            'description': _description,
        })

        def invalid_url(url):
            return not url or url in format_urls or re.search(
                r'/(?:preview|playlist)/0/30/', url)

        def add_format(f, protocol):
            mobj = re.search(r'\.(?P<abr>\d+)\.(?P<ext>[0-9a-z]{3,4})(?=[/?])',
                             stream_url)
            if mobj:
                for k, v in mobj.groupdict().items():
                    if not f.get(k):
                        f[k] = v
            format_id_list = []
            if protocol:
                format_id_list.append(protocol)
            for k in ('ext', 'abr'):
                v = f.get(k)
                if v:
                    format_id_list.append(v)
            abr = f.get('abr')
            if abr:
                f['abr'] = int(abr)
            f.update({
                'format_id':
                '_'.join(format_id_list),
                'protocol':
                'm3u8_native' if protocol == 'hls' else 'http',
            })
            formats.append(f)

        transcodings = try_get(res_json, lambda x: x['media']['transcodings'],
                               list) or []
        for trans in transcodings:
            if not isinstance(trans, dict):
                continue
            format_url = trans.get('url')
            stream = self._extract_url_transcodings(format_url, query)
            if not isinstance(stream, dict):
                continue
            stream_url = stream.get('url')
            spinner(text=text)
            if invalid_url(stream_url):
                continue
            format_urls.add(stream_url)
            stream_format = trans.get('format') or {}
            protocol = stream_format.get('protocol')
            if protocol != 'hls' and '/hls' in format_url:
                protocol = 'hls'
            ext = None
            preset = trans.get('preset')
            if preset:
                ext = preset.split('_')[0]
            if ext not in KNOWN_EXTENSIONS:
                ext = mimetype2ext(stream_format.get('mime_type'))
            add_format({
                'url': stream_url,
                'ext': ext,
            }, 'http' if protocol == 'progressive' else protocol)
        sys.stdout.write(fg + '\r[' + fc + '*' + fg +
                         '] : Extracting info of track %s. (done)\n' % (ele))
        return formats