Пример #1
0
def main():
    login()
    #songsList = getSongsFromInternet()
    songsList = getSongsFromLocalarea("songsList.txt")
    #partition(songsList)
    downloader = Downloader(songsList)
    downloader.download(3, threadNum=10)
    #print(songsList)
    print("done.")
Пример #2
0
 def test_download_to_dir(self):
   dir = tempfile.mkdtemp()
   try:
     d = Downloader(dir)
     with util.CaptureStdout():
       with d.download('file://' + __file__) as f:
         filename = f
   finally:
     shutil.rmtree(dir)
   self.assertEqual(dir, os.path.dirname(filename))
Пример #3
0
 def test_file_removed_on_exception(self):
   try:
     d = Downloader()
     with util.CaptureStdout():
       with d.download('file://' + __file__) as f:
         filename = f
         raise TestException()
   except TestException:
     pass
   self.assertFalse(os.path.exists(filename))
Пример #4
0
def main(reparse=False):
    """Main entry point for this ETL process.  Downloads, updates db,
    stores the nightly data.

    This is the binary to run from a cron job.

    """

    os.chdir(os.path.dirname(__file__))
    logger = log.logger()
    logger.info('Starting ETL of FBO Nightly data.')

    # Figure out where we put data
    datadir = get_datadir()
    dbdir = get_dbdir()
    if not os.path.exists(os.path.join(dbdir, "sqlite3")):
        os.makedirs(os.path.join(dbdir, "sqlite3"))

    # Get a database connection, create db if needed
    db = model.FBO(
        "development",
        db_conf_file=os.path.join(
            dbdir,
            "dbconf.yml"))

    # Make sure the db schema is up to date, create tables, etc.
    db.migrate()

    assert os.path.exists(datadir)

    # Download raw data files
    dloader = Downloader(datadir, db, 'nightly')
    dloader.download(fname_urls, True)

    # Do our ETL
    nights = Nightlies(db)
    nights.etl_from_dir(reparse=reparse)

    # Close the db connection
    db.close()

    info('Finished ETL of FBO data.')
Пример #5
0
def download(url='', title='', artist='', gender='', album=''):
    cleanMp3s()
    url = request.form['url']
    title = request.form['title']
    artist = request.form['artist']
    gender = request.form['gender']
    album = request.form['album']
    downloader = Downloader(url, title, artist, gender, album)
    try:
        path = downloader.download()
    except IOError as e:
        return str(e)
    return send_from_directory(os.path.abspath('.'), path, as_attachment=True)
Пример #6
0
def main():
    args = [i.lower() for i in sys.argv]

    if 'help' in args or len(args) is 1:
        print_help()

    if 'download' in args:
        down = Downloader()
        down.download()
        down.preprocess()
        down.write_out(train="train.dat",test="test.dat")
    if 'tag' in args:
        t = Tagger()
        t.tag("test.dat")
        t.write_out("test_tagged.dat")
    if 'train' in args:
        m = Model()
        m.train("train.dat")
        m.write_out()
    if 'test' in args:
        m = Model("model.mdl")
        m.test("test_tagged.dat")
Пример #7
0
def downloadlink(url='', title='', artist='', gender='', album=''):
    cleanMp3s()
    url = request.form['url']
    title = request.form['title']
    artist = request.form['artist']
    gender = request.form['gender']
    album = request.form['album']
    downloader = Downloader(url, title, artist, gender, album)
    path = downloader.download()
    dir = 'files/'
    if not os.path.exists(dir):
            os.makedirs(dir)
    newpath = dir + path
    os.rename(path, newpath)
    return '<a href="/' + newpath + '">' + newpath + '</a>'
Пример #8
0
    #LC80130312013273LGN00
    prefix = 'LC8013031'
    #postfix = 'LGN01'
    images = [
        '2013273LGN00', # Sept 29th, 2013
    ]

    print "Processing {0} image archives ...".format(len(images))

    for image in images:

        imagename = "{0}{1}".format(prefix,image)

        print "Working on '{0}' ...".format(imagename)

        dler.download(imagename)

        uncomp.uncompress(imagename)

        rgb.makergb(imagename,RED=5,GREEN=6,BLUE=4)

        prev.makepreview(imagename)

        #cleanup
        os.system("rm ./{0}/*_B*.TIF".format(imagename))
        #os.system("rm ./{0}/*_PROJECTED*".format(imagename))
        #os.system('rm ./{0}/*_RGB*')

        print "Done with {0}".format(imagename)

    print "Done creating JPEG preview files for all {0} image archives.".format(len(images))
Пример #9
0
class Projects:
    def __init__(self, target):
        self.downloader = Downloader()
        self.projects = []
        self.projects.append(Project(
            "pugixml",
            "PUGIXML",
            None,
            False))
        self.projects.append(libgit2Project(target))
        self.projects.append(Project(
            "Ishiko/Errors",
            "ISHIKO",
            "Makefiles/$(compiler_short_name)/IshikoErrors.sln",
            False))
        self.projects.append(Project(
            "Ishiko/Collections",
            "ISHIKO",
            "Makefiles/$(compiler_short_name)/IshikoCollections.sln",
            False))
        self.projects.append(Project(
            "Ishiko/Process",
            "ISHIKO",
            "Makefiles/$(compiler_short_name)/IshikoProcess.sln",
            False))
        self.projects.append(Project(
            "DiplodocusDB/Core",
            "DIPLODOCUSDB",
            "Makefiles/$(compiler_short_name)/DiplodocusDBCore.sln",
            False))
        self.projects.append(Project(
            "DiplodocusDB/TreeDB/Core",
            "DIPLODOCUSDB",
            "Makefiles/$(compiler_short_name)/DiplodocusTreeDBCore.sln",
            False))
        self.projects.append(Project(
            "DiplodocusDB/TreeDB/XMLTreeDB",
            "DIPLODOCUSDB",
            "Makefiles/$(compiler_short_name)/DiplodocusXMLTreeDB.sln",
            False))
        self.projects.append(Project(
            "CodeSmithyIDE/CodeSmithy/Core",
            "CODESMITHY",
            "Makefiles/$(compiler_short_name)/CodeSmithyCore.sln",
            False))
        self.projects.append(Project(
            "CodeSmithyIDE/CodeSmithy/Make",
            "CODESMITHY",
            "Makefiles/$(compiler_short_name)/CodeSmithyMake.sln",
            False))
        self.projects.append(Project(
            "Ishiko/TestFramework/Core",
            "ISHIKO",
            "Makefiles/$(compiler_short_name)/IshikoTestFrameworkCore.sln",
            True))
        self.projects.append(Project(
            "Ishiko/WindowsRegistry",
            "ISHIKO",
            "Makefiles/$(compiler_short_name)/IshikoWindowsRegistry.sln",
            True))
        self.projects.append(Project(
            "Ishiko/FileTypes",
            "ISHIKO",
            "Makefiles/$(compiler_short_name)/IshikoFileTypes.sln",
            True))
        self.projects.append(Project(
            "CodeSmithyIDE/CodeSmithy/UICore",
            "CODESMITHY",
            "Makefiles/$(compiler_short_name)/CodeSmithyUICore.sln",
            True))
        self.projects.append(wxWidgetsProject())
        self.projects.append(Project(
            "CodeSmithyIDE/CodeSmithy/UIElements",
            "CODESMITHY",
            "Makefiles/$(compiler_short_name)/CodeSmithyUIElements.sln",
            True))
        self.projects.append(Project(
            "CodeSmithyIDE/CodeSmithy/UIImplementation",
            "CODESMITHY",
            "Makefiles/$(compiler_short_name)/CodeSmithyUIImplementation.sln",
            True))
        self.projects.append(Project(
            "CodeSmithyIDE/CodeSmithy/UI",
            "CODESMITHY",
            "Makefiles/$(compiler_short_name)/CodeSmithy.sln",
            True))
        self.projects.append(Project(
            "CodeSmithyIDE/CodeSmithy/Tests/Core",
            "CODESMITHY",
            "Makefiles/$(compiler_short_name)/CodeSmithyCoreTests.sln",
            True))
        self.projects.append(Project(
            "CodeSmithyIDE/CodeSmithy/Tests/Make",
            "CODESMITHY",
            "Makefiles/$(compiler_short_name)/CodeSmithyMakeTests.sln",
            True))
        self.projects.append(Project(
            "CodeSmithyIDE/CodeSmithy/Tests/UICore",
            "CODESMITHY",
            "Makefiles/$(compiler_short_name)/CodeSmithyUICoreTests.sln",
            True))
        self.tests = []
        self.tests.append(Test("CodeSmithyIDE/CodeSmithy/Tests/Core",
                               "CodeSmithyCoreTests.exe"))
        self._init_downloader()

    def get(self, name):
        for project in self.projects:
            if project.name == name:
                return project
        return None

    def set_environment_variables(self, output):
        print("")
        output.print_step_title("Setting environment variables")
        env = {}
        for project in self.projects:
            value = os.getcwd() + "/Build/" + project.name.split("/")[0]
            if project.env_var in env:
                old_value = env[project.env_var]
                if (old_value != value):
                    exception_text = "Conflicting values for " + \
                        "environment variable " + project.env_var + " (" + \
                        value + " vs " + old_value + ")"
                    raise RuntimeError(exception_text)
            else:
                env[project.env_var] = value
        for var_name in env:
            print("    " + var_name + ": " + env[var_name])
            os.environ[var_name] = env[var_name]
        output.next_step()

    def download(self):
        self.downloader.download()

    def build(self, build_tools, build_configuration,
              input, state, output):
        # For now only bypass pugixml, libgit2 and wxWidgets because they
        # are independent from the rest. More complex logic is required to
        # handle the other projects.
        # Unless we have built all project succesfully.
        for project in self.projects:
            if state.build_complete:
                project.built = True
            elif project.name in ["libgit2", "pugixml", "wxWidgets"]:
                if project.name in state.built_projects:
                    project.built = True
        for project in self.projects:
            print("")
            output.print_step_title("Building " + project.name)
            if project.built:
                print("    Using previous execution")
            else:
                project.unzip(self.downloader)
                project.build(build_tools, build_configuration,
                              input, output)
            state.set_built_project(project.name)
            output.next_step()
        state.set_build_complete()

    def test(self, compiler, architecture_dir_name, input):
        for test in self.tests:
            # TODO
            executable_path = "Build/" + test.project_name + \
                              "/Makefiles/VC15/x64/Debug/" + test.executable
            try:
                subprocess.check_call([executable_path])
            except subprocess.CalledProcessError:
                launchIDE = input.query("    Tests failed. Do you you want to"
                                        " launch the IDE?", ["y", "n"], "n")
                if launchIDE == "y":
                    self.get(test.project_name).launch(compiler,
                                                       architecture_dir_name)
                raise RuntimeError(test.project_name + " tests failed.")

    def _init_downloader(self):
        for project in self.projects:
            project_downloader = project.create_downloader()
            self.downloader.merge(project_downloader)
Пример #10
0
class VkBot:
    def __init__(self):
        self.timeout = TIMEOUT_SECONDS
        signal.signal(signal.SIGTERM, self.catch_signal)
        self.init_connection()

    def init_connection(self):
        try:
            self.vk_session = vk_api.VkApi(login=os.getenv("LOGIN"),
                                           password=os.getenv("PASSW"))
            try:
                self.vk_session.auth(token_only=True)
            except vk_api.AuthError as e:
                print(e)
                sys.exit(0)
            except vk_api.exceptions.Captcha as e:
                print("CAPTCHA")
                print(e.get_url())
                code = input()
                e.try_again(key=code)

            print("ID:", os.getpid())
            print("Got VK API Session")
            self.group_session = vk_api.VkApi(token=os.getenv("KEY"))
            print("Got Group Session")
            self.longpoll = VkBotLongPoll(self.group_session,
                                          os.getenv("GROUP_ID"))
            print("Got Longpoll Object")
            self.api = self.vk_session.get_api()
            print("Got API Object")
            self.group_api = self.group_session.get_api()
            print("Got Group API Object")
            self.upload = vk_api.VkUpload(self.vk_session)
            print("Got Upload Object")
            self.loader = Downloader()
            print("Got Downloader Object")
        except (requests.exceptions.ConnectionError) as e:
            print("Reinitializing session data")
            print(e)
            print("Timeout:", self.timeout)
            time.sleep(self.timeout)
            self.timeout += 1
            self.init_connection()

    def catch_signal(self, signal, frame):
        print("Stopping...")
        sys.exit(0)

    def send_message(self, user_id, message, attachment=None):
        self.group_api.messages.send(user_id=user_id,
                                     random_id=get_random_id(),
                                     message=message,
                                     attachment=attachment)

    def response(self, event):
        self.send_message(user_id=event.obj.message["from_id"],
                          message="Wait a bit")
        link = self.find_yt(event.obj)
        if link:
            result = self.loader.download(link)
            if result:
                if not result.endswith(".mp3"):
                    index = result.find(".")
                    path = result.replace(result[index:], ".mp3")
                else:
                    path = result
                title, artist = result.lstrip("storage/").rstrip(".mp3").split(
                    "---")
                self.upload_yt(event, path, title, artist)
                os.remove(path)
        else:
            self.send_error(event)
        print()

    def start(self):
        print("Start Longpoll listening")
        while True:
            try:
                for event in self.longpoll.listen():
                    if event.type == VkBotEventType.MESSAGE_NEW:
                        print("Event:\n", pprint.pprint(event.obj))
                        print("From:", event.obj.message["from_id"])
                        print('Message:', event.obj.message["text"])
                        self.response(event)
                    elif event.type == VkBotEventType.MESSAGE_REPLY:
                        print("From(Bot):", event.obj.peer_id)
                        print('Message(Bot):', event.obj.text)
                        print()
                    else:
                        print(event.type)
                        print()
            except (requests.exceptions.ReadTimeout) as e:
                print("Got exception")
                print(type(e))
                print(e)
                time.sleep(self.timeout)
                self.init_connection()
                self.timeout = TIMEOUT_SECONDS
                self.start()

    def upload_yt(self, event, path, title, artist):
        try:
            audio = self.upload.audio(audio=path, title=title, artist=artist)
        except vk_api.exceptions.ApiError as e:
            self.send_message(user_id=event.obj.message["from_id"],
                              message=f"{e.error['error_msg']}")
        else:
            self.send_message(
                user_id=event.obj.message["from_id"],
                message="Your audio:",
                attachment=f"audio{audio['owner_id']}_{audio['id']}")

    def send_error(self, event):
        self.send_message(user_id=event.obj.message["from_id"],
                          message='',
                          attachment='photo-185940778_457239022')

    def find_yt(self, event):
        if event.message["text"] != '':
            pattern = r'(http(s)?:\/\/)?((w){3}.)?youtu(be|.be)?(\.com)?\/.\S*'
            result = re.search(pattern, event.message["text"])
            if result:
                return result.group()
        else:
            if event.message['attachments']:
                attachment = event.message['attachments'][0]
            else:
                return

            if attachment['type'] != 'video' or attachment['video'].get(
                    'platform') != 'YouTube':
                return

            videos = self.api.video.get(
                videos=
                f"{attachment['video']['owner_id']}_{attachment['video']['id']}"
            )

            if not videos['items']:
                return

            return videos['items'][0]['player']
Пример #11
0
class Handler(object):
    def __init__(self, url_q, wb_q, info_q):
        self.url_count = 1
        self.handle_urls = set()
        self.url_suf = 'http://weibo.com'
        self.url_q = url_q
        self.wb_q = wb_q
        self.info_q = info_q
        self.downloader = Downloader()
        self.validater = Validater()

    def oninit(self, url):
        url = self.url_suf + url
        res = self.downloader.download(url)
        if not res: return
        cont = res.content
        uid = re.search("CONFIG\['oid'\]='(.*?)'", cont).group(1)
        nick = re.search("CONFIG\['onick'\]='(.*?)'",
                         cont).group(1).decode('utf-8')
        # ajax 请求微博时要用
        page_id = re.search("CONFIG\['page_id'\]='(.*?)'", cont).group(1)
        domain = re.search("CONFIG\['domain'\]='(.*?)'", cont).group(1)
        location = re.search("CONFIG\['location'\]='(.*?)'", cont).group(1)
        nums = Parser.parse_index(cont)
        if not nums: return
        follow_num, fans_num, wb_num = nums
        # 验证该用户关注粉丝微博数是否变化
        # 如变化更新信息,微博变化则需要爬取更新的微博
        # 该方法返回需要爬取的微博数
        crawl_info, wb_num = self.validater.validate_nums(page_id, nums)
        self.crawl_info = crawl_info
        return {
            'uid': uid,
            'page_id': page_id,
            'nick': nick,
            'domain': domain,
            'location': location,
            'follow_num': int(follow_num),
            'fans_num': int(fans_num),
            'wb_num': int(wb_num)
        }

    def handle_url(self):
        while True:
            try:
                url = self.url_q.get()
                if url == 'end':
                    self.wb_q.put_nowait('end')
                    self.info_q.put_nowait('end')
                    print 'crawl over'
                    break
                if url in self.handle_urls: continue
                self.args = args = self.oninit(url)
                if not args: continue
                # wb_t = Thread(target=self.get_wb)
                # url_t = Thread(target=self.get_url)
                # wb_t.start()
                # url_t.start()
                # wb_t.join()
                # info_t = Thread(target=self.get_info)
                # info_t.start()
                # url_t.join()
                # info_t.join()

                greenlets = []
                greenlets.append(gevent.spawn(self.get_wb))
                greenlets.append(gevent.spawn(self.get_url))
                greenlets.append(gevent.spawn(self.get_info))
                gevent.joinall(greenlets)

                self.handle_urls.add(url)
            except Empty:
                sleep(1)
            except Exception, e:
                logger.debug('error in handle_url:' + str(e))
Пример #12
0
class Player:
    def __init__(self):
        if not os.path.isdir(PLAYLIST_DIR):
            print(f'No config found at {PLAYLIST_DIR}')
            exit(1)

        if not os.path.isdir(DOWNLOADS_DIR):
            os.mkdir(DOWNLOADS_DIR)

        self.downloader = Downloader(DOWNLOADS_DIR)
        self.audio = None

    def list_playlists(self):
        playlists = []

        for f in os.listdir(PLAYLIST_DIR):
            if f.endswith('.playlist'):
                playlists.append(f.replace('.playlist', ''))

        playlists.sort()
        return playlists

    def list_songs(self, playlist_name):
        playlist = Playlist(PLAYLIST_DIR, playlist_name)
        return playlist.songs

    def play(self, playlist_name, callback, offset=0):
        simpleaudio.stop_all()

        playlist = Playlist(PLAYLIST_DIR, playlist_name)
        self.current_playlist = playlist

        songs = cycle(playlist.songs)

        for song in islice(songs, offset, None):
            if self.current_playlist is not playlist:
                break

            if not self._download_exists(song):
                print(f'Downloading {song}')
                callback.on_downloading_song(song)
                self.downloader.download(song)

            print(f'Playing {song}')
            callback.on_song_started(song)
            self._play_audio(song)

    def _download_exists(self, song):
        path = os.path.join(DOWNLOADS_DIR, song)
        return os.path.isfile(path)

    def _play_audio(self, file_name):
        path = os.path.join(DOWNLOADS_DIR, file_name)
        sound = AudioSegment.from_file(path)

        self.audio = simpleaudio.play_buffer(
            sound.raw_data,
            num_channels=sound.channels,
            bytes_per_sample=sound.sample_width,
            sample_rate=sound.frame_rate)

        self.audio.wait_done()
Пример #13
0
        '2013170LGN00',
        '2013154LGN00',
        '2013138LGN01',
        '2013122LGN01',
        '2013106LGN01',
    ]

    print "Processing {0} image archives ...".format(len(images))

    for image in images:

        imagename = "{0}{1}".format(prefix,image)

        print "Working on '{0}' ...".format(imagename)

        dler.download(imagename)

        uncomp.uncompress(imagename)

        rgb.makergb(imagename)

        prev.makepreview(imagename)

        #cleanup
        os.system("rm ./{0}/*_B*.TIF".format(imagename))
        #os.system("rm ./{0}/*_PROJECTED*".format(imagename))
        #os.system('rm ./{0}/*_RGB*')

        print "Done with {0}".format(imagename)

    print "Done creating JPEG preview files for all {0} image archives.".format(len(images))
Пример #14
0
 def test_download_to_temp_dir(self):
   d = Downloader()
   with util.CaptureStdout():
     with d.download('file://' + __file__) as f:
       filename = f
   self.assertEqual(tempfile.gettempdir(), os.path.dirname(filename))
Пример #15
0
 def test_download(self):
   d = Downloader()
   with util.CaptureStdout():
     with d.download('file://' + __file__) as f:
       self.assertEqual(readfile(__file__), readfile(f))