Пример #1
0
 def handle(self, *args, **options):
     for file in sorted(
             glob.glob(
                 '/var/www/vhosts/tmi-archive.com/mp3/import2/clean/*.mp3')
     ):
         title = file.split('/')[-1].replace('.mp3',
                                             '').replace('-', ' ').replace(
                                                 '_', ' ')
         title = f'Imported - {title}'
         existing = Talk.objects.filter(title=title).count()
         if existing > 0:
             continue
         id_ = uuid.uuid4()
         id_ = base64.b32encode(
             id_.bytes).decode("utf-8").rstrip("=\n")[:12]
         new_file_dir = '/var/www/vhosts/tmi-archive.com/mp3.tmi-archive.com'
         new_file_name = f"tmi-archive-{id_}.mp3"
         shutil.copy(file, new_file_dir + '/' + new_file_name)
         talk = Talk(title=title,
                     updated_by_id=1,
                     created_by_id=1,
                     original_file_name=file.split('/')[-1],
                     audio_filename=new_file_name,
                     audio_cleaned=new_file_name,
                     audio_original=new_file_name)
         talk.auto_add_user_data = False
         talk.save()
         print(title, talk.id)
Пример #2
0
    def handle(self, *args, **options):
        df = pd.read_json('./_other/scraper/articles-selenium.json')
        audio_path = '/home/script/_tmp/dhamma'
        for i, row in df.iloc[3:].iterrows():
            title = row.title

            existing = Talk.objects.filter(title=title).count()
            if existing > 0:
                continue
            print(title)
            content = BeautifulSoup(row.article, "html.parser").find('div', {'class': 'entry-content'})
            content = str(content).replace('<div class="entry-content">', '')[:-6]
            content = BeautifulSoup(content, "html.parser")

            for div in content.find_all("div", {'class': 'powerpress_player'}):
                div.decompose()
            for div in content.find_all("p", {'class': 'powerpress_links'}):
                div.decompose()
            content = str(content)
            talk = Talk(
                title=title,
                description=content,
                updated_by_id=1,
                created_by_id=1
            )
            talk.auto_add_user_data = False
            talk.save()

            if row.audio_link is not None:

                from django.core.files import File
                file_name = row.audio_link.replace('?_=1', '').split('/')[-1]
                orig_audio = '{}/mp3/{}'.format(audio_path, file_name)
                if os.path.exists(orig_audio):
                    orig_audio = File(open(orig_audio, 'rb'))
                    talk.audio_original.save('new', orig_audio)

                clean_audio = '{}/mp3-cleaned/{}'.format(audio_path, file_name)
                if os.path.exists(clean_audio):
                    clean_audio = File(open(clean_audio, 'rb'))
                    talk.audio_cleaned.save('new', clean_audio)