示例#1
0
文件: models.py 项目: huwf/dictogloss
    def split_file(self, seconds=DEFAULT_SEGMENT_LENGTH):

        if not seconds:
            seconds = DEFAULT_SEGMENT_LENGTH

        # logger.info('Splitting file at %s', path)
        # split_path = os.path.split(path)
        # output_path = "/".join([split_path[0], f'{seconds}_{split_path[1].rstrip(self.extension)}%04d.mp3'])
        path = os.path.dirname(self._save_path())
        output_dir = os.path.join(path, str(seconds))
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        else:  # The base has already been split
            return
        ffmpeg_path = os.path.join(output_dir, f'%04d_{self.filename}')

        # TODO: Choice between different tools?
        args = [
            'ffmpeg', '-i', self._save_path(),
            '-f', 'segment',
            '-segment_time', str(seconds),
            '-c', 'copy', ffmpeg_path
        ]

        self.ffmpeg(args)

        logger.debug(f'listdir: {output_dir}')
        segments = [s for s in sorted(os.listdir(output_dir))]
        logger.debug('segments: %r' % segments)
        for idx, f in enumerate(segments):
            logger.debug(f'{idx} {f}')
            logger.debug(f'SAVING {idx}, {f}')
            s = Segment(base_id=self.id, position=idx + 1, length=seconds, language=self.language)
            db.add(s)
        db.commit()
示例#2
0
def import_article():
    data = request.get_json()
    logger.info('data: %s', data)

    source_url = data.get('source_url')
    pretty_name = data.get('pretty_name')
    language = data.get('language')
    rss_id = data.get('id')
    track = RSSTrack.get(rss_id)
    try:
        resp = requests.get(track.url)
        resp.raise_for_status()
    except HTTPError as e:
        return {'status': 'error', 'details': str(e)}, resp.status_code

    hostname = urlparse(track.url).hostname

    article = Article(pretty_name=track.name,
                      url=track.url,
                      rss_id=rss_id,
                      language=language)
    parser = ParsingRules.get(hostname)
    # TODO: Save in a better format!
    content = parser.parse(resp.content)
    article.content = content
    db.add(article)
    db.commit()
    return {'status': 'ok', 'article': article.to_json()}, 200
示例#3
0
def add_channel():
    data = request.get_json()
    try:
        feed_url = data.get('feed_url')
        resp = requests.head(feed_url)
        resp.raise_for_status()
    except HTTPError:
        return {
            'status':
            'error',
            'details':
            f'Failed to connect to {feed_url} with status code {resp.status_code}'
        }, resp.status_code
    except ConnectionError:
        return {
            'status': 'error',
            'details': f'Server for url {feed_url} is not available'
        }, 503

    channel = RSSChannel(url=feed_url,
                         channel_name=data.get('feed_name'),
                         channel_description=data.get('feed_description'),
                         channel_type=data.get('feed_type'))
    db.add(channel)
    db.commit()
    return {'status': 'ok', 'data': channel.to_json()}, 200
示例#4
0
def translate_segment(file, position):
    # NOTE: This does not necessarily translate a whole segment, merely a part.
    # To translate a whole part, we may end up translating some bits twice
    # so we'll have to do something a bit more clever
    data = request.get_json(force=True)
    target = data.get('target', 'en-GB')
    text = data.get('text')
    logger.debug(f'translate file {file} position {position} text {text}')
    segment = BaseAudio.get(file).get_segment(position)
    logger.debug(f'translate segment: {segment.to_json()}')

    translation = _translate(text, target, segment)
    db.add(translation)
    db.commit()
    ret = {'status': 'ok', 'translation': translation.to_json(segment)}

    return ret, 200
示例#5
0
def save_file():
    """Downloads the file from the URL specified in POST/PUT body and saves on the filesystem and creates
    a record in the database

    Required fields:
    source_url: The URL to download it from (must be open, no auth will be attempted)
    Optional fields:
    language: BCP 47 language code. See https://cloud.google.com/speech-to-text/docs/languages for supported languages
    Files with more than one language should use string "MULTI"
    pretty_name: The name that the file will be displayed as

    :return: A JSON representation of the file or error message as appropriate
    """
    try:
        data = request.get_json()
        source_url = data.get('source_url')
        pretty_name = data.get('pretty_name')
        language = data.get('language')

        # RSS page field
        track_id = data.get('track_id')

        audio = BaseAudio.get(source_url=source_url)
        if BaseAudio.exists(audio):
            logger.info('Audio file already exists')
            return audio.to_json(), 409

        audio = BaseAudio(source_url=source_url,
                          pretty_name=pretty_name,
                          language=language)
        db.add(audio)
        db.flush()
        audio.save_file()
        db.commit()

        if track_id:
            track = db.query(RSSTrack).filter(RSSTrack.id == track_id).first()
            if track:
                track.is_added = True
                db.commit()

    except Exception as e:
        data = {'status': 'error', 'message': str(e)}
        return data, 500
    return audio.to_json(), 201
示例#6
0
def parse_feed():
    """Checks the content of an RSS URL and adds the episodes to the website

    :return:
    """
    data = request.get_json()
    feed_url = data.get('url')
    req = requests.get(feed_url)

    soup = BeautifulSoup(req.content, 'xml')

    channel = db.query(RSSChannel).filter(RSSChannel.url == feed_url).first()
    if not channel:
        name = soup.find('title').text
        description = soup.find('description').text
        channel = RSSChannel(url=feed_url,
                             channel_name=name,
                             channel_description=description)
        db.add(channel)
        db.commit()

    latest_track = db.query(RSSTrack)\
        .filter(RSSTrack.channel == channel)\
        .order_by(RSSTrack.published_date.desc())\
        .first()

    tracks = soup.find_all('item')
    ret = []
    for item in tracks:
        try:
            date_text = item.find('pubDate').text
        except AttributeError:
            date_text = item.find('pubdate').text
        rfc_date = datetime.datetime.fromtimestamp(
            email.utils.mktime_tz(email.utils.parsedate_tz(date_text)),
            pytz.utc)
        if latest_track:
            latest_track.published_date = latest_track.published_date.replace(
                tzinfo=rfc_date.tzinfo)
            if latest_track.published_date >= rfc_date:
                logger.debug(
                    f'Reached {latest_track.name}, which is already in the database'
                )
                break
        if channel.channel_type == 'audio':
            track_url = item.find('enclosure').get('url')
        elif channel.channel_type == 'text':
            track_url = item.find('link').text
        else:
            raise ValueError('Unrecognised channel type')
        name = item.find('title').text
        description = item.find('description').text
        pub_date = datetime.datetime.fromtimestamp(
            email.utils.mktime_tz(email.utils.parsedate_tz(date_text)),
            pytz.utc)
        track = RSSTrack(channel_id=channel.id,
                         url=track_url,
                         name=name,
                         description=description,
                         published_date=pub_date)
        ret.append(track)
        db.add(track)

    db.commit()
    return {'status': 'OK', 'data': [track.to_json() for track in ret]}, 200