Пример #1
0
def generate(data):
    global video_url
    global path
    global json_result
    global keywords
    global text
    global result

    # Transcription and Cleaning
    text = youtube_transcribe(video_url)

    # Keywords Extractor
    keywords = get_keywords(text, 15)
    print('\nKeywords:\n', keywords)
    fp = open("keywords.txt", "w")
    fp.write("\n".join(keywords))
    fp.close()
    json_result = web_scrape(keywords)

    # Summarization
    # Percentage of summary - input
    # percentage=int(input())
    result = summary(text, 50)
    fh = open("summary.txt", "w")
    fh.write(result)
    fh.close()
Пример #2
0
from keyframes import Image_Processing
from text_recognition_and_extraction import text_recognition
import io
import pytesseract

# Path to your tesseract executable
pytesseract.pytesseract.tesseract_cmd = r'G:\himanshu\Tesseract-OCR\tesseract.exe'

if __name__ == '__main__':

    # Transcription and Cleaning
    url = input("Enter the URL = ")
    text = youtube_transcribe(url)

    # Keywords Extractor
    keywords = get_keywords(text, 15)
    print('\nKeywords:\n', keywords)

    # Summarization
    percentage = int(
        input(
            "Enter the percentage of information in text you want as summary : "
        ))
    result = summary(text, percentage)
    print('\nSummary:\n', result)

    # Keyframe Extraction
    Image_Processing(url, keywords)
    print("Images Extracted in 'out' folder")

    # Text Recognition And Extraction