示例#1
0
def wiki_to_text(username, password, outputdir, sourcefile):
    lyrics = kuplett_parser.get_all_lyrics(sourcefile, username, password)
    data = kuplett_parser.load_data(sourcefile)

    counter = 0
    for lyric in lyrics:
        filename = outputdir + "/" + kuplett_parser.get_generic_name(
            data.get("urls")[counter])
        counter += 1

        if len(os.path.dirname(filename).strip()) > 0 and not os.path.exists(
                os.path.dirname(filename)):
            os.makedirs(os.path.dirname(filename))

        outfile = codecs.open(filename, 'w', 'utf-8')
        for line in lyric:
            outfile.write(line + "\n")
    return lyrics
def kupletter_to_inputsong(username, password, outdirname):
    sourcefile = "data_2017.txt"
    lyrics = kuplett_parser.get_all_lyrics(sourcefile, username, password)
    #parse read text and write to an inputsong file
    inputsong_parser = Raw_to_inputsong_parser()

    #Various short forms of the different styles defined above.
    #Note that keys are cast to lowercase, and hence no upper case version is needed
    data = kuplett_parser.load_data(sourcefile)
    inputsong_parser.style_dictionary = data.get("dictionary")
    inputsong_parser.short_style_dictionary = data.get("shortdictionary")
    print(inputsong_parser.short_style_dictionary)

    delimiter = ':'  #separates singer from lyrics

    outfilenames = []
    akt = 1
    for page in kuplett_parser.load_data(sourcefile).get("urls"):
        name = kuplett_parser.get_generic_name(page)
        outfilenames.append("Akt" + str(akt) + "/" + name + ".txt")
        if "final" in name.lower():
            # This is kind of dangerous. Assumes that all songs with 'final'
            # in the same is the last song of each akt (part) of the spex.
            akt += 1

    i = 0
    for lyric in lyrics:
        #open relevant file
        outfilename = outdirname + "/" + outfilenames[i]
        folder = outfilename
        if folder.find("/") >= 0:
            folder = folder[:folder.rfind("/")]
        if not os.path.exists(folder):
            try:
                os.makedirs(folder)
            except OSError:
                print("Skipping creation of %s because it exists already.",
                      folder)
        outfile = codecs.open(outfilename, 'w', 'utf-8')

        #First we go through the lyric to get metadata that's
        #potentially been sprinkled throughout the text (bad spexare!)
        meta = get_metadata(lyric, delimiter)

        title_line = meta.titel
        mel_line = meta.melodi
        auth_line = meta.forf
        arr_line = meta.arr
        medv_line = meta.medv
        outfile.write(title_line + "\n")
        outfile.write(mel_line + "\n")
        outfile.write(auth_line + "\n")
        outfile.write(medv_line + "\n")
        outfile.write(arr_line + "\n")
        outfile.write("\n")

        #if the first line does not have a singer
        #we will interpret it and following lines as if everyone is singing
        #NOTE: empty lines are intepreted as belonging to the previous singer
        #but we manually override this behavior for the first line only.
        inputsong_parser.empty_style = "Alla"

        for line in preprocess_inputsong(lyric, delimiter):
            inputsong_line = inputsong_parser.parse_line_to_inputsong(
                line, delimiter)
            outfile.write(inputsong_line + "\n")

        i = i + 1
        outfile.close()
def kupletter_to_ass(username,password,outfilename):
    sourcefile = "data_2016.txt"
    allowEmptyLines = True

    lyrics=kuplett_parser.get_all_lyrics(sourcefile, username, password);
    
    #parse read text and write to an ASS file
    ass_parser=Raw_to_ass_parser(30,1) #increment 1 second for each new line. Start at 30 seconds.
    
    # Load dictionary for mapping singers
    data = kuplett_parser.load_data(sourcefile);
    ass_parser.style_dictionary=data.get("dictionary")
    if "multilinesplitter" in data.get("meta").keys():
        ass_parser.multi_line_keyword = data.get("meta").get("multilinesplitter")

    delimiter=':' #separates singer from lyrics

    counter = 0
    fileContent = "";
    currentOldContentOffset = 0
    for lyric in lyrics:
        #run through the file one time to parse metadata
        meta=get_metadata(lyric,delimiter)
        padding="kommentar:"
        fileContent += ass_parser.parse_line_to_ass(padding,delimiter,allowEmptyLines)+"\n"

        title_line="kommentar: Titel:"+meta.titel
        mel_line="kommentar: Melodi:"+meta.melodi
        auth_line=u"kommentar: Författare:"+meta.forf
        arr_line="kommentar: Arr:"+meta.arr
        medv_line="kommentar: Medverkande:"+meta.medv
        fileContent += ass_parser.parse_line_to_ass(title_line,delimiter,allowEmptyLines)+"\n"
        fileContent += ass_parser.parse_line_to_ass(mel_line,delimiter,allowEmptyLines)+"\n"
        fileContent += ass_parser.parse_line_to_ass(auth_line,delimiter,allowEmptyLines)+"\n"
        fileContent += ass_parser.parse_line_to_ass(arr_line,delimiter,allowEmptyLines)+"\n"
        fileContent += ass_parser.parse_line_to_ass(medv_line,delimiter,allowEmptyLines)+"\n"

        fileContent += ass_parser.parse_line_to_ass(padding,delimiter,allowEmptyLines)+"\n"
        
        #if the first line does not have a singer
        #we will interpret it and following lines as if everyone is singing
        #NOTE: empty lines are intepreted as belonging to the previous singer
        #but we manually override this behavior for the first line only.
        #NOTE: its likely that the preprocessing will remove these lines anyway
        ass_parser.empty_style="ALLA"
        filename = data.get("urls")[counter]
        counter += 1
        
        # Read data from old file (if such exists)
        useOldFile, oldContent, oldContentOffset = getOldData(filename, outfilename, currentOldContentOffset)
        currentOldContentOffset = oldContentOffset
        # Find diff in files
        diff = diff_tool.find_unchanged_lines(filename, lyric, allowEmptyLines)
        # Fetch the new data
        newContent = preprocess_ass(lyric,delimiter)
        # Process each diff.
        for d in diff:
            if d.isNewLine or not useOldFile:
                ass_line=ass_parser.parse_line_to_ass(newContent[d.line],delimiter,allowEmptyLines)
                if len(ass_line) > 0:
                    fileContent += ass_line+"\n"
            else:
                if d.line+oldContentOffset >= len(oldContent):
                    print("Dammit, now we are outside the valid intervals. Did anyone change the header size? (title etc.) See getOldData() for fix.")
                fileContent += oldContent[d.line+oldContentOffset]
    
    #open to write ASS output to
    if len(os.path.dirname(outfilename).strip()) > 0 and not os.path.exists(os.path.dirname(outfilename)):
        os.makedirs(os.path.dirname(outfilename))
    outfile = codecs.open(outfilename, 'w','utf-8')
    
    #write preamble to the ASS file
    outfile.write(kuplett_parser.get_ass_header(sourcefile))
    #write content
    outfile.write(fileContent)
    outfile.close()
def kupletter_to_ass(username, password, outfilename):
    sourcefile = "data_2017.txt"
    allowEmptyLines = True

    lyrics = kuplett_parser.get_all_lyrics(sourcefile, username, password)

    #parse read text and write to an ASS file
    ass_parser = Raw_to_ass_parser(
        30, 1)  #increment 1 second for each new line. Start at 30 seconds.

    # Load dictionary for mapping singers
    data = kuplett_parser.load_data(sourcefile)
    ass_parser.style_dictionary = data.get("dictionary")
    if "multilinesplitter" in data.get("meta").keys():
        ass_parser.multi_line_keyword = data.get("meta").get(
            "multilinesplitter")

    delimiter = ':'  #separates singer from lyrics

    counter = 0
    fileContent = ""
    currentOldContentOffset = 0
    for lyric in lyrics:
        #run through the file one time to parse metadata
        meta = get_metadata(lyric, delimiter)
        padding = "kommentar:"
        fileContent += ass_parser.parse_line_to_ass(padding, delimiter,
                                                    allowEmptyLines) + "\n"

        title_line = "kommentar: Titel:" + meta.titel
        mel_line = "kommentar: Melodi:" + meta.melodi
        auth_line = u"kommentar: Författare:" + meta.forf
        arr_line = "kommentar: Arr:" + meta.arr
        medv_line = "kommentar: Medverkande:" + meta.medv
        fileContent += ass_parser.parse_line_to_ass(title_line, delimiter,
                                                    allowEmptyLines) + "\n"
        fileContent += ass_parser.parse_line_to_ass(mel_line, delimiter,
                                                    allowEmptyLines) + "\n"
        fileContent += ass_parser.parse_line_to_ass(auth_line, delimiter,
                                                    allowEmptyLines) + "\n"
        fileContent += ass_parser.parse_line_to_ass(arr_line, delimiter,
                                                    allowEmptyLines) + "\n"
        fileContent += ass_parser.parse_line_to_ass(medv_line, delimiter,
                                                    allowEmptyLines) + "\n"

        fileContent += ass_parser.parse_line_to_ass(padding, delimiter,
                                                    allowEmptyLines) + "\n"

        #if the first line does not have a singer
        #we will interpret it and following lines as if everyone is singing
        #NOTE: empty lines are intepreted as belonging to the previous singer
        #but we manually override this behavior for the first line only.
        #NOTE: its likely that the preprocessing will remove these lines anyway
        ass_parser.empty_style = "ALLA"
        filename = data.get("urls")[counter]
        counter += 1

        # Read data from old file (if such exists)
        useOldFile, oldContent, oldContentOffset = getOldData(
            filename, outfilename, currentOldContentOffset)
        currentOldContentOffset = oldContentOffset
        # Find diff in files
        diff = diff_tool.find_unchanged_lines(filename, lyric, allowEmptyLines)
        # Fetch the new data
        newContent = preprocess_ass(lyric, delimiter)
        # Process each diff.
        for d in diff:
            if d.isNewLine or not useOldFile:
                ass_line = ass_parser.parse_line_to_ass(
                    newContent[d.line], delimiter, allowEmptyLines)
                if len(ass_line) > 0:
                    fileContent += ass_line + "\n"
            else:
                if d.line + oldContentOffset >= len(oldContent):
                    print(
                        "Dammit, now we are outside the valid intervals. Did anyone change the header size? (title etc.) See getOldData() for fix."
                    )
                fileContent += oldContent[d.line + oldContentOffset]

    #open to write ASS output to
    if len(os.path.dirname(outfilename).strip()) > 0 and not os.path.exists(
            os.path.dirname(outfilename)):
        os.makedirs(os.path.dirname(outfilename))
    outfile = codecs.open(outfilename, 'w', 'utf-8')

    #write preamble to the ASS file
    outfile.write(kuplett_parser.get_ass_header(sourcefile))
    #write content
    outfile.write(fileContent)
    outfile.close()
def kupletter_to_inputsong(username,password,outdirname):
    sourcefile = "data_2016.txt"
    lyrics=kuplett_parser.get_all_lyrics(sourcefile, username, password);
    #parse read text and write to an inputsong file
    inputsong_parser=Raw_to_inputsong_parser() 

    #Various short forms of the different styles defined above. 
    #Note that keys are cast to lowercase, and hence no upper case version is needed
    data = kuplett_parser.load_data(sourcefile);
    inputsong_parser.style_dictionary = data.get("dictionary")
    inputsong_parser.short_style_dictionary = data.get("shortdictionary");
    print(inputsong_parser.short_style_dictionary);

    delimiter=':' #separates singer from lyrics

    outfilenames = []
    akt = 1
    for page in kuplett_parser.load_data(sourcefile).get("urls"):
        name = kuplett_parser.get_generic_name(page);
        outfilenames.append("Akt"+str(akt)+"/"+name+".txt")
        if "final" in name.lower():
            # This is kind of dangerous. Assumes that all songs with 'final'
            # in the same is the last song of each akt (part) of the spex.
            akt += 1
    
    i=0
    for lyric in lyrics:
        #open relevant file
        outfilename=outdirname+"/"+outfilenames[i]
        folder = outfilename;
        if folder.find("/") >= 0:
            folder = folder[:folder.rfind("/")]
        if not os.path.exists(folder):
            try:
                os.makedirs(folder);
            except OSError:
                print("Skipping creation of %s because it exists already.", folder)
        outfile = codecs.open(outfilename, 'w','utf-8')
    
        #First we go through the lyric to get metadata that's
        #potentially been sprinkled throughout the text (bad spexare!)
        meta=get_metadata(lyric,delimiter)

        title_line=meta.titel
        mel_line=meta.melodi
        auth_line=meta.forf
        arr_line=meta.arr
        medv_line=meta.medv
        outfile.write(title_line+"\n")
        outfile.write(mel_line+"\n")
        outfile.write(auth_line+"\n")
        outfile.write(medv_line+"\n")
        outfile.write(arr_line+"\n")
        outfile.write("\n")
    
        #if the first line does not have a singer
        #we will interpret it and following lines as if everyone is singing
        #NOTE: empty lines are intepreted as belonging to the previous singer
        #but we manually override this behavior for the first line only.
        inputsong_parser.empty_style="Alla"

        for line in preprocess_inputsong(lyric,delimiter):
            inputsong_line=inputsong_parser.parse_line_to_inputsong(line,delimiter)
            outfile.write(inputsong_line+"\n")

        i=i+1
        outfile.close()