#!/usr/bin/python
# -*- coding: utf-8 -*-

import sample
import sys

if __name__ == "__main__":
    sentences = sample.get_summaries(sys.argv[1])
    outFH = open(sys.argv[2], "w")
    for sentence in sentences:
        outFH.write(sentence[2])
    outFH.close()
    'zwei-glorreiche-halunken.html'
    """
    regex = re.compile(ur'^(.*\.html).*$')
    return regex.match(filename).group(1)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Extracts movie summarys'
                                     + ' and movie ratings')
    parser.add_argument('--input', required=True,
                        help='input directory')
    parser.add_argument('--output', required=True,
                        help='output file name for CSV file')
    parser.add_argument('--ratings-map', required=True,
                        help='Map from file name to rating')
    args = parser.parse_args()
    sentences = sample.get_summaries(args.input)
    outFH = open(args.output, "w")
    writer = csv.writer(outFH)
    tmp = OrderedDict()
    m = load_ratings_map(args.ratings_map)
    for s in sentences:
        curFile = clean_filename(s[0])
        if not curFile in tmp:
            tmp[curFile] = []
        tmp[curFile].append(s[2].replace("\n", " "))
    notFound = 0
    invalid = 0
    for (fileName, summaryFragments) in tmp.iteritems():
        if not fileName in m:
            notFound += 1
            continue