def parse_excel_by_party(filename):
    u"""
    Parse excel files with vote statistics by party.

    One excel file per stenogram.

    Assumptions
    ===========

    - There is a total of six parties (see parties_count above)
    - For each session, there is a line containing either `vote_marker` or
    `registration_marker`, and that gives the kind of the session. There is
    only one registration per stenogram.
    - After this line, there are two lines we don't care about, and the next
    parties_count consecutive lines contain the vote/presence statistics by party.
    """
    book = xlrd.open_workbook(filename, logfile=excel_warnings)
    sheet = book.sheet_by_index(0)
    rows = sheet.nrows
    sessions = []
    row = 0
    while row < rows:
        first = sheet.cell_value(rowx=row, colx=0)
        if registration_marker in first:
            row += 2
            per_party_dict = {}
            for i in range(parties_count):
                row += 1
                party = canonical_party_name(sheet.cell_value(rowx=row, colx=0).strip().upper())
                present = int(sheet.cell_value(rowx=row, colx=1))
                expected = int(sheet.cell_value(rowx=row, colx=2))
                per_party_dict[party] = reg_stats_per_party_tuple(present, expected)
            reg_by_party_dict = per_party_dict
        elif vote_marker in first:
            description = first.split(vote_marker)[-1].strip()
            time, description = description.split(u'по тема')
            time = datetime.datetime.strptime(time[-6:], '%H:%M ')
            description = description.strip()
            row += 2
            votes_by_party_dict = {}
            for i in range(parties_count):
                row += 1
                party = canonical_party_name(sheet.cell_value(rowx=row, colx=0).strip().upper())
                yes = int(sheet.cell_value(rowx=row, colx=1))
                no = int(sheet.cell_value(rowx=row, colx=2))
                abstained = int(sheet.cell_value(rowx=row, colx=3))
                total = int(sheet.cell_value(rowx=row, colx=4))
                votes_by_party_dict[party] = vote_stats_per_party_tuple(yes, no, abstained, total)
            sessions.append(session_tuple(description, time, None, votes_by_party_dict))
        row += 1
    return reg_by_party_dict, sessions
def parse_excel_by_name(filename):
    """
    Parse excel files with vote statistics by representative.

    Assumptions
    ===========

    The .xls file starts with two lines we don't care about. All remaining lines
    contain the following fields, from left to right:
        - representative name
        - two fields we skip
        - representative's party
        - undefined number of fields containing stuff about how the
        representative voted.
    """
    # XXX Workarounds
    # Correct spelling errors in names of MPs.
    def MP_name_spellcheck(name):
        tr_dict = {u'МАРИЯНА ПЕТРОВА ИВАНОВА-НИКОЛОВА': u'МАРИАНА ПЕТРОВА ИВАНОВА-НИКОЛОВА',
                   u'ВЕНЦЕСЛАВ ВАСИЛЕВ ВЪРБАНОВ': u'ВЕНЦИСЛАВ ВАСИЛЕВ ВЪРБАНОВ',
                   u'АЛЕКСАНДЪР СТОЙЧЕВ СТОЙЧЕВ': u'АЛЕКСАНДЪР СТОЙЧЕВ СТОЙКОВ'}
        if name in tr_dict:
            logger_workaround.warning("Spelling error: %s" % name)
            return tr_dict[name]
        return name
    # Remove unregistered MPs.
    def filter_names(*args):
        to_filter_out = [u'МИХАИЛ ВЛАДИМИРОВ ВЛАДОВ', u'НИКОЛАЙ НАНКОВ НАНКОВ']
        zip_args = zip(*args)
        filtered = filter(lambda a: a[0] not in to_filter_out, zip_args)
        if len(filtered) != len(zip_args):
            logger_workaround.warning("An MP was filtered out of the by-names list, because they are not registered as an MP.")
            return zip(*filtered)
        return args
    # XXX End of Workarounds.

    # Translate the registration and vote markers.
    tr_reg = {u'О':'absent', u'П':'present', u'Р':'manually_registered'}
    tr_vote = {'+':'yes', '-':'no', '=':'abstain', '0':'absent'}

    book = xlrd.open_workbook(filename, logfile=excel_warnings)
    sheet = book.sheet_by_index(0)
    cols = sheet.ncols
    rows = sheet.nrows

    names = [MP_name_spellcheck(' '.join(sheet.cell_value(rowx=row, colx=0).upper().split()))
             for row in range(2, rows)]
    parties = [canonical_party_name(sheet.cell_value(rowx=row, colx=3).strip().upper())
               for row in range(2, rows)]
    reg_sessions = []
    vote_sessions = []
    for col in range(4, cols):
        values = [sheet.cell_value(rowx=row, colx=col) for row in range(2, rows)]
        if all(v in tr_reg.keys() for v in values):
            reg_sessions.append([tr_reg[v] for v in values])
        elif all(v in tr_vote.keys() for v in values):
            vote_sessions.append([tr_vote[v] for v in values])
        elif all(v=='' for v in values):
            logger_excel.warning("Empty column found in the by_names excell file. Skipping it.")
        else:
            logger_excel.error("Strange column found in the by_names excell file. Skipping it.")
    vote_sessions = zip(*vote_sessions)

    if len(reg_sessions) > 1:
        logger_excel.warning("There are more than one registration for this stenogram.")
    elif len(reg_sessions) != 1:
        raise ValueError('No registrations detected in the by-names file.')

    return filter_names(names, parties, reg_sessions[-1], vote_sessions)
 try:
     r = xmltodict.parse(xml_str)
     name = (
         " ".join(
             [
                 r["schema"]["Profile"]["Names"]["FirstName"]["@value"],
                 r["schema"]["Profile"]["Names"]["SirName"]["@value"],
                 r["schema"]["Profile"]["Names"]["FamilyName"]["@value"],
             ]
         )
         .encode("UTF-8")
         .upper()
         .strip()
     )
     force = " ".join(r["schema"]["Profile"]["PoliticalForce"]["@value"].split(" ")[:-1])
     force = canonical_party_name(force).encode("UTF-8")
     mail = r["schema"]["Profile"]["E-mail"]["@value"].encode("UTF-8").replace(";", ",").replace(":", ",").strip()
 except xml.parsers.expat.ExpatError:
     logger_mps.warning("Parsing the xml file for MP %s failed. Trying csv." % i)
     try:
         csv_file = urlopen("http://www.parliament.bg/export.php/bg/csv/MP/%d" % i)
         data = [l.strip().replace("&quot;", '"').split(";")[:-1] for l in csv_file.readlines()]
         name = " ".join([d.strip() for d in data[0]])
         mail = ", ".join([d.strip() for d in data[9][1:]])
         mail = mail.replace(";", ",").replace(":", ",")
         force = " ".join(data[6][-1].decode("UTF-8").split(" ")[:-1])
         force = canonical_party_name(force).encode("UTF-8")
     except Exception, e:
         logger_mps.error("The csv file for MP %s is unparsable as well due to %s. Skipping this id." % (i, str(e)))
         continue
 url_list.append(original_url)