Пример #1
0
def create():

    __create_or_delete__(path.assets('indicators'))

    overview = io.read_json(path.assets('overview.json'))

    # Start by compiling a giant data structure of every company
    companies = {}
    for d in overview:
        company_data = io.read_json(path.assets('%s.json' % d['id']))
        companies[d['id']] = company_data

    # Now aggregate the data by indicator id using the survey data
    survey = io.read_json(path.assets('survey.json'))
    for item in survey:
        indicator_id = item['id'].lower()
        indicator_data = {
            'id': item['id'],
            'name': item['name'],
            'follow': item['follow'],
            'companies': []
        }

        print indicator_id, item['follow']

        for company_id, company in companies.iteritems():
            company_data = [i for i in company if indicator_id == i['id'].lower()]
            if len(company_data) > 1:
                print 'Found too many company matches for', indicator_id

            # This might be an indicator that doesn't apply
            if not len(company_data):
                continue

            company_data = company_data[0]
            company_overview = [c for c in overview if company_id in c['id']]

            if len(company_overview) != 1:
                print 'Weirdness finding company from company overview'
            company_overview = company_overview[0]

            company_type = 'Telecommunications'
            if 'false' in company_overview['telco']:
                company_type = 'Internet'

            indicator_data['companies'].append({
                'name': company_overview['name'],
                'id': company_overview['id'],
                'display': company_overview['display'],
                'score': company_data['score'],
                'type': company_type,
                'levels': company_data['levels'],
                'services': company_data['services']
            })


        indicator_data['companies'] = sorted(indicator_data['companies'], key=lambda c: c['id'])
        io.write_json(path.assets('indicators/%s.json' % indicator_id), indicator_data)
Пример #2
0
def create_indicator_scores():
    companies = io.read_json(path.assets("services.json"))
    survey = io.read_json(path.assets("survey.json"))

    indicator_data = []
    for i in survey:
        indicator_id = i["id"].lower()
        scores = {}
        levels = {}

        for c in companies:
            c_name = c["display"]
            c_overall = c["overall"]
            if indicator_id in c_overall:
                scores[c_name] = c_overall[indicator_id]
            else:
                print "no %s in %s" % (indicator_id, " ".join(c_overall))

        print i["name"]
        indicator_data.append({"id": indicator_id, "scores": scores, "text": i["text"], "name": i["name"]})
    io.write_json(path.assets("indicator-overview.json"), indicator_data)
Пример #3
0
def create():

    __create_or_delete__(path.assets('indicators'))

    overview = io.read_json(path.assets('overview.json'))

    # Start by compiling a giant data structure of every company
    companies = {}
    for d in overview:
        company_data = io.read_json(path.assets('%s.json' % d['id']))
        companies[d['id']] = company_data

    # Now aggregate the data by indicator id using the survey data
    survey = io.read_json(path.assets('survey.json'))
    for item in survey:
        indicator_id = item['id'].lower()
        indicator_data = {
            'id': item['id'],
            'name': item['name'],
            'follow': item['follow'],
            'companies': []
        }

        print indicator_id, item['follow']

        for company_id, company in companies.iteritems():
            company_data = [
                i for i in company if indicator_id == i['id'].lower()
            ]
            if len(company_data) > 1:
                print 'Found too many company matches for', indicator_id

            # This might be an indicator that doesn't apply
            if not len(company_data):
                continue

            company_data = company_data[0]
            company_overview = [c for c in overview if company_id in c['id']]

            if len(company_overview) != 1:
                print 'Weirdness finding company from company overview'
            company_overview = company_overview[0]

            company_type = 'Telecommunications'
            if 'false' in company_overview['telco']:
                company_type = 'Internet'

            indicator_data['companies'].append({
                'name':
                company_overview['name'],
                'id':
                company_overview['id'],
                'display':
                company_overview['display'],
                'score':
                company_data['score'],
                'type':
                company_type,
                'levels':
                company_data['levels'],
                'services':
                company_data['services']
            })

        indicator_data['companies'] = sorted(indicator_data['companies'],
                                             key=lambda c: c['id'])
        io.write_json(path.assets('indicators/%s.json' % indicator_id),
                      indicator_data)
Пример #4
0
def create(filename):

    # Create a dictionary where properties are company names
    overview = io.read_json(path.assets('overview.json'))
    companies = [name.snake_case(item['display']) for item in overview]
    company_dict = {}
    for c in companies:
        company_dict[c] = -1

    # Now use that dictionary to save the index of those company names.
    raw = io.read_csv(path.raw(filename))
    raw_header = raw[0]
    for idx, item in enumerate(raw_header):
        snake_header = name.snake_case(item)
        if snake_header in company_dict:
            company_dict[snake_header] = idx

    # This should be 0 if we've matched every company
    if not_all_found(company_dict.values()):
        print 'Not all companies accounted for in services overview csv'

    # This is where we check a ref file, or create one
    ref_path = path.ref('service-column-mapping.json')
    if os.path.isfile(ref_path):
        ref = io.read_json(ref_path)
    else:
        ref = [name.snake_case(row[0]) for row in raw[1:] if row[0] != '']
        io.write_json(ref_path, ref)

    # Create a dictionary matching row number fo the indicator
    indicator_dict = {}
    for indic in ref:
        indicator_dict[indic] = -1
    for idx, row in enumerate(raw):
        indicator = name.snake_case(row[0])
        if indicator in indicator_dict:
            indicator_dict[indicator] = idx

    if not_all_found(indicator_dict.values()):
        print 'Not all indicators accounted for in services overview csv'

    # Baselines
    tel = 'telco'
    net = 'internet company'

    output = []

    # Get a slice of all the columns that encompass each company
    stops = sorted(idx for idx in company_dict.values())
    for idx, stop in enumerate(stops):
        next_stop = stops[idx+1] if idx + 1 < len(stops) else len(raw_header)
        company_range = [item[stop:next_stop] for item in raw]
        company = {
            'display': company_range[0][0],
            'name': name.filename(company_range[0][0])
        }

        # The second item in the first row *should* be the type
        header_type = company_range[0][1].lower()
        if header_type not in [tel, net]:
            print 'No company type found. Instead, saw %s' % header_type
        company['type'] = header_type

        # The second row contains the service names
        service_names = [item for item in company_range[1]]
        services = []
        for column_number, service_name in enumerate(service_names):

            # Get each indicator value for each service using
            # the indicator mapping we defined earlier
            scores = {}
            for indicator_name, row_number in indicator_dict.iteritems():
                cell = company_range[row_number][column_number]
                scores[indicator_name] = company_range[row_number][column_number]

            # The first 'service' is actually just the overall
            # Do some spreadsheet format-checking here
            if column_number == 0:
                total = scores['total']
                if not len(total):
                    print 'No weighted total for %s %s' % (service_name, company['name'])
                if 'overall' not in service_name:
                    print 'Service %s != "overall"' % service_name
                company['overall'] = scores

            # The second 'service' is usually the group score;
            # No need to save this, we don't use it here.
            elif column_number == 1 and 'group' in service_name:
                continue

            # Otherwise, call it a service.
            else:
                service = {
                    'name': service_name,
                    'scores': scores
                }

                # Get service type if it's available
                service_type = company_range[0][column_number]
                if len(service_type):
                    service['type'] = service_type
                services.append(service)

        company['services'] = services
        output.append(company)

    io.write_json(path.assets('services.json'), output)
Пример #5
0
def create(filename):

    company_name = name.filename(filename[:-4])

    all_services = io.read_json(path.assets('services.json'))
    service_data = [item for item in all_services if (company_name
        in item['name'].replace('.', ''))]

    if len(service_data) != 1:
        print 'Weird number of services found', len(service_data)

    service_data = service_data[0]

    # Create a mapping dictionary of just indicator names
    # each mapped to -1
    ref = io.read_json(path.ref('service-column-mapping.json'))
    indicator_dict = {}
    for item in ref:
        if is_number(item[1:]):
            indicator_dict[item] = -1


    # Map the indicator to the proper rows
    raw = io.read_csv(path.raw('companies/' + filename))
    for idx, row in enumerate(raw):
        indicator = row[0].lower()
        if indicator in indicator_dict:
            indicator_dict[indicator] = idx

    # Use the survey data to map possible responses to position
    survey = io.read_json(path.assets('survey.json'))

    if not_all_found(indicator_dict.values()):
        print 'Not all indicators accounted for in services overview csv'

    all_indicators = []

    # Get a slice of all the rows that encompass each company
    stops = sorted(idx for idx in indicator_dict.values())
    for idx, stop in enumerate(stops):
        next_stop = stops[idx+1] if idx + 1 < len(stops) else len(raw) + 1
        indicator_range = raw[stop:next_stop]

        # Divide that slice by empty rows
        split = array.slice_arr(indicator_range, array.is_empty_row)

        # The first slice contains consolidated answers,
        # comments, and sources.
        responses = split.pop(0)

        # The first row of responses is indicator name followed by
        # service categories
        header = [item for item in responses.pop(0) if len(item)]

        indicator_name = header[0]

        # Find the survey question we're looking for
        survey_item = ([item for item in survey
            if item['id'].lower() == indicator_name.lower()])

        if len(survey_item) != 1:
            print 'Too many items in survey.json for this indicator'
            print indicator_name
            print survey_item

        indicator_data = {
            'id': indicator_name,
            'services': [],
            'levels': []
        }

        # Check if this indicator is valid before continuing
        if len(responses) == 1 and 'this indicator is n/a' in responses[0][0].lower():
            continue
        else:

            # question scores follow the response text in the split array
            scores = split.pop(0)

            # ..followed by the overall indicator score (verify this)
            indicator_score = split.pop(-1)[0][1] if ('indicator score'
                in split[-1][0][0].lower()) else []
            if not len(indicator_score):
                print '\nIndicator score not found in %s' % header[0]
                print split, '\n'
            else:
                indicator_data['score'] = indicator_score

            # ..and the same for the overall service scores
            level_scores = split.pop(-1)[0] if ('level score'
                in split[-1][0][0].lower()) else []
            if not len(level_scores):
                print '\nService score not found in %s' % header[0]
                print split, '\n'

            # Determine the comments and sources location
            comments = responses.pop(-2)
            sources = responses.pop(-1)

            if ('comments' not in comments[0].lower() or
                'sources' not in sources[0].lower()):
                print 'Comments not found in %s' % comments[0]
                print 'Sources not found in %s' % sources[0]

            # Some question text include an if-not-then clause,
            # which throws off the count between the text and the score.
            # Record it and then delete the row.
            indicator_data['follow'] = 0
            for idx, row in enumerate(responses):
                if 'continue with B' in row[0] and len(set(row[1:])) == 1:
                    indicator_data['follow'] = 1
                    del responses[idx]
                    break

            if len(responses) != len(scores):
                print 'Length of responses and scores not matching'
                print len(responses), len(scores)


            # Save level responses, and level positions
            # Determine if this question has custom answers
            survey_levels = survey_item[0]['levels']
            for idx, level in enumerate(responses):
                level_data = []

                # Assume anything longer than 25 characters,
                # aka "no/insufficient evidence", is a custom response
                custom = 0
                survey_options = survey_levels[idx]['responses']
                for option in survey_options:
                    if len(option) > 25:
                        custom = 1

                for level_idx, level_response in enumerate(level):

                    # First level index is useless.
                    if level_idx == 0 or not len(level_response):
                        continue

                    if len(header) <= level_idx:
                        print 'No header available, this will break'

                    service = header[level_idx]

                    # Exclude group scores, operating company
                    # from indicators that don't need them
                    if (('(group)' in service or '(operating company)' in service )
                            and exclude_service(indicator_name)):
                        continue

                    # Shim issues where the response includes too much text.
                    if len(level_response) > 25 and "no/insufficient" == level_response[:15]:
                        level_response = "no/insufficient evidence"


                    # Only add to the services list if we're on the first level.
                    # Other, we add too many
                    if idx == 0:

                        if 'operating company' in service.lower():
                            service_type = 'operating company'
                        elif 'group' in service.lower():
                            service_type = 'group'
                        else:
                            matching_service = [item for item in service_data['services'] if (
                                item['name'].lower() in service.lower())]
                            if len(matching_service) == 1 and 'type' in matching_service[0]:
                                service_type = matching_service[0]['type']
                            else:
                                service_type = ''

                        indicator_data['services'].append({
                            'name': scrub_service_name(service),
                            'type': service_type,
                            'comments': comments[level_idx],
                            'sources': sources[level_idx],
                            'score': level_scores[level_idx]
                        })

                    level_data.append({
                        'response': level_response,
                        'score': scores[idx][level_idx]
                    })


                indicator_data['custom'] = custom
                indicator_data['levels'].append({
                    'scores': level_data,
                    'text': survey_levels[idx]['text']
                })

        all_indicators.append(indicator_data)

    io.write_json(path.assets(company_name + '.json'), all_indicators)
Пример #6
0
def create(filename):

    # Create a dictionary where properties are company names
    overview = io.read_json(path.assets('overview.json'))
    companies = [name.snake_case(item['display']) for item in overview]
    company_dict = {}
    for c in companies:
        company_dict[c] = -1

    # Now use that dictionary to save the index of those company names.
    raw = io.read_csv(path.raw(filename))
    raw_header = raw[0]
    for idx, item in enumerate(raw_header):
        snake_header = name.snake_case(item)
        if snake_header in company_dict:
            company_dict[snake_header] = idx

    # This should be 0 if we've matched every company
    if not_all_found(company_dict.values()):
        print 'Not all companies accounted for in services overview csv'

    # This is where we check a ref file, or create one
    ref_path = path.ref('service-column-mapping.json')
    if os.path.isfile(ref_path):
        ref = io.read_json(ref_path)
    else:
        ref = [name.snake_case(row[0]) for row in raw[1:] if row[0] != '']
        io.write_json(ref_path, ref)

    # Create a dictionary matching row number fo the indicator
    indicator_dict = {}
    for indic in ref:
        indicator_dict[indic] = -1
    for idx, row in enumerate(raw):
        indicator = name.snake_case(row[0])
        if indicator in indicator_dict:
            indicator_dict[indicator] = idx

    if not_all_found(indicator_dict.values()):
        print 'Not all indicators accounted for in services overview csv'

    # Baselines
    tel = 'telco'
    net = 'internet company'

    output = []

    # Get a slice of all the columns that encompass each company
    stops = sorted(idx for idx in company_dict.values())
    for idx, stop in enumerate(stops):
        next_stop = stops[idx + 1] if idx + 1 < len(stops) else len(raw_header)
        company_range = [item[stop:next_stop] for item in raw]
        company = {
            'display': company_range[0][0],
            'name': name.filename(company_range[0][0])
        }

        # The second item in the first row *should* be the type
        header_type = company_range[0][1].lower()
        if header_type not in [tel, net]:
            print 'No company type found. Instead, saw %s' % header_type
        company['type'] = header_type

        # The second row contains the service names
        service_names = [item for item in company_range[1]]
        services = []
        for column_number, service_name in enumerate(service_names):

            # Get each indicator value for each service using
            # the indicator mapping we defined earlier
            scores = {}
            for indicator_name, row_number in indicator_dict.iteritems():
                cell = company_range[row_number][column_number]
                scores[indicator_name] = company_range[row_number][
                    column_number]

            # The first 'service' is actually just the overall
            # Do some spreadsheet format-checking here
            if column_number == 0:
                total = scores['total']
                if not len(total):
                    print 'No weighted total for %s %s' % (service_name,
                                                           company['name'])
                if 'overall' not in service_name:
                    print 'Service %s != "overall"' % service_name
                company['overall'] = scores

            # The second 'service' is usually the group score;
            # No need to save this, we don't use it here.
            elif column_number == 1 and 'group' in service_name:
                continue

            # Otherwise, call it a service.
            else:
                service = {'name': service_name, 'scores': scores}

                # Get service type if it's available
                service_type = company_range[0][column_number]
                if len(service_type):
                    service['type'] = service_type
                services.append(service)

        company['services'] = services
        output.append(company)

    io.write_json(path.assets('services.json'), output)
Пример #7
0
def create(filename):

    company_name = name.filename(filename[:-4])

    all_services = io.read_json(path.assets('services.json'))
    service_data = [
        item for item in all_services
        if (company_name in item['name'].replace('.', ''))
    ]

    if len(service_data) != 1:
        print 'Weird number of services found', len(service_data)

    service_data = service_data[0]

    # Create a mapping dictionary of just indicator names
    # each mapped to -1
    ref = io.read_json(path.ref('service-column-mapping.json'))
    indicator_dict = {}
    for item in ref:
        if is_number(item[1:]):
            indicator_dict[item] = -1

    # Map the indicator to the proper rows
    raw = io.read_csv(path.raw('companies/' + filename))
    for idx, row in enumerate(raw):
        indicator = row[0].lower()
        if indicator in indicator_dict:
            indicator_dict[indicator] = idx

    # Use the survey data to map possible responses to position
    survey = io.read_json(path.assets('survey.json'))

    if not_all_found(indicator_dict.values()):
        print 'Not all indicators accounted for in services overview csv'

    all_indicators = []

    # Get a slice of all the rows that encompass each company
    stops = sorted(idx for idx in indicator_dict.values())
    for idx, stop in enumerate(stops):
        next_stop = stops[idx + 1] if idx + 1 < len(stops) else len(raw) + 1
        indicator_range = raw[stop:next_stop]

        # Divide that slice by empty rows
        split = array.slice_arr(indicator_range, array.is_empty_row)

        # The first slice contains consolidated answers,
        # comments, and sources.
        responses = split.pop(0)

        # The first row of responses is indicator name followed by
        # service categories
        header = [item for item in responses.pop(0) if len(item)]

        indicator_name = header[0]

        # Find the survey question we're looking for
        survey_item = ([
            item for item in survey
            if item['id'].lower() == indicator_name.lower()
        ])

        if len(survey_item) != 1:
            print 'Too many items in survey.json for this indicator'
            print indicator_name
            print survey_item

        indicator_data = {'id': indicator_name, 'services': [], 'levels': []}

        # Check if this indicator is valid before continuing
        if len(responses
               ) == 1 and 'this indicator is n/a' in responses[0][0].lower():
            continue
        else:

            # question scores follow the response text in the split array
            scores = split.pop(0)

            # ..followed by the overall indicator score (verify this)
            indicator_score = split.pop(-1)[0][1] if (
                'indicator score' in split[-1][0][0].lower()) else []
            if not len(indicator_score):
                print '\nIndicator score not found in %s' % header[0]
                print split, '\n'
            else:
                indicator_data['score'] = indicator_score

            # ..and the same for the overall service scores
            level_scores = split.pop(-1)[0] if (
                'level score' in split[-1][0][0].lower()) else []
            if not len(level_scores):
                print '\nService score not found in %s' % header[0]
                print split, '\n'

            # Determine the comments and sources location
            comments = responses.pop(-2)
            sources = responses.pop(-1)

            if ('comments' not in comments[0].lower()
                    or 'sources' not in sources[0].lower()):
                print 'Comments not found in %s' % comments[0]
                print 'Sources not found in %s' % sources[0]

            # Some question text include an if-not-then clause,
            # which throws off the count between the text and the score.
            # Record it and then delete the row.
            indicator_data['follow'] = 0
            for idx, row in enumerate(responses):
                if 'continue with B' in row[0] and len(set(row[1:])) == 1:
                    indicator_data['follow'] = 1
                    del responses[idx]
                    break

            if len(responses) != len(scores):
                print 'Length of responses and scores not matching'
                print len(responses), len(scores)

            # Save level responses, and level positions
            # Determine if this question has custom answers
            survey_levels = survey_item[0]['levels']
            for idx, level in enumerate(responses):
                level_data = []

                # Assume anything longer than 25 characters,
                # aka "no/insufficient evidence", is a custom response
                custom = 0
                survey_options = survey_levels[idx]['responses']
                for option in survey_options:
                    if len(option) > 25:
                        custom = 1

                for level_idx, level_response in enumerate(level):

                    # First level index is useless.
                    if level_idx == 0 or not len(level_response):
                        continue

                    if len(header) <= level_idx:
                        print 'No header available, this will break'

                    service = header[level_idx]

                    # Exclude group scores, operating company
                    # from indicators that don't need them
                    if (('(group)' in service
                         or '(operating company)' in service)
                            and exclude_service(indicator_name)):
                        continue

                    # Shim issues where the response includes too much text.
                    if len(level_response
                           ) > 25 and "no/insufficient" == level_response[:15]:
                        level_response = "no/insufficient evidence"

                    # Only add to the services list if we're on the first level.
                    # Other, we add too many
                    if idx == 0:

                        if 'operating company' in service.lower():
                            service_type = 'operating company'
                        elif 'group' in service.lower():
                            service_type = 'group'
                        else:
                            matching_service = [
                                item for item in service_data['services']
                                if (item['name'].lower() in service.lower())
                            ]
                            if len(matching_service
                                   ) == 1 and 'type' in matching_service[0]:
                                service_type = matching_service[0]['type']
                            else:
                                service_type = ''

                        indicator_data['services'].append({
                            'name':
                            scrub_service_name(service),
                            'type':
                            service_type,
                            'comments':
                            comments[level_idx],
                            'sources':
                            sources[level_idx],
                            'score':
                            level_scores[level_idx]
                        })

                    level_data.append({
                        'response': level_response,
                        'score': scores[idx][level_idx]
                    })

                indicator_data['custom'] = custom
                indicator_data['levels'].append({
                    'scores':
                    level_data,
                    'text':
                    survey_levels[idx]['text']
                })

        all_indicators.append(indicator_data)

    io.write_json(path.assets(company_name + '.json'), all_indicators)