Exemplo n.º 1
0
def publisher_timelag_sorted():
    publisher_timelags = [
        (publisher, publisher_name.get(publisher),
         agg['transaction_months_with_year'], agg['timelag'])
        for publisher, agg in JSONDir(
            './stats-calculated/current/aggregated-publisher').items()
    ]
    return sorted(publisher_timelags,
                  key=lambda (publisher, publisher_title, _, timelag):
                  (timelag_index(timelag), publisher_title))
Exemplo n.º 2
0
def publisher_timelag_dict():
    publisher_timelags = [
        (publisher, publisher_name.get(publisher),
         agg['transaction_months_with_year'], agg['timelag'])
        for publisher, agg in JSONDir(
            './stats-calculated/current/aggregated-publisher').items()
    ]
    data = {}
    for v in publisher_timelags:
        data[v[0]] = v
    return data
Exemplo n.º 3
0
def publisher_frequency():
    gitaggregate_publisher = JSONDir('./stats-calculated/gitaggregate-publisher-dated')
    for publisher, agg in gitaggregate_publisher.items():
        if not 'most_recent_transaction_date' in agg:
            continue
        updates_per_month = defaultdict(int)
        previous_transaction_date = datetime.date(1,1,1)
        for gitdate, transaction_date_str in sorted(agg['most_recent_transaction_date'].items()):
            transaction_date = parse_iso_date(transaction_date_str)
            if transaction_date is not None and transaction_date > previous_transaction_date:
                previous_transaction_date = transaction_date
                updates_per_month[gitdate[:7]] += 1
        first_published_string = sorted(agg['most_recent_transaction_date'])[0]
        first_published = parse_iso_date(first_published_string)
        if first_published >= previous_month_starts[2]:
            #if True in [ x in updates_per_month for x in previous_months[:3] ]:
            frequency = 'Annual'
        elif first_published >= previous_month_starts[5]:
            if all([ x in updates_per_month for x in previous_months[:3] ]):
                frequency = 'Monthly'
            else:
                frequency = 'Annual'
        elif first_published >= previous_month_starts[11]:
            if [ x in updates_per_month for x in previous_months[:6] ].count(True) >= 4:
                frequency = 'Monthly'
            elif any([ x in updates_per_month for x in previous_months[:3] ]) and any([ x in updates_per_month for x in previous_months[3:6] ]):
                frequency = 'Quarterly'
            else:
                frequency = 'Annual'
        else:
            if [ x in updates_per_month for x in previous_months[:12] ].count(True) >= 9:
                frequency = 'Monthly'
            elif [ any([ x in updates_per_month for x in previous_months[start:end] ]) for start,end in [(0,3),(3,6),(6,9),(9,12)] ].count(True) >= 3:
                frequency = 'Quarterly'
            elif any([ x in updates_per_month for x in previous_months[:6] ]) and any([ x in updates_per_month for x in previous_months[6:12] ]):
                frequency = 'Six-Monthly'
            elif any([ x in updates_per_month for x in previous_months[:12] ]):
                frequency = 'Annual'
            else:
                frequency = 'Less than Annual'
        if publisher in publisher_name: # Only display current publishers
            yield publisher, publisher_name.get(publisher), updates_per_month, frequency
Exemplo n.º 4
0
def publisher_frequency():
    """Generate the publisher frequency data
    """

    # Load all the data from 'gitaggregate-publisher-dated' into memory
    gitaggregate_publisher = JSONDir(
        './stats-calculated/gitaggregate-publisher-dated')

    # Loop over each publisher - i.e. a publisher folder within 'gitaggregate-publisher-dated'
    for publisher, agg in gitaggregate_publisher.items():

        # Skip to the next publisher if there is no data for 'most_recent_transaction_date' for this publisher
        if not 'most_recent_transaction_date' in agg:
            continue

        # Skip if this publisher appears in the list of publishers who have since changed their Registry ID
        if publisher in get_registry_id_matches().keys():
            continue

        updates_per_month = defaultdict(int)
        previous_transaction_date = datetime.date(1, 1, 1)

        # Find the most recent transaction date and parse into a datetime object
        for gitdate, transaction_date_str in sorted(
                agg['most_recent_transaction_date'].items()):
            transaction_date = parse_iso_date(transaction_date_str)

            # If transaction date has increased
            if transaction_date is not None and transaction_date > previous_transaction_date:
                previous_transaction_date = transaction_date
                updates_per_month[gitdate[:7]] += 1

        # Find the first date that this publisher made data available, and parse into a datetime object
        first_published_string = sorted(agg['most_recent_transaction_date'])[0]
        first_published = parse_iso_date(first_published_string)

        # Implement the assessment logic on http://dashboard.iatistandard.org/timeliness.html#h_assesment

        if first_published >= previous_month_days[2]:
            # This is a publisher of less than 3 months
            #if True in [ x in updates_per_month for x in previous_months[:3] ]:
            frequency = 'Annual'
        elif first_published >= previous_month_days[5]:
            # This is a publisher of less than 6 months
            if all([x in updates_per_month for x in previous_months[:3]]):
                frequency = 'Monthly'
            else:
                frequency = 'Annual'
        elif first_published >= previous_month_days[11]:
            # This is a publisher of less than 12 months
            if [x in updates_per_month
                    for x in previous_months[:6]].count(True) >= 4:
                frequency = 'Monthly'
            elif any([
                    x in updates_per_month for x in previous_months[:3]
            ]) and any([x in updates_per_month for x in previous_months[3:6]]):
                frequency = 'Quarterly'
            else:
                frequency = 'Annual'
        else:
            # This is a publisher of 1 year or more
            if ([x in updates_per_month
                 for x in previous_months[:12]].count(True) >= 7) and (
                     [x in updates_per_month
                      for x in previous_months[:2]].count(True) >= 1):
                # Data updated in 7 or more of past 12 full months AND data updated at least once in last 2 full months.
                frequency = 'Monthly'
            elif ([x in updates_per_month
                   for x in previous_months[:12]].count(True) >= 3) and (
                       [x in updates_per_month
                        for x in previous_months[:4]].count(True) >= 1):
                # Data updated in 3 or more of past 12 full months AND data updated at least once in last 4 full months.
                frequency = 'Quarterly'
            elif any(
                [x in updates_per_month for x in previous_months[:6]]) and any(
                    [x in updates_per_month for x in previous_months[6:12]]):
                # There has been an update in 2 of the last 6 month periods
                frequency = 'Six-Monthly'
            elif any([x in updates_per_month for x in previous_months[:12]]):
                # There has been an update in 1 of the last 12 months
                frequency = 'Annual'
            else:
                # There has been an update in none of the last 12 months
                frequency = 'Less than Annual'

        # If the publisher is in the list of current publishers, return a generator object
        if publisher in publisher_name:
            yield publisher, publisher_name.get(
                publisher), updates_per_month, frequency
Exemplo n.º 5
0
def publisher_frequency():
    """Generate the publisher frequency data
    """

    # Load all the data from 'gitaggregate-publisher-dated' into memory
    gitaggregate_publisher = JSONDir('./stats-calculated/gitaggregate-publisher-dated')
    
    # Loop over each publisher - i.e. a publisher folder within 'gitaggregate-publisher-dated'
    for publisher, agg in gitaggregate_publisher.items():
        
        # Skip to the next publisher if there is no data for 'most_recent_transaction_date' for this publisher
        if not 'most_recent_transaction_date' in agg:
            continue

        # Skip if this publisher appears in the list of publishers who have since changed their Registry ID
        if publisher in get_registry_id_matches().keys():
            continue

        updates_per_month = defaultdict(int)
        previous_transaction_date = datetime.date(1,1,1)
        
        # Find the most recent transaction date and parse into a datetime object
        for gitdate, transaction_date_str in sorted(agg['most_recent_transaction_date'].items()):
            transaction_date = parse_iso_date(transaction_date_str)

            # If transaction date has increased 
            if transaction_date is not None and transaction_date > previous_transaction_date:
                previous_transaction_date = transaction_date
                updates_per_month[gitdate[:7]] += 1
        
        # Find the first date that this publisher made data available, and parse into a datetime object
        first_published_string = sorted(agg['most_recent_transaction_date'])[0]
        first_published = parse_iso_date(first_published_string)
        
        # Implement the assessment logic on http://dashboard.iatistandard.org/timeliness.html#h_assesment

        if first_published >= previous_month_starts[2]:
            # This is a publisher of less than 3 months
            #if True in [ x in updates_per_month for x in previous_months[:3] ]:
            frequency = 'Annual'
        elif first_published >= previous_month_starts[5]:
            # This is a publisher of less than 6 months
            if all([ x in updates_per_month for x in previous_months[:3] ]):
                frequency = 'Monthly'
            else:
                frequency = 'Annual'
        elif first_published >= previous_month_starts[11]:
            # This is a publisher of less than 12 months
            if [ x in updates_per_month for x in previous_months[:6] ].count(True) >= 4:
                frequency = 'Monthly'
            elif any([ x in updates_per_month for x in previous_months[:3] ]) and any([ x in updates_per_month for x in previous_months[3:6] ]):
                frequency = 'Quarterly'
            else:
                frequency = 'Annual'
        else:
            # This is a publisher of 1 year or more
            if ([ x in updates_per_month for x in previous_months[:12] ].count(True) >= 7) and ([ x in updates_per_month for x in previous_months[:2] ].count(True) >= 1):
                # Data updated in 7 or more of past 12 full months AND data updated at least once in last 2 full months. 
                frequency = 'Monthly'
            elif ([ x in updates_per_month for x in previous_months[:12] ].count(True) >= 3) and ([ x in updates_per_month for x in previous_months[:4] ].count(True) >= 1):
                # Data updated in 3 or more of past 12 full months AND data updated at least once in last 4 full months.
                frequency = 'Quarterly'
            elif any([ x in updates_per_month for x in previous_months[:6] ]) and any([ x in updates_per_month for x in previous_months[6:12] ]):
                # There has been an update in 2 of the last 6 month periods
                frequency = 'Six-Monthly'
            elif any([ x in updates_per_month for x in previous_months[:12] ]):
                # There has been an update in 1 of the last 12 months
                frequency = 'Annual'
            else:
                # There has been an update in none of the last 12 months
                frequency = 'Less than Annual'

        # If the publisher is in the list of current publishers, return a generator object
        if publisher in publisher_name: 
            yield publisher, publisher_name.get(publisher), updates_per_month, frequency
Exemplo n.º 6
0
def publisher_timelag_dict():
    publisher_timelags = [ (publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag']) for publisher,agg in JSONDir('./stats-calculated/current/aggregated-publisher').items() ]
    data = {}
    for v in publisher_timelags: 
        data[v[0]] = v
    return data
Exemplo n.º 7
0
def publisher_timelag_sorted():
    publisher_timelags = [ (publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag']) for publisher,agg in JSONDir('./stats-calculated/current/aggregated-publisher').items() ]
    return sorted(publisher_timelags, key=lambda (publisher, publisher_title, _, timelag): (
        timelag_index(timelag),
        publisher_title
        ))