def most_referenced(sample,amount):
  with open('data/02-refined/%s-rows.json' % sample, 'rU') as f:
    rows = json.load(f)
    for row in rows:
      # pprint(row.keys())
      for author in row['article']['authors']:
        if not authors.has_key(author):
          authors[author] = {'articles': 0, 'referenced': 0}
        authors[author]['articles'] += 1

      for reference in row['article']['referencelist']:
        for author in reference['authors']:
          if not authors.has_key(author):
            authors[author] = {'articles': 0, 'referenced': 0}
        authors[author]['referenced'] += 1

      pprint(author)

  with open('data/03-stats/%s-top-references.csv' % sample, 'wb') as f:
    writer = CSVUnicodeWriter(f)
    for row in rows:
      for reference in row['article']['referencelist']:
        for author in reference['authors']:
          for top_author,stats in sorted(authors.items(), key=lambda x: x[1]['referenced'],reverse=True)[:amount]:
            if author == top_author:
              writer.writerow([','.join(reference['authors']),reference['year'],reference['original']])
def most_referenced(sample, amount):
    with open('data/02-refined/%s-rows.json' % sample, 'rU') as f:
        rows = json.load(f)
        for row in rows:
            # pprint(row.keys())
            for author in row['article']['authors']:
                if not authors.has_key(author):
                    authors[author] = {'articles': 0, 'referenced': 0}
                authors[author]['articles'] += 1

            for reference in row['article']['referencelist']:
                for author in reference['authors']:
                    if not authors.has_key(author):
                        authors[author] = {'articles': 0, 'referenced': 0}
                authors[author]['referenced'] += 1

            pprint(author)

    with open('data/03-stats/%s-top-references.csv' % sample, 'wb') as f:
        writer = CSVUnicodeWriter(f)
        for row in rows:
            for reference in row['article']['referencelist']:
                for author in reference['authors']:
                    for top_author, stats in sorted(
                            authors.items(),
                            key=lambda x: x[1]['referenced'],
                            reverse=True)[:amount]:
                        if author == top_author:
                            writer.writerow([
                                ','.join(reference['authors']),
                                reference['year'], reference['original']
                            ])
def references_by_authors_db(sample,authors):
  f = open('data/03-stats/%s-top-references.csv' % sample, 'wb')
  writer = CSVUnicodeWriter(f)

  for author,stats in authors:
    print author
    for reference in db.references.find({
        'articleset' : sample,
        'authors' : author
      }):
      # pprint(reference)
      writer.writerow([','.join(reference['authors']),reference['year'],reference['original']])

  f.close()
def references_by_authors_db(sample, authors):
    f = open('data/03-stats/%s-top-references.csv' % sample, 'wb')
    writer = CSVUnicodeWriter(f)

    for author, stats in authors:
        print author
        for reference in db.references.find({
                'articleset': sample,
                'authors': author
        }):
            # pprint(reference)
            writer.writerow([
                ','.join(reference['authors']), reference['year'],
                reference['original']
            ])

    f.close()