Пример #1
0
    def runTest(self):
        sample = setup.sample(100, 85, 'id', 'abstract', 'title')
        self.assertEqual(len(sample), 2)
        self.assertTrue(len(sample['train']) <= 85)
        self.assertTrue(len(sample['test']) <= 15)

        for f in sample['train']:
            for arg in ['id', 'abstract', 'title']:
                self.assertIn(arg, f.keys())

        for f in sample['test']:
            for arg in ['id', 'abstract', 'title']:
                self.assertIn(arg, f.keys())
    def runTest(self):
        sample = setup.sample(100,85,'id','abstract','title')
        self.assertEqual(len(sample),2)
        self.assertTrue(len(sample['train']) <= 85)
        self.assertTrue(len(sample['test']) <= 15)

        for f in sample['train']:
            for arg in ['id','abstract','title']:
                self.assertIn(arg,f.keys())

        for f in sample['test']:
            for arg in ['id','abstract','title']:
                self.assertIn(arg,f.keys())
#!/usr/bin/python
''' 
Script to gather sample articles from solr and write them to json
'''
import plos_classification.setup as setup, json
from datetime import datetime

print datetime.now(), 'gathering sample articles'
s = setup.sample(15000, 10000, 'abstract', 'title', 'subject2_hierarchy',
                 'cross_published_journal_key')

print datetime.now(), 'writing sample to file'
o = open('data/sample.json', 'w')
json.dump(s, o, indent=2)

print datetime.now(), 'finished'
#!/usr/bin/python
''' 
Script to gather sample articles from solr and write them to json
'''
import plos_classification.setup as setup, json
from datetime import datetime

print datetime.now(), 'gathering sample articles'
s = setup.sample(15000,10000,'abstract','title','subject2_hierarchy','cross_published_journal_key')

print datetime.now(), 'writing sample to file'
o = open('data/sample.json','w')
json.dump(s,o,indent=2)

print datetime.now(),'finished'