def handle(self, path, **kwargs): sourceinfo = models.SourceInfo( contributor='OpenOil', license='CC-BY-SA-4.0', info={ 'description': 'Import of company information from EDGAR', 'source_file': path, 'code_version': hulk.utils.get_git_revision(), 'command_run': ' '.join(sys.argv) }) sourceinfo.save() label = kwargs['label'] or uuid.uuid4().hex[:10] search = models.Search(label, metadata={}, source=sourceinfo) search.save() fh = canopener(path) for linenum, line in enumerate(fh, 1): js = json.loads(line) try: js = self.postprocess_line(js, kwargs) except AssertionError: continue result = models.SearchResult(sequencenum=linenum, search=search, metadata=js) result.save() print('output at http://localhost:8000/search/%s' % search.label)
def handle(self, path, **kwargs): sourceinfo = models.SourceInfo( contributor='OpenOil', license='CC-BY-SA-4.0', info={ 'description': 'Import of company information from EDGAR', 'source_file': path, 'code_version': hulk.utils.get_git_revision(), 'command_run': ' '.join(sys.argv) }) sourceinfo.save() label = kwargs['label'] or uuid.uuid4().hex[:10] search = models.Search( label, metadata={}, source=sourceinfo) search.save() fh = canopener(path) for linenum, line in enumerate(fh,1): js = json.loads(line) try: js = self.postprocess_line(js, kwargs) except AssertionError: continue result = models.SearchResult( sequencenum = linenum, search= search, metadata=js) result.save() print('output at http://localhost:8000/search/%s' % search.label)
def handle(self, path, **kwargs): sourceinfo = models.SourceInfo( contributor='OpenOil', license='CC-BY-SA-4.0', info={ 'description': 'Import of company information from EDGAR', 'source_file': path, 'code_version': hulk.utils.get_git_revision(), 'command_run': ' '.join(sys.argv) }) sourceinfo.save() fh = canopener(path) reader = csv.reader(fh, delimiter='\t') for line in reader: (cik, name, state, sic) = line jurisdiction = 'US_%s' % state comp, created = models.Company.objects.get_or_create( company_name = name.title(), cik = cik, jurisdiction = jurisdiction, sic = sic, source=sourceinfo, defaults={}) comp.save()
def handle(self, path, **kwargs): sourceinfo = models.SourceInfo( contributor='OpenOil', license='CC-BY-SA-4.0', info={ 'description': 'Import of company information from EDGAR', 'source_file': path, 'code_version': hulk.utils.get_git_revision(), 'command_run': ' '.join(sys.argv) }) sourceinfo.save() fh = canopener(path) reader = csv.reader(fh, delimiter='\t') for line in reader: (cik, name, state, sic) = line jurisdiction = 'US_%s' % state comp, created = models.Company.objects.get_or_create( company_name=name.title(), cik=cik, jurisdiction=jurisdiction, sic=sic, source=sourceinfo, defaults={}) comp.save()
def test_s3_url(self, s3_conn_mock): with canopener('s3://blahblah/blah') as test_file: assert isinstance(test_file, FileMock)
def test_cant_write_url(self): with assert_raises(ValueError): canopener('http://blahblah/blah', 'w')
def test_url(self): with canopener('http://blahblah/blah') as test_file: assert isinstance(test_file, FileMock)
def test_local(self): with canopener('blahblah') as test_file: assert isinstance(test_file, FileMock)
def test_gzpath(self): with canopener('blahblah.gz') as test_file: assert isinstance(test_file, gzfile)
def test_bz2path(self): with canopener('blahblah.bz2') as test_file: assert isinstance(test_file, FileMock)