class AggregatorImportViewTest(TestCase): def setUp(self): self.aggregator = AggregatorFactory() self.update_url = self.aggregator.get_absolute_url() + 'post/' self.post_data = dict() self.post_data['silk_rule_file'] = open( self._get_test_file('config.xml', 'controller') ) def test_post(self): self.client_login('admin') archiveitem = ArchiveItemFactory() self.post_data['archiveitems'] = [archiveitem.pk] response = self.client.post(self.update_url, data=self.post_data) self.assertRedirects(response, self.aggregator.get_absolute_url()) aggregator = Aggregator.objects.get(pk=self.aggregator.pk) self.assertEqual( aggregator.silk_rule.strip().replace("\n", ""), '<LinkageRule> ' '<Compare id="unnamed_5" metric="levenshtein" required="false" ' 'threshold="0.0" weight="1"> ' '<TransformInput function="lowerCase" id="unnamed_3"> ' '<Input id="unnamed_1" path="?a/sd:Event#name" /> ' '</TransformInput> <TransformInput function="lowerCase" ' 'id="unnamed_4"> ' '<Input id="unnamed_2" path="?b/sd:Event#name" /> ' '</TransformInput> <Param name="minChar" value="0" />' ' <Param name="maxChar" value="z" /> ' '</Compare> </LinkageRule>'.strip() )
class AggregatorImportViewTest(TestCase): def setUp(self): self.aggregator = AggregatorFactory() self.update_url = self.aggregator.get_absolute_url() + 'post/' self.post_data = dict() self.post_data['silk_rule_file'] = open( self._get_test_file('config.xml', 'controller')) def test_post(self): self.client_login('admin') archiveitem = ArchiveItemFactory() self.post_data['archiveitems'] = [archiveitem.pk] response = self.client.post(self.update_url, data=self.post_data) self.assertRedirects(response, self.aggregator.get_absolute_url()) aggregator = Aggregator.objects.get(pk=self.aggregator.pk) self.assertEqual( aggregator.silk_rule.strip().replace("\n", ""), '<LinkageRule> ' '<Compare id="unnamed_5" metric="levenshtein" required="false" ' 'threshold="0.0" weight="1"> ' '<TransformInput function="lowerCase" id="unnamed_3"> ' '<Input id="unnamed_1" path="?a/sd:Event#name" /> ' '</TransformInput> <TransformInput function="lowerCase" ' 'id="unnamed_4"> ' '<Input id="unnamed_2" path="?b/sd:Event#name" /> ' '</TransformInput> <Param name="minChar" value="0" />' ' <Param name="maxChar" value="z" /> ' '</Compare> </LinkageRule>'.strip())
def setUp(self): self.client_login('admin') self.aggregator = AggregatorFactory() self.export_url = reverse('aggregator_export', args=(self.aggregator.pk, )) self.workflow_url = reverse('aggregator_workflow', args=[ self.aggregator.pk, ])
def setUp(self): self.aggregator = AggregatorFactory() self.update_url = self.aggregator.get_absolute_url() + 'post/' self.post_data = dict() self.post_data['silk_rule_file'] = open( self._get_test_file('config.xml', 'controller') )
def setUp(self): self.client_login('admin') self.aggregator = AggregatorFactory() self.export_url = reverse( 'aggregator_export', args=(self.aggregator.pk, ) ) self.workflow_url = reverse( 'aggregator_workflow', args=[self.aggregator.pk, ])
def setUp(self): self.archiveitem = ArchiveItemFactory() self.aggregator = AggregatorFactory() AggregatorArchiveItem.objects.create( aggregator=self.aggregator, archiveitem=self.archiveitem, ) self.url = reverse('archiveitem_aggregator_del', args=(self.archiveitem.pk, ))
class AggregatorUpdateViewTest(TestCase): def setUp(self): self.aggregator = AggregatorFactory() self.update_url = self.aggregator.get_absolute_url() + 'edit/' self.post_data = dict(AggregatorFactory.attributes()) def test_get(self): self.client_login('admin') response = self.client.get(self.update_url) self.assertContains(response, self.aggregator.name) self.assertContains(response, self.aggregator.description) def test_post(self): self.client_login('admin') archiveitem = ArchiveItemFactory() self.post_data['archiveitems'] = [archiveitem.pk] print self.post_data response = self.client.post(self.update_url, data=self.post_data) self.assertRedirects(response, self.aggregator.get_absolute_url()) aggregator = Aggregator.objects.get(pk=self.aggregator.pk) self.assertEqual(aggregator.name, self.post_data['name']) self.assertEqual(aggregator.description, self.post_data['description']) self.assertIn(archiveitem, aggregator.archiveitems.all())
def setUp(self): self.aggregator = AggregatorFactory() self.update_url = self.aggregator.get_absolute_url() + 'edit/' self.post_data = dict(AggregatorFactory.attributes())
def setUp(self): self.create_url = '/c/aggregator/create/' self.post_data = dict(AggregatorFactory.attributes())
class AggregatorDetailViewTest(TestCase): def setUp(self): self.client_login('admin') self.aggregator = AggregatorFactory() self.export_url = reverse( 'aggregator_export', args=(self.aggregator.pk, ) ) self.workflow_url = reverse( 'aggregator_workflow', args=[self.aggregator.pk, ]) def test_get(self): response = self.client.get(self.aggregator.get_absolute_url()) self.assertContains(response, self.aggregator.name) def test_contains_edit_link(self): response = self.client.get(self.aggregator.get_absolute_url()) self.assertContains( response, self.aggregator.get_absolute_url() + 'edit/' ) def test_contains_archiveitems(self): item1 = ArchiveItemFactory() item2 = ArchiveItemFactory() for item in (item1, item2): AggregatorArchiveItem.objects.create( aggregator=self.aggregator, archiveitem=item ) response = self.client.get(self.aggregator.get_absolute_url()) self.assertContains(response, item1.get_absolute_url()) self.assertContains(response, item2.get_absolute_url()) def test_contains_rule(self): self.aggregator.silk_rule = 'tanto gentile e tanto onesta pare' self.aggregator.save() response = self.client.get(self.aggregator.get_absolute_url()) self.assertContains(response, self.aggregator.silk_rule) def test_contains_schedulers(self): response = self.client.get(self.aggregator.get_absolute_url()) self.assertContains(response, 'No scheduler found') scheduler = AggregatorSchedulerFactory(object_id=self.aggregator.pk) response = self.client.get(self.aggregator.get_absolute_url()) self.assertNotContains(response, 'No scheduler found') self.assertContains(response, scheduler.get_absolute_url()) def test_contains_silk_link(self): response = self.client.get(self.aggregator.get_absolute_url()) silk_url = 'http://{}:{}/workbench/'.format( settings.SILK_EXTERNAL_HOST, settings.SILK_EXTERNAL_PORT ) self.assertContains(response, silk_url) def test_can_download_silk_project_file(self): self.client_login('admin') response = self.client.get(self.aggregator.get_absolute_url()) self.assertContains(response, self.export_url + '?download') response = self.client.get(self.export_url + '?download') self.assertEqual(response['Content-Type'], 'text/xml; charset=utf-8') self.assertTrue(response.has_header('Content-Disposition')) self.assertNotEqual(response.content.strip(), '') def test_can_run_workflow(self): self.client_login('admin') Scheduler.objects.all().delete() response = self.client.get(self.aggregator.get_absolute_url()) self.assertContains(response, self.workflow_url) response = self.client.post(self.workflow_url) self.assertEqual(response.status_code, 302) self.assertTrue( response['Location'].startswith('http://testserver/s/task/') ) self.assertEqual(Scheduler.objects.count(), 1) def test_can_view_silk_project_file_without_downloading(self): self.client_login('admin') response = self.client.get(self.export_url) self.assertEqual(response['Content-Type'], 'text/xml; charset=utf-8') self.assertFalse(response.has_header('Content-Disposition')) self.assertNotEqual(response.content.strip(), '') def test_silk_project_file_is_valid(self): import xml.etree.ElementTree as ET self.client_login('admin') item1 = ArchiveItemFactory() item2 = ArchiveItemFactory() for item in (item1, item2): AggregatorArchiveItem.objects.create( aggregator=self.aggregator, archiveitem=item ) response = self.client.get(self.export_url) tree = ET.fromstring(response.content) self.assertIn( (settings.TRIPLE_DATABASE['PREFIXES']['sdv1'], 'sd'), [(x.get('namespace'), x.get('id')) for x in tree.findall('.//Prefix')] ) # check datasources datasources = tree.findall('.//DataSource') self.assertEqual(len(datasources), 3) self.assertEqual(datasources[0].get('id'), 'master-graph') mastergraph = datasources[0] datasources = datasources[1:] # check datasources endpoints self.assertEqual( mastergraph.find('Param[@name="host"]').get('value'), settings.TRIPLE_DATABASE_MASTER['HOST'] ) self.assertEqual( [get_virtuoso_endpoint()] * 2, [x.find('Param[@name="endpointURI"]').get("value") for x in datasources] ) # check datasources graph names self.assertEqual( mastergraph.find('Param[@name="graph"]').get('value'), settings.TRIPLE_DATABASE_MASTER["KWARGS"]["graph"] ) self.assertEqual( [item1.datagraph_mapped_name, item2.datagraph_mapped_name], [x.find('Param[@name="graph"]').get("value") for x in datasources] ) # check tasks datasource_ids = [x.get('id') for x in datasources] tasks = tree.findall('.//LinkingTask') self.assertEqual(len(tasks), 2) self.assertEqual( datasource_ids, [x.find('.//Interlink').get('id') for x in tasks] ) # check task parameters for datasource_id, task in zip(datasource_ids, tasks): self.assertEqual( task.find('.//SourceDataset').get('dataSource'), datasource_id ) self.assertEqual( task.find('.//TargetDataset').get('dataSource'), 'master-graph' ) self.assertEqual( task.find('.//SourceDataset').find('RestrictTo').text.strip(), '?a rdf:type <{}> .'.format(self.aggregator.entity_type) ) self.assertEqual( task.find('.//TargetDataset').find('RestrictTo').text.strip(), 'b -> {}'.format(self.aggregator.vertex_selector) ) self.assertIsNone(task.find('.//LinkageRule').text) self.assertIsNone(task.find('.//Filter').text) self.assertIsNone(task.find('.//Outputs').text) self.assertIsNone(task.find('.//PositiveEntities').text) self.assertIsNone(task.find('.//NegativeEntities').text) self.assertIsNone( task.find('.//Alignment/') .find('{}Alignment'.format('{http://knowledgeweb.' 'semanticweb.org' '/heterogeneity/alignment#}') ).text )
def setUp(self): self.aggregator = AggregatorFactory() self.update_url = self.aggregator.get_absolute_url() + 'post/' self.post_data = dict() self.post_data['silk_rule_file'] = open( self._get_test_file('config.xml', 'controller'))
class SilkRuleXMLTestCase(TestCase): def setUp(self): self.aggregator = AggregatorFactory() def test_can_silk_rules_file_is_valid(self): import xml.etree.ElementTree as ET from django.template.loader import render_to_string archive_item = ArchiveItemFactory() AggregatorArchiveItem.objects.create( aggregator=self.aggregator, archiveitem=archive_item ) self.aggregator.silk_rule = \ '<LinkageRule><smart data="now" /></LinkageRule>' self.aggregator.save() output_filename = 'a_really_cool_filename.thm' context = { 'aggregator': self.aggregator, 'sd_prefix': settings.TRIPLE_DATABASE['PREFIXES']['sdv1'], 'sparql_endpoint': get_virtuoso_endpoint(), 'archive_item': archive_item, 'output_filename': output_filename, 'mastergraph_host': settings.TRIPLE_DATABASE_MASTER['HOST'], 'mastergraph_port': settings.TRIPLE_DATABASE_MASTER['KWARGS']['rexpro_port'], 'mastergraph_graphname': settings.TRIPLE_DATABASE_MASTER['KWARGS']['graph'], 'resource_namespace': settings.TRIPLE_DATABASE_MASTER['PREFIXES']['sdres'], } tree = ET.fromstring(render_to_string( 'controller/aggregator/silk_rules.xml', context )) self.assertIn( (settings.TRIPLE_DATABASE['PREFIXES']['sdv1'], 'sd'), [(x.get('namespace'), x.get('id')) for x in tree.findall('.//Prefix')] ) # check datasources datasources_dom = tree.findall('.//DataSource') self.assertEqual(len(datasources_dom), 2) self.assertEqual(datasources_dom[0].get('id'), 'master-graph') mastergraph, datasource = datasources_dom # check datasource endpoints self.assertEqual( get_virtuoso_endpoint(), datasource.find('Param[@name="endpointURI"]').get("value"), ) # check datasources graph names self.assertEqual( mastergraph.find('Param[@name="graph"]').get('value'), settings.TRIPLE_DATABASE_MASTER["KWARGS"]["graph"] ) self.assertEqual( archive_item.datagraph_mapped_name, datasource.find('Param[@name="graph"]').get("value") ) # check tasks datasource_id = datasource.get('id') rules = tree.findall('.//Interlink') self.assertEqual(len(rules), 1) self.assertEqual(datasource_id, rules[0].get('id')) # check rules parameters rule = rules[0] self.assertEqual( rule.find('.//SourceDataset').get('dataSource'), datasource_id ) self.assertEqual( rule.find('.//TargetDataset').get('dataSource'), 'master-graph' ) self.assertEqual( ET.tostring(rule.find('.//LinkageRule')).strip(), self.aggregator.silk_rule ) self.assertEqual( rule.find('.//SourceDataset').find('RestrictTo').text.strip(), '?a rdf:type <{}> .'.format(self.aggregator.entity_type) ) self.assertEqual( rule.find('.//TargetDataset').find('RestrictTo').text.strip(), 'b -> {}'.format(self.aggregator.vertex_selector) ) self.assertIsNone(rule.find('.//Filter').text) output = rule.find('.//Outputs').find('Output') self.assertEqual(output.get('type'), 'file') self.assertEqual(output.findall('Param')[0].get('name'), 'file') self.assertEqual( output.findall('Param')[0].get('value'), output_filename) self.assertEqual(output.findall('Param')[1].get('name'), 'format') self.assertEqual(output.findall('Param')[1].get('value'), 'ntriples')
class AggregatorDetailViewTest(TestCase): def setUp(self): self.client_login('admin') self.aggregator = AggregatorFactory() self.export_url = reverse('aggregator_export', args=(self.aggregator.pk, )) self.workflow_url = reverse('aggregator_workflow', args=[ self.aggregator.pk, ]) def test_get(self): response = self.client.get(self.aggregator.get_absolute_url()) self.assertContains(response, self.aggregator.name) def test_contains_edit_link(self): response = self.client.get(self.aggregator.get_absolute_url()) self.assertContains(response, self.aggregator.get_absolute_url() + 'edit/') def test_contains_archiveitems(self): item1 = ArchiveItemFactory() item2 = ArchiveItemFactory() for item in (item1, item2): AggregatorArchiveItem.objects.create(aggregator=self.aggregator, archiveitem=item) response = self.client.get(self.aggregator.get_absolute_url()) self.assertContains(response, item1.get_absolute_url()) self.assertContains(response, item2.get_absolute_url()) def test_contains_rule(self): self.aggregator.silk_rule = 'tanto gentile e tanto onesta pare' self.aggregator.save() response = self.client.get(self.aggregator.get_absolute_url()) self.assertContains(response, self.aggregator.silk_rule) def test_contains_schedulers(self): response = self.client.get(self.aggregator.get_absolute_url()) self.assertContains(response, 'No scheduler found') scheduler = AggregatorSchedulerFactory(object_id=self.aggregator.pk) response = self.client.get(self.aggregator.get_absolute_url()) self.assertNotContains(response, 'No scheduler found') self.assertContains(response, scheduler.get_absolute_url()) def test_contains_silk_link(self): response = self.client.get(self.aggregator.get_absolute_url()) silk_url = 'http://{}:{}/workbench/'.format( settings.SILK_EXTERNAL_HOST, settings.SILK_EXTERNAL_PORT) self.assertContains(response, silk_url) def test_can_download_silk_project_file(self): self.client_login('admin') response = self.client.get(self.aggregator.get_absolute_url()) self.assertContains(response, self.export_url + '?download') response = self.client.get(self.export_url + '?download') self.assertEqual(response['Content-Type'], 'text/xml; charset=utf-8') self.assertTrue(response.has_header('Content-Disposition')) self.assertNotEqual(response.content.strip(), '') def test_can_run_workflow(self): self.client_login('admin') Scheduler.objects.all().delete() response = self.client.get(self.aggregator.get_absolute_url()) self.assertContains(response, self.workflow_url) response = self.client.post(self.workflow_url) self.assertEqual(response.status_code, 302) self.assertTrue( response['Location'].startswith('http://testserver/s/task/')) self.assertEqual(Scheduler.objects.count(), 1) def test_can_view_silk_project_file_without_downloading(self): self.client_login('admin') response = self.client.get(self.export_url) self.assertEqual(response['Content-Type'], 'text/xml; charset=utf-8') self.assertFalse(response.has_header('Content-Disposition')) self.assertNotEqual(response.content.strip(), '') def test_silk_project_file_is_valid(self): import xml.etree.ElementTree as ET self.client_login('admin') item1 = ArchiveItemFactory() item2 = ArchiveItemFactory() for item in (item1, item2): AggregatorArchiveItem.objects.create(aggregator=self.aggregator, archiveitem=item) response = self.client.get(self.export_url) tree = ET.fromstring(response.content) self.assertIn((settings.TRIPLE_DATABASE['PREFIXES']['sdv1'], 'sd'), [(x.get('namespace'), x.get('id')) for x in tree.findall('.//Prefix')]) # check datasources datasources = tree.findall('.//DataSource') self.assertEqual(len(datasources), 3) self.assertEqual(datasources[0].get('id'), 'master-graph') mastergraph = datasources[0] datasources = datasources[1:] # check datasources endpoints self.assertEqual( mastergraph.find('Param[@name="host"]').get('value'), settings.TRIPLE_DATABASE_MASTER['HOST']) self.assertEqual([get_virtuoso_endpoint()] * 2, [ x.find('Param[@name="endpointURI"]').get("value") for x in datasources ]) # check datasources graph names self.assertEqual( mastergraph.find('Param[@name="graph"]').get('value'), settings.TRIPLE_DATABASE_MASTER["KWARGS"]["graph"]) self.assertEqual( [item1.datagraph_mapped_name, item2.datagraph_mapped_name], [x.find('Param[@name="graph"]').get("value") for x in datasources]) # check tasks datasource_ids = [x.get('id') for x in datasources] tasks = tree.findall('.//LinkingTask') self.assertEqual(len(tasks), 2) self.assertEqual(datasource_ids, [x.find('.//Interlink').get('id') for x in tasks]) # check task parameters for datasource_id, task in zip(datasource_ids, tasks): self.assertEqual( task.find('.//SourceDataset').get('dataSource'), datasource_id) self.assertEqual( task.find('.//TargetDataset').get('dataSource'), 'master-graph') self.assertEqual( task.find('.//SourceDataset').find('RestrictTo').text.strip(), '?a rdf:type <{}> .'.format(self.aggregator.entity_type)) self.assertEqual( task.find('.//TargetDataset').find('RestrictTo').text.strip(), 'b -> {}'.format(self.aggregator.vertex_selector)) self.assertIsNone(task.find('.//LinkageRule').text) self.assertIsNone(task.find('.//Filter').text) self.assertIsNone(task.find('.//Outputs').text) self.assertIsNone(task.find('.//PositiveEntities').text) self.assertIsNone(task.find('.//NegativeEntities').text) self.assertIsNone( task.find('.//Alignment/').find('{}Alignment'.format( '{http://knowledgeweb.' 'semanticweb.org' '/heterogeneity/alignment#}')).text)
def setUp(self): self.archiveitem = ArchiveItemFactory() self.aggregator = AggregatorFactory() self.url = reverse('archiveitem_aggregator_add', args=(self.archiveitem.pk, ))
def setUp(self): self.aggregator = AggregatorFactory()
class AggregatorSchedulerFactory(SchedulerFactory): """ factory for creating a Scheduler executed on an aggregator """ object_id = factory.LazyAttribute(lambda x: AggregatorFactory().pk) content_type = factory.LazyAttribute( lambda x: ContentType.objects.get_for_model(Aggregator))
def handle(self, *args, **options): """ entry point """ from django.contrib.sites.models import Site site = Site.objects.get() site.name = 'controller' site.domain = 'localhost:8001' site.save() AdminFactory() trentinocultura = SourceFactory( name='trentinocultura', description='description description', scraper_name='trentinocultura', scraper_api_key='', ) DatasetFactory( source=trentinocultura, name='Trentinocultura agenda', url='http://www.trentinocultura.net/asp_cat/main.asp?IDProspettiva' '=35&SearchType=AGENDA_SEARCH&Pag=%d&TipoVista=AGENDA&cmd=new', description='Eventi in Trentino', download='scraper:trentinocultura:trentinocultura', curator='Federico Scrinzi', license='All rights reserved', other_meta='{}', bounding_box='10.3817591116112,45.6730626059259,' '12.4775685651704,47.0917759206089', ) ingiro_local = SourceFactory( name='in-giro (locale)', description='i dati dello scraper di in-giro, ma in locale (file ' 'webui.scheduler.tests.data/in-giro.zip) utile per' 'test, ma anche per boh, altro? =)', ) DatasetFactory( source=ingiro_local, name='eventi-e-poi-ingiro', url='http://in-giro.net', description='Eventi e POI presi da in-giro', download='http://testserver/in-giro.zip', curator='Ciccio Pasticcio', license='All rights reserved', other_meta='{}', ) bgg_source = SourceFactory( name='BoardGameGeek (test)', description='pochi dati per testare il matching su silk' ) bgt_source = SourceFactory( name='BoardGameTournament (test)', description='pochi dati per testare il matching su silk', ) bgg_dataset = DatasetFactory( source=bgg_source, name='boardgamegeek-games', url='http://boardgamegeek.com', description='Lista di boardgames presi da boardgamegeek', download='https://dl.dropbox.com/u/3435878/boardgamegeek.csv', curator='Stefano Parmesan', license='All rights reserved', other_meta='{}', ) bgt_dataset = DatasetFactory( source=bgt_source, name='boardgametournament-games', url='http://boardgametournament.com', description='Lista di boardgames presi da boardgametournament', download='https://dl.dropbox.com/u/3435878/' 'boardgametournament.csv', curator='Stefano Parmesan', license='All rights reserved', other_meta='{}', ) DatasetFactory( source=bgt_source, name='boardgametournament-games-xls', url='http://boardgametournament.com', description='Lista di boardgames presi da boardgametournament', download='https://dl.dropbox.com/u/3435878/' 'boardgametournament.xls', curator='Stefano Parmesan', license='All rights reserved', encoding="utf8", other_meta='{}', ) DatasetFactory( source=bgt_source, name='boardgametournament-games-xlsx', url='http://boardgametournament.com', description='Lista di boardgames presi da boardgametournament', download='https://dl.dropbox.com/u/3435878/' 'boardgametournament.xlsx', curator='Stefano Parmesan', license='All rights reserved', encoding="utf8", other_meta='{}', ) with open(get_test_file('boardgamegeek_refine_rules.json')) as fin: rule = ''.join(fin.readlines()) bgg_archiveitem = ArchiveItemFactory( dataset=bgg_dataset, file_target='boardgamegeek.csv', file_hash='ea6ee15e9b052171db4f96743aa11425', rule=RuleFactory( hash="ea6ee15e9b052171db4f96743aa11425", rule=rule, ) ) with open(get_test_file('boardgametournament_refine_rules.json')) \ as fin: rule = ''.join(fin.readlines()) bgt_archiveitem = ArchiveItemFactory( dataset=bgt_dataset, file_target='boardgametournament.csv', file_hash='be864f716b6a7716f3b1c2254f4f5eea', rule=RuleFactory( hash="be864f716b6a7716f3b1c2254f4f5eea", rule=rule, ) ) with open(get_test_file('boardgames_aggregator_silk_rules.xml')) \ as fin: rule = ''.join(fin.readlines()) aggregator = AggregatorFactory( name='BoardGames', description='Un dataset di giochi da tavolo', silk_rule=rule, entity_type='{}BoardGame'.format( settings.TRIPLE_DATABASE['PREFIXES']['sdv1'] ), vertex_selector="g.V('type', 'sd$BoardGame')%limit.id.fill(m)", ) for archiveitem in (bgg_archiveitem, bgt_archiveitem): AggregatorArchiveItem.objects.create( aggregator=aggregator, archiveitem=archiveitem, ) osm_source = SourceFactory( name='OSM (test)', description='pochi dati per testare lo slicer' ) osm_dataset = DatasetFactory( source=osm_source, name='osm-dataset', url='http://openstreetmap.org', download='https://dl.dropbox.com/u/781790/osm-10nodes.csv', curator='Davide setti', license='CC PUCCI', ) with open(get_test_file('osm-refine-rules.json')) as fin: rule = ''.join(fin.readlines()) osm_archiveitem = ArchiveItemFactory( dataset=osm_dataset, file_target='osm-10nodes.csv', file_hash='e6f4a5c5f5fe12765f7b3ca04ab7a82d', rule=RuleFactory( hash="e6f4a5c5f5fe12765f7b3ca04ab7a82d", rule=rule, ) ) poi_aggregator = AggregatorFactory( name='POI', description='POI aggregator', entity_type=settings.TRIPLE_DATABASE['PREFIXES']['sdv1'] + 'POI', vertex_selector="g.V('type', 'sd$POI')%limit.id.fill(m)" ) AggregatorArchiveItem.objects.create( aggregator=poi_aggregator, archiveitem=osm_archiveitem, ) DatasetFactory( source=osm_source, name='Aeroporti', url='http://dati.trentino.it', description='Aeroporti del trentino, file SHP', download='http://testserver/aeroporti_tn.zip', curator='Federico Scrinzi', license='Open Data', other_meta='{}', ) strange_source = SourceFactory( name='Strange or malformed (test)', description='pochi dati con valori strani tipo None', ) DatasetFactory( source=strange_source, name='strange symbols', url='http://testserver/', description='Some strange symbols', download='http://testserver/strangesymbols.csv', curator='Federico Scrinzi', license='Open Data', other_meta='{}', ) DatasetFactory( source=strange_source, name='looks like ascii', url='http://testserver/', description="file that looks like ascii but it's UTF8", download='http://testserver/lookslikeascii.csv', curator='Federico Scrinzi', license='Open Data', other_meta='{}', )