def composeBookCreate(booksData): deleteAllBooks() driver = getNeoDriver() count = 0 try: with driver.session() as session: transaction = session.begin_transaction() for data in booksData: # data - tuple of (tuple of title and author) and cities title = data[0][0] author = data[0][1] cities = ','.join('\'' + str(city) + '\'' for city in data[1]) query = bookQuery.replace('$$cities$$', cities) transaction.run(query, {"title": title, "author": author}) count += 1 if (count > 500): transaction.commit() Importer.getInstance().updateProgress('neo', True, count) count = 0 transaction = session.begin_transaction() transaction.commit() Importer.getInstance().updateProgress('neo', True, count) driver.close() print('Finished importing books in Neo4j') except Exception as exception: print('Something went wrong while inserting books - ' + str(exception))
def runImport(): bookPaths = os.listdir(booksDir) booksData = [] bookCount = 0 for bookFile in bookPaths: bookId = bookFile[:-4] bookCataloguePath = catalogueDir + '/' + bookId + '/pg' + bookId + '.rdf' try: with open(booksDir + '/' + bookFile, 'r', encoding='utf-8') as content_file: content = content_file.read() cities = extractCities(content) graph = Graph() graph.parse(bookCataloguePath, format='xml') booksData.append((extractGraphInfo(graph), cities)) except: print('Error in ' + booksDir + '/' + bookFile) if (bookCount > 50): Importer.getInstance().updateProgress(None, True, bookCount) bookCount = 0 bookCount += 1 Importer.getInstance().updateProgress(None, True, bookCount) print('Finished parsing books') mongoImporter.importCityData(citiesFile) mongoImporter.importBooksData(booksData) neo4jImporter.loadCitiesFromCSV('cities5000.csv') neo4jImporter.composeBookCreate(booksData)
def importCityData(path): db.geodata.delete_many({}) db.geodata.drop_indexes() db.geodata.create_index([('city', TEXT)], name='city_index', default_language='english') db.geodata.create_index([('location', GEOSPHERE)], name='location_index') # '../Resources/cities5000.csv' with open(path, 'r', encoding='utf-8', errors='replace') as csv_file: csv_reader = csv.reader(csv_file, delimiter='\t') count = 0 geodata = [] for row in csv_reader: if (row[4] != '' and row[5] != ''): # coordinates: [longitude, latitude] datum = { 'city': str(row[2]), 'location': { 'type': 'Point', 'coordinates': [float(row[5]), float(row[4])] } } geodata.append(datum) if (len(geodata) > 500): db.geodata.insert_many(geodata) geodata.clear() if (count > 500): Importer.getInstance().updateProgress( 'mongo', False, count) count = 0 count += 1 db.geodata.insert_many(geodata) Importer.getInstance().updateProgress('mongo', False, count) print('Finished importing cities')
def loadCitiesFromCSV(path): # cities5000.csv deleteAllCities() constraintQuery = 'CREATE CONSTRAINT ON (city:City) ASSERT city.name IS UNIQUE' query = ''' LOAD CSV FROM "file:///''' + path + '''" AS row FIELDTERMINATOR "\t" WITH row WHERE NOT row[2] IS NULL MERGE (city :City {name:row[2]}) SET city.latt = toFloat(row[4]), city.long = toFloat(row[5]) ''' neov(constraintQuery) neov(query) Importer.getInstance().updateProgress('neo', False, 100) print('Finished importing cities in Neo4j')
def postIndex(): _showMain = False _showExtra = False _createMap = False _values = [] _time = 0 _map = '' selectedQuery = request.form.get('selectedQuery') selectedDB = request.form.get('selectedDB') if (selectedQuery == '1'): _showMain = True city = request.form.get('cityName') _values.append(city) elif (selectedQuery == '2'): _showMain = True bookTitle = request.form.get('bookTitle') _values.append(bookTitle) _createMap = True elif (selectedQuery == '3'): _showMain = True _showExtra = True authorName = request.form.get('authorName') _values.append(authorName) _createMap = True elif (selectedQuery == '4'): _showMain = True longitude = request.form.get('longitude') latitude = request.form.get('latitude') radius = request.form.get('radius') _values.append(longitude) _values.append(latitude) _values.append(radius) _result, _resultExtra, _time = executeQuery(selectedDB, selectedQuery, _values) if (_createMap): _map = createMap(_result).replace('60%', '48%') return render_template('index.html', showMain=_showMain, showExtra=_showExtra, query=selectedQuery, result=_result, resultExtra=_resultExtra, values=_values, time=_time, map=_map, imported=Importer.getInstance().getImportedState())
def importBooksData(booksData): db.books.delete_many({}) books = [] count = 0 for data in booksData: book = { # data - tuple of (tuple of title and author) and cities 'title': data[0][0], 'author': data[0][1], 'cities': list(data[1]) } books.append(book) if (len(books) > 500): db.books.insert_many(books) books.clear() if (count > 500): Importer.getInstance().updateProgress('mongo', True, count) count = 0 count += 1 db.books.insert_many(books) Importer.getInstance().updateProgress('mongo', True, count) print('Finished importing books')
def getIndex(): return render_template('index.html', imported=Importer.getInstance().getImportedState())
def startImport(): if (Importer.getInstance().getImportedState() == False): runImport() return 'IMPORT OK'
def getProgress(): totalBooks, totalCities, parsedCount, currentBookCountMongo, currentCityCountMongo, currentBookCountNeo, currentCityCountNeo = Importer.getInstance( ).getImportDetails() return jsonify({ 'books': { 'total': totalBooks, 'parsed': parsedCount, 'mongo': currentBookCountMongo, 'neo': currentBookCountNeo }, 'cities': { 'total': totalCities, 'mongo': currentCityCountMongo, 'neo': currentCityCountNeo } })