def test_get_from_neo_with_unlimited(self): df = Neo4jDataAccess(neo4j_creds=self.creds).get_from_neo( 'MATCH (n:Tweet) WHERE n.id>1000 RETURN n.id', limit=None) assert len(df) > 1000 df = Neo4jDataAccess(neo4j_creds=self.creds).get_from_neo( 'MATCH (n:Tweet) WHERE n.hydrated=\'FULL\' RETURN n.id LIMIT 5', limit=None) assert len(df) == 5
def test_save_enrichment_df_to_graph_wrong_parameter_types(self): with pytest.raises(TypeError) as excinfo: res = Neo4jDataAccess(neo4j_creds=self.creds).save_enrichment_df_to_graph( 'test', pd.DataFrame(), 'test') assert "label parameter" in str(excinfo.value) with pytest.raises(TypeError) as excinfo: res = Neo4jDataAccess(neo4j_creds=self.creds).save_enrichment_df_to_graph( Neo4jDataAccess.NodeLabel.Tweet, [], 'test') assert "Pandas.DataFrame" in str(excinfo.value)
def test_save_enrichment_df_to_graph_new_nodes(self): df = pd.DataFrame([{'id': 555, 'text': 'Tweet 123'}, {'id': 666, 'text': 'Tweet 234'} ]) Neo4jDataAccess(neo4j_creds=self.creds).save_enrichment_df_to_graph( Neo4jDataAccess.NodeLabel.Tweet, df, 'test') df = Neo4jDataAccess(neo4j_creds=self.creds).get_tweet_by_id( df['id'].to_frame()) assert len(df) == 2 assert df.at[0, 'text'] == 'Tweet 123' assert df.at[1, 'text'] == 'Tweet 234'
def test_save_enrichment_df_to_graph_multiple_properties(self): df = pd.DataFrame([{'id': 777, 'text': 'Tweet 123', 'favorite_count': 2}, {'id': 888, 'text': 'Tweet 234', 'favorite_count': 3} ]) Neo4jDataAccess(neo4j_creds=self.creds).save_enrichment_df_to_graph( Neo4jDataAccess.NodeLabel.Tweet, df, 'test') df = Neo4jDataAccess(neo4j_creds=self.creds).get_tweet_by_id( df['id'].to_frame()) assert len(df) == 2 assert df.at[0, 'text'] == 'Tweet 123' assert df.at[0, 'favorite_count'] == 2 assert df.at[1, 'text'] == 'Tweet 234' assert df.at[1, 'favorite_count'] == 3
def test_save_parquet_to_graph(self): filename = os.path.join(os.path.dirname(__file__), 'data/2020_03_22_02_b1.snappy2.parquet') tdf = pd.read_parquet(filename, engine='pyarrow') Neo4jDataAccess( neo4j_creds=self.creds).save_parquet_df_to_graph(tdf, 'dave') assert True
def test_save_enrichment_df_to_graph_property_does_not_exist(self): df = pd.DataFrame([{'id': 777, 'text': 'Tweet 123', 'new_prop': 2}]) with pytest.raises(Exception) as excinfo: Neo4jDataAccess(neo4j_creds=self.creds).save_enrichment_df_to_graph( Neo4jDataAccess.NodeLabel.Tweet, df, 'test') assert "create_propertykey" in str(excinfo.value)
def test_get_tweet_by_id(self): df = pd.DataFrame([{'id': 1}]) df = Neo4jDataAccess(neo4j_creds=self.creds).get_tweet_by_id(df) assert len(df) == 1 assert df.at[0, 'id'] == 1 assert df.at[0, 'text'] == 'Tweet 1' assert df.at[0, 'hydrated'] == 'FULL'
def test_get_tweet_by_id_with_cols(self): df = pd.DataFrame([{"id": 1}]) df = Neo4jDataAccess( neo4j_creds=self.creds).get_tweet_by_id(df, cols=['id', 'text']) assert len(df) == 1 assert len(df.columns) == 2 assert df.at[0, 'id'] == 1 assert df.at[0, 'text'] == 'Tweet 1'
def test_get_tweet_hydrated_status_by_id(self): df = Neo4jDataAccess( neo4j_creds=self.creds).get_tweet_hydrated_status_by_id(self.ids) assert len(df) == 5 assert df[df['id'] == 1]['hydrated'][0] == 'FULL' assert df[df['id'] == 2]['hydrated'][1] == 'FULL' assert df[df['id'] == 3]['hydrated'][2] == None assert df[df['id'] == 4]['hydrated'][3] == 'PARTIAL' assert df[df['id'] == 5]['hydrated'][4] == 'PARTIAL'
def test_get_tweet_by_id_wrong_parameter(self): with pytest.raises(TypeError) as excinfo: df = Neo4jDataAccess( neo4j_creds=self.creds).get_tweet_by_id('test') assert "df" in str(excinfo.value)
def test_get_from_neo_with_limit_only(self): df = Neo4jDataAccess(neo4j_creds=self.creds).get_from_neo( 'MATCH (n:Tweet) WHERE n.hydrated=\'FULL\' RETURN n.id, n.text', limit=1) assert len(df) == 1 assert len(df.columns) == 2
def test_get_from_neo(self): df = Neo4jDataAccess(neo4j_creds=self.creds).get_from_neo( 'MATCH (n:Tweet) WHERE n.hydrated=\'FULL\' RETURN n.id, n.text LIMIT 5') assert len(df) == 5 assert len(df.columns) == 2
1219746235038474243, 1219746508955967488, 1219746544955453441] }) # DEBUG, INFO, WARNING, ERROR, CRITICAL logging.getLogger().setLevel(logging.WARNING) pd.set_option('display.max_columns', 500) pd.set_option('display.max_colwidth', None) # Test save_parquet_df_to_graph print('----') print('Testing Parquet Save') tdf = pd.read_parquet( './data/2020_03_22_02_b1.snappy2.parquet', engine='pyarrow') Neo4jDataAccess().save_parquet_df_to_graph(tdf, 'dave') print('----') # Test get_tweet_hydrated_status_by_id print('----') print('Testing get_tweet_hydrated_status_by_id') df = Neo4jDataAccess().get_tweet_hydrated_status_by_id(df) print(df) # Test get_tweet_by_id print('----') print('Testing get_tweet_by_id') df = Neo4jDataAccess().get_tweet_by_id(df) print(df) print('----') print('Testing get_tweet_by_id for cols')
def test_get_neo4j_graph_wrong_parameter(self): with pytest.raises(TypeError) as excinfo: res = Neo4jDataAccess( neo4j_creds=self.creds).get_neo4j_graph('test') assert "role_type parameter" in str(excinfo.value)
def test_get_neo4j_graph(self): res = Neo4jDataAccess(neo4j_creds=self.creds).get_neo4j_graph( Neo4jDataAccess.RoleType.READER) assert res is not None
from modules.IngestDrugSynonyms import IngestDrugSynonyms from modules.DrugSynonymDataToNeo4j import DrugSynonymDataToNeo4j from modules.Neo4jDataAccess import Neo4jDataAccess import logging logging.basicConfig(format='>>> %(message)s', level=logging.INFO) drugSynonym = IngestDrugSynonyms() drugSynonym.auto_get_and_clean_data() drugSynonym.create_drug_study_links() drugSynonym.create_url_study_links() neo4jBridge = DrugSynonymDataToNeo4j( graph=Neo4jDataAccess().get_neo4j_graph(Neo4jDataAccess.RoleType.WRITER)) neo4jBridge.merge_drugs(drugSynonym.drugs) neo4jBridge.merge_synonyms(drugSynonym.synonyms) neo4jBridge.merge_drug_to_synonym_rels(drugSynonym.drug_synonym_rels) neo4jBridge.merge_studies(drugSynonym.all_studies_df) neo4jBridge.merge_drug_to_study_rels(drugSynonym.appeared_in_edges) neo4jBridge.merge_url(drugSynonym.urls) neo4jBridge.merge_url_to_study_rels(drugSynonym.url_points_at_study_edges)