def fct(): elastic = Elasticsearch(hosts=[{'host':'localhost','port':9200}]) # the Schema, used to force specific types and to add alias/ it is changed according to files content schema = { "settings": { "analysis": { "analyzer": { "my_english_analyzer": {"type": "standard","stopwords": "_english_"} } } }, "mappings":{ "properties":{ "source": { "type":"text" , "analyzer": "my_english_analyzer", "fields": {"keyword": { "type": "keyword"}}, "fielddata": True}, "term": { "type":"text", "analyzer": "my_english_analyzer", "fields": {"keyword": { "type": "keyword"}}, "fielddata": True}, "key": { "type":"text", "analyzer": "my_english_analyzer", "fields": {"keyword": { "type": "keyword"}}, "fielddata": True} } } } # Create index with a schema c.createIndex('dfp_perso_tw', schema, elastic) inputFolder = dirpath+"/script/dataSource/json-twitter_data" for loadType in ["personal_data_fixed"]: whatFile = os.path.join(inputFolder, loadType+'.json') try: response = helpers.bulk(elastic, bulkJsonData(whatFile, "dfp_perso_tw",loadType)) print ("Insert Twitter Personal Data") except: print ("Error in Twitter : "+ whatFile) pass
def fct(): # create a new instance of the Elasticsearch client class #elastic = Elasticsearch("localhost") elastic = Elasticsearch(hosts=[{'host': 'localhost', 'port': 9200}]) # the Schema, used to force specific types and to add alias/ it is changed according to files content schema = { "mappings": { "properties": { "timestamp": { "type": "date", "format": "date_optional_time||epoch_second" }, "created_at": { "type": "alias", "path": "timestamp" } } } } # Create index with a schema c.createIndex('dfp_fb_vote', schema, elastic) inputFolder = dirpath + "/script/dataSource/json-facebook_data/other_activity" for loadType in ["polls_you_voted_on"]: whatFile = os.path.join(inputFolder, loadType + '.json') try: response = helpers.bulk( elastic, bulkJsonData(whatFile, "dfp_fb_vote", loadType)) print("Insert Facebook Votes") except: print("Error in Facebook votes") pass
def fct(): elastic = Elasticsearch(hosts=[{'host': 'localhost', 'port': 9200}]) # the Schema, used to force specific types and to add alias/ it is changed according to files content schema = { "settings": { "analysis": { "analyzer": { "my_english_analyzer": { "type": "standard", "stopwords": "_english_" } } } }, "mappings": { "properties": { "created_at": { "type": "date", "format": "EEE MMM dd HH:mm:ss ZZ yyyy||date_optional_time||epoch_second" }, "full_text": { "type": "text", "analyzer": "my_english_analyzer", "fields": { "keyword": { "type": "keyword" } }, "fielddata": True }, "all_text": { "type": "alias", "path": "full_text" }, "mySentiment": { "type": "float" }, "sentPositive": { "type": "float" }, "sentNegative": { "type": "float" } } } } # Create index with a schema c.createIndex('dfp_text_tw_tweet', schema, elastic) inputFolder = dirpath + "/script/dataSource/json-twitter_data" for loadType in ["tweet"]: whatFile = os.path.join(inputFolder, loadType + '.json') #try: response = helpers.bulk( elastic, bulkJsonData(whatFile, "dfp_text_tw_tweet", loadType)) print("Insert Twitter Tweets")
def fct(): elastic = Elasticsearch(hosts=[{'host': 'localhost', 'port': 9200}]) # the Schema, used to force specific types and to add alias/ it is changed according to files content schema = { "mappings": { "properties": { "timestamp": { "type": "date", "format": "date_optional_time||epoch_second" }, "created_at": { "type": "alias", "path": "timestamp" } } } } # Create index with a schema c.createIndex('dfp_fb_likes', schema, elastic) inputFolder = dirpath + "/script/dataSource/json-facebook_data/likes_and_reactions" for loadType in ["likes_on_external_sites", "pages", "posts_and_comments"]: whatFile = os.path.join(inputFolder, loadType + '.json') try: response = helpers.bulk( elastic, bulkJsonData(whatFile, "dfp_fb_likes", loadType)) print("Insert Facebook Friends") except: print("Error in Facebook :" + whatFile) pass
def fct(): elastic = Elasticsearch(hosts=[{'host': 'localhost', 'port': 9200}]) # the Schema, used to force specific types and to add alias/ it is changed according to files content schema = { "mappings": { "properties": { "accountId": { "type": "keyword" }, "userLink": { "type": "keyword" } } } } # Create index with a schema c.createIndex('dfp_people_tw_follow', schema, elastic) inputFolder = dirpath + "/script/dataSource/json-twitter_data" for loadType in ["follower", "following"]: whatFile = os.path.join(inputFolder, loadType + '.json') try: response = helpers.bulk( elastic, bulkJsonData(whatFile, "dfp_people_tw_follow", loadType)) print("Insert Twitter follower and following") except: print("Error in Insert Twitter : " + whatFile) pass
def fct(): elastic = Elasticsearch(hosts=[{'host':'localhost','port':9200}]) # the Schema, used to force specific types and to add alias/ it is changed according to files content schema = { "mappings":{ "properties":{ "Connected On": { "type":"date", "format":"dd MMM yyyy"}, "created_at": { "type": "alias", "path": "Connected On" } } } } # Create index with a schema c.createIndex('dfp_people_li_connections', schema, elastic) inputFolder = dirpath+"/script/dataSource/json-LinkedIn_data" for loadType in ["Connections"]: whatFile = os.path.join(inputFolder, loadType+'.json') #try: response = helpers.bulk(elastic, bulkJsonData(whatFile, "dfp_people_li_connections",loadType)) #except: #print ("Error in "+ whatFile) #pass print ("Insert LinkedIn Friends")
def fct(): elastic = Elasticsearch(hosts=[{'host': 'localhost', 'port': 9200}]) # the Schema, used to force specific types and to add alias/ it is changed according to files content schema = { "mappings": { "properties": { "date": { "type": "date", "format": "EEE MMM dd HH:mm:ss ZZ yyyy||date_optional_time||epoch_second" }, "mySentiment": { "type": "float" }, "sentPositive": { "type": "float" }, "sentNegative": { "type": "float" }, } } } # Create index with a schema c.createIndex('dfp_sentiment', schema, elastic) inputFolder = dirpath + '/vegaFiles/' for r, d, f in os.walk(inputFolder): for file in f: if file.endswith("sentiment.json"): whatFile = os.path.join(inputFolder, file) try: file_part = file.split(".")[0] response = helpers.bulk( elastic, bulkJsonData(whatFile, "dfp_sentiment", file_part)) print("Insert Sentiment") except: print("Error in Insert Sentiment") pass
def fct(): elastic = Elasticsearch(hosts=[{'host': 'localhost', 'port': 9200}]) # the Schema, used to force specific types and to add alias/ it is changed according to files content schema = { "mappings": { "properties": { "name": { "type": "keyword" }, "screen_name": { "type": "text" }, "tags": { "type": "number" }, "type": { "type": "text" } } } } # Create index with a schema c.createIndex('dfp_friend', schema, elastic) inputFolder = dirpath + '/vegaFiles/' for r, d, f in os.walk(inputFolder): for file in f: if file.endswith("friends.json"): whatFile = os.path.join(inputFolder, file) try: file_part = file.split(".")[0] response = helpers.bulk( elastic, bulkJsonData(whatFile, "dfp_friend", file_part)) print("Insert Friends") except: print("Error in Insert " + whatFile) pass
def fct(): # create a new instance of the Elasticsearch client class #elastic = Elasticsearch("localhost") elastic = Elasticsearch(hosts=[{'host':'localhost','port':9200}]) # the Schema, used to force specific types and to add alias/ it is changed according to files content schema = { "settings": { "analysis": { "analyzer": { "my_english_analyzer": {"type": "standard","stopwords": "_english_"} } } }, "mappings":{ "properties":{ "timestamp": { "type":"date", "format":"EEE MMM dd HH:mm:ss ZZ yyyy||date_optional_time||epoch_second"}, "created_at": { "type": "alias", "path": "timestamp" }, "all_text": { "type": "text", "analyzer": "my_english_analyzer", "fields": {"keyword": { "type": "keyword"}}, "fielddata": True}, "mySentiment": { "type": "float"}, "sentPositive": { "type": "float"}, "sentNegative": { "type": "float"}, "data": { "properties": { "post": { "type": "text", "analyzer": "my_english_analyzer", "fields": {"keyword": { "type": "keyword"}}, "fielddata": True} } }, "attachments": { "properties": { "data": { "properties": { "place": { "properties": { "location": {"type": "geo_point"} } } } } } } } } } # Create index with a schema c.createIndex('dfp_text_fb_posts', schema, elastic) #inputFolder = "../../dataSource/json-facebook_data/posts" inputFolder = dirpath+"/script/dataSource/json-facebook_data/posts" for loadType in ["your_posts_1"]: whatFile = os.path.join(inputFolder, loadType+'.json') try: response = helpers.bulk(elastic, bulkJsonData(whatFile, "dfp_text_fb_posts",loadType)) print ("Insert Facebook Posts") except: print ("Error in Facebook Posts") pass