def import_twitter(directory="data/twitter/"): events.prepare_import(1) print("Importing tweets...") with db.atomic(): tweets_directory = directory + "data/js/tweets/" for tweet_file in [os.path.join(tweets_directory, name) for name in os.listdir(tweets_directory)]: lines = open(tweet_file).read().split("\n") data = json.loads("\n".join(lines[1:])) for tweet in data: id = tweet["id_str"] text = tweet["text"] is_reply = "in_reply_to_screen_name" in tweet is_retweet = "retweeted_status" in tweet time = dateutil.parser.parse(tweet["created_at"]) images = [] hashtags = [item["text"] for item in tweet["entities"]["hashtags"]] kvps = {"text": text, "url": "https://twitter.com/" + tweet["user"]["screen_name"] + "/status/" + id} for image in tweet["entities"]["media"]: url = image["media_url_https"] extension = url.split(".")[-1] url += ":orig" local_address = directory + "img/" + image["id_str"] + "." + extension if not os.path.isfile(local_address): urllib.urlretrieve(url, local_address) images.append((time, local_address)) if is_retweet: events.add("Retweeted " + tweet["retweeted_status"]["user"]["name"] + ": " + text, time, ["twitter", "retweet"] + hashtags, kvps, images = images) elif is_reply: events.add("Replied to " + tweet["in_reply_to_screen_name"] + ": " + text, time, ["twitter", "reply"] + hashtags, kvps, images=images) else: events.add("Tweet: " + text, time, ["twitter", "tweet"] + hashtags, kvps, images=images)
def import_reddit(directory="data/reddit/"): events.prepare_import(3) if not os.path.isfile(directory + "submissions.json" ) or not os.path.isfile(directory + "comments.json"): get_data(directory) with db.atomic(): json_text = open(directory + "submissions.json").read() submissions = json.loads(json_text) print("Importing reddit submissions...") for submission in submissions: time = datetime.datetime.fromtimestamp(submission["time"]) events.add( "Posted to " + submission["subreddit"] + ": " + submission["title"], time, ["reddit", "post", submission["subreddit"]], kvps={k: submission[k] for k in submission if k != "time"}) json_text = open(directory + "comments.json").read() comments = json.loads(json_text) print("Importing reddit comments...") for comment in comments: time = datetime.datetime.fromtimestamp(comment["time"]) events.add("Commented in " + comment["subreddit"] + ": " + create_comment_summary(comment["message"]), time, ["reddit", "comment", comment["subreddit"]], kvps={k: comment[k] for k in comment if k != "time"})
def import_photos(directory="data/photos/", session_time_seconds=60 * 60): events.prepare_import(4) photos = find_photos(directory) print("Sorting photos...") photos = sorted(photos, key=lambda p: p.time) with db.atomic(): print("Importing photos...") last_photo_time = {} current_photos = {} for photo in photos: camera = photo.camera if camera not in last_photo_time: last_photo_time[camera] = photo.time current_photos[camera] = [photo] elif (photo.time - last_photo_time[camera] ).total_seconds() > session_time_seconds: create_event(current_photos[camera], camera) last_photo_time[camera] = photo.time current_photos[camera] = [photo] else: last_photo_time[camera] = photo.time current_photos[camera].append(photo) for camera in current_photos: create_event(current_photos[camera], camera)
def import_whatsapp(directory="data/WhatsApp/"): events.prepare_import(12) messages.database.init(directory + "msgstore.db") contacts_dict = get_contacts_dict(directory) print("Reading Whatsapp conversations...") with db.atomic(): for contact_jid in contacts_dict.keys(): read_conversation(directory, contact_jid, contacts_dict) print("Done.")
def import_steam(directory="data/steam/"): events.prepare_import(2) print("Importing Steam purchases...") with db.atomic(): for file_name in [os.path.join(directory, name) for name in os.listdir(directory)]: tree = html.fromstring(open(file_name).read()) table = tree.xpath("//*[@id=\"main_content\"]/div/div/div/div/table/tbody") for tr in table[0][1:]: time = dateutil.parser.parse(tr[0].text) name = tr[1].text.strip() if len(tr[1]) == 1: name = tr[1][-1].tail.strip() events.add("Added Steam game " + name + " to library.", time, ["steam", "game"], kvps={"name": name})
def import_ebay(directory="data/ebay/"): events.prepare_import(14) print("Importing ebay purchases...") importer.generic.import_csv(directory + "ebay.csv", 4, "Purchased {16} from {6} for {14} {13}", ["ebay", "purchase"], { "product": 16, "category": 15, "status": 5, "amount": "{14} {13}", "seller": 6, "type": 9 }, delimiter=";", dayfirst=True)
def import_linkedin(directory="data/linkedin/"): events.prepare_import(7) print("Importing LinkedIn contacts...") with db.atomic(): file = open(directory + "Connections.csv", encoding = "utf8") reader = csv.reader(file, delimiter = ",") next(reader, None) for line in reader: name = line[0] + " " + line[1] email = line[2] company = line[3] position = line[4] time = dateparser.parse(line[5]) events.add("Added LinkedIn contact " + name + ".", time, ["linkedin", "friend"], kvps = {"email": email, "company": company, "position": position})
def import_facebook_data(directory="data/facebook/"): events.prepare_import(11) with db.atomic(): print("Reading Facebook app posts...") read_app_posts(directory) read_app_installs(directory) print("Reading Facebook comments...") read_comments(directory) print("Reading Facebook events...") read_events(directory) print("Reading Facebook friends...") read_friends(directory) print("Reading Facebook messages...") read_messages(directory) print("Reading Facebook photos...") read_photos(directory)
def import_paypal(directory="data/paypal/"): events.prepare_import(5) print("Importing Paypal payments...") with db.atomic(): for file_name in [ os.path.join(directory, name) for name in os.listdir(directory) ]: lines = open(file_name, encoding="utf8").read().split("\n") for line in lines[1:-1]: data = [value for value in line[1:-1].split('","')] time = datetime.datetime.strptime(data[0] + " " + data[1], "%d.%m.%Y %H:%M:%S") name = data[3] if len(name) == 0 or data[4] == "Allgemeine Autorisierung": continue currency = data[6] if currency in currencies: currency = currencies[currency] amount = data[9].replace(",", ".") if len(amount) == 0: continue amount_positive = amount[0] != "-" amount_absolute = amount.replace("-", "") item = data[15] kvps = { "account": data[10], "message": item, "recipient-name": name, "recipient-account": data[11], "amount": amount } if amount_positive: events.add( "Received " + currency + " " + amount_absolute + " from " + name + (" for " + item if len(item) > 0 else "") + " using Paypal.", time, ["money", "paypal"], kvps) else: events.add( "Paid " + currency + " " + amount_absolute + " to " + name + (" for " + item if len(item) > 0 else "") + " using Paypal.", time, ["money", "paypal"], kvps)
def import_wordpress(directory="data/wordpress/"): events.prepare_import(0) print("Importing Wordpress articles...") with db.atomic(): for file_name in [os.path.join(directory, name) for name in os.listdir(directory)]: tree = ElementTree.parse(file_name) channel = tree.find("channel") for item in channel: if item.tag != "item": continue title = item.find("title").text time = dateutil.parser.parse(item.find("pubDate").text) url = item.find("guid").text content = next(element.text for element in item if element.tag.endswith("encoded")) content = re.sub("[\<].*?[\>]", "", content) tags = [element.text for element in item if element.tag == "category"] events.add("Posted Wordpress article: " + title, time, ["wordpress"] + list(set(tags)), {"title": title, "message": content, "url": url})
def import_money(directory="data/money/"): events.prepare_import(6) print("Importing bank transfers...") with db.atomic(): for file_name in [ os.path.join(directory, name) for name in os.listdir(directory) ]: lines = open(file_name).read().split("\n") for line in lines[1:-1]: data = [value[1:-1] for value in line.split(";")] account = data[0] time = datetime.datetime.strptime(data[1], "%d.%m.%y") message = data[4].split("+")[-1] recipient_name = data[5] recipient_account = data[6] recipient_bank = data[7] amount = data[8].replace(",", ".") amount_positive = amount[0] != "-" amount_absolute = amount.replace("-", "") currency = data[9] if currency in currencies: currency = currencies[currency] kvps = { "account": account, "message": message, "recipient-name": recipient_name, "recipient-account": recipient_account, "recipient-bank": recipient_bank, "amount": amount } if len(recipient_name) == 0: continue if amount_positive: events.add( "Received " + currency + " " + amount_absolute + " from " + recipient_name + ": " + message, time, ["money"], kvps) else: events.add( "Sent " + currency + " " + amount_absolute + " to " + recipient_name + ": " + message, time, ["money"], kvps)
def import_kickstarter(directory="data/kickstarter/"): events.prepare_import(8) print("Importing Kickstarter pledges...") with db.atomic(): file = open(directory + "backed_projects.json", encoding="utf8") data = json.load(file) for project in data: summary = "Backed " + project[ "name"] + " on Kickstarter for " + project[ "pledge_amount"] + " " + project["project_currency"] time = dateutil.parser.parse(project["pledged_at"]) kvps = { "name": project["name"], "description": project["description"], "goal": project["project_goal"], "status": project["project_status"], "amount": project["pledge_amount"], "currency": project["project_currency"] } events.add(summary, time, ["kickstarter"], kvps)
def import_thunderbird(directory="data/thunderbird/"): events.prepare_import(13) thunderbird_db.db.init(directory + "global-messages-db.sqlite") with db.atomic(): folder_ids = get_folder_ids() print("Importing emails...") Message2 = Message.alias() query = Message.select(Message.folderID, Message.date, MessageContent.c1subject, MessageContent.c0body, MessageContent.c3author, MessageContent.c4recipients)\ .join(MessageContent, on = (Message.id == MessageContent.docid).alias("content"))\ .where(Message.folderID << folder_ids & ~fn.EXISTS(Message2.select().where((Message.date > Message2.date) & (Message.conversationID == Message2.conversationID))))\ .order_by(Message.date) read_messages(query, False) print("Importing email replies...") query = Message.select(Message.folderID, Message.date, MessageContent.c1subject, MessageContent.c0body, MessageContent.c3author, MessageContent.c4recipients) \ .join(MessageContent, on=(Message.id == MessageContent.docid).alias("content")) \ .where(Message.folderID << folder_ids & fn.EXISTS(Message2.select().where((Message.date > Message2.date) & (Message.conversationID == Message2.conversationID)))) \ .order_by(Message.date) read_messages(query, True)
def import_google(directory="data/google/"): events.prepare_import(9) with db.atomic(): print("Importing google calendar...") read_calendar(directory) #read_locations(directory) print("Importing saved places...") read_saved_places(directory) print("Importing Youtube uploads...") read_youtube_uploads(directory) print("Importing Youtube subscriptions...") read_youtube_subscriptions(directory) print("Importing Youtube favorites...") read_youtube_favorites(directory) print("Importing Youtube Likes...") read_youtube_likes(directory) print("Importing Youtube Playlists...") read_youtube_playlists(directory) print("Importing Google device activations...") read_device_activations(directory) print("Importing Google Pay transactions...") read_transactions(directory) print("Importing Google Play store activity...") read_google_play(directory)