def parse_file(filename): print u"*** Parse %s file ***" % filename reader = Sax2.Reader() fh = open(filename, "r") s = fh.read() fh.close() d = reader.fromString(s) type = "jabber" if filename.find("JabberProtocol") != -1 else "icq" head = d.getElementsByTagName("head")[0] date_xml = head.getElementsByTagName("date")[0] my_from = "" with_account = None for c in head.getElementsByTagName("contact"): if c.hasAttribute("type"): if c.getAttribute("type") == "myself": my_from = c.getAttribute("contactId") else: with_account = c.getAttribute("contactId") year = int(date_xml.getAttribute("year")) month = int(date_xml.getAttribute("month")) chat = Chat(account=my_from, type=type, with_account=with_account) chat.save() for msg_xml in d.getElementsByTagName("msg"): day_time = msg_xml.getAttribute("time").split() day = int(day_time[0]) time = day_time[1].split(":") hour = int(time[0]) minutes = int(time[1]) seconds = int(time[2]) from_user = msg_xml.getAttribute("from") from_nick = msg_xml.getAttribute("nick") myself = True if from_user == my_from else False date = datetime(year=year, month=month, day=day, hour=hour, minute=minutes, second=seconds) if msg_xml.hasChildNodes(): text = msg_xml.childNodes[0].nodeValue else: test = u"" msg = Message(date=date, text=text, from_user=from_user, from_nick=from_nick, myself=myself, chat=chat) msg.save() print msg
def split_chats(): """ Split chats by day """ for c in list(Chat.objects.all()): msg_qs = Message.objects.filter(chat=c) stats = msg_qs.aggregate(Max("date"), Min("date")) if not stats["date__min"] or not stats["date__max"]: continue d = stats["date__min"] d = d.replace(microsecond=0, second=0, hour=0, minute=0) first = True while d < stats["date__max"]: d2 = d + timedelta(days=1) if first: first = False else: new_chat = Chat(account=c.account, type=c.type, with_account=c.with_account) new_chat.save() msg_qs.filter(date__gte=d, date__lte=d2).update(chat=new_chat) d = d2