"""

Provide an interface to a sequence of chat events.  In theory, should
cover IRC and other chat systems, log-based or in real-time.

Mainly used (and therefor mostly likely to be working is 
 fromWikiRRSAgent()
    

Cool feaure, someday:
   * line up multiple logs, with possible time-stamp-skew
     (flag entries that are during the time range of a log
     but only occur in some logs.)   [[ including using dirc ]]

"""

import re
import time
import sys
import time
import htmlentitydefs
import xml.sax.saxutils

import langio2

class ChatEvent:
    """

    fields:
        when   (abstime?   reltime?    sortable [ what if same? ])
        nick
        id (if there is one)
        isAction
        text

    needs to know real start time?
    
    """
    def __init__(self, when, who, text):
        self.when = when
        self.who = who
        self.text = text
        self.originalText = None
        self.lineNumber = None

dirc_stamp = re.compile(r'''^\[(\d\d:\d\d)\] (.*)''')

def fromXChatLog(source):
    source = langio2.ensure_open_source(source)
    for line in source:
        line = line.strip()
        if line == "":
            continue
        if line.startswith("****"):
            if line.startswith('**** BEGIN LOGGING AT'):
                year = line[-4:]
            # print 'special line: "%s".' % line
            continue
        if line[16:].startswith("You are being CTCP flooded"):
            continue
        if line[16:].startswith("Usage: "):
            continue
        if line[16:] == "Tcl interface unloaded":
            continue
        if line[16:] == "Python interface unloaded":
            continue
        
                           
        date = line[0:15]
        try:
            (kind, text) = line[16:].split('\t', 1)
        except ValueError:
            print >>sys.stderr, 'bad line: "%s".' % line[16:]
            continue

        m = dirc_stamp.match(text)
        if m:
            continue   # we probably don't want these...
            text = m.group(2)
            real_time = m.group(1)
            print 'real_time: "%s".' % real_time

        if kind == "-->":
            kind = "join"
            continue    # skip, not implemented for now
        if kind == "<--":
            kind = "leave"
            continue    # skip, not implemented for now
        if kind == "---":
            kind = "special"
            continue    # skip, not implemented for now
        if kind == "*":
            kind = "action"
            continue    # skip, not implemented for now

        if kind.startswith(">") and kind.endswith("<"):
            kind = "privmsg"
            continue    # we don't want these!
            
        if kind.startswith("<") and kind.endswith(">"):
            who = kind[1:-1]
            # print >>sys.stderr, 'who: "%s".' % who
        else:
            print >>sys.stderr, 'kind: "%s".' % kind

        when = time.mktime(time.strptime(year+" "+date, "%Y %b %d %H:%M:%S"))
        yield ChatEvent(when, who, text)

rrsagent_url  = re.compile(r'''^http://www.w3.org/(\d+)/(\d+)/(\d+)-.*-irc.txt$''')
rrsagent_line = re.compile(r'''^(\d\d):(\d\d):(\d\d) <([^>]*)> (.*)''')

def fromRRSAgent(source):
    '''  eg http://www.w3.org/2008/01/09-owl-irc.txt
    '''
    m = rrsagent_url.match(source)
    t = [x for x in time.gmtime()]
    if m:
        t[0] = int(m.group(1))
        t[1] = int(m.group(2))
        t[2] = int(m.group(3))
    else:
        print >>sys.stderr, 'cannot determine date of IRC log; using today'
        
    source = langio2.ensure_open_source(source)
    for line in source:
        line = line.strip()

        m = rrsagent_line.match(line)
        if m:
            t[3] = int(m.group(1))
            t[4] = int(m.group(2))
            t[5] = int(m.group(3))
            who = m.group(4)
            text = m.group(5)

            if text == who + " has joined #owl":    #@@@ name of channel
                continue
            if text == who + " has left #owl":    #@@@ name of channel
                continue
        else:
            print >>sys.stderr, 'bad rrsagent line: "%s"' % line
            continue

        when = time.mktime(t)
        yield ChatEvent(when, who, text)



    # yield events
    pass

wikirrsagent_url  = re.compile(r'''.*(20\d\d)-(\d+)-(\d+).*$''')
wikirrsagent_line = re.compile(r'''^((\d\d):(\d\d):(\d\d) )?<([^>]*)> (.*)''')

entities = { 
    '&apos;' : "'"   # why is this missing from htmlentitydefs?
    }
for (key, value) in htmlentitydefs.entitydefs.items():
    entities["&"+key+";"] = value


def unescape(line):
    return xml.sax.saxutils.unescape(line, entities)

def fromWikiRRSAgent(source):
    '''  eg http://www.w3.org/2009/sparql/wiki/Chatlog_2009-02-24
    '''
    m = wikirrsagent_url.match(source)
    t = [x for x in time.gmtime()]
    if m:
        t[0] = int(m.group(1))
        t[1] = int(m.group(2))
        t[2] = int(m.group(3))
    else:
        #pass
        print >>sys.stderr, 'cannot determine date of IRC log; using today'
        
    source = langio2.ensure_open_source(source)
    inBody = False
    lineNumber = 0
    for line in source:
        if line.startswith("#"):
            continue
        line = line.strip()
        # I don't like the standard NBSP; just use a space
        line = line.replace("&nbsp;", " ")
        line = unescape(line)

        if inBody:
            if line == "</pre>":
                inBody = False
                continue
            # fall through and process line
        else:
            if line.startswith("<pre>"):
                inBody = True
                # print "Found <pre> section"
                line = line[5:]
            else:
                # skip this line, it's some HTML we don't care about
                continue

        lineNumber += 1
        if line == "":
            continue

        # print >>sys.stderr, lineNumber, line
        
        m = wikirrsagent_line.match(line)
        timeGiven = False
        if m:
            if m.group(2) is None:
                t[3] = 0
                t[4] = 0
                t[5] = 0
            else:
                timeGiven = True
                t[3] = int(m.group(2))
                t[4] = int(m.group(3))
                t[5] = int(m.group(4))
            who = m.group(5)
            text = m.group(6)

            #if text == who + " has joined #owl":    #@@@ name of channel
            #    continue
            #if text == who + " has left #owl":    #@@@ name of channel
            #    continue
        else:
            # print >>sys.stderr, 'bad rrsagent line: "%s"' % line
            # continue
            raise RuntimeError, "Can't parse chatlog line %s" % `line`

        if timeGiven:
            when = time.mktime(t)
        else:
            when = None
        e = ChatEvent(when, who, text)
        e.lineNumber = lineNumber
        e.originalText = line
        yield e

    # yield events
    pass

def fromDIRCLog(source):
    ''' not really.   it looks like this code was just copied from the above and then abandoned. '''
    
    source = langio2.ensure_open_source(source)
    for line in source:
        line = line.strip()
        if line == "":
            continue
        if line.startswith("****"):
            if line.startswith('**** BEGIN LOGGING AT'):
                year = line[-4:]
            # print 'special line: "%s".' % line
            continue
        if line[16:].startswith("You are being CTCP flooded"):
            continue
        if line[16:].startswith("Usage: "):
            continue
                           
        date = line[0:15]
        try:
            (kind, text) = line[16:].split('\t', 1)
        except ValueError:
            print >>sys.stderr, 'bad line: "%s".' % line[16:]
            continue

        m = dirc_stamp.match(text)
        if m:
            continue   # we probably don't want these...
            text = m.group(2)
            real_time = m.group(1)
            print 'real_time: "%s".' % real_time

        if kind == "-->":
            kind = "join"
            continue    # skip, not implemented for now
        if kind == "<--":
            kind = "leave"
            continue    # skip, not implemented for now
        if kind == "---":
            kind = "special"
            continue    # skip, not implemented for now
        if kind == "*":
            kind = "action"
            continue    # skip, not implemented for now

        if kind.startswith(">") and kind.endswith("<"):
            kind = "privmsg"
            continue    # we don't want these!
            
        if kind.startswith("<") and kind.endswith(">"):
            who = kind[1:-1]
            #print 'who: "%s".' % who
        else:
            print 'kind: "%s".' % kind

        when = time.mktime(time.strptime(year+" "+date, "%Y %b %d %H:%M:%S"))
        yield ChatEvent(when, who, text)

# does this go here?   or in meeting.py?  or what?
sPat = re.compile(r"""s/(.*)/(.*)/? *""")
def doSubs(buffer):
    for i in xrange(0, len(buffer)):
        #print >>sys.stderr, "buffer[%d] = %s"%(i, buffer[i].__class__)
        assert isinstance(buffer[i], ChatEvent)
        line = buffer[i].text
        m = sPat.match(line)
        if m:
            old = m.group(1)
            new = m.group(2)
            #print >>sys.stderr, "Found pattern '%s' => '%s'" %(old,new)
            succeededAt = None
            for j in xrange(i-1, 0, -1):
                #print >>sys.stderr, "   buffer[%d] = %s"%(j, buffer[j].__class__)
                assert isinstance(buffer[j], ChatEvent)
                pos = buffer[j].text.find(old)
                if pos >= 0:
                    t = buffer[j].text.replace(old, new)
                    print >>sys.stderr, "Found: %s"%buffer[j].text
                    print >>sys.stderr, "Now  : %s"%t
                    buffer[j].text = t
                    succeededAt = j
                    break
            if succeededAt:
                buffer[i].text += " (succeeded, %d lines ago)"% (i-succeededAt)
            else:
                buffer[i].text += " (failed)"

        
    
