#	$Id: ContentTable.py,v 1.1 1999/01/15 20:21:44 dieter Exp $
# Copyright (C) 1998-1999 by Dr. Dieter Maurer <dieter@handshake.de>
# D-66386 St. Ingbert, Eichendorffstr. 23, Germany
#
#			All Rights Reserved
#
# Permission to use, copy, modify, and distribute this software and its
# documentation for any purpose and without fee is hereby granted,
# provided that the above copyright notice and this permission
# notice appear in all copies, modified copies and in
# supporting documentation.
# 
# Dieter Maurer DISCLAIMS ALL WARRANTIES WITH
# REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL Dieter Maurer
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
# DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
# PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
# PERFORMANCE OF THIS SOFTWARE.
"""Add a Table of Contents to a HTML document.

Documentation can be found at:
  URL:http://www.handshake.de/~dieter/pyprojects/addContentTable.html
"""

from visitor import Visitor
from xml.dom.core import ELEMENT_NODE, TEXT_NODE
from string import join, upper
import re

class ContentTableBuilder(Visitor):
  def __init__(self,sections=('H2','H3','H4'), place= 'H1', numerate=1):
    self.sections= list(sections)
    self.numerate= numerate
    self.dest= place
    Visitor.__init__(self,self._collect,)
  #
  def addContentTable(self,dom,place=None,newplace=0):
    """add a content table to *dom*.

    The content table is placed after *place* (if there was not yet
    a content table or *newplace*).
    """
    dest= place or self.dest
    pos= self.emptyContentTableContainer(dom,dest,newplace)
    ct= self.getContentTable(dom)
    pos.appendChild(ct)
  #
  def emptyContentTableContainer(self,dom,place='H1',newplace=0):
    """make the content table container (a <div class='TOC'>) empty; create, if necessary."""
    found= None
    for c in dom.getElementsByTagName('DIV'):
      if upper(c.getAttribute('CLASS')) == 'TOC': found= c; break
    if found:
      if newplace: found.parentNode.removeChild(found); found= None
      else:
	children= found.childNodes
	for c in range(len(children)): found.removeChild(children[0])
    if not found:
      pos= dom.getElementsByTagName(place)[0]
      if not pos:
	raise ValueError,"destination `%s' for content table not found" % dest
      found= dom.createElement('DIV')
      found.setAttribute('CLASS','TOC')
      pos.parentNode.insertBefore(found,pos.nextSibling)
    return found
  #
  def getContentTable(self,dom):
    """add content table destination anchors to the DOM document *dom*
    and return the content table as an 'ul' element."""
    self._level= -1
    self._fragments= []
    self.visit(dom)
    return self._handleSection(-1,dom)
  #
  def _collect(self,node):
    """collect content table info for *node*."""
    if node.nodeType != ELEMENT_NODE: return
    try: i= self.sections.index(node.nodeName)
    except ValueError: return
    return self._handleSection(i,node)
  #
  def _handleSection(self,newlevel,node):
    """handle transition to *newlevel* for *node*."""
    doc= self._doc
    while self._level < newlevel:
      self._level= self._level+1
      self._fragments.append(doc.createDocumentFragment())
    while self._level > newlevel:
      ul= self._makeContentTableLevel(self._level)
      ul.appendChild(self._fragments[self._level])
      del self._fragments[self._level]
      self._level= self._level-1
      if self._level == -1: return ul
      l= self._fragments[self._level].lastChild
      if not l:
	l= self._makeContentTableEntry(doc.createDocumentFragment())
	self._fragments[self._level].appendChild(l)
      l.appendChild(ul)
    if self._level == -1: return
    num= self._genNumber(); refname= 'bct_sec_%s' % num
    child= node.firstChild
    val= None
    if child.nodeType == ELEMENT_NODE and child.nodeName == 'A':
      val= child.getAttribute('NAME')
      if val[:8] == 'bct_sec_':
	val= refname
	child.setAttribute('NAME',val)
    if not val:
      val= refname
      newchild= doc.createElement('A')
      newchild.setAttribute('NAME',val)
      children= node.childNodes
      for c in range(len(children)): newchild.appendChild(children[0])
      node.appendChild(newchild)
      child= newchild
    header= self._getContentHeader(child,num)
    li= self._makeContentLine(header,val)
    self._fragments[self._level].appendChild(li)
    return 1
  #
  def _genNumber(self):
    """return structured content number."""
    return join(map( lambda f: `f.childNodes.get_length()`,
		     self._fragments[:-1])
		+ [`self._fragments[-1].childNodes.get_length()+1`],
		'.')
  _re_Numbered= re.compile(r'(\d+\.)*\d+\s+')
  #
  def _getContentHeader(self,anchor,number):
    """derive a content header line from *anchor*."""
    val= anchor.getAttribute('NAME')
    if not val:
      raise ValueError, 'missing NAME attribute for %s' % str(anchor)
    child= anchor.firstChild
    if self.numerate:
      if child.nodeType == TEXT_NODE:
	m= self._re_Numbered.match(child.data)
	if m: child.deleteData(0,m.end())
      if self.numerate > 0:
	prefix= '%s ' % number
	if child.nodeType == TEXT_NODE: child.insertData(0,prefix)
	else: anchor.insertBefore(self._doc.createTextNode(prefix),child)
    f= self._doc.createDocumentFragment()
    for c in anchor.childNodes: f.appendChild(c.cloneNode(1))
    return f
  #
  def _makeContentLine(self,header,refval):
    doc= self._doc
    href=doc.createElement('A')
    href.setAttribute('HREF','#%s' % refval)
    href.appendChild(header)
    return self._makeContentTableEntry(href)
  #
  def _makeContentTableLevel(self,level):
    doc= self._doc
    ul= doc.createElement('UL')
    ul.setAttribute('CLASS','TOC')
    return ul
  #
  def _makeContentTableEntry(self,content):
    doc= self._doc
    li= doc.createElement('LI')
    li.setAttribute('CLASS','TOCLINE%d' % self._level)
    li.appendChild(content)
    return li
    
