#!/usr/bin/python
#
# This is the most horrible of hacks. Pretend you're not looking.
import cStringIO as StringIO
import re, sys
sections = {
'chapter': 'chapter',
'section': 'sect1',
'subsection': 'sect2',
'subsubsection': 'sect3',
}
envs = {
'codesample2': 'programlisting',
'codesample4': 'programlisting',
'enumerate': 'orderedlist',
'figure': 'informalfigure',
'itemize': 'itemizedlist',
'note': 'note',
'quote': 'blockquote',
}
def process(ifp, ofp):
print >> ofp, '\n'
stack = []
para = True
inlist = 0
for line in ifp:
if line.startswith('%%% Local Variables:'):
break
line = (line.rstrip()
.replace('~', ' ')
.replace('&', '&')
.replace('&emdash;', '&emdash;')
.replace('\_', '_')
.replace('\{', '{')
.replace('\}', '}')
.replace('\$', '$')
.replace('\%', '%')
.replace('\#', '#')
.replace('<', '<')
.replace('>', '>')
.replace('``', '')
.replace("''", '')
.replace('\\', '\\'))
line = re.sub(r'\s*\\(?:centering|small)\b\s*', '', line)
line = re.sub(r'\\(?:hgrc\\|hgrc)\b',
r' /.hgrc', line)
line = re.sub(r'\\item\[(?P[^]]+)\]', r'\item \g:', line)
line = re.sub(r'\\bug{(?P\d+)}',
r'issue \g', line)
line = re.sub(r'\\cite{([^}]+)}', r'\1', line)
line = re.sub(r'\\hggopt{(?P[^}]+)}',
r'', line)
line = re.sub(r'\\hgxopt{(?P[^}]+)}{(?P[^}]+)}{(?P[^}]+)}',
r'', line)
line = re.sub(r'\\hgxcmd{(?P[^}]+)}{(?P[^}]+)}',
r'\g', line)
line = re.sub(r'\\hgext{(?P[^}]+)}',
r'\g', line)
line = re.sub(r'\\hgopt{(?P[^}]+)}{(?P[^}]+)}',
r'',
line)
line = re.sub(r'\\cmdopt{(?P[^}]+)}{(?P[^}]+)}',
r'',
line)
line = re.sub(r'\\hgcmd{(?P[^}]+)}',
r'hg \g', line)
line = re.sub(r'\\caption{(?P[^}]+?)}',
r'
\g
', line)
line = re.sub(r'\\grafix{(?P[^}]+)}',
r'XXX add text', line)
line = re.sub(r'\\envar{(?P[^}]+)}',
r'\g', line)
line = re.sub(r'\\rcsection{(?P[^}]+)}',
r'\g', line)
line = re.sub(r'\\rcitem{(?P[^}]+)}{(?P[^}]+)}',
r'\g', line)
line = re.sub(r'\\dirname{(?P[^}]+?)}',
r'\g', line)
line = re.sub(r'\\filename{(?P[^}]+?)}',
r'\g', line)
line = re.sub(r'\\tildefile{(?P[^}]+)}',
r' /\g', line)
line = re.sub(r'\\sfilename{(?P[^}]+)}',
r'\g', line)
line = re.sub(r'\\sdirname{(?P[^}]+)}',
r'\g', line)
line = re.sub(r'\\interaction{(?P[^}]+)}',
r'', line)
line = re.sub(r'\\excode{(?P[^}]+)}',
r'', line)
line = re.sub(r'\\pymod{(?P[^}]+)}',
r'\g', line)
line = re.sub(r'\\pymodclass{(?P[^}]+)}{(?P[^}]+)}',
r'\g', line)
line = re.sub(r'\\url{(?P[^}]+)}',
r'\g', line)
line = re.sub(r'\\href{(?P[^}]+)}{(?P[^}]+)}',
r'\g', line)
line = re.sub(r'\\command{(?P[^}]+)}',
r'\g', line)
line = re.sub(r'\\option{(?P[^}]+)}',
r'', line)
line = re.sub(r'\\ref{(?P[^}]+)}', r'', line)
line = re.sub(r'\\emph{(?P[^}]+)}',
r'\g', line)
line = re.sub(r'\\texttt{(?P[^}]+)}',
r'\g', line)
line = re.sub(r'\\textbf{(?P[^}]+)}',
r'\g', line)
line = re.sub(r'\\hook{(?P[^}]+)}',
r'\g', line)
line = re.sub(r'\\tplfilter{(?P[^}]+)}',
r'\g', line)
line = re.sub(r'\\tplkword{(?P[^}]+)}',
r'\g', line)
line = re.sub(r'\\tplkwfilt{(?P[^}]+)}{(?P[^}]+)}',
r'\g', line)
line = re.sub(r'\\[vV]erb(.)(?P[^\1]+?)\1',
r'\g', line)
line = re.sub(r'\\package{(?P[^}]+)}',
r'\g', line)
line = re.sub(r'\\hgcmdargs{(?P[^}]+)}{(?P[^}]+)}',
r'hg \g \g',
line)
line = re.sub(r'\\cmdargs{(?P[^}]+)}{(?P[^}]+)}',
r'\g \g',
line)
m = re.match(r'\\(chapter|section|subsection|subsubsection){(.*)}', line)
if m:
kind, content = m.groups()
sec = sections[kind]
while stack and stack[-1] >= sec:
close = stack.pop()
print >> ofp, '%s>' % close
stack.append(sec)
print >> ofp, '<%s>\n%s' % (sec, content)
else:
m = re.match(r'\s*\\(begin|end){(?P[^}]+)}', line)
if m:
if not para:
print >> ofp, ''
if inlist:
ofp.write('')
para = True
state, env = m.groups()
env = envs[env]
if state == 'begin':
ofp.write('<')
if env in ('itemizedlist', 'orderedlist'):
inlist = 1
else:
ofp.write('')
if env == ('itemizedlist', 'orderedlist'):
inlist = 0
print >> ofp, env + '>'
else:
if line.startswith('\\item '):
if inlist > 1:
print >> ofp, ''
print >> ofp, ''
else:
inlist = 2
para = True
line = line[6:]
if line and para:
if inlist:
ofp.write('')
ofp.write('')
para = False
if not line and not para:
print >> ofp, ''
if inlist:
ofp.write('')
para = True
print >> ofp, line
while stack:
print >> ofp, '%s>' % stack.pop()
ofp.write('\n'.join(['\n']))
if __name__ == '__main__':
for name in sys.argv[1:]:
if not name.endswith('.tex'):
continue
newname = name[:-3] + 'xml'
ofp = StringIO.StringIO()
process(open(name), ofp)
s = ofp.getvalue()
s = re.sub('\n+', '', s, re.M)
open(newname, 'w').write(s)