bos@552: #!/usr/bin/python
bos@552: #
bos@552: # This is the most horrible of hacks. Pretend you're not looking.
bos@552:
bos@552: import cStringIO as StringIO
bos@552: import re, sys
bos@552:
bos@552: sections = {
bos@552: 'chapter': 'chapter',
bos@552: 'section': 'sect1',
bos@552: 'subsection': 'sect2',
bos@552: 'subsubsection': 'sect3',
bos@552: }
bos@552:
bos@552: envs = {
bos@552: 'codesample2': 'programlisting',
bos@552: 'codesample4': 'programlisting',
bos@552: 'enumerate': 'orderedlist',
bos@556: 'figure': 'informalfigure',
bos@552: 'itemize': 'itemizedlist',
bos@552: 'note': 'note',
bos@552: 'quote': 'blockquote',
bos@552: }
bos@552:
bos@552: def process(ifp, ofp):
bos@556: print >> ofp, '\n'
bos@552: stack = []
bos@552: para = True
bos@556: inlist = 0
bos@552: for line in ifp:
bos@552: if line.startswith('%%% Local Variables:'):
bos@552: break
bos@552: line = (line.rstrip()
bos@556: .replace('~', ' ')
bos@552: .replace('&', '&')
bos@552: .replace('&emdash;', '&emdash;')
bos@552: .replace('\_', '_')
bos@552: .replace('\{', '{')
bos@552: .replace('\}', '}')
bos@552: .replace('\$', '$')
bos@552: .replace('\%', '%')
bos@552: .replace('\#', '#')
bos@552: .replace('<', '<')
bos@552: .replace('>', '>')
bos@556: .replace('``', '')
bos@556: .replace("''", '
')
bos@552: .replace('\\', '\\'))
bos@552: line = re.sub(r'\s*\\(?:centering|small)\b\s*', '', line)
bos@552: line = re.sub(r'\\(?:hgrc\\|hgrc)\b',
bos@552: r' /.hgrc', line)
bos@552: line = re.sub(r'\\item\[(?P[^]]+)\]', r'\item \g:', line)
bos@552: line = re.sub(r'\\bug{(?P\d+)}',
bos@552: r'issue \g', line)
bos@552: line = re.sub(r'\\cite{([^}]+)}', r'\1', line)
bos@552: line = re.sub(r'\\hggopt{(?P[^}]+)}',
bos@552: r'', line)
bos@552: line = re.sub(r'\\hgxopt{(?P[^}]+)}{(?P[^}]+)}{(?P[^}]+)}',
bos@552: r'', line)
bos@552: line = re.sub(r'\\hgxcmd{(?P[^}]+)}{(?P[^}]+)}',
bos@552: r'\g', line)
bos@552: line = re.sub(r'\\hgext{(?P[^}]+)}',
bos@552: r'\g', line)
bos@552: line = re.sub(r'\\hgopt{(?P[^}]+)}{(?P[^}]+)}',
bos@552: r'',
bos@552: line)
bos@552: line = re.sub(r'\\cmdopt{(?P[^}]+)}{(?P[^}]+)}',
bos@552: r'',
bos@552: line)
bos@552: line = re.sub(r'\\hgcmd{(?P[^}]+)}',
bos@552: r'hg \g', line)
bos@552: line = re.sub(r'\\caption{(?P[^}]+?)}',
bos@556: r'\g', line)
bos@552: line = re.sub(r'\\grafix{(?P[^}]+)}',
bos@552: r'XXX add text', line)
bos@552: line = re.sub(r'\\envar{(?P[^}]+)}',
bos@552: r'\g', line)
bos@552: line = re.sub(r'\\rcsection{(?P[^}]+)}',
bos@552: r'\g', line)
bos@552: line = re.sub(r'\\rcitem{(?P[^}]+)}{(?P[^}]+)}',
bos@552: r'\g', line)
bos@552: line = re.sub(r'\\dirname{(?P[^}]+?)}',
bos@552: r'\g', line)
bos@552: line = re.sub(r'\\filename{(?P[^}]+?)}',
bos@552: r'\g', line)
bos@552: line = re.sub(r'\\tildefile{(?P[^}]+)}',
bos@552: r' /\g', line)
bos@552: line = re.sub(r'\\sfilename{(?P[^}]+)}',
bos@552: r'\g', line)
bos@552: line = re.sub(r'\\sdirname{(?P[^}]+)}',
bos@552: r'\g', line)
bos@552: line = re.sub(r'\\interaction{(?P[^}]+)}',
bos@552: r'', line)
bos@552: line = re.sub(r'\\excode{(?P[^}]+)}',
bos@552: r'', line)
bos@552: line = re.sub(r'\\pymod{(?P[^}]+)}',
bos@552: r'\g', line)
bos@552: line = re.sub(r'\\pymodclass{(?P[^}]+)}{(?P[^}]+)}',
bos@552: r'\g', line)
bos@552: line = re.sub(r'\\url{(?P[^}]+)}',
bos@552: r'\g', line)
bos@552: line = re.sub(r'\\href{(?P[^}]+)}{(?P[^}]+)}',
bos@552: r'\g', line)
bos@552: line = re.sub(r'\\command{(?P[^}]+)}',
bos@552: r'\g', line)
bos@552: line = re.sub(r'\\option{(?P[^}]+)}',
bos@552: r'', line)
bos@556: line = re.sub(r'\\ref{(?P[^}]+)}', r'', line)
bos@552: line = re.sub(r'\\emph{(?P[^}]+)}',
bos@552: r'\g', line)
bos@552: line = re.sub(r'\\texttt{(?P[^}]+)}',
bos@552: r'\g', line)
bos@552: line = re.sub(r'\\textbf{(?P[^}]+)}',
bos@552: r'\g', line)
bos@552: line = re.sub(r'\\hook{(?P[^}]+)}',
bos@552: r'\g', line)
bos@552: line = re.sub(r'\\tplfilter{(?P[^}]+)}',
bos@552: r'\g', line)
bos@552: line = re.sub(r'\\tplkword{(?P[^}]+)}',
bos@552: r'\g', line)
bos@552: line = re.sub(r'\\tplkwfilt{(?P[^}]+)}{(?P[^}]+)}',
bos@552: r'\g', line)
bos@552: line = re.sub(r'\\[vV]erb(.)(?P[^\1]+?)\1',
bos@552: r'\g', line)
bos@552: line = re.sub(r'\\package{(?P[^}]+)}',
bos@552: r'\g', line)
bos@552: line = re.sub(r'\\hgcmdargs{(?P[^}]+)}{(?P[^}]+)}',
bos@552: r'hg \g \g',
bos@552: line)
bos@552: line = re.sub(r'\\cmdargs{(?P[^}]+)}{(?P[^}]+)}',
bos@552: r'\g \g',
bos@552: line)
bos@552: m = re.match(r'\\(chapter|section|subsection|subsubsection){(.*)}', line)
bos@552: if m:
bos@552: kind, content = m.groups()
bos@552: sec = sections[kind]
bos@552: while stack and stack[-1] >= sec:
bos@552: close = stack.pop()
bos@552: print >> ofp, '%s>' % close
bos@552: stack.append(sec)
bos@552: print >> ofp, '<%s>\n%s' % (sec, content)
bos@552: else:
bos@552: m = re.match(r'\s*\\(begin|end){(?P[^}]+)}', line)
bos@552: if m:
bos@552: if not para:
bos@552: print >> ofp, ''
bos@552: if inlist:
bos@552: ofp.write('')
bos@552: para = True
bos@552: state, env = m.groups()
bos@552: env = envs[env]
bos@552: if state == 'begin':
bos@552: ofp.write('<')
bos@556: if env in ('itemizedlist', 'orderedlist'):
bos@556: inlist = 1
bos@552: else:
bos@552: ofp.write('')
bos@556: if env == ('itemizedlist', 'orderedlist'):
bos@556: inlist = 0
bos@552: print >> ofp, env + '>'
bos@552: else:
bos@552: if line.startswith('\\item '):
bos@556: if inlist > 1:
bos@556: print >> ofp, ''
bos@556: print >> ofp, ''
bos@556: else:
bos@556: inlist = 2
bos@552: para = True
bos@552: line = line[6:]
bos@552: if line and para:
bos@552: if inlist:
bos@552: ofp.write('')
bos@552: ofp.write('')
bos@552: para = False
bos@552: if not line and not para:
bos@552: print >> ofp, ''
bos@552: if inlist:
bos@552: ofp.write('')
bos@552: para = True
bos@552: print >> ofp, line
bos@552: while stack:
bos@552: print >> ofp, '%s>' % stack.pop()
bos@556: ofp.write('\n'.join(['\n']))
bos@552:
bos@552:
bos@552: if __name__ == '__main__':
bos@552: for name in sys.argv[1:]:
bos@552: if not name.endswith('.tex'):
bos@552: continue
bos@552: newname = name[:-3] + 'xml'
bos@552: ofp = StringIO.StringIO()
bos@552: process(open(name), ofp)
bos@552: s = ofp.getvalue()
bos@552: s = re.sub('\n+', '', s, re.M)
bos@552: open(newname, 'w').write(s)