hgbook

view ja/fixhtml.py @ 890:2887b61fa4fe

Change fields to fieldsets in the Comment admin model. The 'date'
field isn't working properly for an unknown reason, so it has been
removed from the interface temporarily.
author dukebody <dukebody@gmail.com>
date Sun Oct 11 21:12:46 2009 +0200 (2009-10-11)
parents
children
line source
1 #!/usr/bin/env python
2 #
3 # This script attempts to work around some of the more bizarre and
4 # quirky behaviours of htlatex.
5 #
6 # - We've persuaded htlatex to produce UTF-8, which unfortunately
7 # causes it to use huge character sequences to represent even the
8 # safe 7-bit ASCII subset of UTF-8. We fix that up.
9 #
10 # - BUT we have to treat angle brackets (for example, redirections in
11 # shell script snippets) specially, otherwise they'll break the
12 # generated HTML. (Reported by Johannes Hoff.)
13 #
14 # - For some reason, htlatex gives a unique ID to each fancyvrb
15 # environment, which makes writing a sane, small CSS stylesheet
16 # impossible. We squish all those IDs down to nothing.
18 import os
19 import sys
20 import re
22 angle_re = re.compile(r'(&#x003[CE];)')
23 unicode_re = re.compile(r'&#x00([0-7][0-9A-F]);')
24 fancyvrb_re = re.compile(r'id="fancyvrb\d+"', re.I)
25 ligature_re = re.compile(r'&#xFB0([0-4]);')
27 tmpsuffix = '.tmp.' + str(os.getpid())
29 def hide_angle(m):
30 return m.group(1).lower()
32 def fix_ascii(m):
33 return chr(int(m.group(1), 16))
35 ligatures = ['ff', 'fi', 'fl', 'ffi', 'ffl']
37 def expand_ligature(m):
38 return ligatures[int(m.group(1))]
40 for name in sys.argv[1:]:
41 tmpname = name + tmpsuffix
42 ofp = file(tmpname, 'w')
43 for line in file(name):
44 line = angle_re.sub(hide_angle, line)
45 line = unicode_re.sub(fix_ascii, line)
46 line = ligature_re.sub(expand_ligature, line)
47 line = fancyvrb_re.sub('id="fancyvrb"', line)
48 ofp.write(line)
49 ofp.close()
50 os.rename(tmpname, name)