hgbook
annotate en/fixhtml.py @ 514:db12ab3b3b25
corrected some typos on the title page.
translated a couple of index entries
translated a couple of index entries
author | Javier Rojas <jerojasro@devnull.li> |
---|---|
date | Sun Jan 18 19:45:33 2009 -0500 (2009-01-18) |
parents | 2e73abddad21 |
children |
rev | line source |
---|---|
bos@149 | 1 #!/usr/bin/env python |
bos@251 | 2 # |
bos@251 | 3 # This script attempts to work around some of the more bizarre and |
bos@251 | 4 # quirky behaviours of htlatex. |
bos@251 | 5 # |
bos@251 | 6 # - We've persuaded htlatex to produce UTF-8, which unfortunately |
bos@251 | 7 # causes it to use huge character sequences to represent even the |
bos@251 | 8 # safe 7-bit ASCII subset of UTF-8. We fix that up. |
bos@251 | 9 # |
bos@251 | 10 # - BUT we have to treat angle brackets (for example, redirections in |
bos@251 | 11 # shell script snippets) specially, otherwise they'll break the |
bos@251 | 12 # generated HTML. (Reported by Johannes Hoff.) |
bos@251 | 13 # |
bos@251 | 14 # - For some reason, htlatex gives a unique ID to each fancyvrb |
bos@251 | 15 # environment, which makes writing a sane, small CSS stylesheet |
bos@251 | 16 # impossible. We squish all those IDs down to nothing. |
bos@149 | 17 |
bos@149 | 18 import os |
bos@149 | 19 import sys |
bos@149 | 20 import re |
bos@149 | 21 |
bos@251 | 22 angle_re = re.compile(r'([CE];)') |
bos@251 | 23 unicode_re = re.compile(r'�([0-7][0-9A-F]);') |
bos@149 | 24 fancyvrb_re = re.compile(r'id="fancyvrb\d+"', re.I) |
bos@260 | 25 ligature_re = re.compile(r'ྰ([0-4]);') |
bos@149 | 26 |
bos@149 | 27 tmpsuffix = '.tmp.' + str(os.getpid()) |
bos@149 | 28 |
bos@251 | 29 def hide_angle(m): |
bos@251 | 30 return m.group(1).lower() |
bos@251 | 31 |
bos@149 | 32 def fix_ascii(m): |
bos@149 | 33 return chr(int(m.group(1), 16)) |
bos@149 | 34 |
bos@260 | 35 ligatures = ['ff', 'fi', 'fl', 'ffi', 'ffl'] |
bos@260 | 36 |
bos@260 | 37 def expand_ligature(m): |
bos@260 | 38 return ligatures[int(m.group(1))] |
bos@260 | 39 |
bos@149 | 40 for name in sys.argv[1:]: |
bos@149 | 41 tmpname = name + tmpsuffix |
bos@149 | 42 ofp = file(tmpname, 'w') |
bos@149 | 43 for line in file(name): |
bos@251 | 44 line = angle_re.sub(hide_angle, line) |
bos@149 | 45 line = unicode_re.sub(fix_ascii, line) |
bos@260 | 46 line = ligature_re.sub(expand_ligature, line) |
bos@149 | 47 line = fancyvrb_re.sub('id="fancyvrb"', line) |
bos@149 | 48 ofp.write(line) |
bos@149 | 49 ofp.close() |
bos@149 | 50 os.rename(tmpname, name) |