hgbook

view en/autoid.py @ 713:f87515a4a3cf

Remove dead symbolic links

for i in `find .`; do if (test -h $i); then file $i|grep broken; fi; done

./es/99book.bib: broken symbolic link to `../en/99book.bib'
./es/bookhtml.cfg: broken symbolic link to `../en/bookhtml.cfg'
./es/fixhtml.py: broken symbolic link to `../en/fixhtml.py'
./es/hgbook.css: broken symbolic link to `../en/hgbook.css'
./es/htlatex.book: broken symbolic link to `../en/htlatex.book'
author Dongsheng Song <dongsheng.song@gmail.com>
date Thu May 21 14:26:31 2009 +0800 (2009-05-21)
parents
children
line source
1 #!/usr/bin/env python
2 #
3 # Add unique ID attributes to para tags. This script should only be
4 # run by one person, since otherwise it introduces the possibility of
5 # chaotic conflicts among tags.
7 import glob, os, re, sys
9 tagged = re.compile('<para[^>]* id="x_([0-9a-f]+)"[^>]*>', re.M)
10 untagged = re.compile('<para>')
12 names = glob.glob('ch*.xml') + glob.glob('app*.xml')
14 # First pass: find the highest-numbered paragraph ID.
16 biggest_id = 0
17 seen = set()
18 errs = 0
20 for name in names:
21 for m in tagged.finditer(open(name).read()):
22 i = int(m.group(1),16)
23 if i in seen:
24 print >> sys.stderr, '%s: duplication of ID %s' % (name, i)
25 errs += 1
26 seen.add(i)
27 if i > biggest_id:
28 biggest_id = i
30 def retag(s):
31 global biggest_id
32 biggest_id += 1
33 return '<para id="x_%x">' % biggest_id
35 # Second pass: add IDs to paragraphs that currently lack them.
37 for name in names:
38 f = open(name).read()
39 f1 = untagged.sub(retag, f)
40 if f1 != f:
41 tmpname = name + '.tmp'
42 fp = open(tmpname, 'w')
43 fp.write(f1)
44 fp.close()
45 os.rename(tmpname, name)
47 sys.exit(errs)