hgbook

diff en/autoid.py @ 774:5c9552a4e552
Deep revision of Ch.14.
author: Giulio@puck
date: Tue Aug 04 18:15:35 2009 +0200 (2009-08-04)
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/en/autoid.py	Tue Aug 04 18:15:35 2009 +0200
     1.3 @@ -0,0 +1,47 @@
     1.4 +#!/usr/bin/env python
     1.5 +#
     1.6 +# Add unique ID attributes to para tags.  This script should only be
     1.7 +# run by one person, since otherwise it introduces the possibility of
     1.8 +# chaotic conflicts among tags.
     1.9 +
    1.10 +import glob, os, re, sys
    1.11 +
    1.12 +tagged = re.compile('<para[^>]* id="x_([0-9a-f]+)"[^>]*>', re.M)
    1.13 +untagged = re.compile('<para>')
    1.14 +
    1.15 +names = glob.glob('ch*.xml') + glob.glob('app*.xml')
    1.16 +
    1.17 +# First pass: find the highest-numbered paragraph ID.
    1.18 +
    1.19 +biggest_id = 0
    1.20 +seen = set()
    1.21 +errs = 0
    1.22 +
    1.23 +for name in names:
    1.24 +    for m in tagged.finditer(open(name).read()):
    1.25 +        i = int(m.group(1),16)
    1.26 +        if i in seen:
    1.27 +            print >> sys.stderr, '%s: duplication of ID %s' % (name, i)
    1.28 +            errs += 1
    1.29 +        seen.add(i)
    1.30 +        if i > biggest_id:
    1.31 +            biggest_id = i
    1.32 +
    1.33 +def retag(s):
    1.34 +    global biggest_id
    1.35 +    biggest_id += 1
    1.36 +    return '<para id="x_%x">' % biggest_id
    1.37 +
    1.38 +# Second pass: add IDs to paragraphs that currently lack them.
    1.39 +
    1.40 +for name in names:
    1.41 +    f = open(name).read()
    1.42 +    f1 = untagged.sub(retag, f)
    1.43 +    if f1 != f:
    1.44 +        tmpname = name + '.tmp'
    1.45 +        fp = open(tmpname, 'w')
    1.46 +        fp.write(f1)
    1.47 +        fp.close()
    1.48 +        os.rename(tmpname, name)
    1.49 +
    1.50 +sys.exit(errs)
author	Giulio@puck
date	Tue Aug 04 18:15:35 2009 +0200 (2009-08-04)
parents
children