hgbook
diff fr/autoid.py @ 1019:746a888fb41b
some typo and better french translation
author | André Sintzoff <andre.sintzoff@gmail.com> |
---|---|
date | Mon Nov 30 10:57:42 2009 +0100 (2009-11-30) |
parents | c838b3975bc6 |
children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/fr/autoid.py Mon Nov 30 10:57:42 2009 +0100 1.3 @@ -0,0 +1,47 @@ 1.4 +#!/usr/bin/env python 1.5 +# 1.6 +# Add unique ID attributes to para tags. This script should only be 1.7 +# run by one person, since otherwise it introduces the possibility of 1.8 +# chaotic conflicts among tags. 1.9 + 1.10 +import glob, os, re, sys 1.11 + 1.12 +tagged = re.compile('<para[^>]* id="x_([0-9a-f]+)"[^>]*>', re.M) 1.13 +untagged = re.compile('<para>') 1.14 + 1.15 +names = glob.glob('ch*.xml') + glob.glob('app*.xml') 1.16 + 1.17 +# First pass: find the highest-numbered paragraph ID. 1.18 + 1.19 +biggest_id = 0 1.20 +seen = set() 1.21 +errs = 0 1.22 + 1.23 +for name in names: 1.24 + for m in tagged.finditer(open(name).read()): 1.25 + i = int(m.group(1),16) 1.26 + if i in seen: 1.27 + print >> sys.stderr, '%s: duplication of ID %s' % (name, i) 1.28 + errs += 1 1.29 + seen.add(i) 1.30 + if i > biggest_id: 1.31 + biggest_id = i 1.32 + 1.33 +def retag(s): 1.34 + global biggest_id 1.35 + biggest_id += 1 1.36 + return '<para id="x_%x">' % biggest_id 1.37 + 1.38 +# Second pass: add IDs to paragraphs that currently lack them. 1.39 + 1.40 +for name in names: 1.41 + f = open(name).read() 1.42 + f1 = untagged.sub(retag, f) 1.43 + if f1 != f: 1.44 + tmpname = name + '.tmp' 1.45 + fp = open(tmpname, 'w') 1.46 + fp.write(f1) 1.47 + fp.close() 1.48 + os.rename(tmpname, name) 1.49 + 1.50 +sys.exit(errs)