hgbook
diff tools/po4a/lib/Locale/Po4a/Po.pm @ 642:a4b71115602d
Typo fix
author | Dongsheng Song <dongsheng.song@gmail.com> |
---|---|
date | Tue Mar 17 16:09:57 2009 +0800 (2009-03-17) |
parents | |
children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/tools/po4a/lib/Locale/Po4a/Po.pm Tue Mar 17 16:09:57 2009 +0800 1.3 @@ -0,0 +1,1580 @@ 1.4 +# Locale::Po4a::Po -- manipulation of po files 1.5 +# $Id: Po.pm,v 1.95 2009-02-28 22:18:39 nekral-guest Exp $ 1.6 +# 1.7 +# This program is free software; you may redistribute it and/or modify it 1.8 +# under the terms of GPL (see COPYING). 1.9 + 1.10 +############################################################################ 1.11 +# Modules and declarations 1.12 +############################################################################ 1.13 + 1.14 +=head1 NAME 1.15 + 1.16 +Locale::Po4a::Po - po file manipulation module 1.17 + 1.18 +=head1 SYNOPSIS 1.19 + 1.20 + use Locale::Po4a::Po; 1.21 + my $pofile=Locale::Po4a::Po->new(); 1.22 + 1.23 + # Read po file 1.24 + $pofile->read('file.po'); 1.25 + 1.26 + # Add an entry 1.27 + $pofile->push('msgid' => 'Hello', 'msgstr' => 'bonjour', 1.28 + 'flags' => "wrap", 'reference'=>'file.c:46'); 1.29 + 1.30 + # Extract a translation 1.31 + $pofile->gettext("Hello"); # returns 'bonjour' 1.32 + 1.33 + # Write back to a file 1.34 + $pofile->write('otherfile.po'); 1.35 + 1.36 +=head1 DESCRIPTION 1.37 + 1.38 +Locale::Po4a::Po is a module that allows you to manipulate message 1.39 +catalogs. You can load and write from/to a file (which extension is often 1.40 +I<po>), you can build new entries on the fly or request for the translation 1.41 +of a string. 1.42 + 1.43 +For a more complete description of message catalogs in the po format and 1.44 +their use, please refer to the documentation of the gettext program. 1.45 + 1.46 +This module is part of the PO4A project, which objective is to use po files 1.47 +(designed at origin to ease the translation of program messages) to 1.48 +translate everything, including documentation (man page, info manual), 1.49 +package description, debconf templates, and everything which may benefit 1.50 +from this. 1.51 + 1.52 +=head1 OPTIONS ACCEPTED BY THIS MODULE 1.53 + 1.54 +=over 4 1.55 + 1.56 +=item porefs 1.57 + 1.58 +This specifies the reference format. It can be one of 'none' to not produce 1.59 +any reference, 'noline' to not specify the line number, and 'full' to 1.60 +include complete references. 1.61 + 1.62 +=back 1.63 + 1.64 +=cut 1.65 + 1.66 +use IO::File; 1.67 + 1.68 + 1.69 +require Exporter; 1.70 + 1.71 +package Locale::Po4a::Po; 1.72 +use DynaLoader; 1.73 + 1.74 +use Locale::Po4a::Common qw(wrap_msg wrap_mod wrap_ref_mod dgettext); 1.75 + 1.76 +use subs qw(makespace); 1.77 +use vars qw(@ISA @EXPORT_OK); 1.78 +@ISA = qw(Exporter DynaLoader); 1.79 +@EXPORT = qw(%debug); 1.80 +@EXPORT_OK = qw(&move_po_if_needed); 1.81 + 1.82 +use Locale::Po4a::TransTractor; 1.83 +# Try to use a C extension if present. 1.84 +eval("bootstrap Locale::Po4a::Po $Locale::Po4a::TransTractor::VERSION"); 1.85 + 1.86 +use 5.006; 1.87 +use strict; 1.88 +use warnings; 1.89 + 1.90 +use Carp qw(croak); 1.91 +use File::Path; # mkdir before write 1.92 +use File::Copy; # move 1.93 +use POSIX qw(strftime floor); 1.94 +use Time::Local; 1.95 + 1.96 +use Encode; 1.97 + 1.98 +my @known_flags=qw(wrap no-wrap c-format fuzzy); 1.99 + 1.100 +our %debug=('canonize' => 0, 1.101 + 'quote' => 0, 1.102 + 'escape' => 0, 1.103 + 'encoding' => 0, 1.104 + 'filter' => 0); 1.105 + 1.106 +=head1 Functions about whole message catalogs 1.107 + 1.108 +=over 4 1.109 + 1.110 +=item new() 1.111 + 1.112 +Creates a new message catalog. If an argument is provided, it's the name of 1.113 +a po file we should load. 1.114 + 1.115 +=cut 1.116 + 1.117 +sub new { 1.118 + my ($this, $options) = (shift, shift); 1.119 + my $class = ref($this) || $this; 1.120 + my $self = {}; 1.121 + bless $self, $class; 1.122 + $self->initialize($options); 1.123 + 1.124 + my $filename = shift; 1.125 + $self->read($filename) if defined($filename) && length($filename); 1.126 + return $self; 1.127 +} 1.128 + 1.129 +# Return the numerical timezone (e.g. +0200) 1.130 +# Neither the %z nor the %s formats of strftime are portable: 1.131 +# '%s' is not supported on Solaris and '%z' indicates 1.132 +# "2006-10-25 19:36E. Europe Standard Time" on MS Windows. 1.133 +sub timezone { 1.134 + my @g = gmtime(); 1.135 + my @l = localtime(); 1.136 + 1.137 + my $diff; 1.138 + $diff = floor(timelocal(@l)/60 +0.5); 1.139 + $diff -= floor(timelocal(@g)/60 +0.5); 1.140 + 1.141 + my $h = floor($diff / 60) + $l[8]; # $l[8] indicates if we are currently 1.142 + # in a daylight saving time zone 1.143 + my $m = $diff%60; 1.144 + 1.145 + return sprintf "%+03d%02d\n", $h, $m; 1.146 +} 1.147 + 1.148 +sub initialize { 1.149 + my ($self, $options) = (shift, shift); 1.150 + my $date = strftime("%Y-%m-%d %H:%M", localtime).timezone(); 1.151 + chomp $date; 1.152 +# $options = ref($options) || $options; 1.153 + 1.154 + $self->{options}{'porefs'}= 'full'; 1.155 + $self->{options}{'msgid-bugs-address'}= undef; 1.156 + $self->{options}{'copyright-holder'}= "Free Software Foundation, Inc."; 1.157 + $self->{options}{'package-name'}= "PACKAGE"; 1.158 + $self->{options}{'package-version'}= "VERSION"; 1.159 + foreach my $opt (keys %$options) { 1.160 + if ($options->{$opt}) { 1.161 + die wrap_mod("po4a::po", 1.162 + dgettext ("po4a", "Unknown option: %s"), $opt) 1.163 + unless exists $self->{options}{$opt}; 1.164 + $self->{options}{$opt} = $options->{$opt}; 1.165 + } 1.166 + } 1.167 + $self->{options}{'porefs'} =~ /^(full|noline|none)$/ || 1.168 + die wrap_mod("po4a::po", 1.169 + dgettext ("po4a", 1.170 + "Invalid value for option 'porefs' ('%s' is ". 1.171 + "not one of 'full', 'noline' or 'none')"), 1.172 + $self->{options}{'porefs'}); 1.173 + 1.174 + $self->{po}=(); 1.175 + $self->{count}=0; # number of msgids in the PO 1.176 + # count_doc: number of strings in the document 1.177 + # (duplicate strings counted multiple times) 1.178 + $self->{count_doc}=0; 1.179 + $self->{header_comment}= 1.180 + " SOME DESCRIPTIVE TITLE\n" 1.181 + ." Copyright (C) YEAR ". 1.182 + $self->{options}{'copyright-holder'}."\n" 1.183 + ." This file is distributed under the same license ". 1.184 + "as the ".$self->{options}{'package-name'}." package.\n" 1.185 + ." FIRST AUTHOR <EMAIL\@ADDRESS>, YEAR.\n" 1.186 + ."\n" 1.187 + .", fuzzy"; 1.188 +# $self->header_tag="fuzzy"; 1.189 + $self->{header}=escape_text("Project-Id-Version: ". 1.190 + $self->{options}{'package-name'}." ". 1.191 + $self->{options}{'package-version'}."\n". 1.192 + ((defined $self->{options}{'msgid-bugs-address'})? 1.193 + "Report-Msgid-Bugs-To: ".$self->{options}{'msgid-bugs-address'}."\n": 1.194 + ""). 1.195 + "POT-Creation-Date: $date\n". 1.196 + "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n". 1.197 + "Last-Translator: FULL NAME <EMAIL\@ADDRESS>\n". 1.198 + "Language-Team: LANGUAGE <LL\@li.org>\n". 1.199 + "MIME-Version: 1.0\n". 1.200 + "Content-Type: text/plain; charset=CHARSET\n". 1.201 + "Content-Transfer-Encoding: ENCODING"); 1.202 + 1.203 + $self->{encoder}=find_encoding("ascii"); 1.204 + 1.205 + # To make stats about gettext hits 1.206 + $self->stats_clear(); 1.207 +} 1.208 + 1.209 +=item read($) 1.210 + 1.211 +Reads a po file (which name is given as argument). Previously existing 1.212 +entries in self are not removed, the new ones are added to the end of the 1.213 +catalog. 1.214 + 1.215 +=cut 1.216 + 1.217 +sub read { 1.218 + my $self=shift; 1.219 + my $filename=shift 1.220 + or croak wrap_mod("po4a::po", 1.221 + dgettext("po4a", 1.222 + "Please provide a non-null filename")); 1.223 + 1.224 + my $fh; 1.225 + if ($filename eq '-') { 1.226 + $fh=*STDIN; 1.227 + } else { 1.228 + open $fh,"<$filename" 1.229 + or croak wrap_mod("po4a::po", 1.230 + dgettext("po4a", "Can't read from %s: %s"), 1.231 + $filename, $!); 1.232 + } 1.233 + 1.234 + ## Read paragraphs line-by-line 1.235 + my $pofile=""; 1.236 + my $textline; 1.237 + while (defined ($textline = <$fh>)) { 1.238 + $pofile .= $textline; 1.239 + } 1.240 +# close INPUT 1.241 +# or croak (sprintf(dgettext("po4a", 1.242 +# "Can't close %s after reading: %s"), 1.243 +# $filename,$!)."\n"); 1.244 + 1.245 + my $linenum=0; 1.246 + 1.247 + foreach my $msg (split (/\n\n/,$pofile)) { 1.248 + my ($msgid,$msgstr,$comment,$automatic,$reference,$flags,$buffer); 1.249 + my ($msgid_plural, $msgstr_plural); 1.250 + foreach my $line (split (/\n/,$msg)) { 1.251 + $linenum++; 1.252 + if ($line =~ /^#\. ?(.*)$/) { # Automatic comment 1.253 + $automatic .= (defined($automatic) ? "\n" : "").$1; 1.254 + 1.255 + } elsif ($line =~ /^#: ?(.*)$/) { # reference 1.256 + $reference .= (defined($reference) ? "\n" : "").$1; 1.257 + 1.258 + } elsif ($line =~ /^#, ?(.*)$/) { # flags 1.259 + $flags .= (defined($flags) ? "\n" : "").$1; 1.260 + 1.261 + } elsif ($line =~ /^#(.*)$/) { # Translator comments 1.262 + $comment .= (defined($comment) ? "\n" : "").($1||""); 1.263 + 1.264 + } elsif ($line =~ /^msgid (".*")$/) { # begin of msgid 1.265 + $buffer = $1; 1.266 + 1.267 + } elsif ($line =~ /^msgid_plural (".*")$/) { 1.268 + # begin of msgid_plural, end of msgid 1.269 + 1.270 + $msgid = $buffer; 1.271 + $buffer = $1; 1.272 + 1.273 + } elsif ($line =~ /^msgstr (".*")$/) { 1.274 + # begin of msgstr, end of msgid 1.275 + 1.276 + $msgid = $buffer; 1.277 + $buffer = "$1"; 1.278 + 1.279 + } elsif ($line =~ /^msgstr\[([0-9]+)\] (".*")$/) { 1.280 + # begin of msgstr[x], end of msgid_plural or msgstr[x-1] 1.281 + 1.282 + # Note: po4a cannot uses plural forms 1.283 + # (no integer to use the plural form) 1.284 + # * drop the msgstr[x] where x >= 2 1.285 + # * use msgstr[0] as the translation of msgid 1.286 + # * use msgstr[1] as the translation of msgid_plural 1.287 + 1.288 + if ($1 eq "0") { 1.289 + $msgid_plural = $buffer; 1.290 + $buffer = "$2"; 1.291 + } elsif ($1 eq "1") { 1.292 + $msgstr = $buffer; 1.293 + $buffer = "$2"; 1.294 + } elsif ($1 eq "2") { 1.295 + $msgstr_plural = $buffer; 1.296 + warn wrap_ref_mod("$filename:$linenum", 1.297 + "po4a::po", 1.298 + dgettext("po4a", "Messages with more than 2 plural forms are not supported.")); 1.299 + } 1.300 + } elsif ($line =~ /^(".*")$/) { 1.301 + # continuation of a line 1.302 + $buffer .= "\n$1"; 1.303 + 1.304 + } else { 1.305 + warn wrap_ref_mod("$filename:$linenum", 1.306 + "po4a::po", 1.307 + dgettext("po4a", "Strange line: -->%s<--"), 1.308 + $line); 1.309 + } 1.310 + } 1.311 + $linenum++; 1.312 + if (defined $msgid_plural) { 1.313 + $msgstr_plural=$buffer; 1.314 + 1.315 + $msgid = unquote_text($msgid) if (defined($msgid)); 1.316 + $msgstr = unquote_text($msgstr) if (defined($msgstr)); 1.317 + 1.318 + $self->push_raw ('msgid' => $msgid, 1.319 + 'msgstr' => $msgstr, 1.320 + 'reference' => $reference, 1.321 + 'flags' => $flags, 1.322 + 'comment' => $comment, 1.323 + 'automatic' => $automatic, 1.324 + 'plural' => 0); 1.325 + 1.326 + $msgid_plural = unquote_text($msgid_plural) 1.327 + if (defined($msgid_plural)); 1.328 + $msgstr_plural = unquote_text($msgstr_plural) 1.329 + if (defined($msgstr_plural)); 1.330 + 1.331 + $self->push_raw ('msgid' => $msgid_plural, 1.332 + 'msgstr' => $msgstr_plural, 1.333 + 'reference' => $reference, 1.334 + 'flags' => $flags, 1.335 + 'comment' => $comment, 1.336 + 'automatic' => $automatic, 1.337 + 'plural' => 1); 1.338 + } else { 1.339 + $msgstr=$buffer; 1.340 + 1.341 + $msgid = unquote_text($msgid) if (defined($msgid)); 1.342 + $msgstr = unquote_text($msgstr) if (defined($msgstr)); 1.343 + 1.344 + $self->push_raw ('msgid' => $msgid, 1.345 + 'msgstr' => $msgstr, 1.346 + 'reference' => $reference, 1.347 + 'flags' => $flags, 1.348 + 'comment' => $comment, 1.349 + 'automatic' => $automatic); 1.350 + } 1.351 + } 1.352 +} 1.353 + 1.354 +=item write($) 1.355 + 1.356 +Writes the current catalog to the given file. 1.357 + 1.358 +=cut 1.359 + 1.360 +sub write{ 1.361 + my $self=shift; 1.362 + my $filename=shift 1.363 + or croak dgettext("po4a","Can't write to a file without filename")."\n"; 1.364 + 1.365 + my $fh; 1.366 + if ($filename eq '-') { 1.367 + $fh=\*STDOUT; 1.368 + } else { 1.369 + # make sure the directory in which we should write the localized 1.370 + # file exists 1.371 + my $dir = $filename; 1.372 + if ($dir =~ m|/|) { 1.373 + $dir =~ s|/[^/]*$||; 1.374 + 1.375 + File::Path::mkpath($dir, 0, 0755) # Croaks on error 1.376 + if (length ($dir) && ! -e $dir); 1.377 + } 1.378 + open $fh,">$filename" 1.379 + or croak wrap_mod("po4a::po", 1.380 + dgettext("po4a", "Can't write to %s: %s"), 1.381 + $filename, $!); 1.382 + } 1.383 + 1.384 + print $fh "".format_comment($self->{header_comment},"") 1.385 + if defined($self->{header_comment}) && length($self->{header_comment}); 1.386 + 1.387 + print $fh "msgid \"\"\n"; 1.388 + print $fh "msgstr ".quote_text($self->{header})."\n\n"; 1.389 + 1.390 + 1.391 + my $buf_msgstr_plural; # USed to keep the first msgstr of plural forms 1.392 + my $first=1; 1.393 + foreach my $msgid ( sort { ($self->{po}{"$a"}{'pos'}) <=> 1.394 + ($self->{po}{"$b"}{'pos'}) 1.395 + } keys %{$self->{po}}) { 1.396 + my $output=""; 1.397 + 1.398 + if ($first) { 1.399 + $first=0; 1.400 + } else { 1.401 + $output .= "\n"; 1.402 + } 1.403 + 1.404 + $output .= format_comment($self->{po}{$msgid}{'comment'},"") 1.405 + if defined($self->{po}{$msgid}{'comment'}) 1.406 + && length ($self->{po}{$msgid}{'comment'}); 1.407 + if ( defined($self->{po}{$msgid}{'automatic'}) 1.408 + && length ($self->{po}{$msgid}{'automatic'})) { 1.409 + foreach my $comment (split(/\\n/,$self->{po}{$msgid}{'automatic'})) 1.410 + { 1.411 + $output .= format_comment($comment, ". ") 1.412 + } 1.413 + } 1.414 + $output .= format_comment($self->{po}{$msgid}{'type'},". type: ") 1.415 + if defined($self->{po}{$msgid}{'type'}) 1.416 + && length ($self->{po}{$msgid}{'type'}); 1.417 + $output .= format_comment($self->{po}{$msgid}{'reference'},": ") 1.418 + if defined($self->{po}{$msgid}{'reference'}) 1.419 + && length ($self->{po}{$msgid}{'reference'}); 1.420 + $output .= "#, ". join(", ", sort split(/\s+/,$self->{po}{$msgid}{'flags'}))."\n" 1.421 + if defined($self->{po}{$msgid}{'flags'}) 1.422 + && length ($self->{po}{$msgid}{'flags'}); 1.423 + 1.424 + if (exists $self->{po}{$msgid}{'plural'}) { 1.425 + if ($self->{po}{$msgid}{'plural'} == 0) { 1.426 + if ($self->get_charset =~ /^utf-8$/i) { 1.427 + my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'}); 1.428 + $msgid = Encode::decode_utf8($msgid); 1.429 + $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n"); 1.430 + $buf_msgstr_plural = Encode::encode_utf8("msgstr[0] ".quote_text($msgstr)."\n"); 1.431 + } else { 1.432 + $output = "msgid ".quote_text($msgid)."\n"; 1.433 + $buf_msgstr_plural = "msgstr[0] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n"; 1.434 + } 1.435 + } elsif ($self->{po}{$msgid}{'plural'} == 1) { 1.436 +# TODO: there may be only one plural form 1.437 + if ($self->get_charset =~ /^utf-8$/i) { 1.438 + my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'}); 1.439 + $msgid = Encode::decode_utf8($msgid); 1.440 + $output = Encode::encode_utf8("msgid_plural ".quote_text($msgid)."\n"); 1.441 + $output .= $buf_msgstr_plural; 1.442 + $output .= Encode::encode_utf8("msgstr[1] ".quote_text($msgstr)."\n"); 1.443 + $buf_msgstr_plural = ""; 1.444 + } else { 1.445 + $output = "msgid_plural ".quote_text($msgid)."\n"; 1.446 + $output .= $buf_msgstr_plural; 1.447 + $output .= "msgstr[1] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n"; 1.448 + } 1.449 + } else { 1.450 + die wrap_msg(dgettext("po4a","Can't write PO files with more than two plural forms.")); 1.451 + } 1.452 + } else { 1.453 + if ($self->get_charset =~ /^utf-8$/i) { 1.454 + my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'}); 1.455 + $msgid = Encode::decode_utf8($msgid); 1.456 + $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n"); 1.457 + $output .= Encode::encode_utf8("msgstr ".quote_text($msgstr)."\n"); 1.458 + } else { 1.459 + $output .= "msgid ".quote_text($msgid)."\n"; 1.460 + $output .= "msgstr ".quote_text($self->{po}{$msgid}{'msgstr'})."\n"; 1.461 + } 1.462 + } 1.463 + 1.464 + print $fh $output; 1.465 + } 1.466 +# print STDERR "$fh"; 1.467 +# if ($filename ne '-') { 1.468 +# close $fh 1.469 +# or croak (sprintf(dgettext("po4a", 1.470 +# "Can't close %s after writing: %s\n"), 1.471 +# $filename,$!)); 1.472 +# } 1.473 +} 1.474 + 1.475 +=item write_if_needed($$) 1.476 + 1.477 +Like write, but if the PO or POT file already exists, the object will be 1.478 +written in a temporary file which will be compared with the existing file 1.479 +to check that the update is needed (this avoids to change a POT just to 1.480 +update a line reference or the POT-Creation-Date field). 1.481 + 1.482 +=cut 1.483 + 1.484 +sub move_po_if_needed { 1.485 + my ($new_po, $old_po, $backup) = (shift, shift, shift); 1.486 + my $diff; 1.487 + 1.488 + if (-e $old_po) { 1.489 + my $diff_ignore = "-I'^#:' " 1.490 + ."-I'^\"POT-Creation-Date:' " 1.491 + ."-I'^\"PO-Revision-Date:'"; 1.492 + $diff = qx(diff -q $diff_ignore $old_po $new_po); 1.493 + if ( $diff eq "" ) { 1.494 + unlink $new_po 1.495 + or die wrap_msg(dgettext("po4a","Can't unlink %s: %s."), 1.496 + $new_po, $!); 1.497 + # touch the old PO 1.498 + my ($atime, $mtime) = (time,time); 1.499 + utime $atime, $mtime, $old_po; 1.500 + } else { 1.501 + if ($backup) { 1.502 + copy $old_po, $old_po."~" 1.503 + or die wrap_msg(dgettext("po4a","Can't copy %s to %s: %s."), 1.504 + $old_po, $old_po."~", $!); 1.505 + } else { 1.506 + } 1.507 + move $new_po, $old_po 1.508 + or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."), 1.509 + $new_po, $old_po, $!); 1.510 + } 1.511 + } else { 1.512 + move $new_po, $old_po 1.513 + or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."), 1.514 + $new_po, $old_po, $!); 1.515 + } 1.516 +} 1.517 + 1.518 +sub write_if_needed { 1.519 + my $self=shift; 1.520 + my $filename=shift 1.521 + or croak dgettext("po4a","Can't write to a file without filename")."\n"; 1.522 + 1.523 + if (-e $filename) { 1.524 + my ($tmp_filename); 1.525 + (undef,$tmp_filename)=File::Temp->tempfile($filename."XXXX", 1.526 + DIR => "/tmp", 1.527 + OPEN => 0, 1.528 + UNLINK => 0); 1.529 + $self->write($tmp_filename); 1.530 + move_po_if_needed($tmp_filename, $filename); 1.531 + } else { 1.532 + $self->write($filename); 1.533 + } 1.534 +} 1.535 + 1.536 +=item gettextize($$) 1.537 + 1.538 +This function produces one translated message catalog from two catalogs, an 1.539 +original and a translation. This process is described in L<po4a(7)|po4a.7>, 1.540 +section I<Gettextization: how does it work?>. 1.541 + 1.542 +=cut 1.543 + 1.544 +sub gettextize { 1.545 + my $this = shift; 1.546 + my $class = ref($this) || $this; 1.547 + my ($poorig,$potrans)=(shift,shift); 1.548 + 1.549 + my $pores=Locale::Po4a::Po->new(); 1.550 + 1.551 + my $please_fail = 0; 1.552 + my $toobad = dgettext("po4a", 1.553 + "\nThe gettextization failed (once again). Don't give up, ". 1.554 + "gettextizing is a subtle art, but this is only needed once ". 1.555 + "to convert a project to the gorgeous luxus offered by po4a ". 1.556 + "to translators.". 1.557 + "\nPlease refer to the po4a(7) documentation, the section ". 1.558 + "\"HOWTO convert a pre-existing translation to po4a?\" ". 1.559 + "contains several hints to help you in your task"); 1.560 + 1.561 + # Don't fail right now when the entry count does not match. Instead, give 1.562 + # it a try so that the user can see where we fail (which is probably where 1.563 + # the problem is). 1.564 + if ($poorig->count_entries_doc() > $potrans->count_entries_doc()) { 1.565 + warn wrap_mod("po4a gettextize", dgettext("po4a", 1.566 + "Original has more strings than the translation (%d>%d). ". 1.567 + "Please fix it by editing the translated version to add ". 1.568 + "some dummy entry."), 1.569 + $poorig->count_entries_doc(), 1.570 + $potrans->count_entries_doc()); 1.571 + $please_fail = 1; 1.572 + } elsif ($poorig->count_entries_doc() < $potrans->count_entries_doc()) { 1.573 + warn wrap_mod("po4a gettextize", dgettext("po4a", 1.574 + "Original has less strings than the translation (%d<%d). ". 1.575 + "Please fix it by removing the extra entry from the ". 1.576 + "translated file. You may need an addendum (cf po4a(7)) ". 1.577 + "to reput the chunk in place after gettextization. A ". 1.578 + "possible cause is that a text duplicated in the original ". 1.579 + "is not translated the same way each time. Remove one of ". 1.580 + "the translations, and you're fine."), 1.581 + $poorig->count_entries_doc(), 1.582 + $potrans->count_entries_doc()); 1.583 + $please_fail = 1; 1.584 + } 1.585 + 1.586 + if ( $poorig->get_charset =~ /^utf-8$/i ) { 1.587 + $potrans->to_utf8; 1.588 + $pores->set_charset("utf-8"); 1.589 + } else { 1.590 + if ($potrans->get_charset eq "CHARSET") { 1.591 + $pores->set_charset("ascii"); 1.592 + } else { 1.593 + $pores->set_charset($potrans->get_charset); 1.594 + } 1.595 + } 1.596 + print "Po character sets:\n". 1.597 + " original=".$poorig->get_charset."\n". 1.598 + " translated=".$potrans->get_charset."\n". 1.599 + " result=".$pores->get_charset."\n" 1.600 + if $debug{'encoding'}; 1.601 + 1.602 + for (my ($o,$t)=(0,0) ; 1.603 + $o<$poorig->count_entries_doc() && $t<$potrans->count_entries_doc(); 1.604 + $o++,$t++) { 1.605 + # 1.606 + # Extract some informations 1.607 + 1.608 + my ($orig,$trans)=($poorig->msgid_doc($o),$potrans->msgid_doc($t)); 1.609 +# print STDERR "Matches [[$orig]]<<$trans>>\n"; 1.610 + 1.611 + my ($reforig,$reftrans)=($poorig->{po}{$orig}{'reference'}, 1.612 + $potrans->{po}{$trans}{'reference'}); 1.613 + my ($typeorig,$typetrans)=($poorig->{po}{$orig}{'type'}, 1.614 + $potrans->{po}{$trans}{'type'}); 1.615 + 1.616 + # 1.617 + # Make sure the type of both string exist 1.618 + # 1.619 + die wrap_mod("po4a gettextize", 1.620 + "Internal error: type of original string number %s ". 1.621 + "isn't provided", $o) 1.622 + if ($typeorig eq ''); 1.623 + 1.624 + die wrap_mod("po4a gettextize", 1.625 + "Internal error: type of translated string number %s ". 1.626 + "isn't provided", $o) 1.627 + if ($typetrans eq ''); 1.628 + 1.629 + # 1.630 + # Make sure both type are the same 1.631 + # 1.632 + if ($typeorig ne $typetrans){ 1.633 + $pores->write("gettextization.failed.po"); 1.634 + die wrap_msg(dgettext("po4a", 1.635 + "po4a gettextization: Structure disparity between ". 1.636 + "original and translated files:\n". 1.637 + "msgid (at %s) is of type '%s' while\n". 1.638 + "msgstr (at %s) is of type '%s'.\n". 1.639 + "Original text: %s\n". 1.640 + "Translated text: %s\n". 1.641 + "(result so far dumped to gettextization.failed.po)"). 1.642 + "%s", 1.643 + $reforig, $typeorig, 1.644 + $reftrans, $typetrans, 1.645 + $orig, 1.646 + $trans, 1.647 + $toobad); 1.648 + } 1.649 + 1.650 + # 1.651 + # Push the entry 1.652 + # 1.653 + my $flags; 1.654 + if (defined $poorig->{po}{$orig}{'flags'}) { 1.655 + $flags = $poorig->{po}{$orig}{'flags'}." fuzzy"; 1.656 + } else { 1.657 + $flags = "fuzzy"; 1.658 + } 1.659 + $pores->push_raw('msgid' => $orig, 1.660 + 'msgstr' => $trans, 1.661 + 'flags' => $flags, 1.662 + 'type' => $typeorig, 1.663 + 'reference' => $reforig, 1.664 + 'conflict' => 1, 1.665 + 'transref' => $potrans->{po}{$trans}{'reference'}) 1.666 + unless (defined($pores->{po}{$orig}) 1.667 + and ($pores->{po}{$orig}{'msgstr'} eq $trans)) 1.668 + # FIXME: maybe we should be smarter about what reference should be 1.669 + # sent to push_raw. 1.670 + } 1.671 + 1.672 + # make sure we return a useful error message when entry count differ 1.673 + die "$toobad\n" if $please_fail; 1.674 + 1.675 + return $pores; 1.676 +} 1.677 + 1.678 +=item filter($) 1.679 + 1.680 +This function extracts a catalog from an existing one. Only the entries having 1.681 +a reference in the given file will be placed in the resulting catalog. 1.682 + 1.683 +This function parses its argument, converts it to a perl function definition, 1.684 +eval this definition and filter the fields for which this function returns 1.685 +true. 1.686 + 1.687 +I love perl sometimes ;) 1.688 + 1.689 +=cut 1.690 + 1.691 +sub filter { 1.692 + my $self=shift; 1.693 + our $filter=shift; 1.694 + 1.695 + my $res; 1.696 + $res = Locale::Po4a::Po->new(); 1.697 + 1.698 + # Parse the filter 1.699 + our $code="sub apply { return "; 1.700 + our $pos=0; 1.701 + our $length = length $filter; 1.702 + 1.703 + # explode chars to parts. How to subscript a string in Perl? 1.704 + our @filter = split(//,$filter); 1.705 + 1.706 + sub gloups { 1.707 + my $fmt=shift; 1.708 + my $space = ""; 1.709 + for (1..$pos){ 1.710 + $space .= ' '; 1.711 + } 1.712 + die wrap_msg("$fmt\n$filter\n$space^ HERE"); 1.713 + } 1.714 + sub showmethecode { 1.715 + return unless $debug{'filter'}; 1.716 + my $fmt=shift; 1.717 + my $space=""; 1.718 + for (1..$pos){ 1.719 + $space .= ' '; 1.720 + } 1.721 + print STDERR "$filter\n$space^ $fmt\n";#"$code\n"; 1.722 + } 1.723 + 1.724 + # I dream of a lex in perl :-/ 1.725 + sub parse_expression { 1.726 + showmethecode("Begin expression") 1.727 + if $debug{'filter'}; 1.728 + 1.729 + gloups("Begin of expression expected, got '%s'",$filter[$pos]) 1.730 + unless ($filter[$pos] eq '('); 1.731 + $pos ++; # pass the '(' 1.732 + if ($filter[$pos] eq '&') { 1.733 + # AND 1.734 + $pos++; 1.735 + showmethecode("Begin of AND") 1.736 + if $debug{'filter'}; 1.737 + $code .= "("; 1.738 + while (1) { 1.739 + gloups ("Unfinished AND statement.") 1.740 + if ($pos == $length); 1.741 + parse_expression(); 1.742 + if ($filter[$pos] eq '(') { 1.743 + $code .= " && "; 1.744 + } elsif ($filter[$pos] eq ')') { 1.745 + last; # do not eat that char 1.746 + } else { 1.747 + gloups("End of AND or begin of sub-expression expected, got '%s'", $filter[$pos]); 1.748 + } 1.749 + } 1.750 + $code .= ")"; 1.751 + } elsif ($filter[$pos] eq '|') { 1.752 + # OR 1.753 + $pos++; 1.754 + $code .= "("; 1.755 + while (1) { 1.756 + gloups("Unfinished OR statement.") 1.757 + if ($pos == $length); 1.758 + parse_expression(); 1.759 + if ($filter[$pos] eq '(') { 1.760 + $code .= " || "; 1.761 + } elsif ($filter[$pos] eq ')') { 1.762 + last; # do not eat that char 1.763 + } else { 1.764 + gloups("End of OR or begin of sub-expression expected, got '%s'",$filter[$pos]); 1.765 + } 1.766 + } 1.767 + $code .= ")"; 1.768 + } elsif ($filter[$pos] eq '!') { 1.769 + # NOT 1.770 + $pos++; 1.771 + $code .= "(!"; 1.772 + gloups("Missing sub-expression in NOT statement.") 1.773 + if ($pos == $length); 1.774 + parse_expression(); 1.775 + $code .= ")"; 1.776 + } else { 1.777 + # must be an equal. Let's get field and argument 1.778 + my ($field,$arg,$done); 1.779 + $field = substr($filter,$pos); 1.780 + gloups("EQ statement contains no '=' or invalid field name") 1.781 + unless ($field =~ /([a-z]*)=/i); 1.782 + $field = lc($1); 1.783 + $pos += (length $field) + 1; 1.784 + 1.785 + # check that we've got a valid field name, 1.786 + # and the number it referes to 1.787 + # DO NOT CHANGE THE ORDER 1.788 + my @names=qw(msgid msgstr reference flags comment automatic); 1.789 + my $fieldpos; 1.790 + for ($fieldpos = 0; 1.791 + $fieldpos < scalar @names && $field ne $names[$fieldpos]; 1.792 + $fieldpos++) {} 1.793 + gloups("Invalid field name: %s",$field) 1.794 + if $fieldpos == scalar @names; # not found 1.795 + 1.796 + # Now, get the argument value. It has to be between quotes, 1.797 + # which can be escaped 1.798 + # We point right on the first char of the argument 1.799 + # (first quote already eaten) 1.800 + my $escaped = 0; 1.801 + my $quoted = 0; 1.802 + if ($filter[$pos] eq '"') { 1.803 + $pos++; 1.804 + $quoted = 1; 1.805 + } 1.806 + showmethecode(($quoted?"Quoted":"Unquoted")." argument of field '$field'") 1.807 + if $debug{'filter'}; 1.808 + 1.809 + while (!$done) { 1.810 + gloups("Unfinished EQ argument.") 1.811 + if ($pos == $length); 1.812 + 1.813 + if ($quoted) { 1.814 + if ($filter[$pos] eq '\\') { 1.815 + if ($escaped) { 1.816 + $arg .= '\\'; 1.817 + $escaped = 0; 1.818 + } else { 1.819 + $escaped = 1; 1.820 + } 1.821 + } elsif ($escaped) { 1.822 + if ($filter[$pos] eq '"') { 1.823 + $arg .= '"'; 1.824 + $escaped = 0; 1.825 + } else { 1.826 + gloups("Invalid escape sequence in argument: '\\%s'",$filter[$pos]); 1.827 + } 1.828 + } else { 1.829 + if ($filter[$pos] eq '"') { 1.830 + $done = 1; 1.831 + } else { 1.832 + $arg .= $filter[$pos]; 1.833 + } 1.834 + } 1.835 + } else { 1.836 + if ($filter[$pos] eq ')') { 1.837 + # counter the next ++ since we don't want to eat 1.838 + # this char 1.839 + $pos--; 1.840 + $done = 1; 1.841 + } else { 1.842 + $arg .= $filter[$pos]; 1.843 + } 1.844 + } 1.845 + $pos++; 1.846 + } 1.847 + # and now, add the code to check this equality 1.848 + $code .= "(\$_[$fieldpos] =~ m/$arg/)"; 1.849 + 1.850 + } 1.851 + showmethecode("End of expression") 1.852 + if $debug{'filter'}; 1.853 + gloups("Unfinished statement.") 1.854 + if ($pos == $length); 1.855 + gloups("End of expression expected, got '%s'",$filter[$pos]) 1.856 + unless ($filter[$pos] eq ')'); 1.857 + $pos++; 1.858 + } 1.859 + # And now, launch the beast, finish the function and use eval 1.860 + # to construct this function. 1.861 + # Ok, the lack of lexer is a fair price for the eval ;) 1.862 + parse_expression(); 1.863 + gloups("Garbage at the end of the expression") 1.864 + if ($pos != $length); 1.865 + $code .= "; }"; 1.866 + print STDERR "CODE = $code\n" 1.867 + if $debug{'filter'}; 1.868 + eval $code; 1.869 + die wrap_mod("po4a::po", dgettext("po4a", "Eval failure: %s"), $@) 1.870 + if $@; 1.871 + 1.872 + for (my $cpt=(0) ; 1.873 + $cpt<$self->count_entries(); 1.874 + $cpt++) { 1.875 + 1.876 + my ($msgid,$ref,$msgstr,$flags,$type,$comment,$automatic); 1.877 + 1.878 + $msgid = $self->msgid($cpt); 1.879 + $ref=$self->{po}{$msgid}{'reference'}; 1.880 + 1.881 + $msgstr= $self->{po}{$msgid}{'msgstr'}; 1.882 + $flags = $self->{po}{$msgid}{'flags'}; 1.883 + $type = $self->{po}{$msgid}{'type'}; 1.884 + $comment = $self->{po}{$msgid}{'comment'}; 1.885 + $automatic = $self->{po}{$msgid}{'automatic'}; 1.886 + 1.887 + # DO NOT CHANGE THE ORDER 1.888 + $res->push_raw('msgid' => $msgid, 1.889 + 'msgstr' => $msgstr, 1.890 + 'flags' => $flags, 1.891 + 'type' => $type, 1.892 + 'reference' => $ref, 1.893 + 'comment' => $comment, 1.894 + 'automatic' => $automatic) 1.895 + if (apply($msgid,$msgstr,$ref,$flags,$comment,$automatic)); 1.896 + } 1.897 + # delete the apply subroutine 1.898 + # otherwise it will be redefined. 1.899 + undef &apply; 1.900 + return $res; 1.901 +} 1.902 + 1.903 +=item to_utf8() 1.904 + 1.905 +Recodes to utf-8 the po's msgstrs. Does nothing if the charset is not 1.906 +specified in the po file ("CHARSET" value), or if it's already utf-8 or 1.907 +ascii. 1.908 + 1.909 +=cut 1.910 + 1.911 +sub to_utf8 { 1.912 + my $this = shift; 1.913 + my $charset = $this->get_charset(); 1.914 + 1.915 + unless ($charset eq "CHARSET" or 1.916 + $charset =~ /^ascii$/i or 1.917 + $charset =~ /^utf-8$/i) { 1.918 + foreach my $msgid ( keys %{$this->{po}} ) { 1.919 + Encode::from_to($this->{po}{$msgid}{'msgstr'}, $charset, "utf-8"); 1.920 + } 1.921 + $this->set_charset("utf-8"); 1.922 + } 1.923 +} 1.924 + 1.925 +=back 1.926 + 1.927 +=head1 Functions to use a message catalog for translations 1.928 + 1.929 +=over 4 1.930 + 1.931 +=item gettext($%) 1.932 + 1.933 +Request the translation of the string given as argument in the current catalog. 1.934 +The function returns the original (untranslated) string if the string was not 1.935 +found. 1.936 + 1.937 +After the string to translate, you can pass a hash of extra 1.938 +arguments. Here are the valid entries: 1.939 + 1.940 +=over 1.941 + 1.942 +=item wrap 1.943 + 1.944 +boolean indicating whether we can consider that whitespaces in string are 1.945 +not important. If yes, the function canonizes the string before looking for 1.946 +a translation, and wraps the result. 1.947 + 1.948 +=item wrapcol 1.949 + 1.950 +The column at which we should wrap (default: 76). 1.951 + 1.952 +=back 1.953 + 1.954 +=cut 1.955 + 1.956 +sub gettext { 1.957 + my $self=shift; 1.958 + my $text=shift; 1.959 + my (%opt)=@_; 1.960 + my $res; 1.961 + 1.962 + return "" unless defined($text) && length($text); # Avoid returning the header. 1.963 + my $validoption="reference wrap wrapcol"; 1.964 + my %validoption; 1.965 + 1.966 + map { $validoption{$_}=1 } (split(/ /,$validoption)); 1.967 + foreach (keys %opt) { 1.968 + Carp::confess "internal error: unknown arg $_.\n". 1.969 + "Here are the valid options: $validoption.\n" 1.970 + unless $validoption{$_}; 1.971 + } 1.972 + 1.973 + $text=canonize($text) 1.974 + if ($opt{'wrap'}); 1.975 + 1.976 + my $esc_text=escape_text($text); 1.977 + 1.978 + $self->{gettextqueries}++; 1.979 + 1.980 + if ( defined $self->{po}{$esc_text} 1.981 + and defined $self->{po}{$esc_text}{'msgstr'} 1.982 + and length $self->{po}{$esc_text}{'msgstr'} 1.983 + and ( not defined $self->{po}{$esc_text}{'flags'} 1.984 + or $self->{po}{$esc_text}{'flags'} !~ /fuzzy/)) { 1.985 + 1.986 + $self->{gettexthits}++; 1.987 + $res = unescape_text($self->{po}{$esc_text}{'msgstr'}); 1.988 + if (defined $self->{po}{$esc_text}{'plural'}) { 1.989 + if ($self->{po}{$esc_text}{'plural'} eq "0") { 1.990 + warn wrap_mod("po4a gettextize", dgettext("po4a", 1.991 + "'%s' is the singular form of a message, ". 1.992 + "po4a will use the msgstr[0] translation (%s)."), 1.993 + $esc_text, $res); 1.994 + } else { 1.995 + warn wrap_mod("po4a gettextize", dgettext("po4a", 1.996 + "'%s' is the plural form of a message, ". 1.997 + "po4a will use the msgstr[1] translation (%s)."), 1.998 + $esc_text, $res); 1.999 + } 1.1000 + } 1.1001 + } else { 1.1002 + $res = $text; 1.1003 + } 1.1004 + 1.1005 + if ($opt{'wrap'}) { 1.1006 + if ($self->get_charset =~ /^utf-8$/i) { 1.1007 + $res=Encode::decode_utf8($res); 1.1008 + $res=wrap ($res, $opt{'wrapcol'} || 76); 1.1009 + $res=Encode::encode_utf8($res); 1.1010 + } else { 1.1011 + $res=wrap ($res, $opt{'wrapcol'} || 76); 1.1012 + } 1.1013 + } 1.1014 +# print STDERR "Gettext >>>$text<<<(escaped=$esc_text)=[[[$res]]]\n\n"; 1.1015 + return $res; 1.1016 +} 1.1017 + 1.1018 +=item stats_get() 1.1019 + 1.1020 +Returns statistics about the hit ratio of gettext since the last time that 1.1021 +stats_clear() was called. Please note that it's not the same 1.1022 +statistics than the one printed by msgfmt --statistic. Here, it's statistics 1.1023 +about recent usage of the po file, while msgfmt reports the status of the 1.1024 +file. Example of use: 1.1025 + 1.1026 + [some use of the po file to translate stuff] 1.1027 + 1.1028 + ($percent,$hit,$queries) = $pofile->stats_get(); 1.1029 + print "So far, we found translations for $percent\% ($hit of $queries) of strings.\n"; 1.1030 + 1.1031 +=cut 1.1032 + 1.1033 +sub stats_get() { 1.1034 + my $self=shift; 1.1035 + my ($h,$q)=($self->{gettexthits},$self->{gettextqueries}); 1.1036 + my $p = ($q == 0 ? 100 : int($h/$q*10000)/100); 1.1037 + 1.1038 +# $p =~ s/\.00//; 1.1039 +# $p =~ s/(\..)0/$1/; 1.1040 + 1.1041 + return ( $p,$h,$q ); 1.1042 +} 1.1043 + 1.1044 +=item stats_clear() 1.1045 + 1.1046 +Clears the statistics about gettext hits. 1.1047 + 1.1048 +=cut 1.1049 + 1.1050 +sub stats_clear { 1.1051 + my $self = shift; 1.1052 + $self->{gettextqueries} = 0; 1.1053 + $self->{gettexthits} = 0; 1.1054 +} 1.1055 + 1.1056 +=back 1.1057 + 1.1058 +=head1 Functions to build a message catalog 1.1059 + 1.1060 +=over 4 1.1061 + 1.1062 +=item push(%) 1.1063 + 1.1064 +Push a new entry at the end of the current catalog. The arguments should 1.1065 +form a hash table. The valid keys are: 1.1066 + 1.1067 +=over 4 1.1068 + 1.1069 +=item msgid 1.1070 + 1.1071 +the string in original language. 1.1072 + 1.1073 +=item msgstr 1.1074 + 1.1075 +the translation. 1.1076 + 1.1077 +=item reference 1.1078 + 1.1079 +an indication of where this string was found. Example: file.c:46 (meaning 1.1080 +in 'file.c' at line 46). It can be a space-separated list in case of 1.1081 +multiple occurrences. 1.1082 + 1.1083 +=item comment 1.1084 + 1.1085 +a comment added here manually (by the translators). The format here is free. 1.1086 + 1.1087 +=item automatic 1.1088 + 1.1089 +a comment which was automatically added by the string extraction 1.1090 +program. See the I<--add-comments> option of the B<xgettext> program for 1.1091 +more information. 1.1092 + 1.1093 +=item flags 1.1094 + 1.1095 +space-separated list of all defined flags for this entry. 1.1096 + 1.1097 +Valid flags are: c-text, python-text, lisp-text, elisp-text, librep-text, 1.1098 +smalltalk-text, java-text, awk-text, object-pascal-text, ycp-text, 1.1099 +tcl-text, wrap, no-wrap and fuzzy. 1.1100 + 1.1101 +See the gettext documentation for their meaning. 1.1102 + 1.1103 +=item type 1.1104 + 1.1105 +This is mostly an internal argument: it is used while gettextizing 1.1106 +documents. The idea here is to parse both the original and the translation 1.1107 +into a po object, and merge them, using one's msgid as msgid and the 1.1108 +other's msgid as msgstr. To make sure that things get ok, each msgid in po 1.1109 +objects are given a type, based on their structure (like "chapt", "sect1", 1.1110 +"p" and so on in docbook). If the types of strings are not the same, that 1.1111 +means that both files do not share the same structure, and the process 1.1112 +reports an error. 1.1113 + 1.1114 +This information is written as automatic comment in the po file since this 1.1115 +gives to translators some context about the strings to translate. 1.1116 + 1.1117 +=item wrap 1.1118 + 1.1119 +boolean indicating whether whitespaces can be mangled in cosmetic 1.1120 +reformattings. If true, the string is canonized before use. 1.1121 + 1.1122 +This information is written to the po file using the 'wrap' or 'no-wrap' flag. 1.1123 + 1.1124 +=item wrapcol 1.1125 + 1.1126 +The column at which we should wrap (default: 76). 1.1127 + 1.1128 +This information is not written to the po file. 1.1129 + 1.1130 +=back 1.1131 + 1.1132 +=cut 1.1133 + 1.1134 +sub push { 1.1135 + my $self=shift; 1.1136 + my %entry=@_; 1.1137 + 1.1138 + my $validoption="wrap wrapcol type msgid msgstr automatic flags reference"; 1.1139 + my %validoption; 1.1140 + 1.1141 + map { $validoption{$_}=1 } (split(/ /,$validoption)); 1.1142 + foreach (keys %entry) { 1.1143 + Carp::confess "internal error: unknown arg $_.\n". 1.1144 + "Here are the valid options: $validoption.\n" 1.1145 + unless $validoption{$_}; 1.1146 + } 1.1147 + 1.1148 + unless ($entry{'wrap'}) { 1.1149 + $entry{'flags'} .= " no-wrap"; 1.1150 + } 1.1151 + if (defined ($entry{'msgid'})) { 1.1152 + $entry{'msgid'} = canonize($entry{'msgid'}) 1.1153 + if ($entry{'wrap'}); 1.1154 + 1.1155 + $entry{'msgid'} = escape_text($entry{'msgid'}); 1.1156 + } 1.1157 + if (defined ($entry{'msgstr'})) { 1.1158 + $entry{'msgstr'} = canonize($entry{'msgstr'}) 1.1159 + if ($entry{'wrap'}); 1.1160 + 1.1161 + $entry{'msgstr'} = escape_text($entry{'msgstr'}); 1.1162 + } 1.1163 + 1.1164 + $self->push_raw(%entry); 1.1165 +} 1.1166 + 1.1167 +# The same as push(), but assuming that msgid and msgstr are already escaped 1.1168 +sub push_raw { 1.1169 + my $self=shift; 1.1170 + my %entry=@_; 1.1171 + my ($msgid,$msgstr,$reference,$comment,$automatic,$flags,$type,$transref)= 1.1172 + ($entry{'msgid'},$entry{'msgstr'}, 1.1173 + $entry{'reference'},$entry{'comment'},$entry{'automatic'}, 1.1174 + $entry{'flags'},$entry{'type'},$entry{'transref'}); 1.1175 + my $keep_conflict = $entry{'conflict'}; 1.1176 + 1.1177 +# print STDERR "Push_raw\n"; 1.1178 +# print STDERR " msgid=>>>$msgid<<<\n" if $msgid; 1.1179 +# print STDERR " msgstr=[[[$msgstr]]]\n" if $msgstr; 1.1180 +# Carp::cluck " flags=$flags\n" if $flags; 1.1181 + 1.1182 + return unless defined($entry{'msgid'}); 1.1183 + 1.1184 + #no msgid => header definition 1.1185 + unless (length($entry{'msgid'})) { 1.1186 +# if (defined($self->{header}) && $self->{header} =~ /\S/) { 1.1187 +# warn dgettext("po4a","Redefinition of the header. ". 1.1188 +# "The old one will be discarded\n"); 1.1189 +# } FIXME: do that iff the header isn't the default one. 1.1190 + $self->{header}=$msgstr; 1.1191 + $self->{header_comment}=$comment; 1.1192 + my $charset = $self->get_charset; 1.1193 + if ($charset ne "CHARSET") { 1.1194 + $self->{encoder}=find_encoding($charset); 1.1195 + } else { 1.1196 + $self->{encoder}=find_encoding("ascii"); 1.1197 + } 1.1198 + return; 1.1199 + } 1.1200 + 1.1201 + if ($self->{options}{'porefs'} eq "none") { 1.1202 + $reference = ""; 1.1203 + } elsif ($self->{options}{'porefs'} eq "noline") { 1.1204 + $reference =~ s/:[0-9]*/:1/g; 1.1205 + } 1.1206 + 1.1207 + if (defined($self->{po}{$msgid})) { 1.1208 + warn wrap_mod("po4a::po", 1.1209 + dgettext("po4a","msgid defined twice: %s"), 1.1210 + $msgid) 1.1211 + if (0); # FIXME: put a verbose stuff 1.1212 + if ( defined $msgstr 1.1213 + and defined $self->{po}{$msgid}{'msgstr'} 1.1214 + and $self->{po}{$msgid}{'msgstr'} ne $msgstr) { 1.1215 + my $txt=quote_text($msgid); 1.1216 + my ($first,$second)= 1.1217 + (format_comment(". ",$self->{po}{$msgid}{'reference'}). 1.1218 + quote_text($self->{po}{$msgid}{'msgstr'}), 1.1219 + 1.1220 + format_comment(". ",$reference). 1.1221 + quote_text($msgstr)); 1.1222 + 1.1223 + if ($keep_conflict) { 1.1224 + if ($self->{po}{$msgid}{'msgstr'} =~ m/^#-#-#-#-# .* #-#-#-#-#\\n/s) { 1.1225 + $msgstr = $self->{po}{$msgid}{'msgstr'}. 1.1226 + "\\n#-#-#-#-# $transref #-#-#-#-#\\n". 1.1227 + $msgstr; 1.1228 + } else { 1.1229 + $msgstr = "#-#-#-#-# ". 1.1230 + $self->{po}{$msgid}{'transref'}. 1.1231 + " #-#-#-#-#\\n". 1.1232 + $self->{po}{$msgid}{'msgstr'}."\\n". 1.1233 + "#-#-#-#-# $transref #-#-#-#-#\\n". 1.1234 + $msgstr; 1.1235 + } 1.1236 + # Every msgid will have the same list of references. 1.1237 + # Only keep the last list. 1.1238 + $self->{po}{$msgid}{'reference'} = ""; 1.1239 + } else { 1.1240 + warn wrap_msg(dgettext("po4a", 1.1241 + "Translations don't match for:\n". 1.1242 + "%s\n". 1.1243 + "-->First translation:\n". 1.1244 + "%s\n". 1.1245 + " Second translation:\n". 1.1246 + "%s\n". 1.1247 + " Old translation discarded."), 1.1248 + $txt,$first,$second); 1.1249 + } 1.1250 + } 1.1251 + } 1.1252 + if (defined $transref) { 1.1253 + $self->{po}{$msgid}{'transref'} = $transref; 1.1254 + } 1.1255 + if (defined $reference) { 1.1256 + if (defined $self->{po}{$msgid}{'reference'}) { 1.1257 + $self->{po}{$msgid}{'reference'} .= " ".$reference; 1.1258 + } else { 1.1259 + $self->{po}{$msgid}{'reference'} = $reference; 1.1260 + } 1.1261 + } 1.1262 + $self->{po}{$msgid}{'msgstr'} = $msgstr; 1.1263 + $self->{po}{$msgid}{'comment'} = $comment; 1.1264 + $self->{po}{$msgid}{'automatic'} = $automatic; 1.1265 + if (defined($self->{po}{$msgid}{'pos_doc'})) { 1.1266 + $self->{po}{$msgid}{'pos_doc'} .= " ".$self->{count_doc}++; 1.1267 + } else { 1.1268 + $self->{po}{$msgid}{'pos_doc'} = $self->{count_doc}++; 1.1269 + } 1.1270 + unless (defined($self->{po}{$msgid}{'pos'})) { 1.1271 + $self->{po}{$msgid}{'pos'} = $self->{count}++; 1.1272 + } 1.1273 + $self->{po}{$msgid}{'type'} = $type; 1.1274 + $self->{po}{$msgid}{'plural'} = $entry{'plural'} 1.1275 + if defined $entry{'plural'}; 1.1276 + 1.1277 + if (defined($flags)) { 1.1278 + $flags = " $flags "; 1.1279 + $flags =~ s/,/ /g; 1.1280 + foreach my $flag (@known_flags) { 1.1281 + if ($flags =~ /\s$flag\s/) { # if flag to be set 1.1282 + unless ( defined($self->{po}{$msgid}{'flags'}) 1.1283 + && $self->{po}{$msgid}{'flags'} =~ /\b$flag\b/) { 1.1284 + # flag not already set 1.1285 + if (defined $self->{po}{$msgid}{'flags'}) { 1.1286 + $self->{po}{$msgid}{'flags'} .= " ".$flag; 1.1287 + } else { 1.1288 + $self->{po}{$msgid}{'flags'} = $flag; 1.1289 + } 1.1290 + } 1.1291 + } 1.1292 + } 1.1293 + } 1.1294 +# print STDERR "stored ((($msgid)))=>(((".$self->{po}{$msgid}{'msgstr'}.")))\n\n"; 1.1295 + 1.1296 +} 1.1297 + 1.1298 +=back 1.1299 + 1.1300 +=head1 Miscellaneous functions 1.1301 + 1.1302 +=over 4 1.1303 + 1.1304 +=item count_entries() 1.1305 + 1.1306 +Returns the number of entries in the catalog (without the header). 1.1307 + 1.1308 +=cut 1.1309 + 1.1310 +sub count_entries($) { 1.1311 + my $self=shift; 1.1312 + return $self->{count}; 1.1313 +} 1.1314 + 1.1315 +=item count_entries_doc() 1.1316 + 1.1317 +Returns the number of entries in document. If a string appears multiple times 1.1318 +in the document, it will be counted multiple times 1.1319 + 1.1320 +=cut 1.1321 + 1.1322 +sub count_entries_doc($) { 1.1323 + my $self=shift; 1.1324 + return $self->{count_doc}; 1.1325 +} 1.1326 + 1.1327 +=item msgid($) 1.1328 + 1.1329 +Returns the msgid of the given number. 1.1330 + 1.1331 +=cut 1.1332 + 1.1333 +sub msgid($$) { 1.1334 + my $self=shift; 1.1335 + my $num=shift; 1.1336 + 1.1337 + foreach my $msgid ( keys %{$self->{po}} ) { 1.1338 + return $msgid if ($self->{po}{$msgid}{'pos'} eq $num); 1.1339 + } 1.1340 + return undef; 1.1341 +} 1.1342 + 1.1343 +=item msgid_doc($) 1.1344 + 1.1345 +Returns the msgid with the given position in the document. 1.1346 + 1.1347 +=cut 1.1348 + 1.1349 +sub msgid_doc($$) { 1.1350 + my $self=shift; 1.1351 + my $num=shift; 1.1352 + 1.1353 + foreach my $msgid ( keys %{$self->{po}} ) { 1.1354 + foreach my $pos (split / /, $self->{po}{$msgid}{'pos_doc'}) { 1.1355 + return $msgid if ($pos eq $num); 1.1356 + } 1.1357 + } 1.1358 + return undef; 1.1359 +} 1.1360 + 1.1361 +=item get_charset() 1.1362 + 1.1363 +Returns the character set specified in the po header. If it hasn't been 1.1364 +set, it will return "CHARSET". 1.1365 + 1.1366 +=cut 1.1367 + 1.1368 +sub get_charset() { 1.1369 + my $self=shift; 1.1370 + 1.1371 + $self->{header} =~ /charset=(.*?)[\s\\]/; 1.1372 + 1.1373 + if (defined $1) { 1.1374 + return $1; 1.1375 + } else { 1.1376 + return "CHARSET"; 1.1377 + } 1.1378 +} 1.1379 + 1.1380 +=item set_charset($) 1.1381 + 1.1382 +This sets the character set of the po header to the value specified in its 1.1383 +first argument. If you never call this function (and no file with a specified 1.1384 +character set is read), the default value is left to "CHARSET". This value 1.1385 +doesn't change the behavior of this module, it's just used to fill that field 1.1386 +in the header, and to return it in get_charset(). 1.1387 + 1.1388 +=cut 1.1389 + 1.1390 +sub set_charset() { 1.1391 + my $self=shift; 1.1392 + 1.1393 + my ($newchar,$oldchar); 1.1394 + $newchar = shift; 1.1395 + $oldchar = $self->get_charset(); 1.1396 + 1.1397 + $self->{header} =~ s/$oldchar/$newchar/; 1.1398 + $self->{encoder}=find_encoding($newchar); 1.1399 +} 1.1400 + 1.1401 +#----[ helper functions ]--------------------------------------------------- 1.1402 + 1.1403 +# transforme the string from its po file representation to the form which 1.1404 +# should be used to print it 1.1405 +sub unescape_text { 1.1406 + my $text = shift; 1.1407 + 1.1408 + print STDERR "\nunescape [$text]====" if $debug{'escape'}; 1.1409 + $text = join("",split(/\n/,$text)); 1.1410 + $text =~ s/\\"/"/g; 1.1411 + # unescape newlines 1.1412 + # NOTE on \G: 1.1413 + # The following regular expression introduce newlines. 1.1414 + # Thus, ^ doesn't match all beginnings of lines. 1.1415 + # \G is a zero-width assertion that matches the position 1.1416 + # of the previous substitution with s///g. As every 1.1417 + # substitution ends by a newline, it always matches a 1.1418 + # position just after a newline. 1.1419 + $text =~ s/( # $1: 1.1420 + (\G|[^\\]) # beginning of the line or any char 1.1421 + # different from '\' 1.1422 + (\\\\)* # followed by any even number of '\' 1.1423 + )\\n # and followed by an escaped newline 1.1424 + /$1\n/sgx; # single string, match globally, allow comments 1.1425 + # unescape tabulations 1.1426 + $text =~ s/( # $1: 1.1427 + (\G|[^\\])# beginning of the line or any char 1.1428 + # different from '\' 1.1429 + (\\\\)* # followed by any even number of '\' 1.1430 + )\\t # and followed by an escaped tabulation 1.1431 + /$1\t/mgx; # multilines string, match globally, allow comments 1.1432 + # and unescape the escape character 1.1433 + $text =~ s/\\\\/\\/g; 1.1434 + print STDERR ">$text<\n" if $debug{'escape'}; 1.1435 + 1.1436 + return $text; 1.1437 +} 1.1438 + 1.1439 +# transform the string to its representation as it should be written in po 1.1440 +# files 1.1441 +sub escape_text { 1.1442 + my $text = shift; 1.1443 + 1.1444 + print STDERR "\nescape [$text]====" if $debug{'escape'}; 1.1445 + $text =~ s/\\/\\\\/g; 1.1446 + $text =~ s/"/\\"/g; 1.1447 + $text =~ s/\n/\\n/g; 1.1448 + $text =~ s/\t/\\t/g; 1.1449 + print STDERR ">$text<\n" if $debug{'escape'}; 1.1450 + 1.1451 + return $text; 1.1452 +} 1.1453 + 1.1454 +# put quotes around the string on each lines (without escaping it) 1.1455 +# It does also normalize the text (ie, make sure its representation is wraped 1.1456 +# on the 80th char, but without changing the meaning of the string) 1.1457 +sub quote_text { 1.1458 + my $string = shift; 1.1459 + 1.1460 + return '""' unless defined($string) && length($string); 1.1461 + 1.1462 + print STDERR "\nquote [$string]====" if $debug{'quote'}; 1.1463 + # break lines on newlines, if any 1.1464 + # see unescape_text for an explanation on \G 1.1465 + $string =~ s/( # $1: 1.1466 + (\G|[^\\]) # beginning of the line or any char 1.1467 + # different from '\' 1.1468 + (\\\\)* # followed by any even number of '\' 1.1469 + \\n) # and followed by an escaped newline 1.1470 + /$1\n/sgx; # single string, match globally, allow comments 1.1471 + $string = wrap($string); 1.1472 + my @string = split(/\n/,$string); 1.1473 + $string = join ("\"\n\"",@string); 1.1474 + $string = "\"$string\""; 1.1475 + if (scalar @string > 1 && $string[0] ne '') { 1.1476 + $string = "\"\"\n".$string; 1.1477 + } 1.1478 + 1.1479 + print STDERR ">$string<\n" if $debug{'quote'}; 1.1480 + return $string; 1.1481 +} 1.1482 + 1.1483 +# undo the work of the quote_text function 1.1484 +sub unquote_text { 1.1485 + my $string = shift; 1.1486 + print STDERR "\nunquote [$string]====" if $debug{'quote'}; 1.1487 + $string =~ s/^""\\n//s; 1.1488 + $string =~ s/^"(.*)"$/$1/s; 1.1489 + $string =~ s/"\n"//gm; 1.1490 + # Note: an even number of '\' could precede \\n, but I could not build a 1.1491 + # document to test this 1.1492 + $string =~ s/([^\\])\\n\n/$1!!DUMMYPOPM!!/gm; 1.1493 + $string =~ s|!!DUMMYPOPM!!|\\n|gm; 1.1494 + print STDERR ">$string<\n" if $debug{'quote'}; 1.1495 + return $string; 1.1496 +} 1.1497 + 1.1498 +# canonize the string: write it on only one line, changing consecutive 1.1499 +# whitespace to only one space. 1.1500 +# Warning, it changes the string and should only be called if the string is 1.1501 +# plain text 1.1502 +sub canonize { 1.1503 + my $text=shift; 1.1504 + print STDERR "\ncanonize [$text]====" if $debug{'canonize'}; 1.1505 + $text =~ s/^ *//s; 1.1506 + $text =~ s/^[ \t]+/ /gm; 1.1507 + # if ($text eq "\n"), it messed up the first string (header) 1.1508 + $text =~ s/\n/ /gm if ($text ne "\n"); 1.1509 + $text =~ s/([.)]) +/$1 /gm; 1.1510 + $text =~ s/([^.)]) */$1 /gm; 1.1511 + $text =~ s/ *$//s; 1.1512 + print STDERR ">$text<\n" if $debug{'canonize'}; 1.1513 + return $text; 1.1514 +} 1.1515 + 1.1516 +# wraps the string. We don't use Text::Wrap since it mangles whitespace at 1.1517 +# the end of splited line 1.1518 +sub wrap { 1.1519 + my $text=shift; 1.1520 + return "0" if ($text eq '0'); 1.1521 + my $col=shift || 76; 1.1522 + my @lines=split(/\n/,"$text"); 1.1523 + my $res=""; 1.1524 + my $first=1; 1.1525 + while (defined(my $line=shift @lines)) { 1.1526 + if ($first && length($line) > $col - 10) { 1.1527 + unshift @lines,$line; 1.1528 + $first=0; 1.1529 + next; 1.1530 + } 1.1531 + if (length($line) > $col) { 1.1532 + my $pos=rindex($line," ",$col); 1.1533 + while (substr($line,$pos-1,1) eq '.' && $pos != -1) { 1.1534 + $pos=rindex($line," ",$pos-1); 1.1535 + } 1.1536 + if ($pos == -1) { 1.1537 + # There are no spaces in the first $col chars, pick-up the 1.1538 + # first space 1.1539 + $pos = index($line," "); 1.1540 + } 1.1541 + if ($pos != -1) { 1.1542 + my $end=substr($line,$pos+1); 1.1543 + $line=substr($line,0,$pos+1); 1.1544 + if ($end =~ s/^( +)//) { 1.1545 + $line .= $1; 1.1546 + } 1.1547 + unshift @lines,$end; 1.1548 + } 1.1549 + } 1.1550 + $first=0; 1.1551 + $res.="$line\n"; 1.1552 + } 1.1553 + # Restore the original trailing spaces 1.1554 + $res =~ s/\s+$//s; 1.1555 + if ($text =~ m/(\s+)$/s) { 1.1556 + $res .= $1; 1.1557 + } 1.1558 + return $res; 1.1559 +} 1.1560 + 1.1561 +# outputs properly a '# ... ' line to be put in the po file 1.1562 +sub format_comment { 1.1563 + my $comment=shift; 1.1564 + my $char=shift; 1.1565 + my $result = "#". $char . $comment; 1.1566 + $result =~ s/\n/\n#$char/gs; 1.1567 + $result =~ s/^#$char$/#/gm; 1.1568 + $result .= "\n"; 1.1569 + return $result; 1.1570 +} 1.1571 + 1.1572 + 1.1573 +1; 1.1574 +__END__ 1.1575 + 1.1576 +=back 1.1577 + 1.1578 +=head1 AUTHORS 1.1579 + 1.1580 + Denis Barbier <barbier@linuxfr.org> 1.1581 + Martin Quinson (mquinson#debian.org) 1.1582 + 1.1583 +=cut