sjero.net Git - linphone/blob - linphone/intltool-merge.in

   1 #!@INTLTOOL_PERL@ -w
   2
   3 #
   4 #  The Intltool Message Merger
   5 #
   6 #  Copyright (C) 2000, 2002 Free Software Foundation.
   7 #  Copyright (C) 2000, 2001 Eazel, Inc
   8 #
   9 #  Intltool is free software; you can redistribute it and/or
  10 #  modify it under the terms of the GNU General Public License
  11 #  version 2 published by the Free Software Foundation.
  12 #
  13 #  Intltool is distributed in the hope that it will be useful,
  14 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 #  General Public License for more details.
  17 #
  18 #  You should have received a copy of the GNU General Public License
  19 #  along with this program; if not, write to the Free Software
  20 #  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21 #
  22 #  As a special exception to the GNU General Public License, if you
  23 #  distribute this file as part of a program that contains a
  24 #  configuration script generated by Autoconf, you may include it under
  25 #  the same distribution terms that you use for the rest of that program.
  26 #
  27 #  Authors:  Maciej Stachowiak <mjs@noisehavoc.org>
  28 #            Kenneth Christiansen <kenneth@gnu.org>
  29 #            Darin Adler <darin@bentspoon.com>
  30 #
  31 #  Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
  32 #
  33
  34 ## Release information
  35 my $PROGRAM = "intltool-merge";
  36 my $PACKAGE = "intltool";
  37 my $VERSION = "0.22";
  38
  39 ## Loaded modules
  40 use strict;
  41 use Getopt::Long;
  42
  43 ## Scalars used by the option stuff
  44 my $HELP_ARG = 0;
  45 my $VERSION_ARG = 0;
  46 my $BA_STYLE_ARG = 0;
  47 my $XML_STYLE_ARG = 0;
  48 my $KEYS_STYLE_ARG = 0;
  49 my $DESKTOP_STYLE_ARG = 0;
  50 my $SCHEMAS_STYLE_ARG = 0;
  51 my $QUIET_ARG = 0;
  52 my $PASS_THROUGH_ARG = 0;
  53 my $UTF8_ARG = 0;
  54 my $cache_file;
  55
  56 ## Handle options
  57 GetOptions
  58 (
  59  "help" => \$HELP_ARG,
  60  "version" => \$VERSION_ARG,
  61  "quiet|q" => \$QUIET_ARG,
  62  "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
  63  "ba-style|b" => \$BA_STYLE_ARG,
  64  "xml-style|x" => \$XML_STYLE_ARG,
  65  "keys-style|k" => \$KEYS_STYLE_ARG,
  66  "desktop-style|d" => \$DESKTOP_STYLE_ARG,
  67  "schemas-style|s" => \$SCHEMAS_STYLE_ARG,
  68  "pass-through|p" => \$PASS_THROUGH_ARG,
  69  "utf8|u" => \$UTF8_ARG,
  70  "cache|c=s" => \$cache_file
  71  ) or &error;
  72
  73 my $PO_DIR;
  74 my $FILE;
  75 my $OUTFILE;
  76
  77 my %po_files_by_lang = ();
  78 my %translations = ();
  79
  80 # Use this instead of \w for XML files to handle more possible characters.
  81 my $w = "[-A-Za-z0-9._:]";
  82
  83 # XML quoted string contents
  84 my $q = "[^\\\"]*";
  85
  86 ## Check for options.
  87
  88 if ($VERSION_ARG) {
  89         &print_version;
  90 } elsif ($HELP_ARG) {
  91         &print_help;
  92 } elsif ($BA_STYLE_ARG && @ARGV > 2) {
  93         &preparation;
  94         &print_message;
  95         &ba_merge_translations;
  96         &finalize;
  97 } elsif ($XML_STYLE_ARG && @ARGV > 2) {
  98         &utf8_sanity_check;
  99         &preparation;
 100         &print_message;
 101         &xml_merge_translations;
 102         &finalize;
 103 } elsif ($KEYS_STYLE_ARG && @ARGV > 2) {
 104         &utf8_sanity_check;
 105         &preparation;
 106         &print_message;
 107         &keys_merge_translations;
 108         &finalize;
 109 } elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) {
 110         &preparation;
 111         &print_message;
 112         &desktop_merge_translations;
 113         &finalize;
 114 } elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2) {
 115         &preparation;
 116         &print_message;
 117         &schemas_merge_translations;
 118         &finalize;
 119 } else {
 120         &print_help;
 121 }
 122
 123 exit;
 124
 125 ## Sub for printing release information
 126 sub print_version
 127 {
 128     print "${PROGRAM} (${PACKAGE}) ${VERSION}\n";
 129     print "Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.\n\n";
 130     print "Copyright (C) 2000-2002 Free Software Foundation, Inc.\n";
 131     print "Copyright (C) 2000-2001 Eazel, Inc.\n";
 132     print "This is free software; see the source for copying conditions.  There is NO\n";
 133     print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
 134     exit;
 135 }
 136
 137 ## Sub for printing usage information
 138 sub print_help
 139 {
 140     print "Usage: ${PROGRAM} [OPTIONS] PO_DIRECTORY FILENAME OUTPUT_FILE\n";
 141     print "Generates an output file that includes translated versions of some attributes,\n";
 142     print "from an untranslated source and a po directory that includes translations.\n\n";
 143     print "  -b, --ba-style         includes translations in the bonobo-activation style\n";
 144     print "  -d, --desktop-style    includes translations in the desktop style\n";
 145     print "  -k, --keys-style       includes translations in the keys style\n";
 146     print "  -s, --schemas-style    includes translations in the schemas style\n";
 147     print "  -x, --xml-style        includes translations in the standard xml style\n";
 148     print "  -u, --utf8             convert all strings to UTF-8 before merging\n";
 149     print "  -p, --pass-through     use strings as found in .po files, without\n";
 150     print "                         conversion (STRONGLY unrecommended with -x)\n";
 151     print "  -q, --quiet            suppress most messages\n";
 152     print "      --help             display this help and exit\n";
 153     print "      --version          output version information and exit\n";
 154     print "\nReport bugs to bugzilla.gnome.org, module intltool, or contact us through \n";
 155     print "<xml-i18n-tools-list\@gnome.org>.\n";
 156     exit;
 157 }
 158
 159
 160 ## Sub for printing error messages
 161 sub print_error
 162 {
 163     print "Try `${PROGRAM} --help' for more information.\n";
 164     exit;
 165 }
 166
 167
 168 sub print_message
 169 {
 170     print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
 171 }
 172
 173
 174 sub preparation
 175 {
 176     $PO_DIR = $ARGV[0];
 177     $FILE = $ARGV[1];
 178     $OUTFILE = $ARGV[2];
 179
 180     &gather_po_files;
 181     &get_translation_database;
 182 }
 183
 184 # General-purpose code for looking up translations in .po files
 185
 186 sub po_file2lang
 187 {
 188     my ($tmp) = @_;
 189     $tmp =~ s/^.*\/(.*)\.po$/$1/;
 190     return $tmp;
 191 }
 192
 193 sub gather_po_files
 194 {
 195     for my $po_file (glob "$PO_DIR/*.po") {
 196         $po_files_by_lang{po_file2lang($po_file)} = $po_file;
 197     }
 198 }
 199
 200 sub get_po_encoding
 201 {
 202     my ($in_po_file) = @_;
 203     my $encoding = "";
 204
 205     open IN_PO_FILE, $in_po_file or die;
 206     while (<IN_PO_FILE>) {
 207         ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
 208         if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/) {
 209             $encoding = $1;
 210             last;
 211         }
 212     }
 213     close IN_PO_FILE;
 214
 215     if (!$encoding) {
 216         print "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n";
 217         $encoding = "ISO-8859-1";
 218     }
 219     return $encoding
 220 }
 221
 222 sub utf8_sanity_check
 223 {
 224     if (!$UTF8_ARG) {
 225         if (!$PASS_THROUGH_ARG) {
 226             $PASS_THROUGH_ARG="1";
 227         }
 228     }
 229 }
 230
 231 sub get_translation_database
 232 {
 233     if ($cache_file) {
 234         &get_cached_translation_database;
 235     } else {
 236         &create_translation_database;
 237     }
 238 }
 239
 240 sub get_newest_po_age
 241 {
 242     my $newest_age;
 243
 244     foreach my $file (values %po_files_by_lang) {
 245         my $file_age = -M $file;
 246         $newest_age = $file_age if !$newest_age || $file_age < $newest_age;
 247     }
 248
 249     return $newest_age;
 250 }
 251
 252 sub create_cache
 253 {
 254     print "Generating and caching the translation database\n" unless $QUIET_ARG;
 255
 256     &create_translation_database;
 257
 258     open CACHE, ">$cache_file" || die;
 259     print CACHE join "\x01", %translations;
 260     close CACHE;
 261 }
 262
 263 sub load_cache
 264 {
 265     print "Found cached translation database\n" unless $QUIET_ARG;
 266
 267     my $contents;
 268     open CACHE, "<$cache_file" || die;
 269     {
 270         local $/;
 271         $contents = <CACHE>;
 272     }
 273     close CACHE;
 274     %translations = split "\x01", $contents;
 275 }
 276
 277 sub get_cached_translation_database
 278 {
 279     my $cache_file_age = -M $cache_file;
 280     if (defined $cache_file_age) {
 281         if ($cache_file_age <= &get_newest_po_age) {
 282             &load_cache;
 283             return;
 284         }
 285         print "Found too-old cached translation database\n" unless $QUIET_ARG;
 286     }
 287
 288     &create_cache;
 289 }
 290
 291 sub create_translation_database
 292 {
 293     for my $lang (keys %po_files_by_lang) {
 294         my $po_file = $po_files_by_lang{$lang};
 295
 296         if ($UTF8_ARG) {
 297             my $encoding = get_po_encoding ($po_file);
 298             if (lc $encoding eq "utf-8") {
 299                 open PO_FILE, "<$po_file";
 300             } else {
 301                 my $iconv = $ENV{"INTLTOOL_ICONV"} || "iconv";
 302                 open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
 303             }
 304         } else {
 305             open PO_FILE, "<$po_file";
 306         }
 307
 308         my $nextfuzzy = 0;
 309         my $inmsgid = 0;
 310         my $inmsgstr = 0;
 311         my $msgid = "";
 312         my $msgstr = "";
 313         while (<PO_FILE>) {
 314             $nextfuzzy = 1 if /^#, fuzzy/;
 315             if (/^msgid "((\\.|[^\\])*)"/ ) {
 316                 $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
 317                 $msgid = "";
 318                 $msgstr = "";
 319
 320                 if ($nextfuzzy) {
 321                     $inmsgid = 0;
 322                 } else {
 323                     $msgid = unescape_po_string($1);
 324                     $inmsgid = 1;
 325                 }
 326                 $inmsgstr = 0;
 327                 $nextfuzzy = 0;
 328             }
 329             if (/^msgstr "((\\.|[^\\])*)"/) {
 330                 $msgstr = unescape_po_string($1);
 331                 $inmsgstr = 1;
 332                 $inmsgid = 0;
 333             }
 334             if (/^"((\\.|[^\\])*)"/) {
 335                 $msgid .= unescape_po_string($1) if $inmsgid;
 336                 $msgstr .= unescape_po_string($1) if $inmsgstr;
 337             }
 338         }
 339         $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
 340     }
 341 }
 342
 343 sub finalize
 344 {
 345 }
 346
 347 sub unescape_one_sequence
 348 {
 349     my ($sequence) = @_;
 350
 351     return "\\" if $sequence eq "\\\\";
 352     return "\"" if $sequence eq "\\\"";
 353
 354     # gettext also handles \n, \t, \b, \r, \f, \v, \a, \xxx (octal),
 355     # \xXX (hex) and has a comment saying they want to handle \u and \U.
 356
 357     return $sequence;
 358 }
 359
 360 sub unescape_po_string
 361 {
 362     my ($string) = @_;
 363
 364     $string =~ s/(\\.)/unescape_one_sequence($1)/eg;
 365
 366     return $string;
 367 }
 368
 369 sub entity_decode
 370 {
 371     local ($_) = @_;
 372
 373     s/&apos;/'/g; # '
 374     s/&quot;/"/g; # "
 375     s/&amp;/&/g;
 376
 377     return $_;
 378 }
 379
 380 sub entity_encode
 381 {
 382     my ($pre_encoded) = @_;
 383
 384     my @list_of_chars = unpack ('C*', $pre_encoded);
 385
 386     if ($PASS_THROUGH_ARG) {
 387         return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars));
 388     } else {
 389         return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
 390     }
 391 }
 392
 393 sub entity_encode_int_minimalist
 394 {
 395     return "&quot;" if $_ == 34;
 396     return "&amp;" if $_ == 38;
 397     return "&apos;" if $_ == 39;
 398     return chr $_;
 399 }
 400
 401 sub entity_encode_int_even_high_bit
 402 {
 403     if ($_ > 127 || $_ == 34 || $_ == 38 || $_ == 39) {
 404         # the ($_ > 127) should probably be removed
 405         return "&#" . $_ . ";";
 406     } else {
 407         return chr $_;
 408     }
 409 }
 410
 411 sub entity_encoded_translation
 412 {
 413     my ($lang, $string) = @_;
 414
 415     my $translation = $translations{$lang, $string};
 416     return $string if !$translation;
 417     return entity_encode ($translation);
 418 }
 419
 420 ## XML (bonobo-activation specific) merge code
 421
 422 sub ba_merge_translations
 423 {
 424     my $source;
 425
 426     {
 427        local $/; # slurp mode
 428        open INPUT, "<$FILE" or die "can't open $FILE: $!";
 429        $source = <INPUT>;
 430        close INPUT;
 431     }
 432
 433     open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
 434
 435     while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s) {
 436         print OUTPUT $1;
 437
 438         my $node = $2 . "\n";
 439
 440         my @strings = ();
 441         $_ = $node;
 442         while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
 443              push @strings, entity_decode($3);
 444         }
 445         print OUTPUT;
 446
 447         my %langs;
 448         for my $string (@strings) {
 449             for my $lang (keys %po_files_by_lang) {
 450                 $langs{$lang} = 1 if $translations{$lang, $string};
 451             }
 452         }
 453
 454         for my $lang (sort keys %langs) {
 455             $_ = $node;
 456             s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
 457             s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
 458             print OUTPUT;
 459         }
 460     }
 461
 462     print OUTPUT $source;
 463
 464     close OUTPUT;
 465 }
 466
 467
 468 ## XML (non-bonobo-activation) merge code
 469
 470 sub xml_merge_translations
 471 {
 472     my $source;
 473
 474     {
 475        local $/; # slurp mode
 476        open INPUT, "<$FILE" or die "can't open $FILE: $!";
 477        $source = <INPUT>;
 478        close INPUT;
 479     }
 480
 481     open OUTPUT, ">$OUTFILE" or die;
 482
 483     # FIXME: support attribute translations
 484
 485     # Empty nodes never need translation, so unmark all of them.
 486     # For example, <_foo/> is just replaced by <foo/>.
 487     $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
 488
 489     # Support for <_foo>blah</_foo> style translations.
 490     while ($source =~ s|^(.*?)([ \t]*)<\s*_($w+)\s*>(.*?)<\s*/_\3\s*>([ \t]*\n)?||s) {
 491         print OUTPUT $1;
 492
 493         my $spaces = $2;
 494         my $tag = $3;
 495         my $string = $4;
 496
 497         print OUTPUT "$spaces<$tag>$string</$tag>\n";
 498
 499         $string =~ s/\s+/ /g;
 500         $string =~ s/^ //;
 501         $string =~ s/ $//;
 502         $string = entity_decode($string);
 503
 504         for my $lang (sort keys %po_files_by_lang) {
 505             my $translation = $translations{$lang, $string};
 506             next if !$translation;
 507             $translation = entity_encode($translation);
 508             print OUTPUT "$spaces<$tag xml:lang=\"$lang\">$translation</$tag>\n";
 509         }
 510     }
 511
 512     print OUTPUT $source;
 513
 514     close OUTPUT;
 515 }
 516
 517 sub keys_merge_translations
 518 {
 519     open INPUT, "<${FILE}" or die;
 520     open OUTPUT, ">${OUTFILE}" or die;
 521
 522     while (<INPUT>) {
 523         if (s/^(\s*)_(\w+=(.*))/$1$2/)  {
 524             my $string = $3;
 525
 526             print OUTPUT;
 527
 528             my $non_translated_line = $_;
 529
 530             for my $lang (sort keys %po_files_by_lang) {
 531                 my $translation = $translations{$lang, $string};
 532                 next if !$translation;
 533
 534                 $_ = $non_translated_line;
 535                 s/(\w+)=.*/[$lang]$1=$translation/;
 536                 print OUTPUT;
 537             }
 538         } else {
 539             print OUTPUT;
 540         }
 541     }
 542
 543     close OUTPUT;
 544     close INPUT;
 545 }
 546
 547 sub desktop_merge_translations
 548 {
 549     open INPUT, "<${FILE}" or die;
 550     open OUTPUT, ">${OUTFILE}" or die;
 551
 552     while (<INPUT>) {
 553         if (s/^(\s*)_(\w+=(.*))/$1$2/)  {
 554             my $string = $3;
 555
 556             print OUTPUT;
 557
 558             my $non_translated_line = $_;
 559
 560             for my $lang (sort keys %po_files_by_lang) {
 561                 my $translation = $translations{$lang, $string};
 562                 next if !$translation;
 563
 564                 $_ = $non_translated_line;
 565                 s/(\w+)=.*/${1}[$lang]=$translation/;
 566                 print OUTPUT;
 567             }
 568         } else {
 569             print OUTPUT;
 570         }
 571     }
 572
 573     close OUTPUT;
 574     close INPUT;
 575 }
 576
 577 sub schemas_merge_translations
 578 {
 579     my $source;
 580
 581     {
 582        local $/; # slurp mode
 583        open INPUT, "<$FILE" or die "can't open $FILE: $!";
 584        $source = <INPUT>;
 585        close INPUT;
 586     }
 587
 588     open OUTPUT, ">$OUTFILE" or die;
 589
 590     # FIXME: support attribute translations
 591
 592     # Empty nodes never need translation, so unmark all of them.
 593     # For example, <_foo/> is just replaced by <foo/>.
 594     $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
 595
 596     # Support for <_foo>blah</_foo> style translations.
 597
 598     my $regex_start = "^(.*?)([ \t]*)<locale name=\"C\">";
 599     my $regex_short = "([ \t\n]*)<short>(.*?)</short>";
 600     my $regex_long  = "([ \t\n]*)<long>(.*?)</long>";
 601     my $regex_end   = "([ \t\n]*)</locale>";
 602
 603     while ($source =~ s|$regex_start$regex_short$regex_long$regex_end||s) {
 604         print OUTPUT $1;
 605
 606         my $locale_start_spaces = $2;
 607         my $locale_end_spaces = $7;
 608         my $short_spaces = $3;
 609         my $short_string = $4;
 610         my $long_spaces = $5;
 611         my $long_string = $6;
 612
 613         # English first
 614
 615         print OUTPUT "$locale_start_spaces<locale name=\"C\">";
 616         print OUTPUT "$short_spaces<short>$short_string</short>";
 617         print OUTPUT "$long_spaces<long>$long_string</long>";
 618         print OUTPUT "$locale_end_spaces</locale>";
 619
 620         $short_string =~ s/\s+/ /g;
 621         $short_string =~ s/^ //;
 622         $short_string =~ s/ $//;
 623         $short_string = entity_decode($short_string);
 624
 625         $long_string =~ s/\s+/ /g;
 626         $long_string =~ s/^ //;
 627         $long_string =~ s/ $//;
 628         $long_string = entity_decode($long_string);
 629
 630         for my $lang (sort keys %po_files_by_lang) {
 631             my $short_translation = $translations{$lang, $short_string};
 632             my $long_translation  = $translations{$lang, $long_string};
 633
 634             next if (!$short_translation && !$long_translation);
 635
 636             print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
 637
 638             if ($short_translation)
 639             {
 640                 $short_translation = entity_encode($short_translation);
 641                 print OUTPUT "$short_spaces<short>$short_translation</short>";
 642             }
 643
 644             if ($long_translation)
 645             {
 646                 $long_translation = entity_encode($long_translation);
 647                 print OUTPUT "$long_spaces<long>$long_translation</long>";
 648             }
 649
 650             print OUTPUT "$locale_end_spaces</locale>";
 651         }
 652     }
 653
 654     print OUTPUT $source;
 655
 656     close OUTPUT;
 657 }