/usr/bin/intltool-extract

#!/usr/bin/perl -w 
# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4  -*-

#
#  The Intltool Message Extractor
#
#  Copyright (C) 2000-2001, 2003 Free Software Foundation.
#
#  Intltool is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License as
#  published by the Free Software Foundation; either version 2 of the
#  License, or (at your option) any later version.
#
#  Intltool is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#  General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
#  As a special exception to the GNU General Public License, if you
#  distribute this file as part of a program that contains a
#  configuration script generated by Autoconf, you may include it under
#  the same distribution terms that you use for the rest of that program.
#
#  Authors: Kenneth Christiansen <kenneth@gnu.org>
#           Darin Adler <darin@bentspoon.com>
#

## Release information
my $PROGRAM      = "intltool-extract";
my $PACKAGE      = "intltool";
my $VERSION      = "0.50.2";

## Loaded modules
use strict; 
use File::Basename;
use Getopt::Long;

## Scalars used by the option stuff
my $TYPE_ARG	= "0";
my $LOCAL_ARG	= "0";
my $HELP_ARG 	= "0";
my $VERSION_ARG = "0";
my $UPDATE_ARG  = "0";
my $QUIET_ARG   = "0";
my $SRCDIR_ARG	= ".";
my $NOMSGCTXT_ARG = "0";

my $FILE;
my $OUTFILE;

my $gettext_type = "";
my $input;
my %messages = ();
my @messages_sorted = ();
my %loc = ();
my %count = ();
my %comments = ();
my $strcount = 0;

my $XMLCOMMENT = "";

## Use this instead of \w for XML files to handle more possible characters.
my $w = "[-A-Za-z0-9._:]";

## Always print first
$| = 1;

## Handle options
GetOptions (
	    "type=s"     => \$TYPE_ARG,
            "local|l"    => \$LOCAL_ARG,
            "help|h"     => \$HELP_ARG,
            "version|v"  => \$VERSION_ARG,
            "update"     => \$UPDATE_ARG,
	    "quiet|q"    => \$QUIET_ARG,
	    "srcdir=s"	 => \$SRCDIR_ARG,
	    "nomsgctxt"  => \$NOMSGCTXT_ARG,
            ) or &error;

&split_on_argument;


## Check for options. 
## This section will check for the different options.

sub split_on_argument {

    if ($VERSION_ARG) {
        &version;

    } elsif ($HELP_ARG) {
	&help;
        
    } elsif ($LOCAL_ARG) {
        &place_local;
        &extract;

    } elsif ($UPDATE_ARG) {
	&place_normal;
	&extract;

    } elsif (@ARGV > 0) {
	&place_normal;
	&message;
	&extract;

    } else {
	&help;

    }  
}    

sub place_normal {
    $FILE	 = $ARGV[0];
    $OUTFILE     = "$FILE.h";

    my $dirname = dirname ($OUTFILE);
    if (! -d "$dirname" && $dirname ne "") {
        system ("mkdir -p $dirname");
    }
}   

sub place_local {
    $FILE	 = $ARGV[0];
    $OUTFILE     = fileparse($FILE, ());
    if (!-e "tmp/") { 
        system("mkdir tmp/"); 
    }
    $OUTFILE     = "./tmp/$OUTFILE.h"
}

sub determine_type {
   if ($TYPE_ARG =~ /^gettext\/(.*)/) {
	$gettext_type=$1
   }
}

## Sub for printing release information
sub version{
    print <<_EOF_;
${PROGRAM} (${PACKAGE}) $VERSION
Copyright (C) 2000, 2003 Free Software Foundation, Inc.
Written by Kenneth Christiansen, 2000.

This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
_EOF_
    exit;
}

## Sub for printing usage information
sub help {
    print <<_EOF_;
Usage: ${PROGRAM} [OPTION]... [FILENAME]
Generates a header file from an XML source file.

It grabs all strings between <_translatable_node> and its end tag in
XML files. Read manpage (man ${PROGRAM}) for more info.

      --type=TYPE   Specify the file type of FILENAME. Currently supports:
                    "gettext/glade", "gettext/ini", "gettext/keys"
                    "gettext/rfc822deb", "gettext/schemas",
                    "gettext/gsettings", "gettext/xml", "gettext/quoted",
                    "gettext/quotedxml", "gettext/tlk"
  -l, --local       Writes output into current working directory
                    (conflicts with --update)
      --update      Writes output into the same directory the source file
                    reside (conflicts with --local)
      --srcdir      Root of the source tree
  -v, --version     Output version information and exit
  -h, --help        Display this help and exit
  -q, --quiet       Quiet mode

Report bugs to http://bugs.launchpad.net/intltool
_EOF_
    exit;
}

## Sub for printing error messages
sub error{
    print STDERR "Try `${PROGRAM} --help' for more information.\n";
    exit;
}

sub message {
    print "Generating C format header file for translation.\n" unless $QUIET_ARG;
}

sub extract {
    &determine_type;

    &convert;

    open OUT, ">$OUTFILE";
    binmode (OUT) if $^O eq 'MSWin32';
    &msg_write;
    close OUT;

    print "Wrote $OUTFILE\n" unless $QUIET_ARG;
}

sub convert {

    ## Reading the file
    {
	local (*IN);
	local $/; #slurp mode
	open (IN, "<$SRCDIR_ARG/$FILE") || die "can't open $SRCDIR_ARG/$FILE: $!";
        binmode (IN);
        $input = <IN>;
        close IN;
    }

    &type_ini if $gettext_type eq "ini";
    &type_keys if $gettext_type eq "keys";
    &type_xml if $gettext_type eq "xml";
    &type_glade if $gettext_type eq "glade";
    &type_gsettings  if $gettext_type eq "gsettings";
    &type_schemas  if $gettext_type eq "schemas";
    &type_rfc822deb  if $gettext_type eq "rfc822deb";
    &type_quoted if $gettext_type eq "quoted";
    &type_quotedxml if $gettext_type eq "quotedxml";
    &type_tlk if $gettext_type eq "tlk";
}

sub entity_decode_minimal
{
    local ($_) = @_;

    s/&apos;/'/g; # '
    s/&quot;/"/g; # "
    s/&amp;/&/g;

    return $_;
}

sub entity_decode
{
    local ($_) = @_;

    s/&apos;/'/g; # '
    s/&quot;/"/g; # "
    s/&lt;/</g;
    s/&gt;/>/g;
    s/&amp;/&/g;

    return $_;
}

sub escape_char
{
    return '\"' if $_ eq '"';
    return '\n' if $_ eq "\n";
    return '\\\\' if $_ eq '\\';

    return $_;
}

sub escape
{
    my ($string) = @_;
    return join "", map &escape_char, split //, $string;
}

sub add_message
{
    my ($string) = @_;
    push @messages_sorted, $string if !defined $messages{$string};
    $messages{$string} = [];
}

sub type_ini {
    ### For generic translatable desktop files ###
    while ($input =~ /^(#(.+)\n)?^_.*=(.*)$/mg) {
        if (defined($2))  {
            $comments{$3} = $2;
        }
        add_message($3);
    }
}

sub type_keys {
    ### For generic translatable mime/keys files ###
    while ($input =~ /^\s*_\w+=(.*)$/mg) {
        add_message($1);
    }
}

sub type_xml {
    ### For generic translatable XML files ###
    my $tree = readXml($input);
    parseTree(0, $tree);
}

sub print_var {
    my $var = shift;
    my $vartype = ref $var;
    
    if ($vartype =~ /ARRAY/) {
        my @arr = @{$var};
        print "[ ";
        foreach my $el (@arr) {
            print_var($el);
            print ", ";
        }
        print "] ";
    } elsif ($vartype =~ /HASH/) {
        my %hash = %{$var};
        print "{ ";
        foreach my $key (keys %hash) {
            print "$key => ";
            print_var($hash{$key});
            print ", ";
        }
        print "} ";
    } else {
        print $var;
    }
}

# Same syntax as getAttributeString in intltool-merge.in.in, similar logic (look for ## differences comment)
sub getAttributeString
{
    my $sub = shift;
    my $do_translate = shift || 1;
    my $language = shift || "";
    my $translate = shift;
    my $result = "";
    foreach my $e (reverse(sort(keys %{ $sub }))) {
	my $key    = $e;
	my $string = $sub->{$e};
	my $quote = '"';
	
	$string =~ s/^[\s]+//;
	$string =~ s/[\s]+$//;
	
	if ($string =~ /^'.*'$/)
	{
	    $quote = "'";
	}
	$string =~ s/^['"]//g;
	$string =~ s/['"]$//g;

        ## differences from intltool-merge.in.in
	if ($key =~ /^_/) {
            $comments{entity_decode($string)} = $XMLCOMMENT if $XMLCOMMENT;
            add_message(entity_decode($string));
            $$translate = 2;
	}
        ## differences end here from intltool-merge.in.in
	$result .= " $key=$quote$string$quote";
    }
    return $result;
}

# Verbatim copy from intltool-merge.in.in
sub getXMLstring
{
    my $ref = shift;
    my $spacepreserve = shift || 0;
    my @list = @{ $ref };
    my $result = "";

    my $count = scalar(@list);
    my $attrs = $list[0];
    my $index = 1;

    $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
    $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/));

    while ($index < $count) {
	my $type = $list[$index];
	my $content = $list[$index+1];
        if (! $type ) {
	    # We've got CDATA
	    if ($content) {
		# lets strip the whitespace here, and *ONLY* here
                $content =~ s/\s+/ /gs if (!$spacepreserve);
		$result .= $content;
	    }
	} elsif ( "$type" ne "1" ) {
	    # We've got another element
	    $result .= "<$type";
	    $result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements
	    if ($content) {
		my $subresult = getXMLstring($content, $spacepreserve);
		if ($subresult) {
		    $result .= ">".$subresult . "</$type>";
		} else {
		    $result .= "/>";
		}
	    } else {
		$result .= "/>";
	    }
	}
	$index += 2;
    }
    return $result;
}

# Verbatim copy from intltool-merge.in.in, except for MULTIPLE_OUTPUT handling removed
# Translate list of nodes if necessary
sub translate_subnodes
{
    my $fh = shift;
    my $content = shift;
    my $language = shift || "";
    my $singlelang = shift || 0;
    my $spacepreserve = shift || 0;

    my @nodes = @{ $content };

    my $count = scalar(@nodes);
    my $index = 0;
    while ($index < $count) {
        my $type = $nodes[$index];
        my $rest = $nodes[$index+1];
        traverse($fh, $type, $rest, $language, $spacepreserve);
        $index += 2;
    }
}

# Based on traverse() in intltool-merge.in.in
sub traverse
{
    my $fh = shift; # unused, to allow us to sync code between -merge and -extract
    my $nodename = shift;
    my $content = shift;
    my $language = shift || "";
    my $spacepreserve = shift || 0;

    if ($nodename && "$nodename" eq "1") {
        $XMLCOMMENT = $content;
    } elsif ($nodename) {
	# element
	my @all = @{ $content };
	my $attrs = shift @all;
	my $translate = 0;
	my $outattr = getAttributeString($attrs, 1, $language, \$translate);

	if ($nodename =~ /^_/) {
	    $translate = 1;
	    $nodename =~ s/^_//;
	}
	my $lookup = '';

        $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/));
        $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));

	if ($translate) {
	    $lookup = getXMLstring($content, $spacepreserve);
            if (!$spacepreserve) {
                $lookup =~ s/^\s+//s;
                $lookup =~ s/\s+$//s;
            }
            if (exists $attrs->{"msgctxt"}) {
                my $context = entity_decode ($attrs->{"msgctxt"});
                $context =~ s/^["'](.*)["']/$1/;
                $lookup = "$context\004$lookup";
            }

	    if ($lookup && $translate != 2) {
                $comments{$lookup} = $XMLCOMMENT if $XMLCOMMENT;
                add_message($lookup);
            } elsif ($translate == 2) {
                translate_subnodes($fh, \@all, $language, 1, $spacepreserve);
	    }
	} else {
            $XMLCOMMENT = "";
	    my $count = scalar(@all);
	    if ($count > 0) {
                my $index = 0;
                while ($index < $count) {
                    my $type = $all[$index];
                    my $rest = $all[$index+1];
                    traverse($fh, $type, $rest, $language, $spacepreserve);
                    $index += 2;
                }
	    }
	}
        $XMLCOMMENT = "";
    }
}


# Verbatim copy from intltool-merge.in.in, $fh for compatibility
sub parseTree
{
    my $fh        = shift;
    my $ref       = shift;
    my $language  = shift || "";

    my $name = shift @{ $ref };
    my $cont = shift @{ $ref };

    while (!$name || "$name" eq "1") {
        $name = shift @{ $ref };
        $cont = shift @{ $ref };
    }

    my $spacepreserve = 0;
    my $attrs = @{$cont}[0];
    $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));

    traverse($fh, $name, $cont, $language, $spacepreserve);
}

# Verbatim copy from intltool-merge.in.in
sub intltool_tree_comment
{
    my $expat = shift;
    my $data  = $expat->original_string();
    my $clist = $expat->{Curlist};
    my $pos   = $#$clist;

    $data =~ s/^<!--//s;
    $data =~ s/-->$//s;
    push @$clist, 1 => $data;
}

# Verbatim copy from intltool-merge.in.in
sub intltool_tree_cdatastart
{
    my $expat    = shift;
    my $clist = $expat->{Curlist};
    my $pos   = $#$clist;

    push @$clist, 0 => $expat->original_string();
}

# Verbatim copy from intltool-merge.in.in
sub intltool_tree_cdataend
{
    my $expat    = shift;
    my $clist = $expat->{Curlist};
    my $pos   = $#$clist;

    $clist->[$pos] .= $expat->original_string();
}

# Verbatim copy from intltool-merge.in.in
sub intltool_tree_char
{
    my $expat = shift;
    my $text  = shift;
    my $clist = $expat->{Curlist};
    my $pos   = $#$clist;

    # Use original_string so that we retain escaped entities
    # in CDATA sections.
    #
    if ($pos > 0 and $clist->[$pos - 1] eq '0') {
        $clist->[$pos] .= $expat->original_string();
    } else {
        push @$clist, 0 => $expat->original_string();
    }
}

# Verbatim copy from intltool-merge.in.in
sub intltool_tree_start
{
    my $expat    = shift;
    my $tag      = shift;
    my @origlist = ();

    # Use original_string so that we retain escaped entities
    # in attribute values.  We must convert the string to an
    # @origlist array to conform to the structure of the Tree
    # Style.
    #
    my @original_array = split /\x/, $expat->original_string();
    my $source         = $expat->original_string();

    # Remove leading tag.
    #
    $source =~ s|^\s*<\s*(\S+)||s;

    # Grab attribute key/value pairs and push onto @origlist array.
    #
    while ($source)
    {
       if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/)
       {
           $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s;
           push @origlist, $1;
           push @origlist, '"' . $2 . '"';
       }
       elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/)
       {
           $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s;
           push @origlist, $1;
           push @origlist, "'" . $2 . "'";
       }
       else
       {
           last;
       }
    }

    my $ol = [ { @origlist } ];

    push @{ $expat->{Lists} }, $expat->{Curlist};
    push @{ $expat->{Curlist} }, $tag => $ol;
    $expat->{Curlist} = $ol;
}

# Copied from intltool-merge.in.in and added comment handler.
sub readXml
{
    my $xmldoc = shift || return;
    my $ret = eval 'require XML::Parser';
    if(!$ret) {
        die "You must have XML::Parser installed to run $0\n\n";
    }
    my $xp = new XML::Parser(Style => 'Tree');
    $xp->setHandlers(Char => \&intltool_tree_char);
    $xp->setHandlers(Start => \&intltool_tree_start);
    $xp->setHandlers(CdataStart => \&intltool_tree_cdatastart);
    $xp->setHandlers(CdataEnd => \&intltool_tree_cdataend);

    ## differences from intltool-merge.in.in
    $xp->setHandlers(Comment => \&intltool_tree_comment);
    ## differences end here from intltool-merge.in.in

    my $tree = $xp->parse($xmldoc);

# <foo><!-- comment --><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
# would be:
# [foo, [{}, 1, "comment", head, [{id => "a"}, 0, "Hello ",  em, [{}, 0, "there"]], bar, 
# [{}, 0, "Howdy",  ref, [{}]], 0, "do" ] ]

    return $tree;
}

sub type_schemas {
    ### For schemas XML files ###

    # FIXME: We should handle escaped < (less than)
    while ($input =~ /
                      <locale\ name="C">\s*
                          (<default>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/default>\s*)?
                          (<short>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/short>\s*)?
                          (<long>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/long>\s*)?
                      <\/locale>
                     /sgx) {
        my @totranslate = ($3,$6,$9);
        my @eachcomment = ($2,$5,$8);
        foreach (@totranslate) {
            my $currentcomment = shift @eachcomment;
            next if !$_;
            s/\s+/ /g;
            add_message(entity_decode_minimal($_));
            $comments{entity_decode_minimal($_)} = $currentcomment if (defined($currentcomment));
        }
    }
}

# Parse the tree as returned by readXml() for gschema.xml files.
sub traverse_gsettings {
    sub cleanup {
        s/^\s+//;
        s/\s+$//;
        s/\s+/ /g;
        return $_;
    }

    my $nodename = shift;
    my $content = shift;
    my $comment = shift || 0;
    my @list = @{ $content };
    my $attrs_ref = shift @list;
    my %attrs = %{ $attrs_ref };
    if (($nodename eq 'default' and $attrs{'l10n'}) or
        ($nodename eq 'summary') or ($nodename eq 'description')) {
        # preserve whitespace.  deal with it ourselves, below.
        my $message = getXMLstring($content, 1);

        if ($nodename eq 'default') {
            # for <default> we strip leading and trailing whitespace but
            # preserve (possibly quoted) whitespace within
            $message =~ s/^\s+//;
            $message =~ s/\s+$//;
        } else {
            # for <summary> and <description>, we normalise all
            # whitespace while preserving paragraph boundaries
            $message = join "\n\n", map &cleanup, split/\n\s*\n+/, $message;
        }

        my $context = $attrs{'context'};
        $context =~ s/^["'](.*)["']/$1/ if $context;
        $message = $context . "\004" . $message if $context;
        add_message($message);
        $comments{$message} = $comment if $comment;
    } else {
        my $index = 0;
        my $comment;
        while (scalar(@list) > 1) {
            my $type = shift @list;
            my $content = shift @list;
            if (!$type || "$type" eq "1") {
                if ($type == 1) {
                    $comment = $content;
                }
                next;
            } else {
                traverse_gsettings($type, $content, $comment);
                $comment = 0;
            }
        }
    }
}

sub type_gsettings {
    my $tree = readXml($input);
    my @tree_nodes = @{ $tree };
    my $node = shift @tree_nodes;
    while (!$node || "$node" eq "1") {
        shift @tree_nodes;
        $node = shift @tree_nodes;
    }
    my $content = shift @tree_nodes;
    traverse_gsettings($node, $content);
}

sub type_rfc822deb {
    ### For rfc822-style Debian configuration files ###

    my $lineno = 1;
    my $type = '';
    while ($input =~ /\G(.*?)(^|\n)(_+)([^:]+):[ \t]*(.*?)(?=\n\S|$)/sg)
    {
        my ($pre, $newline, $underscore, $tag, $text) = ($1, $2, $3, $4, $5);
        while ($pre =~ m/\n/g)
        {
            $lineno ++;
        }
        $lineno += length($newline);
        my @str_list = rfc822deb_split(length($underscore), $text);
        for my $str (@str_list)
        {
            $strcount++;
            add_message($str);
            $loc{$str} = $lineno;
            $count{$str} = $strcount;
            my $usercomment = '';
            while($pre =~ s/(^|\n)#([^\n]*)$//s)
            {
                $usercomment = "\n" . $2 . $usercomment;
            }
            $comments{$str} = $tag . $usercomment;
        }
        $lineno += ($text =~ s/\n//g);
    }
}

sub rfc822deb_split {
    # Debian defines a special way to deal with rfc822-style files:
    # when a value contain newlines, it consists of
    #   1.  a short form (first line)
    #   2.  a long description, all lines begin with a space,
    #       and paragraphs are separated by a single dot on a line
    # This routine returns an array of all paragraphs, and reformat
    # them.
    # When first argument is 2, the string is a comma separated list of
    # values.
    my $type = shift;
    my $text = shift;
    $text =~ s/^[ \t]//mg;
    return (split(/, */, $text, 0)) if $type ne 1;
    return ($text) if $text !~ /\n/;

    $text =~ s/([^\n]*)\n//;
    my @list = ($1);
    my $str = '';
    for my $line (split (/\n/, $text))
    {
        chomp $line;
        if ($line =~ /^\.\s*$/)
        {
            #  New paragraph
            $str =~ s/\s*$//;
            push(@list, $str);
            $str = '';
        }
        elsif ($line =~ /^\s/)
        {
            #  Line which must not be reformatted
            $str .= "\n" if length ($str) && $str !~ /\n$/;
            $line =~ s/\s+$//;
            $str .= $line."\n";
        }
        else
        {
            #  Continuation line, remove newline
            $str .= " " if length ($str) && $str !~ /\n$/;
            $str .= $line;
        }
    }
    $str =~ s/\s*$//;
    push(@list, $str) if length ($str);
    return @list;
}

sub type_quoted {
    while ($input =~ /\"(([^\"]|\\\")*[^\\\"])\"/g) {
        my $message = $1;
        my $before = $`;
        $message =~ s/\\\"/\"/g;
        $before =~ s/[^\n]//g;
        add_message($message);
        $loc{$message} = length ($before) + 2;
    }
}

sub type_quotedxml {
    while ($input =~ /\"(([^\"]|\\\")*[^\\\"])\"/g) {
        my $message = $1;
        my $before = $`;
        $message =~ s/\\\"/\"/g;
        $message = entity_decode($message);
        $before =~ s/[^\n]//g;
        add_message($message);
        $loc{$message} = length ($before) + 2;
    }
}

sub type_glade {
    ### For translatable Glade XML files ###

    my $tags = "label|title|text|format|copyright|comments|preview_text|tooltip|message";

    while ($input =~ /<($tags)>([^<]+)<\/($tags)>/sg) {
	# Glade sometimes uses tags that normally mark translatable things for
        # little bits of non-translatable content. We work around this by not
        # translating strings that only includes something like label4 or window1.
	add_message(entity_decode($2)) unless $2 =~ /^(window|label|dialog)[0-9]+$/;
    }

    while ($input =~ /<items>(..[^<]*)<\/items>/sg) {
	for my $item (split (/\n/, $1)) {
            add_message(entity_decode($item));
	}
    }

    ## handle new glade files
    while ($input =~ /<(\w+)\s+[^>]*translatable\s*=\s*"yes"(?:\s+[^>]*context\s*=\s*"([^"]*)")?(?:\s+[^>]*comments\s*=\s*"([^"]*)")?[^>]*>([^<]+)<\/\1>/sg) {
        if (!($4 =~ /^(window|label)[0-9]+$/)) {
            my $message = entity_decode($4);
            if (defined($2)) {
                $message = entity_decode($2) . "\004" . $message;
            }
            add_message($message);
            if (defined($3)) {
                $comments{$message} = entity_decode($3) ;
            }
        }
    }
    while ($input =~ /<atkaction\s+action_name="([^>]*)"\s+description="([^>]+)"\/>/sg) {
        add_message(entity_decode_minimal($2));
    }
}

sub type_tlk {
    my ($ftype, $fvers, $langid, $strcount, $stroff);
    my $count = 0;
    my $pos = 0;
    my @inputa = split (//, $input, 21);
    my $foo;
    my $strdata;

    $ftype = substr ($input, 0, 3);
    $fvers = substr ($input, 4, 7);
    $langid = unpack ("L", $inputa[8] . $inputa[9] .
                      $inputa[10] . $inputa[11]);
    $strcount = unpack ("L", $inputa[12] . $inputa[13] .
                        $inputa[14] . $inputa[15]);
    $stroff = unpack ("L", $inputa[16] . $inputa[17] .
                      $inputa[18] . $inputa[19]);

    use bytes;
    $strdata = bytes::substr ($input, $stroff);

    my $sinpos = 20;

    $foo = $inputa[$sinpos];
    $sinpos = 40 * 2000;
    @inputa = split (//, $foo, $sinpos + 1);

    $pos = 0;
    while ($count < $strcount) {
        my ($flags, $soundref, $volvar, $pitch, $offset, $strsize, $sndlen) = 0;

        if ($count > 0 && $count % 2000 == 0) {
            $foo = $inputa[$sinpos];
            my $numleft = ($strcount - $count);
            if ($numleft > 2000) {
                $sinpos = 40 * 2000;
            } else {
                $sinpos = 40 * $numleft;
            }
            @inputa = split (//, $foo, $sinpos + 1);
            my $numbytes = @inputa;
            $pos = 0;
        }


        $flags = unpack ("L", $inputa[$pos] . $inputa[$pos + 1] .
                         $inputa[$pos + 2] . $inputa[$pos + 3]);
        $pos += 4;
        if ($flags & 0x0002) {
            $soundref = join ('', @inputa[$pos..$pos + 15]);
            $soundref =~ s/\0//g;
        }
        $pos += 16;
# According to the Bioware Aurora Talk Table Format documentation
# the VolumeVariance and PitchVariance DWORDs are not used
# We increment the pos counter, but do not read the data, here
#        $volvar = unpack ("L", $inputa[$pos] . $inputa[$pos + 1] .
#                          $inputa[$pos + 2] . $inputa[$pos + 3]);
        $pos += 4;
#        $pitch = unpack ("L", $inputa[$pos] . $inputa[$pos + 1] .
#                         $inputa[$pos + 2] . $inputa[$pos + 3]);
        $pos += 4;
        $offset = unpack ("L", $inputa[$pos] . $inputa[$pos + 1] .
                          $inputa[$pos + 2] . $inputa[$pos + 3])
            if ($flags & 0x0001);
        $pos += 4;
        $strsize = unpack ("L", $inputa[$pos] . $inputa[$pos + 1] .
                           $inputa[$pos + 2] . $inputa[$pos + 3])
            if ($flags & 0x0001);
        $pos += 4;
        $sndlen = unpack ("d", $inputa[$pos] . $inputa[$pos + 1] .
                          $inputa[$pos + 2] . $inputa[$pos + 3])
            if ($flags & 0x0004);
        $pos += 4;

        if (defined $strsize && $strsize > 0) {
            my $message = substr ($strdata, $offset, $strsize);
            if (defined $message) {
                use Encode;
                Encode::from_to ($message, "iso-8859-1", "UTF-8");
                add_message($message);
                if ($message =~ /^Bad Strref$/ ) {
                    $comments{$message} = "DO NOT Translate this Entry.";
                    $comments{$message} .= "\nTLK:position=$count";
               } else {
                    $comments{$message} = "TLK:position=$count";
                    $comments{$message} .= "; TLK:sndresref=$soundref"
                        if (defined $soundref && $soundref ne "");
                    $comments{$message} .= "; TLK:sndlen=$sndlen"
                        if (defined $sndlen && $sndlen != 0);
                }
            } else {
                print STDERR "Missing message? ID: $count\n";
            }
        }
        $count++;
    }
}

sub msg_write {
    my @msgids;
    if (%count)
    {
        @msgids = sort { $count{$a} <=> $count{$b} } keys %count;
    }
    else
    {
        @msgids = @messages_sorted;
    }
    for my $message (@msgids)
    {
	my $offsetlines = 1;
	my $context = undef;
	$offsetlines++ if $message =~ /%/;
	if (defined ($comments{$message}))
	{
		while ($comments{$message} =~ m/\n/g)
		{
		    $offsetlines++;
		}
	}
	print OUT "# ".($loc{$message} - $offsetlines).  " \"$FILE\"\n"
	        if defined $loc{$message};
   	print OUT "/* ".$comments{$message}." */\n"
                if defined $comments{$message};
   	print OUT "/* xgettext:no-c-format */\n" if $message =~ /%/;

    	if ($message =~ /(.*)\004(.*)/s) {
    	    $context = $1;
    	    $message = $2;
    	}
    	my @lines = split (/\n/, $message, -1);
    	for (my $n = 0; $n < @lines; $n++)
	{
            if ($n == 0)
            {
                if (defined $context)
                {
                     if ($NOMSGCTXT_ARG)
                     {
                          print OUT "char *s = N_(\"", $context, "|"; 
                     }
                     else
                     {
 		          print OUT "char *s = C_(\"", $context, "\", \""; 
                     }
                }
                else
                {
 		     print OUT "char *s = N_(\""; 
                }
            }
            else
            {  
                print OUT "             \""; 
            }

            print OUT escape($lines[$n]);

            if ($n < @lines - 1)
            {
                print OUT "\\n\"\n"; 
            }
            else
            {
                print OUT "\");\n";  
	    }
        }
    }
}
intltool 0.50.2-2 / usr / bin / intltool-extract