/usr/share/barnowl/lib/BarnOwl/Parse.pm

use warnings;
use strict;

package BarnOwl::Parse;

use base qw(Exporter);
our @EXPORT_OK = qw(tokenize tokenize_with_point);

# TODO: have the main function return whether or not it was a valid parse, with
# possible error messages or something.  (Still give a parse of some sort on
# invalid parses, just let us know it's invalid if we care.) This is to
# implement command-line-ish things in Perl.

=for doc

Ideally, this should use the same codepath we use to /actually/
tokenize commands, but for now, make sure this is kept in sync with
owl_parseline in util.c

Unlike owl_parseline, we always return a result, even in the presence
of parse errors, since we may be called on incomplete command-lines.

The owl_parseline rules are:

* Tokenize on ' ' and '\t'
* ' and " are quote characters
* \ has no effect

=cut

my $boring = qr{[^'" \t]};
my $quote  = qr{['"]};
my $space  = qr{[ \t]};

sub tokenize_with_point {
    my $line = shift;
    my $point = shift;

    my @words = ();
    my $cword = 0;
    my $cword_start;
    my $cword_end;
    my $word_point;

    my $word = '';
    my $wstart = 0;
    my $skipped = 0;
    my $have_word = 0;

    pos($line) = 0;
    while(pos($line) < length($line)) {
        if($line =~ m{\G ($boring+) }gcx) {
            $word .= $1;
            $have_word = 1;
        } elsif ($line =~ m{\G ($quote)}gcx) {
            my $chr = $1;
            $skipped++ if pos($line) > $point;
            if($line =~ m{\G ([^$chr]*) $chr}gcx) {
                $word .= $1;
                $skipped++ if pos($line) > $point;
            } else {
                $word .= substr($line, pos($line));
                pos($line) = length($line);
            }
            $have_word = 1;
        }

        if ($line =~ m{\G ($space+|$)}gcx) {
            my $wend = pos($line) - length($1);
            if ($have_word) {
                push @words, $word;
                $cword++ unless $wend >= $point;
                if(($wend >= $point) && !defined($word_point)) {
                    $word_point = length($word) - ($wend - $point) + $skipped;
                    $cword_start = $wstart;
                    $cword_end   = $wend;
                }
            }
            # Always reset, so we get $wstart right
            $word = '';
            $wstart = pos($line);
            $skipped = 0;
            $have_word = 0;
        }
    }

    if(length($word)) { die("Internal error, leftover=$word"); }

    unless(defined($word_point)) {
        $word_point = 0;
        $cword_start = $cword_end = $point;
    }

    return (\@words, $cword, $word_point, $cword_start, $cword_end);
}

sub tokenize {
    my $line = shift;

    my ($words, $word, $word_point,
        $word_start, $word_end) = tokenize_with_point($line, 0);
    return $words;
}
barnowl 1.9-1 / usr / share / barnowl / lib / BarnOwl / Parse.pm