This file is indexed.

/usr/share/perl5/Catmandu/Importer/Wikidata.pm is in libcatmandu-wikidata-perl 0.06-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
package Catmandu::Importer::Wikidata;
#ABSTRACT: Import from Wikidata
our $VERSION = '0.06'; #VERSION
use Catmandu::Sane;
use Moo;
use URI::Template;

extends 'Catmandu::Importer::getJSON';

has api => ( 
    is => 'ro', 
    default => sub { 'http://www.wikidata.org/w/api.php' } 
);

has '+url' => (
    is => 'ro',
    lazy => 1,
    builder => sub { 
        URI::Template->new(
            $_[0]->api 
            . '?action=wbgetentities&format=json{&ids}{&sites}{&titles}'
        );
    }
); 

has '+from' => ( 
    is => 'ro', 
    lazy => 1,
    builder => \&_build_from,
);

has ids => (
    is  => 'ro',
    coerce => sub { [ split /[,| ]/, $_[0] ] }
);

has site => (
    is => 'ro',
    default => sub { 'enwiki' },
    trigger => sub {
        my ($self,$site) = @_;
        die "invalid site $site" if $site !~ /^[a-z]+([_-][a-z])*$/;
        $site =~ s/-/_/g;
        return $site;
    }
);

has title => (
    is => 'ro',
);

sub _build_from {
    my ($self) = @_;

    my $vars;

    if ($self->ids) {
        my @ids = map {
            $_ =~ /^[QP][0-9]+$/i or die "invalid wikidata id $_\n";
            uc($_);
        } @{$self->ids};
        $vars = { ids => join('|', @ids) };
    } elsif(defined $self->title) {
        my ($site, $title);
        if ($self->title =~ /^([a-z]+([_-][a-z])*):(.+)$/) {
            ($site, $title) = ($1,$3);
        } else {
            ($site, $title) = ($self->site,$self->title);
        }
        die "invalid site $site" if $site !~ /^[a-z]+([_-][a-z])*$/;
        $site =~ s/-/_/g;
        $vars = { sites => $site, titles => $title };
    }

    return ($vars ? $self->url->process($vars) : undef);
}

sub request_hook {
    my ($self, $line) = @_;

    if ($line =~ /^[PQ][0-9]+$/i) {
        return { ids => uc($line) };
    } elsif ($line =~ /^([a-z]+([_-][a-z])*):(.+)$/) {
        my ($site, $title) = ($1,$3);
        $site =~ s/-/_/g;
        return { sites => $site, titles => $title };
    } else {
        return { sites => $self->site, titles => $line };
    }

    return;
}

sub response_hook {
    my ($self, $data) = @_;
    return unless ref $data and ref $data->{entities} eq 'HASH';
    return [ 
        map {
            $_->{missing} = 1 if exists $_->{missing};
            $_;
        } grep { ref $_ eq 'HASH'; }
        values %{$data->{entities}} 
    ];
}

1;

__END__

=pod

=encoding UTF-8

=head1 NAME

Catmandu::Importer::Wikidata - Import from Wikidata

=head1 VERSION

version 0.06

=head1 SYNOPSIS

    catmandu convert Wikidata --ids Q1,P227
    catmandu convert Wikidata --site dewiki --title Wahnsinn

    echo Q1 | catmandu convert Wikidata
    echo Wahnsinn | catmandu convert Wikidata --site dewiki
    echo dewiki:Wahnsinn | catmandu convert Wikidata

    echo Q1 | catmandu convert Wikidata --fix 'retain_field("labels")'

=head1 DESCRIPTION

This L<Catmandu::Importer> queries Wikidata for entities, given by their
Wikidata identifier (C<Q...>, C<P...>) or by a title in some know Wikidata
site, such as the English Wikipedia (C<enwiki>). The entities are either
specified as options (C<ids>, C<site>, and/pr C<title>) or as line-separated
input values. By default, the raw JSON structure of each Wikidata entity is
returned one by one. Entities not found are returned with the C<missing>
property set to C<1> like this:

    { "id": "Q7", "missing": "1" }

To further process the JSON structure L<Catmandu::Wikidata> contains several
Catmandu fixes, e.g. to only retain a selected language.

=head1 CONFIGURATION

This importer extends L<Catmandu::Importer::getJSON>, so it can be configured
with options C<agent>, C<timeout>, C<headers>, C<proxy>, and C<dry>. Additional
options include:

=over

=item api

Wikidata API base URL. Default is C<http://www.wikidata.org/w/api.php>.

=item ids

A list of Wikidata entitiy/property ids, such as C<Q42> and C<P19>. Use
comma, vertical bar, or space as separator. Read from input stream if no
ids, nor titles are specified.

=item site

Wiki site key for referring to Wikidata entities by title. Default is
C<enwiki> for English Wikipedia. A list of supported site keys can be
queried as part of
L<https://www.wikidata.org/w/api.php?action=paraminfo&modules=wbgetentities>
(unless L<https://bugzilla.wikimedia.org/show_bug.cgi?id=58200> is fixed).

=item title

Title of a page for referring to Wikidata entities. A title is only unique
within a selected C<site>. One can also prepend the site key to a title
separated by colon, e.g. C<enwiki:anarchy> for the entity that is titled
"anarchy" in the English Wikipedia. Read from input stream if no titles, nor
ids are specified.

=back

=head1 AUTHOR

Jakob Voß

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2014 by Jakob Voß.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut