This file is indexed.

/usr/lib/python2.7/dist-packages/swap/webAccess.py is in python-swap 1.2.1-7.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
"""Web Access

This module implements some basic bits of the web architecture:
dereferencing a URI to get a document, with content negotiation,
and deciding on the basis of the Internet Content Type what to do with it.

$Id: webAccess.py,v 1.34 2007/08/06 16:13:56 syosi Exp $


Web access functionality building on urllib2

"""

import sys, os

#import urllib
import urllib2, urllib  # Python standard

from why import newTopLevelFormula

import uripath # http://www.w3.org/2000/10/swap/uripath.py
import diag
from diag import progress
import notation3   # Parser    @@@ Registery of parsers vs content types woudl be better.

from OrderedSequence import indentString

HTTP_Content_Type = 'content-type' #@@ belongs elsewhere?

print_all_file_names = diag.print_all_file_names   # for listing test files

class SecurityError(IOError):
    pass

# A little code to represent a value that can be set
# and read; a singleton. In essence, this is a little
# prettier than a one element list
def setting(self, val=None):
    if val is not None:
        self[0] = val
    return self[0]

sandBoxed = setting.__get__([False])

def cacheHack(addr):
    """ If on a plane, hack remote w3.org access to local access
    """
    real = "http://www.w3.org/"
    local = "/devel/WWW/"
    suffixes = [ "", ".rdf", ".n3" ]
    if addr.startswith(real):
        rest = local + addr[len(real):]
        for s in suffixes:
            fn = rest + s
            try:
                os.stat(fn)
                progress("Offline: Using local copy %s" % fn)
                return "file://" + fn
            except OSError:
                continue
    return addr
                
def urlopenForRDF(addr, referer=None):
    """Access the web, with a preference for RDF
    """
    return webget(addr,
                  types=['text/rdf+n3',
                         'application/rdf+xml'
       #                  ,'application/x-turtle'    # Why not ask for turtle?
                         ], 
                  referer = referer)


def webget(addr, referer=None, types=[]):
    """Open a URI for reading; return a file-like object with .headers
    cf http://www.w3.org/TR/2004/REC-webarch-20041215/#dereference-uri
    """

    if diag.chatty_flag > 7: progress("Accessing: " + addr)
    if sandBoxed():
        if addr[:5] == 'file:':
            raise SecurityError('local file access prohibited')

#    addr = cacheHack(addr)

    # work around python stdlib bugs with data: URIs
    # buggy in 2.4.2 with CStringIO
    if addr[:5] == 'data:':
        # return open_data(addr)
        return urllib.urlopen(addr)

    req = urllib2.Request(addr)

    if types:
        req.add_header('Accept', ','.join(types))

    if referer: #consistently misspelt
        req.add_header('Referer', referer)

    stream =  urllib2.urlopen(req)

    if print_all_file_names:
        diag.file_list.append(addr)

    return stream


def load(store, uri=None, openFormula=None, asIfFrom=None, contentType=None,
                flags="", referer=None, why=None, topLevel=False):
    """Get and parse document.  Guesses format if necessary.

    uri:      if None, load from standard input.
    remember: if 1, store as metadata the relationship between this URI and this formula.
    
    Returns:  top-level formula of the parsed document.
    Raises:   IOError, SyntaxError, DocumentError
    
    This is an independent function, as it is fairly independent
    of the store. However, it is natural to call it as a method on the store.
    And a proliferation of APIs confuses.
    """
#    if referer is None:
#        raise RuntimeError("We are trying to force things to include a referer header")
    try:
        baseURI = uripath.base()
        if uri != None:
            addr = uripath.join(baseURI, uri) # Make abs from relative
            if diag.chatty_flag > 40: progress("Taking input from " + addr)
            netStream = urlopenForRDF(addr, referer)
            if diag.chatty_flag > 60:
                progress("   Headers for %s: %s\n" %(addr, netStream.headers.items()))
            receivedContentType = netStream.headers.get(HTTP_Content_Type, None)
        else:
            if diag.chatty_flag > 40: progress("Taking input from standard input")
            addr = uripath.join(baseURI, "STDIN") # Make abs from relative
            netStream = sys.stdin
            receivedContentType = None

    #    if diag.chatty_flag > 19: progress("HTTP Headers:" +`netStream.headers`)
    #    @@How to get at all headers??
    #    @@ Get sensible net errors and produce dignostics

        guess = None
        if receivedContentType:
            if diag.chatty_flag > 9:
                progress("Recieved Content-type: " + `receivedContentType` + " for "+addr)
            if receivedContentType.find('xml') >= 0 or (
                     receivedContentType.find('rdf')>=0
                     and not (receivedContentType.find('n3')>=0)  ):
                guess = "application/rdf+xml"
            elif receivedContentType.find('n3') >= 0:
                guess = "text/rdf+n3"
        if guess== None and contentType:
            if diag.chatty_flag > 9:
                progress("Given Content-type: " + `contentType` + " for "+addr)
            if contentType.find('xml') >= 0 or (
                    contentType.find('rdf') >= 0  and not (contentType.find('n3') >= 0 )):
                guess = "application/rdf+xml"
            elif contentType.find('n3') >= 0:
                guess = "text/rdf+n3"
            elif contentType.find('sparql') >= 0 or contentType.find('rq'):
                            guess = "x-application/sparql"
        buffer = netStream.read()
        if guess == None:

            # can't be XML if it starts with these...
            if buffer[0:1] == "#" or buffer[0:7] == "@prefix":
                guess = 'text/rdf+n3'
            elif buffer[0:6] == 'PREFIX' or buffer[0:4] == 'BASE':
                guess = "x-application/sparql"
            elif buffer.find('xmlns="') >=0 or buffer.find('xmlns:') >=0: #"
                guess = 'application/rdf+xml'
            else:
                guess = 'text/rdf+n3'
            if diag.chatty_flag > 9: progress("Guessed ContentType:" + guess)
    except (IOError, OSError):  
        raise DocumentAccessError(addr, sys.exc_info() )
        
    if asIfFrom == None:
        asIfFrom = addr
    if openFormula != None:
        F = openFormula
    else:
        F = store.newFormula()
    if topLevel:
        newTopLevelFormula(F)
    import os
    if guess == "x-application/sparql":
        if diag.chatty_flag > 49: progress("Parsing as SPARQL")
        from sparql import sparql_parser
        import sparql2cwm
        convertor = sparql2cwm.FromSparql(store, F, why=why)
        import StringIO
        p = sparql_parser.N3Parser(StringIO.StringIO(buffer), sparql_parser.branches, convertor)
        F = p.parse(sparql_parser.start).close()
    elif guess == 'application/rdf+xml':
        if diag.chatty_flag > 49: progress("Parsing as RDF")
#       import sax2rdf, xml.sax._exceptions
#       p = sax2rdf.RDFXMLParser(store, F,  thisDoc=asIfFrom, flags=flags)
        if flags == 'rdflib' or int(os.environ.get("CWM_RDFLIB", 0)):
            parser = 'rdflib'
            flags = ''
        else:
            parser = os.environ.get("CWM_RDF_PARSER", "sax2rdf")
        import rdfxml
        p = rdfxml.rdfxmlparser(store, F,  thisDoc=asIfFrom, flags=flags,
                parser=parser, why=why)

        p.feed(buffer)
        F = p.close()
    else:
        assert guess == 'text/rdf+n3'
        if diag.chatty_flag > 49: progress("Parsing as N3")
        if os.environ.get("CWM_N3_PARSER", 0) == 'n3p':
            import n3p_tm
            import triple_maker
            tm = triple_maker.TripleMaker(formula=F, store=store)
            p = n3p_tm.n3p_tm(asIfFrom, tm)
        else:
            p = notation3.SinkParser(store, F,  thisDoc=asIfFrom,flags=flags, why=why)

        try:
            p.startDoc()
            p.feed(buffer)
            p.endDoc()
        except:
            progress("Failed to parse %s" % uri or buffer)
            raise
        
    if not openFormula:
        F = F.close()
    return F 




def loadMany(store, uris, openFormula=None):
    """Get, parse and merge serveral documents, given a list of URIs. 
    
    Guesses format if necessary.
    Returns top-level formula which is the parse result.
    Raises IOError, SyntaxError
    """
    assert type(uris) is type([])
    if openFormula == None: F = store.newFormula()
    else:  F = openFormula
    f = F.uriref()
    for u in uris:
        F.reopen()  # should not be necessary
        store.load(u, openFormula=F, remember=0)
    return F.close()
    
    
    
# @@@@@@@@@@@@@ Ripped from python2.4/lib/urllib which is buggy


#  File "/devel/WWW/2000/10/swap/webAccess.py", line 104, in load
#    netStream = urlopenForRDF(addr, referer)
#  File "/devel/WWW/2000/10/swap/webAccess.py", line 72, in urlopenForRDF
#    return urllib.urlopen(addr)
#  File "/sw/lib/python2.4/urllib.py", line 77, in urlopen
#    return opener.open(url)
#  File "/sw/lib/python2.4/urllib.py", line 185, in open
#    return getattr(self, name)(url)
#  File "/sw/lib/python2.4/urllib.py", line 559, in open_data
#    f.fileno = None     # needed for addinfourl
#AttributeError: 'cStringIO.StringI' object has no attribute 'fileno'
# $ cwm 'data:text/rdf+n3;charset=utf-8;base64,QHByZWZpeCBsb2c6IDxodHRwOi8vd3d3LnczLm9yZy8yMDAwLzEwL3N3YXAvbG9nIz4gLgp7fSA9PiB7OmEgOmIgOmN9IC4g'

# Found the bug in python bug traker.
# http://sourceforge.net/tracker/index.php?func=detail&aid=1365984&group_id=5470&atid=105470
# "Fixed in revision 41548 and 41549 (2.4). by birkenfeld"
# It is in effect fixed in python 2.4.4 

def open_data(url, data=None):
    """Use "data" URL."""
    # ignore POSTed data
    #
    # syntax of data URLs:
    # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
    # mediatype := [ type "/" subtype ] *( ";" parameter )
    # data      := *urlchar
    # parameter := attribute "=" value
    import mimetools, time
    from StringIO import StringIO
    try:
        [type, data] = url.split(',', 1)
    except ValueError:
        raise IOError, ('data error', 'bad data URL')
    if not type:
        type = 'text/plain;charset=US-ASCII'
    semi = type.rfind(';')
    if semi >= 0 and '=' not in type[semi:]:
        encoding = type[semi+1:]
        type = type[:semi]
    else:
        encoding = ''
    msg = []
    msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
                                        time.gmtime(time.time())))
    msg.append('Content-type: %s' % type)
    if encoding == 'base64':
        import base64
        data = base64.decodestring(data)
    else:
        data = unquote(data)
    msg.append('Content-length: %d' % len(data))
    msg.append('')
    msg.append(data)
    msg = '\n'.join(msg)
    f = StringIO(msg)
    headers = mimetools.Message(f, 0)
    f.fileno = None     # needed for addinfourl
    return urllib.addinfourl(f, headers, url)


    
    
    
    
#@@@@@@@@@@  Junk - just to keep track iof the interface to sandros stuff and rdflib
    
def getParser(format, inputURI, workingContext, flags):
    """Return something which can load from a URI in the given format, while
    writing to the given store.
    """
    r = BecauseOfCommandLine(sys.argv[0]) # @@ add user, host, pid, date time? Privacy!
    if format == "rdf" :
        touch(_store)
        if "l" in flags["rdf"]:
            from rdflib2rdf import RDFXMLParser
        else:
            rdfParserName = os.environ.get("CWM_RDF_PARSER", "sax2rdf")
            if rdfParserName == "rdflib2rdf":
                from rdflib2rdf import RDFXMLParser
            elif rdfParserName == "sax2rdf":
                from sax2rdf import RDFXMLParser
            else:
                raise RuntimeError("Unknown RDF parser: " + rdfParserName)
        return RDFXMLParser(_store, workingContext, inputURI,
                                        flags=flags[format], why=r)
    elif format == "n3":
        touch(_store)
        return notation3.SinkParser(_store, openFormula=workingContext,
                    thisDoc=inputURI,  why=r)
    else:
        need(lxkb)
        touch(lxkb)
        return LX.language.getParser(language=format,
                                     sink=lxkb,
                                     flags=flags)

class DocumentAccessError(IOError):
    def __init__(self, uri, info):
        self._uri = uri
        self._info = info
        
    def __str__(self):
        # See C:\Python16\Doc\ref\try.html or URI to that effect
#        reason = `self._info[0]` + " with args: " + `self._info[1]`
        reason = indentString(self._info[1].__str__())
        return ("Unable to access document <%s>, because:\n%s" % ( self._uri, reason))