This file is indexed.

/usr/lib/rhythmbox/plugins/lyrics/TerraParser.py is in rhythmbox-plugins 2.96-0ubuntu4.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
# -*- Mode: python; coding: utf-8; tab-width: 8; indent-tabs-mode: t; -*-
#
# Copyright (C) 2009 Hardy Beltran Monasterios
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# The Rhythmbox authors hereby grant permission for non-GPL compatible
# GStreamer plugins to be used and distributed together with GStreamer
# and Rhythmbox. This permission is above and beyond the permissions granted
# by the GPL license by which Rhythmbox is covered. If you modify this code
# you may extend this exception to your version of the code, but you are not
# obligated to do so. If you do not wish to do so, delete this exception
# statement from your version.
#
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA.


import urllib
import rb
import re
import sys

# Deal with html entitys and utf-8
# code taken from django/utils/text.py

from htmlentitydefs import name2codepoint

pattern = re.compile("&(#?\w+?);")

def _replace_entity(match):
	text = match.group(1)
	if text[0] == u'#':
		text = text[1:]
		try:
			if text[0] in u'xX':
				c = int(text[1:], 16)
			else:
				c = int(text)
			return unichr(c)
		except ValueError:
			return match.group(0)
	else:
		try:
			return unichr(name2codepoint[text])
		except (ValueError, KeyError):
			return match.group(0)

def unescape_entities(text):
	return pattern.sub(_replace_entity, text)

class TerraParser (object):
	def __init__(self, artist, title):
		self.artist = artist
		self.title = title

	def search(self, callback, *data):
		path = 'http://letras.terra.com.br/'

		artist = urllib.quote(self.artist)
		title = urllib.quote(self.title)
		join = urllib.quote(' - ')

		wurl = 'winamp.php?t=%s%s%s' % (artist, join, title)
		print "search URL: " + wurl

		loader = rb.Loader()
		loader.get_url (path + wurl, self.got_lyrics, callback, *data)

	def got_lyrics(self, result, callback, *data):
		if result is None:
			callback (None, *data)
			return

		if result is not None:
			result = result.decode('iso-8859-1').encode('UTF-8')
			if re.search('Música não encontrada', result):
				print "not found"
				callback (None, *data)
			elif re.search('<div id="letra">', result):
				callback(self.parse_lyrics(result), *data)
			else:
				callback (None, *data)
		else:
			callback (None, *data)


	def parse_lyrics(self, source):
		source = re.split('<div id="letra">', source)[1]
		source = re.split('<p>', source)
		# Parse artist and title
		artistitle = re.sub('<.*?>', '', source[0])
		rx = re.compile('^(\t|\n)+',re.M | re.S)
		artistitle = rx.sub('', artistitle)
		# Parse lyrics
		lyrics = re.split('</p>', source[1])[0]
		lyrics = re.sub('<[Bb][Rr]/>', '', lyrics)

		lyrics = unescape_entities(artistitle) + "\n" + unescape_entities(lyrics)
		lyrics += "\n\nEsta letra foi disponibilizada pelo site\nhttp://letras.terra.com.br"

		return lyrics