This file is indexed.

/usr/lib/rhythmbox/plugins/lyrics/TerraParser.py is in rhythmbox-plugins 3.0.2-0ubuntu1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# -*- Mode: python; coding: utf-8; tab-width: 8; indent-tabs-mode: t; -*-
#
# Copyright (C) 2009 Hardy Beltran Monasterios
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# The Rhythmbox authors hereby grant permission for non-GPL compatible
# GStreamer plugins to be used and distributed together with GStreamer
# and Rhythmbox. This permission is above and beyond the permissions granted
# by the GPL license by which Rhythmbox is covered. If you modify this code
# you may extend this exception to your version of the code, but you are not
# obligated to do so. If you do not wish to do so, delete this exception
# statement from your version.
#
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA.


import urllib.parse
import rb
import re
import sys

# Deal with html entities and utf-8
# code taken from django/utils/text.py

from html.entities import name2codepoint

pattern = re.compile("&(#?\w+?);")

def _replace_entity(match):
	text = match.group(1)
	if text[0] == u'#':
		text = text[1:]
		try:
			if text[0] in u'xX':
				c = int(text[1:], 16)
			else:
				c = int(text)
			return chr(c)
		except ValueError:
			return match.group(0)
	else:
		try:
			return chr(name2codepoint[text])
		except (ValueError, KeyError):
			return match.group(0)

def unescape_entities(text):
	return pattern.sub(_replace_entity, text)

class TerraParser (object):
	def __init__(self, artist, title):
		self.artist = artist
		self.title = title

	def search(self, callback, *data):
		path = 'http://letras.mus.br/'

		artist = urllib.parse.quote(self.artist)
		title = urllib.parse.quote(self.title)
		join = urllib.parse.quote(' - ')

		wurl = 'winamp.php?t=%s%s%s' % (artist, join, title)
		print("search URL: " + wurl)

		loader = rb.Loader()
		loader.get_url (path + wurl, self.got_lyrics, callback, *data)

	def got_lyrics(self, result, callback, *data):
		if result is None:
			callback (None, *data)
			return

		if result is not None:
			result = result.decode('utf-8')
			if re.search('Música não encontrada', result):
				print("not found")
				callback (None, *data)
			elif re.search('<div id="letra">', result):
				callback(self.parse_lyrics(result), *data)
			else:
				callback (None, *data)
		else:
			callback (None, *data)


	def parse_lyrics(self, source):
		def unspace(x):
			return " ".join(x.split())
		def untag(x):
			return re.sub('<.*?>', '', x)

		source = re.split('<div id="letra">', source)[1]
		source = re.split('</?div.*?>', source)
		# source[1] = artist+title
		# source[2] = lyrics

		header = "".join(source[1].splitlines())
		# <h1><a>title</a></h1> <h2><a>artist</a></h2>
		bits = re.findall('<h.>(.*?)</h.>', header)
		artistitle = unspace(untag(" - ".join(bits)))

		lyrics = unescape_entities(artistitle) + "\n" + unescape_entities(untag(source[2]))
		lyrics += "\n\nEsta letra foi disponibilizada pelo site\nhttp://letras.mus.br"
		return lyrics