/usr/bin/homescrape is in mpdcron 0.3+git20110303-4.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | #!/usr/bin/env ruby
# vim: set sw=2 sts=2 tw=100 et nowrap fenc=utf-8 :
# Copyright 2010 Ali Polatel <alip@exherbo.org>
# Distributed under the terms of the GNU General Public License v2
%w{getoptlong net/http time uri rubygems nokogiri}.each {|m| require m }
begin
require 'chronic'
has_chronic = true
rescue LoadError
has_chronic = false
end
MYNAME = File.basename $0, ".rb"
MYVERSION = "0.3" + ""
class UserNotFound < StandardError; end
class Scraper
LASTFM_URL = 'http://www.last.fm/user/%s/tracks'
LASTFM_DATE_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
attr_accessor :username, :url
def initialize username
@username = username
@url = sprintf(LASTFM_URL, username)
# Set up proxy
@proxy_url = URI.parse(ENV['http_proxy']) if ENV['http_proxy']
@proxy_host = @proxy_url.host if @proxy_url and @proxy_url.host
@proxy_port = @proxy_url.port if @proxy_url and @proxy_url.port
@proxy_user, @proxy_pass = @proxy_url.userinfo.split(/:/) if @proxy_url and @proxy_url.userinfo
end
def fetch since, page=1, &block
uri = URI.parse(@url + "?page=#{page}")
req = Net::HTTP::Get.new(uri.request_uri)
res = Net::HTTP::Proxy(@proxy_host, @proxy_port,
@proxy_user, @proxy_pass).start(uri.host, uri.port) {|http|
http.request(req)
}
data = res.body
raise UserNotFound if data =~ /User not found/
doc = Nokogiri::HTML data
if page == 1
if doc.css('a.lastpage').length != 0
@lastpage = doc.css('a.lastpage')[0].content.to_i
else
@lastpage = 1
end
end
tags = doc.xpath(<<-EOF)
//tr[
td[@class="subjectCell"]
and td[@class="lovedCell"]
and td[@class="dateCell last"]
]
EOF
tags.each do |tag|
subjectCell = tag.children[2]
lovedCell = tag.children[4]
dateCell = tag.children[8]
artist = subjectCell.children[1].content
title = subjectCell.children[3].content
love = lovedCell.children[1] ? true : false
date = Date.strptime(dateCell.at('//abbr/@title').to_s, LASTFM_DATE_FORMAT)
return if since > date
block.call artist, title, love
end
if page <= @lastpage
fetch since, page + 1, &block
end
end
end
def usage out, code
out.puts <<HELP
#{MYNAME} -- import last.fm data
Usage: #{MYNAME} [OPTIONS] USERNAME
Options:
--help, -h Display help and exit
--version, -V Display version and exit
--since, -s Import data since the given date
HELP
exit code
end
def quote src
src.gsub(/'/, "''").gsub(/"/, "\\\"")
end
opts = GetoptLong.new(
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
[ '--version', '-V', GetoptLong::NO_ARGUMENT ],
[ '--since', '-s', GetoptLong::REQUIRED_ARGUMENT ])
$since = Date.parse(Time.at(0).to_s)
opts.each do |opt, arg|
case opt
when '--help'
usage($stdout, 0)
when '--version'
puts "#{MYNAME}-#{MYVERSION}"
exit 0
when '--since'
$since = has_chronic ? Date.parse(Chronic.parse(arg).to_s) : Date.parse(Time.parse(arg).to_s)
end
end
usage($stderr, 1) if ARGV.empty?
importer = Scraper.new ARGV[0]
importer.fetch($since) do |artist, title, love|
cmd = "eugene %s \"artist='#{quote(artist)}' and title='#{quote(title)}'\""
puts "* " + sprintf(cmd, 'count 1')
system sprintf(cmd, 'count 1')
if love
puts "* " + sprintf(cmd, 'love')
system sprintf(cmd, 'love')
end
cmd_artist = "eugene count --artist 1 \"name='#{quote(artist)}'\""
puts "* " + cmd_artist
system cmd_artist
end
|