/usr/lib/R/site-library/XML/examples/itunes.R is in r-cran-xml 3.98-1.10-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | # convert i
# xsltproc itunes.xsl ~/Music/iTunes/iTunes\ Music\ Library.xml > itunes.xml
# or with Sxslt
#
# user system elapsed
# 7.514 0.090 7.981
system.time({
library(Sxslt)
doc = xsltApplyStyleSheet("~/itunes.xml", "~/Projects/org/omegahat/XML/RS/examples/itunes.xsl")
top = xmlRoot(doc$doc)
songs.xsl = xmlApply(top, function(x) xmlSApply(x, xmlValue))
})
#####################
# As tempting as it is to take the xmlRoot() in this next command,
# that will allow the XML document to be freed and then a crash will ensue.
doc = xmlInternalTreeParse("~/Projects/org/omegahat/XML/RS/examples/itunes.xml")
# fields = unique(unlist(xmlApply(top, names)))
songs = xmlApply(xmlRoot(doc), function(x) xmlSApply(x, xmlValue))
########################
# Working form the original format of /plist/dict/dict/dict/
doc = xmlInternalTreeParse("~/itunes.xml")
dicts = doc["/plist/dict/dict/dict"]
transform =
function(dict)
{
vals = xmlSApply(dict, xmlValue)
i = seq(1, by = 2, length = length(vals)/2)
structure(vals[i + 1], names = gsub(" ", "_", vals[i]))
}
songs = lapply(dicts, transform)
# For reading, xpath and lapply()
# user system elapsed
# 6.784 0.073 7.153
##########################################
# distribution of bit rates for sampling of the sound.
table(as.numeric(sapply(songs, "[[", "Bit_Rate")))
# How often each song was played.
hist(as.numeric(sapply(songs, "[[", "Play_Count")))
# Number of songs on each album
hist(table(sapply(songs, "[", "Album")))
# Year song was recorded (?)
hist(as.numeric(sapply(songs, "[", "Year")))
# Song size
hist(as.numeric(sapply(songs, "[", "Total_Time")))
# Album time
album.time = tapply(songs, sapply(songs, "[", "Album"), function(x) sum(as.numeric(sapply(x, "[", "Total_Time"))/1000))
dateAdded = as.POSIXct(strptime(sapply(songs, "[", "Date_Added"), "%Y-%m-%dT%H:%M:%S"))
#XXX
hist(as.numeric(dateAdded))
# Artists with most songs
sort(table(sapply(songs, "[", "Artist")), decreasing = TRUE)[1:40]
# How many songs on single and double "albums"
table(sapply(songs, "[", "Disc_Number"))
table(sapply(songs, "[", "Kind"))
table(sapply(songs, "[", "Genre"))
# Check the sampling rate for points off the line.
plot(as.numeric(sapply(songs, "[", "Total_Time")), as.numeric(sapply(songs, "[", "Size")))
|