/usr/lib/R/site-library/graph/perf/graphperf.Rnw is in r-bioc-graph 1.56.0-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 | \documentclass{article}
\usepackage{hyperref}
\textwidth=6.2in
\textheight=8.5in
\oddsidemargin=.1in
\evensidemargin=.1in
\headheight=-.3in
\newcommand{\Rfunction}[1]{{\texttt{#1}}}
\newcommand{\Rmethod}[1]{{\texttt{#1}}}
\newcommand{\Rcode}[1]{{\texttt{#1}}}
\newcommand{\Robject}[1]{{\texttt{#1}}}
\newcommand{\Rpackage}[1]{{\textit{#1}}}
\newcommand{\Rclass}[1]{{\textit{#1}}}
\newcommand{\classdef}[1]{%
{\em #1}
}
\newcommand{\myincfig}[3]{\begin{figure}[htbp]
\begin{center}
\includegraphics[width=#2]{#1}
\caption{\label{#1}#3}
\end{center} \end{figure}}
\begin{document}
\title{Graph Package Performance Report}
\author{Seth Falcon}
\maketitle
\SweaveOpts{keep.source=TRUE}
<<funcs, echo=FALSE, results=hide>>=
st <- system.time
perfRow <- function(g, t, op="") {
data.frame(class = class(g),
op = op,
nodes = numNodes(g),
edges = numEdges(g),
directed = ifelse(isDirected(g), "T", "F"),
user = t[1],
sys = t[2],
clock = t[3],
stringsAsFactors = FALSE)
}
timeRow <- function(expr, op) {
t <- system.time(g <- expr)
list(result=g, row=perfRow(g, t, op))
}
randNonDiagIndex <- function(n, count)
{
diagIdx <- 1L + 0L:(n - 1L) * (n + 1L)
idx <- seq_len(n^2)[-diagIdx]
sample(idx, count)
}
randAdjMat <- function(nodeCount, edgeCount)
{
nodes <- paste("graph_node_", seq_len(nodeCount), sep="")
am <- matrix(0L, nrow = nodeCount, ncol = nodeCount,
dimnames = list(nodes, nodes))
am[randNonDiagIndex(nodeCount, edgeCount)] <- 1L
am
}
randSymAdjMat <- function(nodeCount, edgeCount)
{
nodes <- paste("graph_node_", seq_len(nodeCount), sep="")
am <- matrix(0L, nrow = nodeCount, ncol = nodeCount,
dimnames = list(nodes, nodes))
upt <- as.vector(upper.tri(am))
idx1 <- sample(seq_len(nodeCount^2)[upt], edgeCount)
am[idx1] <- 1L
am <- am + t(am)
am
}
@
\section{Introduction}
<<setup, echo=FALSE, results=hide>>=
library("graph")
set.seed(0xab34eL)
medEdgeCount <- 5000L
bigEdgeCount <- 260000L
nodeCount <- 2000L
bigSymMat <- randSymAdjMat(nodeCount, bigEdgeCount)
medSymMat <- randSymAdjMat(nodeCount, medEdgeCount)
bigMat <- randAdjMat(nodeCount, bigEdgeCount)
medMat <- randAdjMat(nodeCount, medEdgeCount)
dim(bigSymMat)
dim(medSymMat)
@
This document surveys the runtime performance of graph operations
believed to be heavily used for common bioinformatic analyses. We
use two \Sexpr{nodeCount} node graphs, one with \Sexpr{medEdgeCount}
edges and the other with \Sexpr{bigEdgeCount} edges. Both graphs are
generated randomly.
<<matrixDetails, echo=FALSE, results=hide>>=
matDim <- dim(bigSymMat)
cntZero <- sum(bigSymMat == 0)
cntNonZero <- sum(bigSymMat != 0)
pctNonZero <- sum(bigSymMat != 0) / nrow(bigSymMat)^2
@
\section{Timing of Operations}
Here we look at the time to construct \Rclass{graphAM} and
\Rclass{graphNEL} instances using two adjacency matrices with the same
node count, but different number of edges.
\subsection{Creating new graph objects}
First, we look at the \Rclass{graphAM} representation. We construct
directed and undirected graphs with different numbers of edges for the
same node set.
<<graphAMCreation, echo=FALSE>>=
## undirected
ans <- timeRow(graphAM(adjMat = bigSymMat, edgemode="undirected"), "new")
df <- ans$row
ans <- timeRow(graphAM(adjMat = medSymMat, edgemode="undirected"), "new")
df <- rbind(df, ans$row)
## directed
ans <- timeRow(graphAM(adjMat = bigMat, edgemode="directed"), "new")
df <- rbind(df, ans$row)
g1 <- ans$result
ans <- timeRow(graphAM(adjMat = medMat, edgemode="directed"), "new")
df <- rbind(df, ans$row)
g2 <- ans$result
## using a from/to matrix
ftmat1 <- t(edgeMatrix(g1))
ans <- timeRow(graphAM(adjMat = ftM2adjM(ftmat1),
edgemode = "directed"), "new f/t")
df <- rbind(df, ans$row)
ftmat2 <- t(edgeMatrix(g2))
ans <- timeRow(graphAM(adjMat = ftM2adjM(ftmat2),
edgemode = "directed"), "new f/t")
df <- rbind(df, ans$row)
rownames(df) <- NULL
df.AM.new <- df
df.AM.new
@
For \Rclass{graphNEL} we can use the \Rcode{as(matrix, "graphNEL")}
coerce method for the undirected case. For a directed graph, we
convert the adjacency matrix into a from-to matrix using
\Rcode{t(edgeMatrix(g1))} and then use \Rfunction{ftM2graphNEL}.
<<graphNELCreation, echo=FALSE>>=
ans <- timeRow(as(bigSymMat, "graphNEL"), "as(m, NEL)")
df.NEL.new <- ans$row
ans <- timeRow(as(medSymMat, "graphNEL"), "as(m, NEL)")
df.NEL.new <- rbind(df.NEL.new, ans$row)
ftMat1 <- t(edgeMatrix(g1))
ftMat2 <- t(edgeMatrix(g2))
## there's a bit of an inconsistency here
## since we aren't using the node labels
ans <- timeRow(ftM2graphNEL(ftMat1), "ft2NEL")
df.NEL.new <- rbind(df.NEL.new, ans$row)
ans <- timeRow(ftM2graphNEL(ftMat2), "ft2NEL")
df.NEL.new <- rbind(df.NEL.new, ans$row)
rownames(df.NEL.new) <- NULL
df.NEL.new
@
Here are some notes on the \textit{experimental} \Rclass{graphAM2}
class that uses a bit array (backed by a raw vector) to store an
adjacency matrix.
First, we make a more standard from/to matrix:
<<fromToSetup>>=
ft1 <- matrix("", nrow = nrow(ftmat1), ncol = ncol(ftmat2))
ft1[, 1L] <- nodes(g1)[ftmat1[, 1L]]
ft1[, 2L] <- nodes(g1)[ftmat1[, 2L]]
ft2 <- matrix("", nrow = nrow(ftmat2), ncol = ncol(ftmat2))
ft2[, 1L] <- nodes(g1)[ftmat2[, 1L]]
ft2[, 2L] <- nodes(g1)[ftmat2[, 2L]]
@
<<graphAM2Creation, echo=FALSE>>=
ans <- timeRow(GraphAM2(from = ft1[, 1L], to = ft1[, 2L],
nodes = nodes(g1),
edgemode = "directed"),
"GraphAM2")
df.AM2.new <- ans$row
gbit1 <- ans$result
ans <- timeRow(GraphAM2(from = ft1[, 1L], to = ft1[, 2L],
nodes = nodes(g1),
edgemode = "undirected"),
"GraphAM2")
df.AM2.new <- rbind(df.AM2.new, ans$row)
ans <- timeRow(GraphAM2(from = ft2[, 1L], to = ft2[, 2L],
nodes = nodes(g2),
edgemode = "directed"),
"GraphAM2")
df.AM2.new <- rbind(df.AM2.new, ans$row)
gbit2 <- ans$result
ans <- timeRow(GraphAM2(from = ft2[, 1L], to = ft2[, 2L],
nodes = nodes(g2),
edgemode = "undirected"),
"GraphAM2")
df.AM2.new <- rbind(df.AM2.new, ans$row)
rownames(df.AM2.new) <- NULL
df.AM2.new
@
\subsection{\Rclass{graphAM} to \Rclass{graphNEL} conversion}
Convert from \Rclass{graphAM} to \Rclass{graphAM}.
<<coerceToNEL, echo=FALSE>>=
ans <- timeRow(as(g1, "graphNEL"), "as(AM, NEL)")
df.AM.to.NEL <- ans$row
gnel1 <- ans$result
ans <- timeRow(as(g2, "graphNEL"), "as(AM, NEL)")
df.AM.to.NEL <- rbind(df.AM.to.NEL, ans$row)
gnel2 <- ans$result
rownames(df.AM.to.NEL) <- NULL
df.AM.to.NEL
@
\subsection{Size comparison}
<<sizeComp, echo=FALSE>>=
objType <- sapply(list(g1, gnel1, gbit1), class)
Size <- sapply(list(g1, gnel1, gbit1), object.size)
data.frame(class=objType, size=Size)
@
\subsection{intersection and union}
Currently, intersection and union are implemented for the
\Rclass{graph} super class using the \Rmethod{nodes}, \Rmethod{edges}
methods and constructing a new \Rclass{graphNEL} for the result. This
is suboptimal for \Rclass{graphAM} objects.
%% TODO: evaluate intersection/union of two random graphs with same
%% edge counts as well as the mixed size case
<<intersectionAndUnion, echo=FALSE>>=
## interesting to note that intersection and union
## are returning graphNEL, even when input is AM
ans <- timeRow(intersection(g1, g2), "intersection AM")
df.setOps <- ans$row
ans <- timeRow(intersection(gnel1, gnel2), "intersection NEL")
df.setOps <- rbind(df.setOps, ans$row)
## intersection2 is giving me
## INTEGER() can only be applied to a 'integer', not a 'double'
## ans <- timeRow(intersection2(g1, g2), "intersection2 AM")
## df.setOps <- ans$row
## ans <- timeRow(intersection2(gnel1, gnel2), "intersection2 NEL")
## df.setOps <- rbind(df.setOps, ans$row)
ans <- timeRow(union(g1, g2), "union AM")
df.setOps <- rbind(df.setOps, ans$row)
ans <- timeRow(union(gnel1, gnel2), "union NEL")
df.setOps <- rbind(df.setOps, ans$row)
ans <- timeRow(graph:::edge_set_intersect(gbit1, gbit2),
"AM2 I")
df.setOps <- rbind(df.setOps, ans$row)
ans <- timeRow(graph:::edge_set_union(gbit1, gbit2),
"AM2 U")
df.setOps <- rbind(df.setOps, ans$row)
rownames(df.setOps) <- NULL
df.setOps
@
Next we explore some different approaches to thresholding a graph's
edges based on edge weight. Below we add random edge weights to a new
graph object \Robject{gw} with the same structure as \Robject{g1}.
<<edgeWeights>>=
## creating with edge weights
gw <- g1
edgeDataDefaults(gw, "weight") <- 1L
W <- abs(rnorm(numEdges(gw)))
## there should be an easier way to do this.
## sadly, it might be easier to extract the matrix
## and set weights on the matrix and create a new graphAM
st(emat <- edgeMatrix(gw))
eFrom <- nodes(gw)[emat[1L, ]]
eTo <- nodes(gw)[emat[2L, ]]
## setting edge weights for all edges
st(edgeData(gw, from = eFrom, to = eTo,
attr = "weight") <- W)
## extracting all edge weights
st(ew <- edgeData(gw, attr="weight"))
@
First approach is to pull out the raw adjacency matrix with values
representing edge weights, perform the thresholding via matrix
assignment and convert back to a \Rclass{graphAM}.
<<edgeWeightThresholding1>>=
## edge thresholding.
st({
c1 <- function() {
M <- as(gw, "matrix")
idx = (M > 0) & (M < 0.3)
M[idx] <- 0
graphAM(adjMat = M, values = list(weight = 1L),
edgemode = "directed")
}
ewt1 <- c1()
})
ewt1
@
Another approach is to extract the edge attributes, identify the edges
to be removed (unfortunately, this requires string parsing) and then
calling \Rmethod{removeEdge}.
<<edgeWeightThresholding2>>=
st({
c1 <- function() {
ew <- unlist(edgeData(gw, attr = "weight"))
toRemove <- names(ew[ew < 0.3])
fromTo <- do.call(rbind, strsplit(toRemove, "|", fixed = TRUE))
removeEdge(fromTo[, 1], fromTo[, 2], gw)
}
ewt2 <- c1()
})
ewt2
@
\subsection{Node permutation}
Here's one way to permute the node labels of a \Rclass{graphAM}
object.
<<nodePermFunc>>=
permNodesAM <- function(g) {
m <- g@adjMat
colnames(m) <- sample(colnames(g@adjMat))
graphAM(adjMat = m, edgemode = edgemode(g))
}
@
<<nodePermTiming, echo=FALSE>>=
ans <- timeRow(permNodesAM(g1), "permNodesAM")
df.node.perm <- ans$row
ans <- timeRow(permNodesAM(g2), "permNodesAM")
df.node.perm <- rbind(df.node.perm, ans$row)
rownames(df.node.perm) <- NULL
df.node.perm
@
\end{document}
|