/usr/lib/R/site-library/GenomicRanges/unitTests/test_findOverlaps-methods.R is in r-bioc-genomicranges 1.30.0-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 | make_subject <- function() {
new("GRanges",
seqnames = Rle(factor(c("chr1", "chr2", "chr1", "chr3")), c(1, 3, 2, 4)),
ranges = IRanges(1:10, width = 10:1),
strand = Rle(strand(c("-", "+", "+", "-", "-", "-")), c(1, 2, 1, 1, 3, 2)),
seqinfo = Seqinfo(seqnames = paste("chr", 1:3, sep="")),
elementMetadata = DataFrame(score = 1:10, GC = seq(1, 0, length=10)))
}
make_query <- function() {
GRangesList(nomatch = GRanges(seqnames = "chr1",
ranges = IRanges(start=5, end=10),
strand = "+"),
onematch = GRanges(seqnames = "chr3",
ranges = IRanges(start=2, end=7),
strand = "-"),
twomatch = GRanges(seqnames = "chr1",
ranges = IRanges(start=1, end=5),
strand = "-"))
}
.checkHits <- function(q_hits, s_hits, q_len, s_len, current, select)
{
target <- Hits(q_hits, s_hits, q_len, s_len, sort.by.query=TRUE)
checkIdentical(t(selectHits(target, select=select)), t(unname(current)))
}
test_findOverlaps_no_overlaps_returns_empty_matches <- function()
{
query <- make_query()
subject <- make_subject()
ranges(subject) <- shift(ranges(subject), 1000L)
## select = "all"
for (type in c("any", "start", "end")) {
current <- findOverlaps(query, subject, type = type, select = "all")
.checkHits(integer(0), integer(0), 3, 10, current, select="all")
ans <- countOverlaps(query, subject, type = type)
checkIdentical(structure(c(0L, 0L, 0L),
names=c("nomatch", "onematch", "twomatch")),
ans)
ans <- subsetByOverlaps(query, subject, type = type)
checkIdentical(query[integer(0)], ans)
}
## select = "first"
expect <- rep(NA_integer_, length(query))
for (type in c("any", "start", "end")) {
ans <- findOverlaps(query, subject, type = type, select = "first")
checkIdentical(expect, ans)
}
}
test_findOverlaps_empty_query <- function()
{
query <- new("GRangesList")
subject <- make_subject()
## select = "all"
for (type in c("any", "start", "end")) {
current <- findOverlaps(query, subject, type = type, select = "all")
.checkHits(integer(0), integer(0), 0, 10, current, select="all")
ans <- countOverlaps(query, subject, type = type)
checkIdentical(integer(0), ans)
ans <- subsetByOverlaps(query, subject, type = type)
checkIdentical(query, ans)
}
## select = "first"
expect <- integer()
for (type in c("any", "start", "end")) {
ans <- findOverlaps(query, subject, type = type, select = "first")
checkIdentical(expect, ans)
}
}
test_findOverlaps_empty_subject <- function()
{
query <- make_query()
subject <- new("GRanges")
## select = "all"
for (type in c("any", "start", "end")) {
current <- findOverlaps(query, subject, type = type, select = "all")
.checkHits(integer(0), integer(0), 3, 0, current, select="all")
ans <- countOverlaps(query, subject, type = type)
checkIdentical(structure(c(0L, 0L, 0L),
names=c("nomatch", "onematch", "twomatch")),
ans)
ans <- subsetByOverlaps(query, subject, type = type)
checkIdentical(query[integer(0)], ans)
}
## select = "first"
expect <- rep(NA_integer_, length(query))
for (type in c("any", "start", "end")) {
ans <- findOverlaps(query, subject, type = type, select = "first")
checkIdentical(expect, ans)
}
}
test_findOverlaps_zero_one_two_matches <- function()
{
query <- make_query()
subject <- make_subject()
## select = "all"
ansAny <- findOverlaps(query, subject, type="any", select="all")
ansStart <- findOverlaps(query, subject, type="start", select="all")
ansEnd <- findOverlaps(query, subject, type="end", select="all")
.checkHits(c(2, 3, 3), c(7, 1, 5), 3, 10, ansAny, select="all")
.checkHits(3, 1, 3, 10, ansStart, select="all")
.checkHits(integer(0), integer(0), 3, 10, ansEnd, select="all")
countsAny <- countOverlaps(query, subject, type="any")
countsStart <- countOverlaps(query, subject, type="start")
countsEnd <- countOverlaps(query, subject, type="end")
.checkHits(c(2, 3, 3), c(7, 1, 5), 3, 10, countsAny, select="count")
.checkHits(3, 1, 3, 10, countsStart, select="count")
.checkHits(integer(0), integer(0), 3, 10, countsEnd, select="count")
subsetAny <- subsetByOverlaps(query, subject, type="any")
subsetStart <- subsetByOverlaps(query, subject, type="start")
subsetEnd <- subsetByOverlaps(query, subject, type="end")
checkIdentical(query[countsAny > 0], subsetAny)
checkIdentical(query[countsStart > 0], subsetStart)
checkIdentical(query[countsEnd > 0], subsetEnd)
## select = "first"
ansAny <- findOverlaps(query, subject, type="any", select="first")
ansStart <- findOverlaps(query, subject, type="start", select="first")
ansEnd <- findOverlaps(query, subject, type="end", select="first")
.checkHits(c(2, 3, 3), c(7, 1, 5), 3, 10, ansAny, select="first")
.checkHits(3, 1, 3, 10, ansStart, select="first")
.checkHits(integer(0), integer(0), 3, 10, ansEnd, select="first")
}
test_findOverlaps_multimatch_within_one_query <- function()
{
query <- make_query()
query[[3L]] <- c(query[[3L]], query[[3L]])
subject <- make_subject()
## select = "all"
ansAny <- findOverlaps(query, subject, type="any", select="all")
ansStart <- findOverlaps(query, subject, type="start", select="all")
ansEnd <- findOverlaps(query, subject, type="end", select="all")
.checkHits(c(2, 3, 3), c(7, 1, 5), 3, 10, ansAny, select="all")
.checkHits(3, 1, 3, 10, ansStart, select="all")
.checkHits(integer(0), integer(0), 3, 10, ansEnd, select="all")
countsAny <- countOverlaps(query, subject, type="any")
countsStart <- countOverlaps(query, subject, type="start")
countsEnd <- countOverlaps(query, subject, type="end")
.checkHits(c(2, 3, 3), c(7, 1, 5), 3, 10, countsAny, select="count")
.checkHits(3, 1, 3, 10, countsStart, select="count")
.checkHits(integer(0), integer(0), 3, 10, countsEnd, select="count")
subsetAny <- subsetByOverlaps(query, subject, type="any")
subsetStart <- subsetByOverlaps(query, subject, type="start")
subsetEnd <- subsetByOverlaps(query, subject, type="end")
checkIdentical(query[countsAny > 0], subsetAny)
checkIdentical(query[countsStart > 0], subsetStart)
checkIdentical(query[countsEnd > 0], subsetEnd)
## select = "first"
ansAny <- findOverlaps(query, subject, type="any", select="first")
ansStart <- findOverlaps(query, subject, type="start", select="first")
ansEnd <- findOverlaps(query, subject, type="end", select="first")
.checkHits(c(2, 3, 3), c(7, 1, 5), 3, 10, ansAny, select="first")
.checkHits(3, 1, 3, 10, ansStart, select="first")
.checkHits(integer(0), integer(0), 3, 10, ansEnd, select="first")
}
test_findOverlaps_either_strand <- function()
{
query <- make_query()
subject <- make_subject()
query@unlistData@strand <- Rle(strand(c("*", "*", "-")))
## select = "all"
ansAny <- findOverlaps(query, subject, type="any", select="all")
ansStart <- findOverlaps(query, subject, type="start", select="all")
ansEnd <- findOverlaps(query, subject, type="end", select="all")
.checkHits(c(1, 1, 1, 2, 3, 3), c(1, 5, 6, 7, 1, 5), 3, 10,
ansAny, select="all")
.checkHits(c(1, 3), c(5, 1), 3, 10, ansStart, select="all")
.checkHits(c(1, 1, 1), c(1, 5, 6), 3, 10, ansEnd, select="all")
countsAny <- countOverlaps(query, subject, type = "any")
countsStart <- countOverlaps(query, subject, type = "start")
countsEnd <- countOverlaps(query, subject, type = "end")
.checkHits(c(1, 1, 1, 2, 3, 3), c(1, 5, 6, 7, 1, 5), 3, 10,
countsAny, select="count")
.checkHits(c(1, 3), c(5, 1), 3, 10, countsStart, select="count")
.checkHits(c(1, 1, 1), c(1, 5, 6), 3, 10, countsEnd, select="count")
subsetAny <- subsetByOverlaps(query, subject, type = "any")
subsetStart <- subsetByOverlaps(query, subject, type = "start")
subsetEnd <- subsetByOverlaps(query, subject, type = "end")
checkIdentical(query[countsAny > 0], subsetAny)
checkIdentical(query[countsStart > 0], subsetStart)
checkIdentical(query[countsEnd > 0], subsetEnd)
# select = "first"
ansAny <- findOverlaps(query, subject, type="any", select="first")
ansStart <- findOverlaps(query, subject, type="start", select="first")
ansEnd <- findOverlaps(query, subject, type="end", select="first")
.checkHits(c(1, 1, 1, 2, 3, 3), c(1, 5, 6, 7, 1, 5), 3, 10,
ansAny, select="first")
.checkHits(c(1, 3), c(5, 1), 3, 10, ansStart, select="first")
.checkHits(c(1, 1, 1), c(1, 5, 6), 3, 10, ansEnd, select="first")
}
test_findOverlaps_minoverlap_GRanges_GRangesList <- function()
{
query <- make_subject()
subject <- make_query()
current <- findOverlaps(query, subject, minoverlap = 5)
.checkHits(1, 3, 10, 3, current, select="all")
current <- findOverlaps(query, subject, minoverlap = 6)
.checkHits(integer(0), integer(0), 10, 3, current, select="all")
}
test_findOverlaps_minoverlap_GRangesList_GRanges <- function()
{
subject <- make_subject()
query <- make_query()
current <- findOverlaps(query, subject, minoverlap = 5)
.checkHits(3, 1, 3, 10, current, select="all")
current <- findOverlaps(query, subject, minoverlap = 6)
.checkHits(integer(0), integer(0), 3, 10, current, select="all")
}
test_findOverlaps_minoverlap_GRangesList_GRangesList <- function()
{
query <- make_query()
subject <- GRangesList("g1" = make_subject())
current <- findOverlaps(query, subject, minoverlap = 1)
.checkHits(c(2, 3), c(1, 1), 3, 1, current, select="all")
query <- make_query()
subject <- GRangesList("g1" = make_subject())
current <- findOverlaps(query, subject, minoverlap = 6)
.checkHits(3, 1, 3, 1, current, select="all")
query <- make_query()
subject <- GRangesList("g1" = make_subject())
current <- findOverlaps(query, subject, minoverlap = 7)
.checkHits(integer(0), integer(0), 3, 1, current, select="all")
current <- findOverlaps(subject, query, minoverlap = 6)
.checkHits(1, 3, 1, 3, current, select="all")
}
test_findOverlaps_with_circular_sequences <- function()
{
gr <- GRanges(seqnames=rep.int("A", 4),
ranges=IRanges(start=c(2, 4, 6, 8), width=3))
## With A of length 9 --> no overlap between last and first ranges.
gr@seqinfo <- Seqinfo(seqnames="A", seqlengths=9, isCircular=TRUE)
current0 <- findOverlaps(gr, gr)
target0_q_hits <- c(1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L)
target0_s_hits <- c(1L, 2L, 1L, 2L, 3L, 2L, 3L, 4L, 3L, 4L)
.checkHits(target0_q_hits, target0_s_hits, 4, 4, current0, select="all")
## With A of length 8 --> last and first ranges do overlap.
gr@seqinfo <- Seqinfo(seqnames="A", seqlengths=8, isCircular=TRUE)
current1 <- findOverlaps(gr, gr)
.checkHits(c(1, target0_q_hits, 4), c(4, target0_s_hits, 1), 4, 4,
current1, select="all")
## With A of length 8 and minoverlap=2 --> no overlap between last
## and first ranges.
current2 <- findOverlaps(gr, gr, minoverlap=2)
.checkHits(1:4, 1:4, 4, 4, current2, select="all")
## With A of length 7 and minoverlap=2 --> last and first ranges
## do overlap.
gr@seqinfo <- Seqinfo(seqnames="A", seqlengths=7, isCircular=TRUE)
current3 <- findOverlaps(gr, gr, minoverlap=2)
.checkHits(c(1, 1:4, 4), c(4, 1:4, 1), 4, 4, current3, select="all")
## type = "within"
q0 <- GRanges("A", IRanges(c(11, 5, 4, 11, 11, 4),
c(30, 30, 30, 50, 51, 51)))
s0 <- GRanges("A", IRanges(5, width=46))
s0@seqinfo <- Seqinfo(seqnames="A", seqlengths=100, isCircular=TRUE)
## sanity check with linear shift
fo0 <- findOverlaps(q0, s0, type="within")
expected <- c(1L, 2L, 4L)
checkIdentical(queryHits(fo0), expected)
A=90
q1 <- shift(q0, A)
s1 <- shift(s0, A)
fo1 <- findOverlaps(q1, s1, type="within")
checkIdentical(queryHits(fo1), expected)
## circular shift
n1=-1; n2=0
q2 <- shift(q0, A + 100 * n1)
s2 <- shift(s0, A + 100 * n2)
fo1 <- findOverlaps(q1, s1, type="within")
checkIdentical(queryHits(fo1), expected)
## With A of length 8 --> range 3 is within range 2
gr <- GRanges(seqnames=rep.int("A", 4),
ranges=IRanges(start=c(2, 4, 6, 8), width=c(3, 3, 3, 5)))
gr@seqinfo <- Seqinfo(seqnames="A", seqlengths=8, isCircular=TRUE)
current4 <- findOverlaps(gr, gr, type="within")
.checkHits(c(1, 1:4), c(1, 4, 2, 3, 4), 4, 4, current4, select="all")
## With A of length 9 --> range 3 is not within range 2
gr@seqinfo <- Seqinfo(seqnames="A", seqlengths=9, isCircular=TRUE)
current5 <- findOverlaps(gr, gr, type="within")
.checkHits(1:4, 1:4, 4, 4, current5, select="all")
}
|