/usr/lib/R/site-library/Biostrings/UnitTests/runit-matchPDict.R is in r-bioc-biostrings 2.32.1-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 | # --- to be included in an upcoming biocDatasets package
randomDNASequences <- function(n, w)
{
alphabet <- DNA_BASES
w <- rep(w, length=n)
sequences <- sapply(seq(1, n, length=n),
function(x) {
s <- sample(alphabet, w[x], replace=TRUE)
s <- paste(s, collapse="")
return(s)
})
return(Biostrings::DNAStringSet(sequences))
}
msubseq <- function(x, ir)
{
## differs from subseq in the sense that several subsequences
## from the same sequence are extracted
## x: XString
## ir: IRanges
res <- vector("character", length = length(ir))
for (i in seq(along=res)) {
res[i] <- as.character(subseq(x, start=ir@start[i], width=width(ir)[i]))
## forced cast: chain of tools for DNAString seems interupted for
## some use cases (or I missed something)
}
res <- DNAStringSet(res)
return(res)
}
# ---
test_pdictConstantWidth <- function()
{
set.seed(1)
l <- 150
target <- randomDNASequences(1, l)[[1]]
W <- 20
L <- 6
ir <- successiveIRanges(rep(W, L), gapwidth = 1)
short_sequences <- msubseq(target, ir)
# shuffle the sequences (they are not in consecutive order)
o <- sample(seq(along=short_sequences))
dna_short <- DNAStringSet(short_sequences[o])
pdict <- PDict(dna_short)
checkEquals(L, length(pdict))
checkEquals(rep(W, L), width(pdict))
checkEquals(NULL, head(pdict))
checkEquals(W, tb.width(pdict))
checkEquals(NULL, tail(pdict))
}
test_pdictVariableWidth <- function()
{
set.seed(1)
l <- 150
target <- randomDNASequences(1, l)[[1]]
W <- 20
L <- 6
n_cut <- sample(0:5, L, replace=TRUE)
ir <- successiveIRanges(rep(W, L) - n_cut, gapwidth = 1 + n_cut[-length(n_cut)])
short_sequences <- msubseq(target, ir)
# shuffle the sequences (they are not in consecutive order)
o <- sample(seq(along=short_sequences))
dna_var_short <- DNAStringSet(short_sequences[o])
pdict <- PDict(dna_var_short,
tb.start=1, # can't this be
tb.width=min(width(short_sequences)) # the default for
# variable width ?
)
checkEquals(L, length(pdict))
checkEquals( (rep(W, L) - n_cut)[o], width(pdict))
checkEquals(NULL, head(pdict))
shortest_seq_width <- min(width(dna_var_short))
checkEquals(shortest_seq_width,
tb.width(pdict)) # mostly a sanity check
checkEquals(substring(short_sequences, shortest_seq_width+1)[o],
as.character(tail(pdict)))
}
test_matchConstantWidth <- function()
{
set.seed(1)
l <- 150
dna_target <- randomDNASequences(1, l)[[1]]
W <- 20
L <- 6
ir <- successiveIRanges(rep(W, L), gapwidth = 1)
short_sequences <- msubseq(dna_target, ir)
# shuffle the sequences (so they are not in consecutive order)
o <- sample(seq(along=short_sequences))
dna_short <- DNAStringSet(short_sequences[o])
pdict <- PDict(dna_short)
res <- matchPDict(pdict, dna_target)
# mostly a sanity check
checkEquals(L, length(res))
for (i in seq(along=res)) {
m_start <- ir[o][i]@start
checkEquals(m_start, start(res[[i]]))
checkEquals(W, width(res[[i]]))
checkEquals(m_start + W - 1, end(res[[i]])) # mostly a sanity check
}
}
test_matchVariableWidth <- function()
{
set.seed(1)
l <- 150
dna_target <- randomDNASequences(1, l)[[1]]
W <- 20
L <- 6
n_cut <- sample(0:5, L, replace=TRUE)
ir <- successiveIRanges(rep(W, L) - n_cut, gapwidth = 1 + n_cut[-length(n_cut)])
short_sequences <- msubseq(dna_target, ir)
# shuffle the sequences (they are not in consecutive order)
o <- sample(seq(along=short_sequences))
dna_var_short <- DNAStringSet(short_sequences[o])
pdict <- PDict(dna_var_short,
tb.start=1, # can't this be
tb.width=min(width(dna_var_short)) # the default for
# variable width ?
)
res <- matchPDict(pdict, dna_target)
# mostly a sanity check
checkEquals(L, length(res))
iro <- ir[o]
for (i in seq(along=res)) {
checkEquals(start(iro)[i], start(res[[i]]))
checkEquals(width(iro)[i], width(res[[i]]))
checkEquals(end(iro)[i], end(res[[i]])) # mostly a sanity check
}
}
|