This file is indexed.

/usr/lib/R/site-library/Biostrings/UnitTests/runit-matchPDict.R is in r-bioc-biostrings 2.32.1-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# --- to be included in an upcoming biocDatasets package
randomDNASequences <- function(n, w)
{
  alphabet <- DNA_BASES
  w <- rep(w, length=n)
  sequences <- sapply(seq(1, n, length=n),
                      function(x) {
                        s <- sample(alphabet, w[x], replace=TRUE)
                        s <- paste(s, collapse="")
                        return(s)
                      })
  return(Biostrings::DNAStringSet(sequences))
}

msubseq <- function(x, ir)
{
  ## differs from subseq in the sense that several subsequences
  ## from the same sequence are extracted
  ## x:  XString
  ## ir: IRanges
  res <- vector("character", length = length(ir))
  for (i in seq(along=res)) {
    res[i] <- as.character(subseq(x, start=ir@start[i], width=width(ir)[i]))
    ## forced cast: chain of tools for DNAString seems interupted for
    ##              some use cases (or I missed something)
  }
  res <- DNAStringSet(res)
  return(res)
}


# ---

test_pdictConstantWidth <- function()
{
  set.seed(1)
  l <- 150
  target <- randomDNASequences(1, l)[[1]]
  W <- 20
  L <- 6 
  ir <- successiveIRanges(rep(W, L), gapwidth = 1)
  short_sequences <- msubseq(target, ir)
  # shuffle the sequences (they are not in consecutive order)
  o <- sample(seq(along=short_sequences))
  
  dna_short <- DNAStringSet(short_sequences[o])
  pdict <- PDict(dna_short)
  checkEquals(L, length(pdict))
  checkEquals(rep(W, L), width(pdict))
  checkEquals(NULL, head(pdict))
  checkEquals(W, tb.width(pdict))
  checkEquals(NULL, tail(pdict))
}

test_pdictVariableWidth <- function()
{
  set.seed(1)
  l <- 150
  target <- randomDNASequences(1, l)[[1]]
  W <- 20
  L <- 6
  n_cut <- sample(0:5, L, replace=TRUE)
  ir <- successiveIRanges(rep(W, L) - n_cut, gapwidth = 1 + n_cut[-length(n_cut)])
  short_sequences <- msubseq(target, ir)
  # shuffle the sequences (they are not in consecutive order)
  o <- sample(seq(along=short_sequences))
  
  dna_var_short <- DNAStringSet(short_sequences[o])
  
  pdict <- PDict(dna_var_short,
                 tb.start=1,                        # can't this be
                 tb.width=min(width(short_sequences)) # the default for
                                                    # variable width ?
                 )
  checkEquals(L, length(pdict))
  checkEquals( (rep(W, L) - n_cut)[o], width(pdict))
  checkEquals(NULL, head(pdict))
  shortest_seq_width <- min(width(dna_var_short))
  checkEquals(shortest_seq_width,
              tb.width(pdict))           # mostly a sanity check
  checkEquals(substring(short_sequences, shortest_seq_width+1)[o],
              as.character(tail(pdict)))
}


test_matchConstantWidth <- function()
{
  set.seed(1)
  l <- 150
  dna_target <- randomDNASequences(1, l)[[1]]
  W <- 20
  L <- 6 
  ir <- successiveIRanges(rep(W, L), gapwidth = 1)
  short_sequences <- msubseq(dna_target, ir)
  # shuffle the sequences (so they are not in consecutive order)
  o <- sample(seq(along=short_sequences))
  
  dna_short <- DNAStringSet(short_sequences[o])
  pdict <- PDict(dna_short)
  
  res <- matchPDict(pdict, dna_target)

  # mostly a sanity check
  checkEquals(L, length(res))
  
  for (i in seq(along=res)) {
    m_start <- ir[o][i]@start
    checkEquals(m_start, start(res[[i]]))
    checkEquals(W, width(res[[i]]))
    checkEquals(m_start + W - 1, end(res[[i]]))  # mostly a sanity check
  }
  
  
}

test_matchVariableWidth <- function()
{
  set.seed(1)
  l <- 150
  dna_target <- randomDNASequences(1, l)[[1]]
  W <- 20
  L <- 6
  n_cut <- sample(0:5, L, replace=TRUE)
  ir <- successiveIRanges(rep(W, L) - n_cut, gapwidth = 1 + n_cut[-length(n_cut)])
  short_sequences <- msubseq(dna_target, ir)
  # shuffle the sequences (they are not in consecutive order)
  o <- sample(seq(along=short_sequences))
  
  dna_var_short <- DNAStringSet(short_sequences[o])
  
  pdict <- PDict(dna_var_short,
                 tb.start=1,                        # can't this be
                 tb.width=min(width(dna_var_short)) # the default for
                                                    # variable width ?
                 )

  res <- matchPDict(pdict, dna_target)

  # mostly a sanity check
  checkEquals(L, length(res))
  
  iro <- ir[o]
  for (i in seq(along=res)) {
    checkEquals(start(iro)[i], start(res[[i]]))
    checkEquals(width(iro)[i], width(res[[i]]))
    checkEquals(end(iro)[i], end(res[[i]]))  # mostly a sanity check
  }
    
}