/usr/share/systemtap/runtime/linux/uprobes-common.c is in systemtap-common 2.6-0.2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 | /* -*- linux-c -*-
* uprobe Functions
* Copyright (C) 2014 Red Hat Inc.
*
* This file is part of systemtap, and is free software. You can
* redistribute it and/or modify it under the terms of the GNU General
* Public License (GPL); either version 2, or (at your option) any
* later version.
*/
#ifndef _UPROBE_COMMON_C_
#define _UPROBE_COMMON_C_
/* NB: Because these utrace callbacks only occur before / after
userspace instructions run, there is no concurrency control issue
between active uprobe callbacks and these registration /
unregistration pieces.
We protect the stap_uprobe->spec_index (which also serves as a
free/busy flag) value with the outer protective stap_probes_lock
spinlock, to protect it against concurrent registration /
unregistration.
*/
static int stap_uprobe_change_plus (struct task_struct *tsk, unsigned long relocation, unsigned long length, const struct stap_uprobe_tf *stf, unsigned long offset, unsigned long vm_flags) {
int tfi = (stf - stap_uprobe_finders);
int spec_index;
/* iterate over stap_uprobe_spec[] that use this same stap_uprobe_tf */
for (spec_index=0; spec_index<sizeof(stap_uprobe_specs)/sizeof(stap_uprobe_specs[0]); spec_index++) {
int handled_p = 0;
int slotted_p = 0;
const struct stap_uprobe_spec *sups = &stap_uprobe_specs [spec_index];
struct stap_uprobe *sup;
pid_t sdt_sem_pid;
int rc = 0;
int i;
int pci;
if (likely(sups->tfi != tfi)) continue;
/* skip probes with an address beyond this map event; should not
happen unless a shlib/exec got mmapped in weirdly piecemeal */
if (likely((vm_flags & VM_EXEC) && sups->address >= length)) continue;
/* Found a uprobe_spec for this stap_uprobe_tf. Need to lock the
stap_uprobes[] array to allocate a free spot, but then we can
unlock and do the register_*probe subsequently. */
mutex_lock (& stap_uprobes_lock);
for (i=0; i<MAXUPROBES; i++) { /* XXX: slow linear search */
sup = & stap_uprobes[i];
/* register new uprobe
We make two passes for semaphores;
see stap_uprobe_change_semaphore_plus */
if (sup->spec_index < 0 || (sups->sdt_sem_offset && vm_flags & VM_WRITE && sup->spec_index == spec_index)) {
#if (UPROBES_API_VERSION < 2)
/* See PR6829 comment. */
if (sup->spec_index == -1 && sup->up.kdata != NULL) continue;
else if (sup->spec_index == -2 && sup->urp.u.kdata != NULL) continue;
#endif
sup->spec_index = spec_index;
slotted_p = 1;
break;
}
}
mutex_unlock (& stap_uprobes_lock);
#ifdef DEBUG_UPROBES
_stp_dbug(__FUNCTION__,__LINE__, "+uprobe spec %d idx %d process %s[%d] addr %p pp %s\n", spec_index, (slotted_p ? i : -1), tsk->comm, tsk->tgid, (void*)(relocation+sups->address), sups->probe->pp);
#endif
/* NB: check for user-module build-id only if we have a pathname
at all; for a process(PID#).* probe, we may not. If at some
point we map process(PID#) to process("/proc/PID#/exe"), we'll
get a pathname. */
if (stf->pathname)
if ((rc = _stp_usermodule_check(tsk, stf->pathname, relocation)))
return rc;
/* Here, slotted_p implies that `i' points to the single
stap_uprobes[] element that has been slotted in for registration
or unregistration processing. !slotted_p implies that the table
was full (registration; MAXUPROBES) or that no matching entry was
found (unregistration; should not happen). */
sdt_sem_pid = (sups->return_p ? sup->urp.u.pid : sup->up.pid);
if (sups->sdt_sem_offset && (sdt_sem_pid != tsk->tgid || sup->sdt_sem_address == 0)) {
/* If the probe is in an ET_EXEC binary, then the sdt_sem_offset already
* is a real address. But stap_uprobe_process_found calls us in this
* case with relocation=offset=0, so we don't have to worry about it. */
sup->sdt_sem_address = (relocation - offset) + sups->sdt_sem_offset;
} /* sdt_sem_offset */
for (pci=0; pci < sups->perf_counters_dim; pci++) {
if ((sups->perf_counters)[pci] > -1)
_stp_perf_read_init ((sups->perf_counters)[pci], tsk);
}
if (slotted_p) {
struct stap_uprobe *sup = & stap_uprobes[i];
if (sups->return_p) {
sup->urp.u.pid = tsk->tgid;
sup->urp.u.vaddr = relocation + sups->address;
sup->urp.handler = &enter_uretprobe_probe;
rc = register_uretprobe (& sup->urp);
} else {
sup->up.pid = tsk->tgid;
sup->up.vaddr = relocation + sups->address;
sup->up.handler = &enter_uprobe_probe;
rc = register_uprobe (& sup->up);
}
/* The u*probe failed to register. However, if we got EEXIST,
* that means that the u*probe is already there, so just ignore
* the error. This could happen if CLONE_THREAD or CLONE_VM was
* used. */
if (rc != 0 && rc != -EEXIST) {
_stp_warn ("u*probe failed %s[%d] '%s' addr %p rc %d\n", tsk->comm, tsk->tgid, sups->probe->pp, (void*)(relocation + sups->address), rc);
/* NB: we need to release this slot,
so we need to borrow the mutex temporarily. */
mutex_lock (& stap_uprobes_lock);
sup->spec_index = -1;
sup->sdt_sem_address = 0;
mutex_unlock (& stap_uprobes_lock);
} else {
handled_p = 1;
}
}
/* NB: handled_p implies slotted_p */
if (unlikely (! handled_p)) {
#ifdef STP_TIMING
atomic_inc (skipped_count_uprobe_reg());
#endif
/* NB: duplicates common_entryfn_epilogue,
but then this is not a probe entry fn epilogue. */
#ifndef STAP_SUPPRESS_HANDLER_ERRORS
if (unlikely (atomic_inc_return (skipped_count()) > MAXSKIPPED)) {
if (unlikely (pseudo_atomic_cmpxchg(session_state(), STAP_SESSION_RUNNING, STAP_SESSION_ERROR) == STAP_SESSION_RUNNING))
_stp_error ("Skipped too many probes, check MAXSKIPPED or try again with stap -t for more details.");
}
#endif
}
} /* close iteration over stap_uprobe_spec[] */
return 0; /* XXX: or rc? */
}
static int stap_uprobe_change_semaphore_plus (struct task_struct *tsk, unsigned long relocation, unsigned long length, const struct stap_uprobe_tf *stf) {
int tfi = (stf - stap_uprobe_finders);
int spec_index;
int rc = 0;
struct stap_uprobe *sup;
int i;
/* We make two passes for semaphores.
The first pass, stap_uprobe_change_plus, calculates the address of the
semaphore. If the probe is in a .so, we calculate the
address when the initial mmap maps the entire solib, e.g.
7f089885a000-7f089885b000 rw-p- libtcl.so
A subsequent mmap maps in the writable segment where the
semaphore control variable lives, e.g.
7f089850d000-7f0898647000 r-xp- libtcl.so
7f0898647000-7f0898846000 ---p libtcl.so
7f0898846000-7f089885b000 rw-p- libtcl.so
The second pass, stap_uprobe_change_semaphore_plus, sets the semaphore.
If the probe is in a .so this will be when the writable segment of the .so
is mapped in. If the task changes, then recalculate the address.
*/
for (i=0; i<MAXUPROBES; i++) { /* XXX: slow linear search */
sup = & stap_uprobes[i];
if (sup->spec_index == -1) continue;
if (sup->sdt_sem_address != 0 && !(sup->up.pid == tsk->tgid && sup->sdt_sem_address >= relocation && sup->sdt_sem_address < relocation+length)) continue;
if (sup->sdt_sem_address) {
unsigned short sdt_semaphore = 0; /* NB: fixed size */
if ((rc = get_user (sdt_semaphore, (unsigned short __user*) sup->sdt_sem_address)) == 0) {
sdt_semaphore ++;
#ifdef DEBUG_UPROBES
{
const struct stap_uprobe_spec *sups = &stap_uprobe_specs [sup->spec_index];
_stp_dbug(__FUNCTION__,__LINE__, "+semaphore %#x @ %#lx spec %d idx %d task %d\n", sdt_semaphore, sup->sdt_sem_address, sup->spec_index, i, tsk->tgid);
}
#endif
rc = put_user (sdt_semaphore, (unsigned short __user*) sup->sdt_sem_address);
/* XXX: need to analyze possibility of race condition */
}
}
}
return rc;
}
/* Removing/unmapping a uprobe is simpler than adding one (in the
_plus function above). We need not care about stap_uprobe_finders
or anything, we just scan through stap_uprobes[] for a live probe
within the given address range, and kill it. */
static int stap_uprobe_change_minus (struct task_struct *tsk, unsigned long relocation, unsigned long length, const struct stap_uprobe_tf *stf) {
int i;
/* NB: it's not an error for us not to find a live uprobe within the
given range. We might have received a callback for a part of a
shlib that was unmapped and unprobed. */
for (i=0; i<MAXUPROBES; i++) { /* XXX: slow linear search */
struct stap_uprobe *sup = & stap_uprobes[i];
struct stap_uprobe_spec *sups;
if (sup->spec_index < 0) continue; /* skip free uprobes slot */
sups = (struct stap_uprobe_spec*) & stap_uprobe_specs[sup->spec_index];
mutex_lock (& stap_uprobes_lock);
/* PR6829, PR9940:
Here we're unregistering for one of two reasons:
1. the process image is going away (or gone) due to exit or exec; or
2. the vma containing the probepoint has been unmapped.
In case 1, it's sort of a nop, because uprobes will notice the event
and dispose of the probes eventually, if it hasn't already. But by
calling unmap_u[ret]probe() ourselves, we free up sup right away.
In both cases, we must use unmap_u[ret]probe instead of
unregister_u[ret]probe, so uprobes knows not to try to restore the
original opcode.
*/
/* URETPROBE */
if (sups->return_p && sup->urp.u.pid == tsk->tgid && sup->urp.u.vaddr >= relocation && sup->urp.u.vaddr < relocation+length) { /* in range */
#ifdef DEBUG_UPROBES
_stp_dbug (__FUNCTION__,__LINE__, "-uretprobe spec %d idx %d process %s[%d] addr %p pp %s\n", sup->spec_index, i, tsk->comm, tsk->tgid, (void*) sup->urp.u.vaddr, sups->probe->pp);
#endif
#if (UPROBES_API_VERSION >= 2)
unmap_uretprobe (& sup->urp);
sup->spec_index = -1;
sup->sdt_sem_address = 0;
#else
/* Uprobes lacks unmap_uretprobe. Before reusing sup, we must wait
until uprobes turns loose of the uretprobe on its own, as indicated
by uretprobe.kdata = NULL. */
sup->spec_index = -2;
#endif
/* UPROBE */
} else if (!sups->return_p && sup->up.pid == tsk->tgid && sup->up.vaddr >= relocation && sup->up.vaddr < relocation+length) { /* in range */
#ifdef DEBUG_UPROBES
_stp_dbug (__FUNCTION__,__LINE__, "-uprobe spec %d idx %d process %s[%d] reloc %p pp %s\n", sup->spec_index, i, tsk->comm, tsk->tgid, (void*) sup->up.vaddr, sups->probe->pp);
#endif
#if (UPROBES_API_VERSION >= 2)
unmap_uprobe (& sup->up);
sup->spec_index = -1;
sup->sdt_sem_address = 0;
#else
/* Uprobes lacks unmap_uprobe. Before reusing sup, we must wait
until uprobes turns loose of the uprobe on its own, as indicated
by uprobe.kdata = NULL. */
sup->spec_index = -1;
sup->sdt_sem_address = 0;
#endif
/* PR10655: we don't need to fidget with the ENABLED semaphore either,
as the process is gone, buh-bye, toodaloo, au revoir, see ya later! */
}
mutex_unlock (& stap_uprobes_lock);
} /* close iteration over stap_uprobes[] */
return 0; /* XXX: or !handled_p */
}
/* The task_finder_callback we use for ET_EXEC targets.
We used to perform uprobe insertion/removal here, but not any more.
(PR10524) */
static int stap_uprobe_process_found (struct stap_task_finder_target *tgt, struct task_struct *tsk, int register_p, int process_p) {
const struct stap_uprobe_tf *stf = container_of(tgt, struct stap_uprobe_tf, finder);
if (! process_p) return 0; /* ignore threads */
dbug_task_vma(1, "%cproc pid %d stf %p %p path %s\n", register_p?'+':'-', tsk->tgid, tgt, stf, stf->pathname);
/* ET_EXEC events are like shlib events, but with 0 relocation bases */
if (register_p) {
int rc = stap_uprobe_change_plus (tsk, 0, TASK_SIZE, stf, 0, 0);
stap_uprobe_change_semaphore_plus (tsk, 0, TASK_SIZE, stf);
return rc;
} else
return stap_uprobe_change_minus (tsk, 0, TASK_SIZE, stf);
}
/* The task_finder_mmap_callback */
static int
stap_uprobe_mmap_found (struct stap_task_finder_target *tgt,
struct task_struct *tsk, char *path,
struct dentry *dentry, unsigned long addr,
unsigned long length, unsigned long offset,
unsigned long vm_flags)
{
int rc = 0;
const struct stap_uprobe_tf *stf = container_of(tgt, struct stap_uprobe_tf, finder);
/* 1 - shared libraries' executable segments load from offset 0
* - ld.so convention offset != 0 is now allowed
* so stap_uprobe_change_plus can set a semaphore,
* i.e. a static extern, in a shared object
* 2 - the shared library we're interested in
* 3 - mapping should be executable or writable (for semaphore in .so)
* NB: or both, on kernels that lack noexec mapping
*/
if (path == NULL || strcmp (path, stf->pathname))
return 0;
/* Check non-writable, executable sections for probes. */
if ((vm_flags & VM_EXEC) && !(vm_flags & VM_WRITE)) {
dbug_task_vma (1,
"+mmap X pid %d path %s addr %p length %u offset %p stf %p %p path %s\n",
tsk->tgid, path, (void *) addr, (unsigned)length, (void*) offset,
tgt, stf, stf->pathname);
rc = stap_uprobe_change_plus (tsk, addr, length, stf, offset, vm_flags);
}
/* Check writable sections for semaphores.
* NB: They may have also been executable for the check above, if we're
* running a kernel that lacks noexec mappings. So long as there's
* no error (rc == 0), we need to look for semaphores too.
*/
if ((rc == 0) && (vm_flags & VM_WRITE)) {
dbug_task_vma (1,
"+mmap W pid %d path %s addr %p length %u offset %p stf %p %p path %s\n",
tsk->tgid, path, (void *) addr, (unsigned)length, (void*) offset,
tgt, stf, stf->pathname);
rc = stap_uprobe_change_semaphore_plus (tsk, addr, length, stf);
}
return rc;
}
/* The task_finder_munmap_callback */
static int stap_uprobe_munmap_found (struct stap_task_finder_target *tgt, struct task_struct *tsk, unsigned long addr, unsigned long length) {
const struct stap_uprobe_tf *stf = container_of(tgt, struct stap_uprobe_tf, finder);
dbug_task_vma (1, "-mmap pid %d addr %p length %lu stf %p %p path %s\n", tsk->tgid, (void *) addr, length, tgt, stf, stf->pathname);
return stap_uprobe_change_minus (tsk, addr, length, stf);
}
/* The task_finder_callback we use for ET_DYN targets.
This just forces an unmap of everything as the process exits.
(PR11151) */
static int stap_uprobe_process_munmap (struct stap_task_finder_target *tgt, struct task_struct *tsk, int register_p, int process_p) {
const struct stap_uprobe_tf *stf = container_of(tgt, struct stap_uprobe_tf, finder);
if (! process_p) return 0; /* ignore threads */
dbug_task_vma (1, "%cproc pid %d stf %p %p path %s\n", register_p?'+':'-', tsk->tgid, tgt, stf, stf->pathname);
/* Covering 0->TASK_SIZE means "unmap everything" */
if (!register_p)
return stap_uprobe_change_minus (tsk, 0, TASK_SIZE, stf);
return 0;
}
#endif /* _UPROBE_COMMON_C_ */
|