This file is indexed.

/usr/include/arc/data/FileCache.h is in nordugrid-arc-dev 4.2.0-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
// -*- indent-tabs-mode: nil -*-

#ifndef FILECACHE_H_
#define FILECACHE_H_

#include <sstream>
#include <vector>
#include <map>
#include <set>
#include <arc/DateTime.h>
#include <arc/Logger.h>

#include "FileCacheHash.h"

namespace Arc {

  /// Contains data on the parameters of a cache.
  /**
   * \ingroup data
   * \headerfile FileCache.h arc/data/FileCache.h
   */
  struct CacheParameters {
    std::string cache_path;
    std::string cache_link_path;
  };

#ifndef WIN32

  /// FileCache provides an interface to all cache operations.
  /**
   * When it is decided a file should be downloaded to the cache, Start()
   * should be called, so that the cache file can be prepared and locked if
   * necessary. If the file is already available it is not locked and Link()
   * can be called immediately to create a hard link to a per-job directory in
   * the cache and then soft link, or copy the file directly to the session
   * directory so it can be accessed from the user's job. If the file is not
   * available, Start() will lock it, then after downloading Link() can be
   * called. Stop() must then be called to release the lock. If the transfer
   * failed, StopAndDelete() can be called to clean up the cache file. After
   * a job has finished, Release() should be called to remove the hard links
   * created for that job.
   *
   * Cache files are locked for writing using the FileLock class, which
   * creates a lock file with the '.lock' suffix next to the cache file.
   * If Start() is called and the cache file is not already available, it
   * creates this lock and Stop() must be called to release it. All processes
   * calling Start() must wait until they successfully obtain the lock before
   * downloading can begin.
   *
   * The cache directory(ies) and the optional directory to link to when the
   * soft-links are made are set in the constructor. The names of cache files
   * are formed from an SHA-1 hash of the URL to cache. To ease the load on
   * the file system, the cache files are split into subdirectories based on
   * the first two characters in the hash. For example the file with hash
   * 76f11edda169848038efbd9fa3df5693 is stored in
   * 76/f11edda169848038efbd9fa3df5693. A cache filename can be found by
   * passing the URL to Find().  For more information on the structure of the
   * cache, see the ARC Computing Element System Administrator Guide
   * (NORDUGRID-MANUAL-20).
   * \ingroup data
   * \headerfile FileCache.h arc/data/FileCache.h
   */
  class FileCache {
   private:
    /// Map of urls and the cache they are mapped to/exist in
    std::map <std::string, struct CacheParameters> _cache_map;
    /// Vector of caches. Each entry defines a cache and specifies
    /// a cache directory and optional link path.
    std::vector<struct CacheParameters> _caches;
    /// Vector of remote caches. Each entry defines a cache and specifies
    /// a cache directory, per-job directory and link/copy information.
    std::vector<struct CacheParameters> _remote_caches;
    /// Vector of caches to be drained.
    std::vector<struct CacheParameters> _draining_caches;
    /// A list of URLs that have already been unlocked in Link(). URLs in
    /// this set will not be unlocked in Stop().
    std::set<std::string> _urls_unlocked;
    /// Identifier used to claim files, ie the job id
    std::string _id;
    /// uid corresponding to the user running the job.
    /// The directory with hard links to cached files will be searchable only by this user
    uid_t _uid;
    /// gid corresponding to the user running the job.
    gid_t _gid;

    /// The sub-dir of the cache for data
    static const std::string CACHE_DATA_DIR;
    /// The sub-dir of the cache for per-job links
    static const std::string CACHE_JOB_DIR;
    /// The length of each cache subdirectory
    static const int CACHE_DIR_LENGTH;
    /// The number of levels of cache subdirectories
    static const int CACHE_DIR_LEVELS;
    /// The suffix to use for meta files
    static const std::string CACHE_META_SUFFIX;
    /// Default validity time of cached DNs
    static const int CACHE_DEFAULT_AUTH_VALIDITY;
    /// Timeout on cache lock. The lock file is continually updated during the
    /// transfer so 15 mins with no transfer update should mean stale lock.
    static const int CACHE_LOCK_TIMEOUT;
    /// Timeout on lock on meta file
    static const int CACHE_META_LOCK_TIMEOUT;

    /// Common code for constructors
    bool _init(const std::vector<std::string>& caches,
               const std::vector<std::string>& remote_caches,
               const std::vector<std::string>& draining_caches,
               const std::string& id,
               uid_t job_uid,
               gid_t job_gid);
    /// Check the meta file corresponding to cache file filename is valid,
    /// and create one if it doesn't exist. Returns false if creation fails,
    /// and if it was due to being locked, is_locked is set to true.
    bool _checkMetaFile(const std::string& filename, const std::string& url, bool& is_locked);
    /// Create the meta file with the given content. Returns false and sets
    /// is_locked to true if the file is already locked.
    bool _createMetaFile(const std::string& meta_file, const std::string& content, bool& is_locked);
    /// Return the filename of the meta file associated to the given url
    std::string _getMetaFileName(const std::string& url);
    /// Get the hashed path corresponding to the given url
    std::string _getHash(const std::string& url) const;
    /// Choose a cache directory to use for this url, based on the free
    /// size of the cache directories. Returns the cache to use.
    struct CacheParameters _chooseCache(const std::string& url) const;
    /// Return the free space in GB at the given path
    float _getCacheInfo(const std::string& path) const;
    /// For cleaning up after a cache file was locked during Link()
    bool _cleanFilesAndReturnFalse(const std::string& hard_link_file, bool& locked);

    /// Logger for messages
    static Logger logger;

   public:
    /// Create a new FileCache instance with one cache directory.
    /**
     * @param cache_path The format is "cache_dir[ link_path]".
     * path is the path to the cache directory and the optional
     * link_path is used to create a link in case the
     * cache directory is visible under a different name during actual
     * usage. When linking from the session dir this path is used
     * instead of cache_path.
     * @param id the job id. This is used to create the per-job dir
     * which the job's cache files will be hard linked from
     * @param job_uid owner of job. The per-job dir will only be
     * readable by this user
     * @param job_gid owner group of job
     */
    FileCache(const std::string& cache_path,
              const std::string& id,
              uid_t job_uid,
              gid_t job_gid);

    /// Create a new FileCache instance with multiple cache dirs.
    /**
     * @param caches a vector of strings describing caches. The format
     * of each string is "cache_dir[ link_path]".
     * @param id the job id. This is used to create the per-job dir
     * which the job's cache files will be hard linked from
     * @param job_uid owner of job. The per-job dir will only be
     * readable by this user
     * @param job_gid owner group of job
     */
    FileCache(const std::vector<std::string>& caches,
              const std::string& id,
              uid_t job_uid,
              gid_t job_gid);

    /// Create a new FileCache instance with multiple cache dirs, remote caches and draining cache directories.
    /**
     * @param caches a vector of strings describing caches. The format
     * of each string is "cache_dir[ link_path]".
     * @param remote_caches Same format as caches. These are the
     * paths to caches which are under the control of other Grid
     * Managers and are read-only for this process.
     * @param draining_caches Same format as caches. These are the
     * paths to caches which are to be drained.
     * @param id the job id. This is used to create the per-job dir
     * which the job's cache files will be hard linked from
     * @param job_uid owner of job. The per-job dir will only be
     * readable by this user
     * @param job_gid owner group of job
     */
    FileCache(const std::vector<std::string>& caches,
              const std::vector<std::string>& remote_caches,
              const std::vector<std::string>& draining_caches,
              const std::string& id,
              uid_t job_uid,
              gid_t job_gid);

    /// Default constructor. Invalid cache.
    FileCache(): _uid(0),_gid(0) {
      _caches.clear();
    }

    /// Start preparing to cache the file specified by url.
    /**
     * Start() returns true if the file was successfully prepared. The
     * available parameter is set to true if the file already exists and in
     * this case Link() can be called immediately. If available is false the
     * caller should write the file and then call Link() followed by Stop().
     * Start() returns false if it was unable to prepare the cache file for any
     * reason. In this case the is_locked parameter should be checked and if
     * it is true the file is locked by another process and the caller should
     * try again later.
     *
     * @param url url that is being downloaded
     * @param available true on exit if the file is already in cache
     * @param is_locked true on exit if the file is already locked, ie
     * cannot be used by this process
     * @param use_remote Whether to look to see if the file exists in a
     * remote cache. Can be set to false if for example a forced download
     * to cache is desired.
     * @param delete_first If true then any existing cache file is deleted.
     * @return true if file is available or ready to be downloaded, false if
     * the file is already locked or preparing the cache failed.
     */
    bool Start(const std::string& url,
               bool& available,
               bool& is_locked,
               bool use_remote = true,
               bool delete_first = false);

    /// Stop the cache after a file was downloaded.
    /**
     * This method (or stopAndDelete()) must be called after file was
     * downloaded or download failed, to release the lock on the
     * cache file. Stop() does not delete the cache file. It returns
     * false if the lock file does not exist, or another pid was found
     * inside the lock file (this means another process took over the
     * lock so this process must go back to Start()), or if it fails
     * to delete the lock file. It must only be called if the caller
     * actually downloaded the file. It must not be called if the file was
     * already available.
     * @param url the url of the file that was downloaded
     * @return true if the lock was successfully released.
     */
    bool Stop(const std::string& url);

    /// Stop the cache after a file was downloaded and delete the cache file.
    /**
     * Release the cache file and delete it, because for example a
     * failed download left an incomplete copy. This method also deletes
     * the meta file which contains the url corresponding to the cache file.
     * The logic of the return value is the same as Stop(). It must only be
     * called if the caller downloaded the file.
     * @param url the url corresponding to the cache file that has
     * to be released and deleted
     * @return true if the cache file and lock were successfully removed.
     */
    bool StopAndDelete(const std::string& url);

    /// Get the cache filename for the given URL.
    /**
     * @param url the URL to look for in the cache
     * @return the full pathname of the file in the cache which corresponds to
     * the given url.
     */
    std::string File(const std::string& url);

    /// Link a cache file to the place it will be used.
    /**
     * Create a hard-link to the per-job dir from  the cache dir, and then a
     * soft-link from here to the session directory. This is effectively
     * 'claiming' the file for the job, so even if the original cache file is
     * deleted, eg by some external process, the hard link still exists until
     * it is explicitly released by calling Release().
     *
     * If cache_link_path is set to "." or copy or executable is true then
     * files will be copied directly to the session directory rather than
     * linked.
     *
     * After linking or copying, the cache file is checked for the presence of
     * a write lock, and whether the modification time has changed since
     * linking started (in case the file was locked, modified then released
     * during linking). If either of these are true the links created during
     * Link() are deleted, try_again is set to true and Link() returns false.
     * The caller should then go back to Start(). If the caller has obtained a
     * write lock from Start() and then downloaded the file, it should set
     * holding_lock to true, in which case none of the above checks are
     * performed.
     *
     * The session directory is accessed under the uid and gid passed in
     * the constructor.
     *
     * @param link_path path to the session dir for soft-link or new file
     * @param url url of file to link to or copy
     * @param copy If true the file is copied rather than soft-linked
     * to the session dir
     * @param executable If true then file is copied and given execute
     * permissions in the session dir
     * @param holding_lock Should be set to true if the caller already holds
     * the lock
     * @param try_again If after linking the cache file was found to be locked,
     * deleted or modified, then try_again is set to true
     * @return true if linking succeeded, false if an error occurred or the
     * file was locked or modified by another process during linking
     */
    bool Link(const std::string& link_path,
              const std::string& url,
              bool copy,
              bool executable,
              bool holding_lock,
              bool& try_again);

    /// Release cache files used in this cache.
    /**
     * Release claims on input files for the job specified by id.
     * For each cache directory the per-job directory with the
     * hard-links will be deleted.
     * @return false if any directory fails to be deleted
     */
    bool Release() const;

    /// Store a DN in the permissions cache for the given url.
    /**
     * Add the given DN to the list of cached DNs with the given expiry time.
     * @param url the url corresponding to the cache file to which we
     * want to add a cached DN
     * @param DN the DN of the user
     * @param expiry_time the expiry time of this DN in the DN cache
     * @return true if the DN was successfully added
     */
    bool AddDN(const std::string& url, const std::string& DN, const Time& expiry_time);

    /// Check if a DN exists in the permission cache and is still valid for the given url.
    /**
     * Check if the given DN is cached for authorisation and it is still valid.
     * @param url the url corresponding to the cache file for which we
     * want to check the cached DN
     * @param DN the DN of the user
     * @return true if the DN exists and is still valid
     */
    bool CheckDN(const std::string& url, const std::string& DN);

    /// Check if it is possible to obtain the creation time of a cache file.
    /**
     * @param url the url corresponding to the cache file for which we
     * want to know if the creation date exists
     * @return true if the file exists in the cache, since the creation time
     * is the creation time of the cache file.
     */
    bool CheckCreated(const std::string& url);

    /// Get the creation time of a cached file.
    /**
     * @param url the url corresponding to the cache file for which we
     * want to know the creation date
     * @return creation time of the file or 0 if the cache file does not exist
     */
    Time GetCreated(const std::string& url);

    /// Check if there is an expiry time of the given url in the cache.
    /**
     * @param url the url corresponding to the cache file for which we
     * want to know if the expiration time exists
     * @return true if an expiry time exists
     */
    bool CheckValid(const std::string& url);

    /// Get expiry time of a cached file.
    /**
     * @param url the url corresponding to the cache file for which we
     * want to know the expiry time
     * @return the expiry time or 0 if none is available
     */
    Time GetValid(const std::string& url);

    /// Set expiry time of a cache file.
    /**
     * @param url the url corresponding to the cache file for which we
     * want to set the expiry time
     * @param val expiry time
     * @return true if the expiry time was successfully set
     */
    bool SetValid(const std::string& url, const Time& val);

    /// Returns true if object is useable.
    operator bool() {
      return (!_caches.empty());
    };

    /// Returns true if all attributes are equal
    bool operator==(const FileCache& a);

  };

#else

  class FileCache {
  public:
    FileCache(const std::string& cache_path,
              const std::string& id,
              int job_uid,
              int job_gid) {}
    FileCache(const std::vector<std::string>& caches,
              const std::string& id,
              int job_uid,
              int job_gid) {}
    FileCache(const std::vector<std::string>& caches,
              const std::vector<std::string>& remote_caches,
              const std::vector<std::string>& draining_caches,
              const std::string& id,
              int job_uid,
              int job_gid,
              int cache_max=100,
              int cache_min=100) {}
    FileCache(const FileCache& cache) {}
    FileCache() {}
    bool Start(const std::string& url, bool& available, bool& is_locked, bool use_remote=true, bool delete_first=false) { return false; }
    bool Stop(const std::string& url) { return false; }
    bool StopAndDelete(const std::string& url) {return false; }
    std::string File(const std::string& url) { return url; }
    bool Link(const std::string& link_path, const std::string& url, bool copy, bool executable, bool holding_lock, bool& try_again)  { return false; }
    bool Release() const { return false;}
    bool AddDN(const std::string& url, const std::string& DN, const Time& expiry_time) { return false;}
    bool CheckDN(const std::string& url, const std::string& DN) { return false; }
    bool CheckCreated(const std::string& url){ return false; }
    Time GetCreated(const std::string& url) { return Time(); }
    bool CheckValid(const std::string& url) { return false; }
    Time GetValid(const std::string& url)  { return Time(); }
    bool SetValid(const std::string& url, const Time& val) { return false; }
    operator bool() {
      return false;
    };
    bool operator==(const FileCache& a)  { return false; }
  };
#endif /*WIN32*/


} // namespace Arc

#endif /*FILECACHE_H_*/