/usr/share/pyshared/dicom/examples/anonymize.py is in python-dicom 0.9.6-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | # anonymize.py
"""Read a dicom file (or directory of files), partially "anonymize" it (them),
by replacing Person names, patient id, optionally remove curves
and private tags, and write result to a new file (directory)
This is an example only; use only as a starting point.
"""
# Copyright (c) 2008, 2011 Darcy Mason
# This file is part of pydicom, relased under an MIT license.
# See the file license.txt included with this distribution, also
# available at http://pydicom.googlecode.com
# Use at your own risk!!
# Many more items need to be addressed for proper de-identifying DICOM data.
# In particular, note that pixel data could have confidential data "burned in"
# Annex E of PS3.15-2011 DICOM standard document details what must be done to
# fully de-identify DICOM data
usage = """
Usage:
python anonymize.py dicomfile.dcm outputfile.dcm
OR
python anonymize.py originals_directory anonymized_directory
Note: Use at your own risk. Does not fully de-identify the DICOM data as per
the DICOM standard, e.g in Annex E of PS3.15-2011.
"""
import os, os.path
import dicom
def anonymize(filename, output_filename, new_person_name="anonymous",
new_patient_id="id", remove_curves=True, remove_private_tags=True):
"""Replace data element values to partly anonymize a DICOM file.
Note: completely anonymizing a DICOM file is very complicated; there
are many things this example code does not address. USE AT YOUR OWN RISK.
"""
# Define call-back functions for the dataset.walk() function
def PN_callback(ds, data_element):
"""Called from the dataset "walk" recursive function for all data elements."""
if data_element.VR == "PN":
data_element.value = new_person_name
def curves_callback(ds, data_element):
"""Called from the dataset "walk" recursive function for all data elements."""
if data_element.tag.group & 0xFF00 == 0x5000:
del ds[data_element.tag]
# Load the current dicom file to 'anonymize'
dataset = dicom.read_file(filename)
# Remove patient name and any other person names
dataset.walk(PN_callback)
# Change ID
dataset.PatientID = new_patient_id
# Remove data elements (should only do so if DICOM type 3 optional)
# Use general loop so easy to add more later
# Could also have done: del ds.OtherPatientIDs, etc.
for name in ['OtherPatientIDs']:
if name in dataset:
delattr(ds, name)
# Same as above but for blanking data elements that are type 2.
for name in ['PatientsBirthDate']:
if name in dataset:
dataset.data_element(name).value = ''
# Remove private tags if function argument says to do so. Same for curves
if remove_private_tags:
dataset.remove_private_tags()
if remove_curves:
dataset.walk(curves_callback)
# write the 'anonymized' DICOM out under the new filename
dataset.save_as(output_filename)
# Can run as a script:
if __name__ == "__main__":
import sys
if len(sys.argv) != 3:
print usage
sys.exit()
arg1, arg2 = sys.argv[1:]
if os.path.isdir(arg1):
in_dir = arg1
out_dir = arg2
if os.path.exists(out_dir):
if not os.path.isdir(out_dir):
raise IOError, "Input is directory; output name exists but is not a directory"
else: # out_dir does not exist; create it.
os.makedirs(out_dir)
filenames = os.listdir(in_dir)
for filename in filenames:
if not os.path.isdir(os.path.join(in_dir, filename)):
print filename + "...",
anonymize(os.path.join(in_dir, filename), os.path.join(out_dir, filename))
print "done\r",
else: # first arg not a directory, assume two files given
in_filename = arg1
out_filename = arg2
anonymize(in_filename, out_filename)
print
|