/usr/share/psychtoolbox-3/PsychDemos/GPGPUDemos/GPUFFTDemo1.m is in psychtoolbox-3-common 3.0.11.20131230.dfsg1-1build1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 | function GPUFFTDemo1(fwidth)
% GPUFFTDemo1([fwidth=11]) - Demonstrate use of GPGPU computing for 2D-FFT.
%
% This demo makes use of the FOSS GPUmat toolbox to perform a GPU
% accelerated 2D FFT + filtering in frequency space + 2D inverse FFT on our
% favourite bunnies. GPUmat allows to use NVidia's CUDA gpu computing
% framework on supported NVidia gpu's (GeForce-8000 series and later, aka
% Direct3D-10 or OpenGL-3 capable).
%
% It shows how a Psychtoolbox floating point texture (with our bunnies
% inside) can be efficiently passed to GPUmat as a matrix of GPUsingle data
% type, which is stored and processed on the GPU. Then it uses GPUmat's fft
% routines for forward/inverse fft's and matrix manipulation. Then it
% returns the final image to Psychtoolbox as a new floating point texture
% for display. The same steps are carried out with Matlab/Octave's regular
% fft routines on the cpu for reference.
%
% Requires the freely downloadable NVidia CUDA-5.0 SDK/Runtime and the free
% and open-source GPUmat toolbox as well as a compute capable NVidia
% graphics card.
%
% Parameters:
%
% 'fwidth' = Width of low-pass filter kernel in frequency space units.
%
% History:
% 5.02.2013 mk Written.
% Default filter width is 11 units:
if nargin < 1 || isempty(fwidth)
fwidth = 11;
end
% Running under PTB-3 hopefully?
AssertOpenGL;
% Skip timing tests and calibrations for this demo:
oldskip = Screen('Preference','SkipSyncTests', 2);
% Open onscreen window with black background on (external) screen,
% enable GPGPU computing support:
screenid = max(Screen('Screens'));
PsychImaging('PrepareConfiguration');
% We explicitely request the GPUmat based api:
PsychImaging('AddTask', 'General', 'UseGPGPUCompute', 'GPUmat');
w = PsychImaging('OpenWindow', screenid, 0);
try
Screen('TextSize', w, 28);
DrawFormattedText(w, 'Please be patient throughout the demo.\nSome benchmark steps are time intense...', 'center', 'center', 255);
Screen('Flip', w);
% Read our beloved bunny image from filesystem:
bunnyimg = imread([PsychtoolboxRoot 'PsychDemos/konijntjes1024x768.jpg']);
% Add a fourth neutral alpha channel, so we can create a 4-channel
% RGBA texture. Why? Some GPU compute api's, e.g., NVidia's CUDA,
% don't like RGB textures, so we need to do this to avoid errors:
bunnyimg(:,:,4) = 255;
% Turn it into a double matrix for conversion into a floating point
% texture, and normalize/convert its color range from 0-255 to 0-1:
dbunny = double(bunnyimg)/255;
% Maketexture a texture out of it in float precision with upright
% texture orientation, so further processing in teamwork between
% GPUmat and Psychtoolbox is simplified. Note: This conversion
% would turn a luminance texture into a troublesome RGB texture,
% which would only work on Linux and maybe Windows, but not on OSX.
% We avoid this by never passing in a luminance texture if it is
% intended to be used with GPUmat aka NVidia's CUDA framework.
bunnytex = Screen('MakeTexture', w, dbunny, [], [], 2, 1);
%% Do the FFT -> manipulate -> IFFT cycle once in Matlab/Octave as a
% reference:
% Extract 2nd layer, the green color channel as an approximation of
% luminance:
dbunny = dbunny(:,:,2);
% Show it pre-transform:
close all;
imshow(dbunny);
title('Pre-FFT Bunny.');
% Perform 2D-FFT in Matlab/Octave on CPU:
f = fft2(dbunny);
% Shift DC component to center of spectrum "image":
fs = fftshift(f);
% Define a low-pass filter in frequency space, ie., an amplitude mask
% to point-wise multiply with the FFT spectrum of FFT transformed
% image:
% fwidth controls frequency - lower values = lower cutoff frequency:
hh = size(fs, 1)/2;
hw = size(fs, 2)/2;
[x,y] = meshgrid(-hw:hw,-hh:hh);
mask = exp(-((x/fwidth).^2)-((y/fwidth).^2));
% Quick and dirty trick: Cut off a bit from mask, so size of mask and
% frequency spectrum matches -- Don't do this at home (or for real research scripts)!
mask = mask(2:end, 2:end);
figure;
imshow(mask);
title('Gaussian low pass filter mask for FFT space filtering:');
tcpu = GetSecs;
for trials = 1:10
% Low pass filter by point-wise multiply of fs with filter 'mask' in
% frequency space:
fs = fs .* mask;
% Invert shift of filtered fs:
f = ifftshift(fs);
% Perform inverse 2D-FFT in Matlab/Octave on CPU:
b = ifft2(f);
end
tcpu = (GetSecs - tcpu) / trials;
fprintf('Measured per trial runtime of CPU FFT: %f msecs [n=%i trials].\n', tcpu * 1000, trials);
% Extract real component for display:
r = real(b);
figure;
imshow(r);
title('Post-FFT->Filter->IFFT Bunny.');
%% Perform FFT->Process->IFFT on GPU via GPUmat / CUDA:
% First Show input image:
Screen('DrawTexture', w, bunnytex);
Screen('TextSize', w, 28);
DrawFormattedText(w, 'Original pre-FFT Bunny:\nPress key to continue.', 'center', 40, [255 255 0]);
Screen('Flip', w);
KbStrokeWait(-1);
% Create GPUsingle matrix with input image from RGBA float bunnytex:
A = GPUTypeFromToGL(0, bunnytex, [], [], 0);
% Extract 2nd layer, the green channel from it for further use:
% Note: The 1st dimension of a GPUsingle matrix created from a
% Psychtoolbox texture always contains the "color channel":
A = A(2, :, :);
% Squeeze it to remove the singleton 1st dimension of A, because
% fft2 and ifft2 can only work on 2D input matrices, not 3D
% matrices, not even ones with a singleton dimension, ie., a 2D
% matrix in disguise:
A = squeeze(A);
% Perform forward 2D FFT on GPU:
F = fft2(A);
% Process it on GPU:
% First shift the spectrums origin (DC component aka 0 Hz frequency) to
% the center of the FFT image:
FS = fftshift(F);
% Generate the amplitude spectrum, scaled to 1/100th via abs(FS)/100.0,
% then converte the image into a texture 'fftmag' and display it:
fftmag = GPUTypeFromToGL(1, abs(FS)/100.0, [], [], 0);
Screen('DrawTexture', w, fftmag);
DrawFormattedText(w, 'Amplitude spectrum post-FFT Bunny:\nPress key to continue.', 'center', 40, [0 255 0]);
Screen('Flip', w);
KbStrokeWait(-1);
% Turn our filter 'mask' from cpu version above into a matrix on GPU:
% We must also transpose the mask, because 'mask' was generated to
% match the shape of the fs matrix on the cpu in Matlab/Octave. As
% Matlab and Octave use column-major storage, whereas Psychtoolbox uses
% row-major storage, all the GPU variables we got from Psychtoolbox
% textures are transposed with respect to the Matlab version. A simple
% transpose fixes this for our mask to match GPU format:
FM = transpose(GPUsingle(mask));
% Measure execution time on GPU. The cudaThreadSynchronize() command
% makes sure we are actually measuring GPU timing with the GetSecs():
cudaThreadSynchronize;
tgpu = GetSecs;
for trials = 1:10
% Filter the amplitude spectrum by point-wise multiply with filter FM:
FS = FM .* FS;
% Shift back DC component to original position to prepare inverse FFT:
F = ifftshift(FS);
% Process inverse 2D FFT on GPU:
B = ifft2(F);
end
cudaThreadSynchronize;
tgpu = (GetSecs - tgpu) / trials;
fprintf('Measured per trial runtime of GPU FFT: %f msecs [n=%i trials].\n', tgpu * 1000, trials);
fprintf('Speedup GPU vs. CPU: %f\n', tcpu / tgpu);
% Extract real component for display:
R = real(B);
% Convert R back into a floating point luminance texture 'tex' for
% processing/display by Screen. Here it is fine to pass a
% single-layer matrix for getting a luminance texture, because we
% don't use Screen('MakeTexture') or similar and don't perform the
% normalization step which would convert into a troublesome RGB
% texture.
tex = GPUTypeFromToGL(1, R, [], [], 0);
% Show it:
Screen('DrawTexture', w, tex);
DrawFormattedText(w, 'GPU-Processed FFT->Filter->IFFT Bunny:\nPress key to continue.', 'center', 40, [0 255 0]);
Screen('Flip', w);
KbStrokeWait(-1);
% Reset interop cache before closing the windows and textures:
GPUTypeFromToGL(4);
% Close window, which will also release all textures:
sca;
% Restore sync test setting:
Screen('Preference','SkipSyncTests', oldskip);
catch %#ok<CTCH>
% Error handling.
% Reset interop cache before closing the windows and textures:
GPUTypeFromToGL(4);
% Close window.
sca;
% Restore sync test setting:
Screen('Preference','SkipSyncTests', oldskip);
psychrethrow(psychlasterror);
end
% Done.
end
% Documentation of another, not yet tested, more complex and probably
% higher performance, way of doing GPU accelerated FFT with GPUmat:
%
% fftType = cufftType;
% fftDir = cufftTransformDirections;
%
% % FFT plan
% plan = 0;
% [status, plan] = cufftPlan2d(plan, size(d_A, 1), size(d_A, 2), fftType.CUFFT_R2C);
% cufftCheckStatus(status, 'Error in cufftPlan2D');
%
% % Run GPU FFT
% [status] = cufftExecR2C(plan, getPtr(d_A), getPtr(d_B), fftDir.CUFFT_FORWARD);
% cufftCheckStatus(status, 'Error in cufftExecR2C');
%
% % Run GPU IFFT
% [status] = cufftExecC2R(plan, getPtr(d_B), getPtr(d_A), fftDir.CUFFT_INVERSE);
% cufftCheckStatus(status, 'Error in cufftExecC2C');
%
% % results should be scaled by 1/N if compared to CPU
% % h_B = 1/N*single(d_A);
%
% [status] = cufftDestroy(plan);
% cufftCheckStatus(status, 'Error in cuffDestroyPlan');
|