/usr/bin/ost-survey is in lustre-utils 1.8.5+dfsg-3ubuntu1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 | #!/usr/bin/perl
# This script is to be run on a client machine and will test all the
# OSTs to determine which is the fastest and slowest
# The current test method is as follows:
# -Create a directory for each OST
# -Use 'lfs setstripe' to set the Lustre striping such that IO goes to
# only one OST
# -Use 'dd' to write and read a file of a specified size
# -Compute the average, and Standard deviation
# -Find the slowest OST for read and write
# -Find the Fastest OST for read and write
# GLOBALS
$pname = $0; # to hold program name
$OSTS = 0; # Number of OSTS we will loop over
$BSIZE = 1024 * 1024; # Size of i/o block
$MNT = "/mnt/lustre"; # Location of Lustre file system
$FSIZE = 30; # Number of i/o blocks
# Usage
sub usage () {
print "Usage: $pname [-s <size>] [-h] <Lustre_Path>\n";
print "[OPTIONS]\n";
print " -s: size of test file in MB (default $FSIZE MB)\n";
print " -h: To display this help\n";
print "example : $pname /mnt/lustre\n";
exit 1;
}
# ost_count subroutine ets globle variable $OST with Number of OST's
# Also fills 1 for active OST indexes in ACTIVEOST_INX array.
sub ost_count () {
# numobd gives number of ost's and activeobd gives number of active ost's
my $tempfile = glob ("/proc/fs/lustre/lov/*-clilov-*/activeobd");
open(PTR, $tempfile) || die "Cannot open $tempfile: $!\n";
$OSTS = <PTR>;
close PTR;
print "Number of Active OST devices : $OSTS";
my $tempfile = glob ("/proc/fs/lustre/lov/*-clilov-*/numobd");
open(PTR, $tempfile) || die "Cannot open $tempfile: $!\n";
$numost = <PTR>;
close PTR;
if ( $numost != $OSTS ) {
printf "Number of non active ots(s): %d\n", ( $numost - $OSTS );
$OSTS = $numost;
}
my $tempfile = glob ("/proc/fs/lustre/lov/*-clilov-*/target_obd");
open(PTR, $tempfile) || die "Cannot open $tempfile: $!\n";
my $count = 0;
my $temp;
while (<PTR>) {
chop;
my ($ost_num, $ost_name, $ost_status) = split(/\s+/, $_);
if ( $ost_status eq "ACTIVE" ) {
$ACTIVEOST_INX[$count] = 1;
}
$count++;
}
}
sub cache_off () {
$CACHEFILE = glob ("/proc/fs/lustre/llite/*/max_cached_mb");
open(PTR, $CACHEFILE) || die "Cannot open $tempfile: $!\n";
$CACHESZ = 0 + <PTR>;
close PTR;
system("echo 0 >> $CACHEFILE");
}
sub cache_on () {
system("echo $CACHESZ >> $CACHEFILE");
}
# make_dummy subroutine creates a dummy file that will be used for read operation.
sub make_dummy () {
my $SIZE = $_[0];
my $tempfile = $_[1];
system ("dd of=$tempfile if=/dev/zero count=$SIZE bs=$BSIZE 2> /dev/null");
}
# run_test subroutine actually writes and reads data to/from dummy file
# and compute corresponding time taken for read and write operation and
# byte transfer for the both operations.
# This subroutine also fill corresponding globle arrays with above information.
sub run_test () {
my $SIZE = $_[0];
my $INX=$_[1];
my $ACTION=$_[2];
my $tempfile = $_[3];
if ( !(-f $tempfile) && $ACTION eq "read" ) {
&make_dummy($SIZE, $tempfile);
}
system("sync");
my ($ts0, $tu0) = gettimeofday();
$tu0 = $ts0 + ($tu0 / 1000000);
if ( $ACTION eq "write" ) {
system("dd of=$tempfile if=/dev/zero count=$SIZE bs=$BSIZE 2> /dev/null");
} elsif ( $ACTION eq "read" ) {
system("dd if=$tempfile of=/dev/null count=$SIZE bs=$BSIZE 2> /dev/null");
} else {
print "Action is neither read nor write\n";
exit 1;
}
system("sync");
my ($ts1, $tu1) = gettimeofday();
$tu1 = $ts1 + ($tu1/1000000);
my $tdelta = $tu1 - $tu0;
my $delta = ($SIZE * $BSIZE / ( $tu1 - $tu0 )) / (1024 * 1024);
if ( $ACTION eq "write" ) {
$wTime[$INX] = $tdelta;
$wMBs[$INX] = $delta;
} else {
$rTime[$INX] = $tdelta;
$rMBs[$INX] = $delta;
}
}
# calculate subroutine compute following things and displays them.
# - Finds worst and best OST for both read and write operations.
# - Compute average of read and write rate from all OSTS
# - Compute Standard deviation for read and write form all OST's
sub calculate () {
my ($op, $MBs);
$op = $_[0];
@MBs = @_[1..$#_];
my $count = 0;
my $total = 0;
my $avg = 0;
my $sd = 0;
my $best_OST = 0;
my $worst_OST = 0;
my $max_mb = 0;
my $min_mb = 999999999;
while ($count < $OSTS ) {
if ( $ACTIVEOST_INX[$count] ) {
$total = $total + $MBs[$count];
if ($max_mb < $MBs[$count] ) {
$max_mb = $MBs[$count];
$best_OST = $count;
}
if ($min_mb > $MBs[$count] ) {
$min_mb = $MBs[$count];
$worst_OST = $count;
}
}
$count++;
}
$avg = $total/$OSTS;
$total = 0;
$count = 0;
while ($count < $OSTS ) {
if ( $ACTIVEOST_INX[$count] ) {
$total = $total + ($MBs[$count] - $avg) * ($MBs[$count] - $avg);
}
$count++;
}
$sd = sqrt($total/$OSTS);
printf "Worst %s OST indx: %d speed: %f\n", $op, $worst_OST, $min_mb;
printf "Best %s OST indx: %d speed: %f\n", $op, $best_OST, $max_mb;
printf "%s Average: %f +/- %f MB/s\n", $op, $avg, $sd;
}
# output_all_data subroutine displays speed and time information
# for all OST's for both read and write operations.
sub output_all_data () {
my $count = 0;
print "Ost# Read(MB/s) Write(MB/s) Read-time Write-time\n";
print "----------------------------------------------------\n";
while ( $count < $OSTS ) {
if ( $ACTIVEOST_INX[$count] ) {
printf "%d %.3f %.3f %.3f %.3f\n",$count,
$rMBs[$count], $wMBs[$count], $rTime[$count], $wTime[$count];
} else {
printf "%d Inactive ost\n",$count;
}
$count = $count + 1;
}
}
@rTime = ();
@wTime = ();
@rMBs = ();
@wMBs = ();
@ACTIVEOST_INX;
# Locals
my $filename = "";
my $dirpath = "";
my $flag = 0;
# Command line parameter parsing
use Getopt::Std;
getopts('s:h') or usage();
usage() if $opt_h;
$FSIZE = $opt_s if $opt_s;
my $i = 0;
foreach (@ARGV) {
$MNT = $_;
$i++;
if ($i > 1) {
print "ERROR: extra argument $_\n";
usage();
}
}
#Check for Time::HiRes module
my $CheckTimeHiRes = "require Time::HiRes";
eval ($CheckTimeHiRes) or die "You need to install the perl-Time-HiRes package to use this script\n";
my $LoadTimeHiRes = "use Time::HiRes qw(gettimeofday)";
eval ($LoadTimeHiRes);
use POSIX qw(strftime);
my $time_v = time();
my $hostname = `lctl list_nids | head -1` or die "You need to install lctl to use this script\n";
chop($hostname);
print "$pname: ", strftime("%D", localtime($time_v));
print " OST speed survey on $MNT from $hostname\n";
# get OST count
ost_count ();
# turn off local cache
cache_off ();
$dirpath = "$MNT/ost_survey_tmp";
eval { mkpath($dirpath) };
if ($@) {
print "Couldn't create $dirpath: $@";
exit 1;
}
use File::Path;
$CNT = 0;
while ($CNT < $OSTS) {
$filename = "$dirpath/file$CNT";
if ( $ACTIVEOST_INX[$CNT] ) {
# set stripe for OST number $CNT
system ("lfs setstripe $filename 0 $CNT 1");
# Perform write for OST number $CNT
&run_test($FSIZE,$CNT,"write",$filename);
$flag++;
}
$CNT = $CNT + 1;
}
$CNT = 0;
while ($CNT < $OSTS) {
$filename = "$dirpath/file$CNT";
if ( $ACTIVEOST_INX[$CNT] ) {
# Perform read for OST number $CNT
&run_test($FSIZE,$CNT,"read",$filename);
$flag++;
}
$CNT = $CNT + 1;
}
# if read or write performed on any OST then display information.
if ( $flag ) {
if ( $flag > 1 ) {
&calculate("Read",@rMBs);
&calculate("Write",@wMBs);
}
output_all_data ();
} else {
print "There is no active OST's found\n";
}
cache_on ();
eval { rmtree($dirpath) };
if ($@) {
print "Warning: Couldn't remove $dirpath: $@";
}
|