akaros/scripts/lock_test.R
<<
>>
Prefs
   1# brho: 2014-10-13
   2#
   3# this is partly fleshed out.  to use, i've just been sourcing the script in R,
   4# then overriding the tsc overhead and freq.  then just running various
   5# functions directly, like print_stats, plot_densities, plot_tput, etc.  don't
   6# expect any command line options to work.
   7
   8# library that includes the pwelch function
   9suppressPackageStartupMessages(library(oce))
  10# library for command line option parsing
  11suppressPackageStartupMessages(library(optparse))
  12
  13# file format: thread_id attempt pre acq(uire) un(lock) tsc_overhead
  14
  15g_tsc_overhead <- 0
  16g_tsc_frequency <- 0
  17
  18######################################
  19### Functions
  20######################################
  21
  22# takes any outliers 2 * farther than the 99th quantile and rounds them down to
  23# that limit.  the limit is pretty arbitrary.  useful for not having
  24# ridiculously large graphs, but still is lousy for various datasets.
  25round_outlier <- function(vec)
  26{
  27        vec99 = quantile(vec, .99)
  28        lim = vec99 + 2 * (vec99 - median(vec))
  29        return(sapply(vec, function(x) min(x, lim)))
  30}
  31
  32# computes acquire latency, using global tsc freq if there isn't one in the
  33# data
  34acq_latency <- function(data)
  35{
  36        tsc_overhead = data$V6
  37        if (tsc_overhead[1] == 0)
  38                tsc_overhead = sapply(tsc_overhead, function(x) g_tsc_overhead)
  39        return (data$V4 - data$V3 - tsc_overhead)
  40}
  41
  42# computes hold latency, using global tsc freq if there isn't one in the data
  43hld_latency <- function(data)
  44{
  45        tsc_overhead = data$V6
  46        if (tsc_overhead[1] == 0)
  47                tsc_overhead = sapply(tsc_overhead, function(x) g_tsc_overhead)
  48        return (data$V5 - data$V4 - tsc_overhead)
  49}
  50
  51# histogram, bins based on percentiles, with limits of the graph based on the
  52# outermost bins.  somewhat works.  can get a 'need finite ylim' if the bins
  53# are too small.  maybe since there are no values in it.
  54#
  55# with density and percentiles for bins, keep in mind the area of a rectangle
  56# is the fraction of data points in the cell.  since all bins have the same
  57# amount of data points, taller cells show a denser concentration in a skinnier
  58# bin
  59#
  60# i don't actually like this much.  using a round_outlier with 20-bin hist or a
  61# density plot look nicer.
  62quant_hist <- function(vec)
  63{
  64        vec_quant = c(quantile(vec, probs=seq(0, 1, .01)))
  65        print(vec_quant)
  66        # keep the 100 in sync with the 0.01 above
  67        hist(vec, breaks=vec_quant, xlim=c(vec_quant[2], vec_quant[100]))
  68}
  69
  70plot_densities <- function(vecs, names=NULL, outfile="",
  71                           title="Lock Acquisition Latency",
  72                           xlab="TSC Ticks")
  73{
  74        nr_vecs = length(vecs)
  75        densities = list()
  76        max_y = 0
  77        min_x = Inf
  78        max_x = 0
  79
  80        for (i in 1:nr_vecs) {
  81                # [[ ]] chooses the actual element.  [] just subsets
  82                dense_i = density(vecs[[i]])
  83                densities = c(densities, list(dense_i))
  84                max_y = max(max_y, dense_i$y)
  85                max_x = max(max_x, dense_i$x)
  86                min_x = min(min_x, dense_i$x)
  87        }
  88
  89        # http://www.statmethods.net/graphs/line.html
  90        colors <- rainbow(nr_vecs) # not a huge fan.  color #2 is light blue.
  91        linetype <- c(1:nr_vecs)
  92        plotchar <- seq(18, 18 + nr_vecs, 1)
  93
  94        # http://stackoverflow.com/questions/8929663/r-legend-placement-in-a-plot
  95        # can manually move it if we don't want to waste space
  96        if (!is.null(names)) {
  97                plot(c(min_x,max_x), c(0, max_y), type="n", xaxt="n", yaxt="n")
  98                legend_sz = legend("topright", legend=names, lty=linetype,
  99                                   plot=FALSE)
 100                max_y = 1.04 * (max_y + legend_sz$rect$h)
 101                invisible(dev.off())
 102        }
 103
 104        if (outfile != "")
 105                pdf(outfile)
 106
 107        plot(c(min_x,max_x), c(0, max_y), type="n", xlab=xlab, main=title,
 108             ylab="Density")
 109
 110        for (i in 1:nr_vecs) {
 111                # too many points, so using "l" and no plotchar.
 112                #lines(densities[[i]], type="b", lty=linetype[i], col=colors[i],
 113                #      pch=plotchar[i], lwd=1.5)
 114                lines(densities[[i]], type="l", lty=linetype[i], lwd=1.5)
 115        }
 116
 117        #legend(x=min_x, y=max_y, legend=names, lty=linetype, col=colors)
 118        if (!is.null(names))
 119                legend("topright", legend=names, lty=linetype)
 120
 121        if (outfile != "")
 122                invisible(dev.off())
 123}
 124
 125
 126plot_density <- function(vec, outfile="",
 127                         title="Lock Acquisition Latency",
 128                         xlab="TSC Ticks")
 129{
 130        vecs = list(vec)
 131        plot_densities(vecs=vecs, outfile=outfile, title=title, xlab=xlab)
 132}
 133
 134
 135plot_acq_times <- function(data, outfile="")
 136{
 137        if (outfile != "")
 138                pdf(outfile)
 139
 140    # all acquire times, timestamps starting at 0
 141        time0 = min(data$V4)
 142        total_acq <- data$V4 - time0
 143
 144        threadid <- unique(data$V1)
 145
 146        acq_n <- list()
 147        names <- c()
 148        for (i in threadid) {
 149                thread_data <- subset(data, data$V1 == i) - time0
 150                acq_n <- c(acq_n, list(thread_data$V4))
 151                names <- c(names, paste("Thread ", i))
 152        }
 153        # can adjust ylim, default are from 1..nr_items
 154        stripchart(acq_n, group.names=names, pch='.', xlab="Time (TSC Ticks)",
 155                   main="Lock Acquisition Timestamps")
 156
 157        if (outfile != "")
 158                invisible(dev.off())
 159}
 160
 161print_vec <- function(vec)
 162{
 163        # this whole str, paste dance is nasty
 164        print("---------------")
 165        str = paste("Average: ", round(mean(vec), 4))
 166        print(str)
 167        str = paste("Stddev: ", round(sd(vec), 4))
 168        print(str)
 169        quants = round(quantile(vec, c(.5, .75, .9, .99, .999)))
 170        str = paste("50/75/90/99/99.9: ", quants[[1]], quants[[2]], quants[[3]],
 171                    quants[[4]], quants[[5]])
 172        print(str)
 173        str = paste("Min: ", min(vec), " Max: ", max(vec))
 174        print(str)
 175}
 176
 177# using something like the tables package to output latex booktab's would be
 178# much nicer
 179print_stats <- function(data)
 180{
 181        acq_lat = acq_latency(data)
 182        hld_lat = hld_latency(data)
 183
 184        print("Acquire Latency")
 185        print_vec(acq_lat)
 186        print("")
 187        print("Hold Latency")
 188        print_vec(hld_lat)
 189}
 190
 191# if you know how many msec there are, this is like doing:
 192#     hist(total_acq/1000000, breaks=50)
 193# except it gives you a line, with points being the top of the hist bars
 194plot_tput <- function(data, title="Lock Acquisition Throughput", outfile="")
 195{
 196        if (outfile != "")
 197                pdf(outfile)
 198
 199        total_acq = sort(data$V4 - min(data$V4))
 200
 201        if (g_tsc_frequency == 0)
 202                stop("WARNING: global TSC freq not set!")
 203        # convert to nsec? XXX
 204        total_acq = total_acq / (g_tsc_frequency / 1e9)
 205
 206        # rounds down all times to the nearest msec, will collect into a table,
 207        # which counts the freq of each bucket, as per:
 208        # http://stackoverflow.com/questions/5034513/how-to-graph-requests-per-second-from-web-log-file-using-r
 209        msec_times = trunc(total_acq/1e6)
 210
 211        # if we just table directly, we'll lose the absent values (msec where no
 212        # timestamp happened).  not sure if factor is the best way, the help
 213        # says it should be a small range.
 214        # http://stackoverflow.com/questions/1617061/including-absent-values-in-table-results-in-r
 215        msec_times = factor(msec_times, 0:max(msec_times))
 216
 217        # without the c(), it'll be a bunch of bars at each msec
 218        tab = c(table(msec_times))
 219        plot(tab, type="o", main=title, xlab="Time (msec)",
 220             ylab="Locks per msec")
 221
 222        if (outfile != "")
 223                invisible(dev.off())
 224}
 225
 226
 227# extract useful information from the raw data file
 228extract_data <- function(filename) {
 229        mydata = read.table(filename, comment.char="#")
 230
 231        work_amt = mydata$V2
 232
 233        # calculate time steps and mean time step (all in ns)
 234        times = as.numeric(as.character(mydata$V1))
 235        N_entries = length(times)
 236        time_steps_ns = times[2:N_entries] - times[1:(N_entries-1)]
 237        avg_time_step_ns = mean(time_steps_ns)
 238
 239        return(list(work_amt=work_amt, time_steps_ns=time_steps_ns,
 240                N_entries=N_entries, avg_time_step_ns=avg_time_step_ns))
 241}
 242
 243
 244######################################
 245### Main
 246######################################
 247
 248### collect command line arguments
 249# establish optional arguments
 250# "-h" and "--help" are automatically in the list
 251option_list <- list(
 252  make_option(c("-i", "--input"), type="character",
 253    default="welch_input.dat",
 254    help="Input data file"),
 255  make_option(c("-o", "--output"), type="character",
 256    default="welch_plot.pdf",
 257    help="Output file for plotting"),
 258  make_option("--xmin", type="double", default=0,
 259    help=paste("Minimum frequency (horizontal axis) ",
 260      "in output plot [default %default]",sep="")),
 261  make_option("--xmax", type="double", default=40,
 262    help=paste("Maximum frequency (horizontal axis) ",
 263      "in output plot [default %default]",sep="")),
 264  make_option("--ymin", type="double", default=-1,
 265    help=paste("Minimum spectrum (vertical axis) ",
 266      "in output plot [default adaptive]",sep="")),
 267  make_option("--ymax", type="double", default=-1,
 268    help=paste("Maximum spectrum (vertical axis) ",
 269      "in output plot [default adaptive]",sep=""))
 270)
 271
 272## read command line
 273#opt <- parse_args(OptionParser(option_list=option_list))
 274#  
 275##max_freq = as.numeric(as.character(args[3]))
 276#
 277#### read in data
 278#mydata = extract_data(opt$input)
 279
 280#round_outlier <- function(vec)
 281#acq_latency <- function(data)
 282#hld_latency <- function(data)
 283#plot_densities <- function(vecs, names=NULL, outfile="",
 284#plot_density <- function(vec, outfile="",
 285#plot_acq_times <- function(data, outfile="")
 286#print_vec <- function(vec)
 287#print_stats <- function(data)
 288#plot_tput <- function(data)
 289#mydata = read.table(filename, comment.char="#")
 290
 291