akaros/scripts/lock_test.R
<<
>>
Prefs
   1#!/usr/bin/env Rscript
   2#
   3# brho: 2014-10-13, 2020-06-25
   4#
   5# to install helper libraries, run R, install manually:
   6#
   7#       install.packages('oce')
   8#       install.packages('optparse')
   9#       install.packages('data.table')
  10# didn't need to library()-load this, but data.table wanted it at one point
  11#       install.packages('bit64')
  12#
  13# To run it, you can do stuff like this:
  14#
  15#  for i in *.dat.gz; do
  16#          echo Processing $i...
  17#          $AKA_DIR/scripts/lock_test.R -c 1 $i
  18#  done
  19#
  20#  echo "Creating ${DIR%/}-tput.pdf"
  21#  $AKA_DIR/scripts/lock_test.R -c 2 *.dat.gz -o ${DIR%/}-tput.pdf
  22#
  23#  echo "Creating ${DIR%/}-kernel-tput.pdf"
  24#  $AKA_DIR/scripts/lock_test.R -c 2 *kernel*.dat.gz -o ${DIR%/}-kernel-tput.pdf
  25#
  26# TODO:
  27# - analyze when the lock jumps between locality regions (hyperthread,
  28# core, CCX, numa, whatever).  It's a bit of a pain with Linux, since core 1 is
  29# on another numa node than core 0.  Whatever.
  30# - For lock_test, have each additional core/thread be allocated close to the
  31# existing core (locality), and then look at the lock scalability.  (classic,
  32# X-axis is nr_cpus, Y axis is time per lock (avg acq lat)).
  33# - Figure out if there was something special about disable/enable IRQs
  34
  35# library that includes the pwelch function
  36suppressPackageStartupMessages(library(oce))
  37# library for command line option parsing
  38suppressPackageStartupMessages(library(optparse))
  39# read.table is brutally slow.  this brings in fread
  40suppressPackageStartupMessages(library(data.table))
  41
  42# file format: thread_id attempt pre acq(uire) un(lock) tsc_overhead
  43# 0 0 4748619790389387 4748619790429260 4748619790429323 0
  44
  45g_tsc_overhead <- 0
  46g_tsc_frequency <- 0
  47
  48# For PNG output...
  49g_width <- 1920
  50g_height <- 1200
  51
  52######################################
  53### Functions
  54######################################
  55
  56# takes any outliers 2 * farther than the 99th quantile and rounds them down to
  57# that limit.  the limit is pretty arbitrary.  useful for not having
  58# ridiculously large graphs, but still is lousy for various datasets.
  59round_outlier <- function(vec)
  60{
  61        vec99 = quantile(vec, .99)
  62        lim = vec99 + 2 * (vec99 - median(vec))
  63        return(sapply(vec, function(x) min(x, lim)))
  64}
  65
  66# computes acquire latency, using global tsc freq if there isn't one in the
  67# data
  68acq_latency <- function(data)
  69{
  70        tsc_overhead = data$V6
  71        if (tsc_overhead[1] == 0)
  72                tsc_overhead = sapply(tsc_overhead, function(x) g_tsc_overhead)
  73        return (data$V4 - data$V3 - tsc_overhead)
  74}
  75
  76# computes hold latency, using global tsc freq if there isn't one in the data
  77hld_latency <- function(data)
  78{
  79        tsc_overhead = data$V6
  80        if (tsc_overhead[1] == 0)
  81                tsc_overhead = sapply(tsc_overhead, function(x) g_tsc_overhead)
  82        return (data$V5 - data$V4 - tsc_overhead)
  83}
  84
  85# histogram, bins based on percentiles, with limits of the graph based on the
  86# outermost bins.  somewhat works.  can get a 'need finite ylim' if the bins
  87# are too small.  maybe since there are no values in it.
  88#
  89# with density and percentiles for bins, keep in mind the area of a rectangle
  90# is the fraction of data points in the cell.  since all bins have the same
  91# amount of data points, taller cells show a denser concentration in a skinnier
  92# bin
  93#
  94# i don't actually like this much.  using a round_outlier with 20-bin hist or a
  95# density plot look nicer.
  96quant_hist <- function(vec)
  97{
  98        vec_quant = c(quantile(vec, probs=seq(0, 1, .01)))
  99        print(vec_quant)
 100        # keep the 100 in sync with the 0.01 above
 101        hist(vec, breaks=vec_quant, xlim=c(vec_quant[2], vec_quant[100]))
 102}
 103
 104# line_data is a list of N data sets that can be plotted as lines.  each set
 105# should just be an array of values; the index will be the X value.
 106#
 107# names is a c() of N strings corresponding to the line_data members
 108# Set the desired x and y min/max.
 109plot_lines <- function(line_data, names=NULL, outfile="", title="",
 110                       xlab="X", ylab="Y", min_x=0, max_x=0, min_y=0, max_y=0)
 111{
 112        nr_lines <- length(line_data)
 113
 114        # not guaranteed to work, but might save some pain
 115        if (max_x == 0) {
 116                for (i in 1:nr_lines)
 117                        max_x <- max(max_x, length(line_data[[i]]))
 118        }
 119        if (max_y == 0) {
 120                for (i in 1:nr_lines)
 121                        max_y <- max(max_y, max(line_data[[i]]))
 122        }
 123
 124        # http://www.statmethods.net/graphs/line.html
 125        colors <- rainbow(nr_lines) # not a huge fan.  color #2 is light blue.
 126        linetype <- c(1:nr_lines)
 127        plotchar <- seq(18, 18 + nr_lines, 1)
 128
 129        # http://stackoverflow.com/questions/8929663/r-legend-placement-in-a-plot
 130        # can manually move it if we don't want to waste space
 131        if (!is.null(names)) {
 132                plot(c(min_x, max_x), c(min_y, max_y), type="n", xaxt="n", yaxt="n")
 133                legend_sz = legend("topright", legend=names, lty=linetype,
 134                                   plot=FALSE)
 135                max_y = 1.04 * (max_y + legend_sz$rect$h)
 136        }
 137
 138        if (outfile != "")
 139                png(outfile, width=g_width, height=g_height)
 140
 141        plot(c(min_x, max_x), c(min_y, max_y), type="n", main=title, xlab=xlab,
 142                ylab=ylab)
 143
 144        for (i in 1:nr_lines) {
 145                # too many points, so using "l" and no plotchar.
 146                #lines(densities[[i]], type="b", lty=linetype[i], col=colors[i],
 147                #      pch=plotchar[i], lwd=1.5)
 148                lines(line_data[[i]], type="l", lty=linetype[i], col=colors[i],
 149                        lwd=1.5)
 150        }
 151
 152        #legend(x=min_x, y=max_y, legend=names, lty=linetype, col=colors)
 153        if (!is.null(names))
 154                legend("topright", legend=names, lty=linetype, col=colors)
 155
 156        if (outfile != "")
 157                invisible(dev.off())
 158}
 159
 160plot_densities <- function(vecs, names=NULL, outfile="")
 161{
 162        nr_vecs = length(vecs)
 163        densities = list()
 164        max_y = 0
 165        min_x = Inf
 166        max_x = 0
 167
 168        for (i in 1:nr_vecs) {
 169                # [[ ]] chooses the actual element.  [] just subsets
 170                dense_i = density(vecs[[i]])
 171                densities = c(densities, list(dense_i))
 172                max_y = max(max_y, dense_i$y)
 173                max_x = max(max_x, dense_i$x)
 174                min_x = min(min_x, dense_i$x)
 175        }
 176        plot_lines(densities, names=names, outfile=outfile,
 177                   title=paste(outfile, "Lock Acquisition Latency"),
 178                   xlab="TSC Ticks", ylab="PDF",
 179                   min_x=min_x, max_x=max_x, max_y=max_y)
 180}
 181
 182plot_density <- function(vec, outfile="")
 183{
 184        vecs = list(vec)
 185        plot_densities(vecs=vecs, outfile=outfile)
 186}
 187
 188plot_acq_times <- function(data, outfile="")
 189{
 190        if (outfile != "")
 191                png(outfile, width=g_width, height=g_height)
 192
 193        # all acquire times, timestamps starting at 0
 194        time0 = min(data$V4)
 195        total_acq <- data$V4 - time0
 196
 197        threadid <- unique(data$V1)
 198
 199        acq_n <- list()
 200        names <- c()
 201        for (i in threadid) {
 202                thread_data <- subset(data, data$V1 == i) - time0
 203                acq_n <- c(acq_n, list(thread_data$V4))
 204                names <- c(names, paste("Thread ", i))
 205        }
 206        # can adjust ylim, default are from 1..nr_items
 207        stripchart(acq_n, group.names=names, pch='.', xlab="Time (TSC Ticks)",
 208                   main="Lock Acquisition Timestamps")
 209
 210        if (outfile != "")
 211                invisible(dev.off())
 212}
 213
 214print_vec <- function(vec, name)
 215{
 216        # goddamn.  cat doesn't like integer64 or something, so you need to
 217        # pre-paste for the calculations.  print will spit out [1] for row
 218        # names, which sucks.
 219        cat(name, "\n")
 220        cat("---------------\n")
 221        cat(paste("Average: ", round(mean(vec), 4), "\n"))
 222        cat(paste("Stddev: ", round(sd(vec), 4), "\n"))
 223        quants = round(quantile(vec, c(.5, .75, .9, .99, .999)))
 224        cat(paste("50/75/90/99/99.9: ", quants[[1]], quants[[2]], quants[[3]],
 225              quants[[4]], quants[[5]], "\n"))
 226        cat(paste("Min: ", min(vec), " Max: ", max(vec), "\n"))
 227        cat("\n")
 228}
 229
 230# using something like the tables package to output latex booktab's would be
 231# much nicer
 232print_stats <- function(data)
 233{
 234        print_vec(acq_latency(data), "Acquire Latency")
 235        print_vec(hld_latency(data), "Hold Latency")
 236}
 237
 238# the 'queue' system includes waiting for the lock and holding the lock.
 239# Returns a data.frame, with X = TSC, Y = qlen
 240get_qlen <- function(data)
 241{
 242        if (g_tsc_frequency == 0)
 243                stop("WARNING: global TSC freq not set!")
 244
 245        # timestamps for all threads, sorted.
 246        # a 'pre' is someone entering the q.  'unl' is leaving.
 247        # both are normalized to the TSC time for pres
 248        pres <- sort(data$V3 - min(data$V3))
 249        unls <- sort(data$V5 - min(data$V3))
 250
 251        # not sure the best way to create the data to print.  easiest seems to
 252        # be two vectors, X = times, Y = qlens, which we'll pass to plot
 253        # but you can't append to them, that's way to slow.  we do know the
 254        # overall length of both: one entry for each of pres and unls.
 255        # so we preallocate.
 256        samples <- length(pres) + length(unls)
 257
 258        times <- rep(0, samples)
 259        qlens <- rep(0, samples)
 260
 261        qlen <- 0
 262        # R uses 1 indexing
 263        p_i <- 1
 264        u_i <- 1
 265
 266        for (i in 1:samples) {
 267                now <- 0
 268
 269                # a bit painful.
 270                if (p_i <= length(pres))
 271                        pre <- pres[[p_i]]
 272                else
 273                        pre <- Inf
 274                if (u_i <= length(unls))
 275                        unl <- unls[[u_i]]
 276                else
 277                        unl <- Inf
 278
 279                if (pre <= unl) {
 280                        if (pre == Inf) {
 281                                print("both pre and unl were Inf!")
 282                                break
 283                        }
 284
 285                        qlen <- qlen + 1
 286                        now <- pre
 287                        p_i <- p_i + 1
 288                } else {
 289                        if (unl == Inf) {
 290                                print("both unl and pre were Inf!")
 291                                break
 292                        }
 293
 294                        qlen <- qlen - 1
 295                        now <- unl
 296                        u_i <- u_i + 1
 297                }
 298                times[i] = now
 299                qlens[i] = qlen
 300        }
 301        # times is in units of TSC.  convert to msec, for easier comparison
 302        # with throughput.  though note that when plotting with e.g. tput, the
 303        # x-axis is set by tput, and whatever we print is scaled to fit in that
 304        # space.  so if you didn't do this, you wouldn't notice.  (I was using
 305        # nsec for a while and didn't notice).
 306        times <- times / (g_tsc_frequency / 1e3)
 307
 308        return(data.frame(setNames(list(times, qlens),
 309                                   c("Time (msec)", "Queue Length"))))
 310}
 311
 312plot_qlen <- function(data, outfile="")
 313{
 314        df <- get_qlen(data)
 315        if (outfile != "")
 316                png(outfile, width=g_width, height=g_height)
 317
 318        # df isn't a 'line', so we need to plot it directly
 319        plot(df, type="p", main="Spinlock Queue Length")
 320
 321        if (outfile != "")
 322                invisible(dev.off())
 323}
 324
 325get_tput <- function(data)
 326{
 327        if (g_tsc_frequency == 0)
 328                stop("WARNING: global TSC freq not set!")
 329
 330        # acq times, sorted, normalized to the lowest, still in ticks
 331        total_acq <- sort(data$V4 - min(data$V4))
 332
 333        # convert to nsec
 334        total_acq <- total_acq / (g_tsc_frequency / 1e9)
 335
 336        # rounds down all times to the nearest msec, will collect into a table,
 337        # which counts the freq of each bucket, as per:
 338        # http://stackoverflow.com/questions/5034513/how-to-graph-requests-per-second-from-web-log-file-using-r
 339        msec_times <- trunc(total_acq/1e6)
 340
 341        # if we just table directly, we'll lose the absent values (msec where no
 342        # timestamp happened).  not sure if factor is the best way, the help
 343        # says it should be a small range.
 344        # http://stackoverflow.com/questions/1617061/including-absent-values-in-table-results-in-r
 345        msec_times <- factor(msec_times, 0:max(msec_times))
 346
 347        # without the c(), it'll be a bunch of bars at each msec
 348        tab <- c(table(msec_times))
 349        return(tab)
 350}
 351
 352# this is a little rough.  You need the data loaded, but calculating tput
 353# requires the right TSC freq set.  So don't use this with data from different
 354# machines (which can be a tough comparison, regardless).
 355plot_tputs <- function(data_l, names=NULL, outfile="")
 356{
 357        nr_lines <- length(data_l)
 358        tputs <- list()
 359
 360        for (i in 1:nr_lines) {
 361                # [[ ]] chooses the actual element.  [] just subsets
 362                tput_i <- get_tput(data_l[[i]])
 363                tputs <- c(tputs, list(tput_i))
 364        }
 365
 366        plot_lines(tputs, names, outfile=outfile,
 367                   title=paste(outfile, "Lock Throughput"),
 368                   xlab = "Time (msec)", ylab = "Locks per msec")
 369}
 370
 371
 372# helper, plots throughput (tp) and whatever else you give it.  e.g. qlen is a
 373# value at a timestamp (msec)
 374plot_tput_foo <- function(tp, foo, foo_str, outfile="")
 375{
 376        if (outfile != "")
 377                png(outfile, width=g_width, height=g_height)
 378
 379        # decent looking margins for the right Y axis
 380        par(mar = c(5, 5, 3, 5))
 381
 382        # Scaling ylim to make room for FOO, which tends to be up top
 383        plot(tp, type="l", main=paste(outfile, "Lock Throughput and", foo_str),
 384             ylim=c(0, max(tp)*1.2),
 385             xlab="Time (msec)", ylab="Throughput",
 386             col="blue", lty=1)
 387
 388        par(new = TRUE)
 389        plot(foo, type="p", xaxt = "n", yaxt = "n", ylab = "", xlab = "",
 390             col="red", lty=2)
 391        axis(side = 4)
 392        mtext(foo_str, side = 4)
 393
 394        legend("topleft", c("tput", foo_str), col = c("blue", "red"),
 395               lty = c(1, 2))
 396
 397        if (outfile != "")
 398                invisible(dev.off())
 399}
 400
 401# plot throughput and queue length on the same graph
 402plot_tput_qlen <- function(data, outfile="")
 403{
 404        tp <- get_tput(data)
 405        ql <- get_qlen(data)
 406
 407        plot_tput_foo(tp, ql, "Qlen", outfile)
 408}
 409
 410# if you know how many msec there are, this is like doing:
 411#     hist(total_acq/1000000, breaks=50)
 412# except it gives you a line, with points being the top of the hist bars
 413plot_tput <- function(data, outfile="")
 414{
 415        plot_tputs(list(data), outfile=outfile)
 416}
 417
 418# the middle timestamp is the acquire-by-lockholder.  sorting on that, we can
 419# see the coreid.  it's a really busy thing to graph.
 420#
 421# You can see some gaps in non-MCS lock holders when a core is starved or not
 422# partipating.  By comparing that gap to the qlen, you can see if it was
 423# starvation or just not participating.  Also, you can easily spot a preempted
 424# lockholder (if they are gone for enough msec).
 425get_core_acqs <- function(data)
 426{
 427        earliest <- min(data$V3)
 428        sorted <- data[order(data$V3)]
 429
 430        times <- sorted$V3 - earliest
 431        cores <- sorted$V1
 432
 433        # times is in units of TSC.  convert to msec, for easier comparison
 434        # with throughput
 435        times <- times / (g_tsc_frequency / 1e3)
 436
 437        return(data.frame(setNames(list(times, cores),
 438                                   c("Time (msec)", "Holder ID"))))
 439}
 440
 441plot_tput_core_acqs <- function(data, outfile="")
 442{
 443        tp <- get_tput(data)
 444        ca <- get_core_acqs(data)
 445
 446        plot_tput_foo(tp, ca, "Holder ID", outfile)
 447}
 448
 449get_tsc_freq <- function(filename) {
 450        # format looks like this:
 451        # tsc_frequency 2300002733
 452        header <- readLines(filename, n=50)
 453        line <- grep("tsc_frequency", header, value=TRUE)
 454        x <- strsplit(line, split=" ")
 455        return(as.numeric(x[[1]][3]))
 456}
 457
 458
 459# pretty ugly.  reads in the data, but also sets the TSC freq, which is
 460# particular to a given machine.  so if you use this, be sure you do any
 461# analyses before loading another.  Sorry.
 462load_test_data <- function(filename)
 463{
 464        g_tsc_frequency <- get_tsc_freq(filename)
 465        if (g_tsc_frequency == 0)
 466                stop("WARNING: could not set global TSC freq!")
 467        assign("g_tsc_frequency", g_tsc_frequency, envir = .GlobalEnv)
 468
 469        # using grep for a hokey comment.char="#".  it's fast enough for now.
 470
 471        # integer64="numeric", o/w all sorts of things fuck up.  like density()
 472        # and round_outlier().  Even with sprinkling as.numeric() around, it's
 473        # still a mess.  This actually worked, though presumably there's a
 474        # reason to use int64.
 475
 476        if (grepl("\\.gz$", filename)) {
 477                return(fread(cmd=paste("gunzip -c", filename, "| grep -v '^#'"),
 478                             integer64="numeric"))
 479        } else {
 480                return(fread(cmd=paste("grep -v '^#'", filename),
 481                             integer64="numeric"))
 482        }
 483}
 484
 485######################################
 486### Main
 487######################################
 488# establish optional arguments
 489# "-h" and "--help" are automatically in the list
 490option_list <- list(
 491        make_option(c("-c", "--cmd"), type="integer", default=-1,
 492                    help="
 493        1: acq_lat & tput_qlen for one file
 494        2: tput comparison for multiple files
 495        "),
 496        make_option(c("-o", "--output"), type="character",
 497                    default="", help="Output file, if applicable")
 498)
 499
 500# CMD 1
 501single_analysis <- function(args)
 502{
 503        data_file <- args$args
 504
 505        if (file.access(data_file) == -1)
 506                stop("cannot access input file")
 507
 508        data <- load_test_data(data_file)
 509
 510        basename <- data_file
 511        basename <- gsub(".gz$", "", basename)
 512        basename <- gsub(".dat$", "", basename)
 513
 514        sink(paste(basename, "-stats.txt", sep=""))
 515        cat("Filename:", data_file, "\n")
 516        cat("\n")
 517        print(grep("^#", readLines(data_file, n=50), value=TRUE))
 518        cat("\n")
 519        print_stats(data)
 520        sink()
 521
 522        # For now, create all files.  Can make options later
 523        plot_density(round_outlier(acq_latency(data)),
 524                     outfile=paste(basename, "-acq.png", sep=""))
 525        plot_tput_qlen(data,
 526                       outfile=paste(basename, "-tput_qlen.png", sep=""))
 527        plot_tput_core_acqs(data,
 528                       outfile=paste(basename, "-tput_core_acqs.png", sep=""))
 529}
 530
 531# CMD 2
 532tput_comparison <- function(args)
 533{
 534        outfile <- args$options$output
 535
 536        if (outfile == "")
 537                stop("Need an outfile (-o)")
 538
 539        tputs <- list()
 540        names <- list()
 541
 542        for (i in args$args) {
 543                data <- load_test_data(i)
 544                # I wanted to do this:
 545                #tputs <- append(tputs, get_tput(data))
 546                # but it's a collection of lists of TPUT tables.  or something
 547                tputs <- c(tputs, list(get_tput(data)))
 548                names <- append(names, i)
 549                rm(data)
 550        }
 551        plot_lines(tputs, names, outfile=outfile,
 552                   title="Lock Throughput",
 553                   xlab = "Time (msec)",
 554                   ylab = "Locks per msec")
 555}
 556
 557main <- function()
 558{
 559        parser <- OptionParser(usage="%prog [options] data.dat",
 560                               option_list=option_list)
 561        # ugly as hell, no error messages, etc.
 562        args <- parse_args(parser, positional_arguments = TRUE)
 563        opt <- args$options
 564
 565        f <- switch(opt$cmd, single_analysis, tput_comparison)
 566        if (is.null(f))
 567                stop("Need a valid (-c) command (-h for options)")
 568        f(args)
 569}
 570
 571# don't run when sourced
 572if (sys.nframe() == 0L) {
 573        main()
 574}
 575
 576# Dirty Hacks Storage
 577
 578# source("scripts/lock_test.R")
 579
 580# this is slow and shitty:
 581# raw = read.table(filename, comment.char="#")
 582
 583# to print stuff to a file, you can do
 584# that can be really slow for large files, so try png instead
 585#pdf("foo.pdf")
 586#command that creates a graphic, like hist or plot
 587#dev.off()
 588
 589# hack, so i can re-source this and not have to reload TSC freq
 590#g_tsc_frequency <- get_tsc_freq("raw1.dat")
 591