home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Usenet 1994 October
/
usenetsourcesnewsgroupsinfomagicoctober1994disk2.iso
/
unix
/
volume20
/
plum-benchmarks
< prev
next >
Wrap
Text File
|
1989-10-22
|
40KB
|
1,307 lines
Subject: v20i047: Plum-Hall benchmarks for timing common C operations
Newsgroups: comp.sources.unix
Sender: sources
Approved: rsalz@uunet.UU.NET
Submitted-by: cbmvax!snark.uu.net!eric@uunet.uu.net
Posting-number: Volume 20, Issue 47
Archive-name: plum-benchmarks
This is a simple set of benchmarks intended to give programmers timing
information about common C operations. See the Makefile for instructions;
basically, all you have to do is type `make', wait, and look at bench.out.
A table of results for some popular machines is included in bench.tbl.
For more information on the benchmark techniques and the philosophy
behind them, browse the file ARTICLE. Here's an excerpt from the article:
We are placing into the public domain some simple benchmarks with
several appealing properties:
They are short enough to type while browsing at trade shows.
They are protected against overly-aggressive compiler optimizations.
They reflect empirically-observed operator frequencies in C programs.
They give a C programmer information directly relevant to programming.
Enjoy!
eric@snark.uu.net
#!/bin/sh
: "This is a shell archive, meaning: "
: "1. Remove everything above the #! /bin/sh line. "
: "2. Save the resulting test in a file. "
: "3. Execute the file with /bin/sh (not csh) to create the files:"
: " READ.ME"
: " Makefile"
: " benchdbl.c"
: " benches.c"
: " benchfn.c"
: " benchlng.c"
: " benchmul.c"
: " benchreg.c"
: " benchsho.c"
: " clock.c"
: " f3.c"
: " fround.c"
: " run-all.c"
: " benches.out"
: " run-all.out"
: " run-all.bat"
: " time-cmd.bat"
: " time-dbl.bat"
: " run-all.sh"
: " cr-lf"
: " n-n"
: " bench.tbl"
: " ARTICLE"
echo file: READ.ME
sed 's/^X//' >READ.ME << 'END-of-READ.ME'
X The Plum-Hall Benchmarks
X
XThis is a simple set of benchmarks intended to give programmers timing
Xinformation about common C operations. See the Makefile for instructions;
Xbasically, all you have to do is type `make', wait, and look at bench.out.
X
XA table of results for some popular machines is included in bench.tbl.
XFor more information on the benchmark techniques and the philosophy
Xbehind them, browse the file ARTICLE.
X
X Enjoy!
X eric@snark.uu.net
END-of-READ.ME
echo file: Makefile
sed 's/^X//' >Makefile << 'END-of-Makefile'
X#
X# Makefile for the Plum-Hall benchmarks package
X# by Eric S. Raymond (eric@snark.uu.net)
X#
X# Your system type for the benchmark list (do *not* string-quote it)
XSYSTYPE = AT&T 6386/375
X
X# Ditto, if your headers define CLOCKS_PER_SEC
XCFLAGS = -DCLOCKS_PER_SEC=60
X
X# Comment this out if you have ANSI clock(3)
XCLOCK = clock.o
X
Xbench.out: nbench obench
X nbench 1 "$(SYSTYPE) (no -O)" >bench.out
X obench 1 "$(SYSTYPE) (-O)" >>bench.out
X
X# optimizer off
Xnbench.o: benches.c
X $(CC) $(CFLAGS) -c benches.c
X mv benches.o nbench.o
Xnbench: nbench.o f3.o $(CLOCK)
X cc -o nbench nbench.o f3.o $(CLOCK) -lm
X
X# optimizer on
Xobench.o: benches.c
X $(CC) -O $(CFLAGS) -c benches.c
X mv benches.o obench.o
Xobench: obench.o f3.o $(CLOCK)
X cc -o obench obench.o f3.o $(CLOCK) -lm
X
Xclean:
X rm -f [no]bench *.o bench.shar
X
Xbench.shar:
X shar READ.ME Makefile *.c *.out *.bat *.sh cr-lf n-n bench.tbl ARTICLE >bench.shar
END-of-Makefile
echo file: benchdbl.c
sed 's/^X//' >benchdbl.c << 'END-of-benchdbl.c'
X/* benchdbl - benchmark for double
X * Thomas Plum, Plum Hall Inc, 609-927-3770
X * If machine traps overflow, use an unsigned type
X * Let T be the execution time in milliseconds
X * Then average time per operator = T/major usec
X * (Because the inner loop has exactly 1000 operations)
X */
X#define STOR_CL auto
X#define TYPE double
X#include <stdio.h>
Xmain(ac, av)
X int ac;
X char *av[];
X {
X STOR_CL TYPE a, b, c;
X long d, major, atol();
X static TYPE m[10] = {0};
X
X major = atol(av[1]);
X printf("executing %ld iterations\n", major);
X a = b = (av[1][0] - '0');
X for (d = 1; d <= major; ++d)
X {
X /* inner loop executes 1000 selected operations */
X for (c = 1; c <= 40; ++c)
X {
X a = a + b + c;
X b = a * 2;
X a = b / 10;
X a = -a;
X b = -a - b - c;
X a = b == c;
X b = a + c;
X a = !b;
X b = a + c;
X a = b > c;
X }
X }
X printf("a=%d\n", a);
X }
END-of-benchdbl.c
echo file: benches.c
sed 's/^X//' >benches.c << 'END-of-benches.c'
X/* benches - driver for Plum Hall benchmarks */
X#include <stdio.h>
X#include <time.h>
X
Xint benchreg(), benchsho(), benchlng();
Xint benchmul(), benchfn(), benchdbl();
X
Xvoid tabulate();
Xchar *fround();
Xmain(argc, argv)
X int argc;
X char *argv[];
X {
X char result[6][10];
X int i;
X
X if (argv[1][0] != '1')
X printf("argv[1] must be 1 !\n");
X if (argc < 3)
X {
X fprintf(stderr, "usage: benches 1 'compiler-id'\n");
X exit(2);
X }
X tabulate(benchreg, result[0]);
X tabulate(benchsho, result[1]);
X tabulate(benchlng, result[2]);
X tabulate(benchmul, result[3]);
X tabulate(benchfn, result[4]);
X tabulate(benchdbl, result[5]);
X printf("\n\n");
X printf("%20.20s %9s %9s %9s %9s %9s %9s\n",
X "", "register", "auto", "auto", "int", "function", "auto");
X printf("%20.20s %9s %9s %9s %9s %9s %9s\n",
X "", "int", "short", "long", "multiply", "call+ret", "double");
X printf("%22.22s ",
X argv[2]);
X for (i = 0; i <= 5; ++i)
X printf("%9.9s ", result[i]);
X printf("\n");
X exit(0);
X }
Xvoid tabulate(fn, s)
X void (*fn)();
X char *s;
X {
X static char arg1[20];
X static char *arga[3] = { "x", &arg1[0], 0 };
X double before, after, microsec;
X long major, major_next;
X
X major_next = 1;
X do {
X major = major_next;
X sprintf(arg1, "%ld", major);
X before = (double)clock();
X (*fn)(2, arga);
X after = (double)clock();
X major_next *= 10;
X } while (after-before < 100);
X microsec = 1e3 * (after - before) / CLOCKS_PER_SEC / major;
X sprintf(s, "%9s ", fround(microsec, 5, 3));
X }
X
X/* fround - round double x to precision p, n significant digits
X * uses static string for result - not re-entrant
X * fround is an accomodation for K+R-level printf which lacks %.*e or %g
X * slow, fat version - uses sprintf
X */
X#include <stdio.h>
Xchar *fround(x, p, n)
X double x;
X short p;
X short n;
X {
X double y;
X double log10();
X short digs;
X short nlog;
X static char s[40] = {0};
X char fmt[20];
X
X sprintf(fmt, "%%.%de", n-1);
X sprintf(s, fmt, x);
X sscanf(s, "%lf", &y);
X if (y == 0)
X nlog = 0;
X else
X nlog = log10(y);
X if (nlog < 0)
X --nlog;
X digs = n - nlog - 1;
X if (digs < 0)
X digs = 0;
X else if (digs > p)
X digs = p;
X sprintf(fmt, "%%.%df", digs);
X sprintf(s, fmt, y);
X if (digs == 0)
X strcat(s, ".");
X while (digs++ < p)
X strcat(s, " ");
X return (s);
X }
X
X
X
X
X#define main benchreg
X#include "benchreg.c"
X
X#undef main
X#undef STOR_CL
X#undef TYPE
X#define main benchsho
X#include "benchsho.c"
X
X#undef main
X#undef STOR_CL
X#undef TYPE
X#define main benchlng
X#include "benchlng.c"
X
X#undef main
X#undef STOR_CL
X#undef TYPE
X#define main benchmul
X#include "benchmul.c"
X
X#undef main
X#undef STOR_CL
X#undef TYPE
X#define main benchfn
X#include "benchfn.c"
X
X#undef main
X#undef STOR_CL
X#undef TYPE
X#define main benchdbl
X#include "benchdbl.c"
END-of-benches.c
echo file: benchfn.c
sed 's/^X//' >benchfn.c << 'END-of-benchfn.c'
X/* benchfn - benchmark for function calls
X * Thomas Plum, Plum Hall Inc, 609-927-3770
X * Let T be the execution time in milliseconds
X * Then average time per operator = T/major usec
X * (Because the inner loop has exactly 1000 operations)
X */
X#include <stdio.h>
Xint dummy = 0;
X
Xf2() { f3();f3();f3();f3();f3();f3();f3();f3();f3();f3();} /* 10 */
Xf1() { f2();f2();f2();f2();f2();f2();f2();f2();f2();f2();} /* 10 */
Xf0() { f1();f1();f1();f1();f1();f1();f1();f1();f1();} /* 9 */
X
Xmain(ac, av)
X int ac;
X char *av[];
X {
X long d, major, atol();
X
X major = atol(av[1]);
X printf("executing %ld iterations\n", major);
X for (d = 1; d <= major; ++d)
X f0(); /* executes 1000 calls */
X printf("dummy=%d\n", dummy);
X }
END-of-benchfn.c
echo file: benchlng.c
sed 's/^X//' >benchlng.c << 'END-of-benchlng.c'
X/* benchlng - benchmark for long integers
X * Thomas Plum, Plum Hall Inc, 609-927-3770
X * If machine traps overflow, use an unsigned type
X * Let T be the execution time in milliseconds
X * Then average time per operator = T/major usec
X * (Because the inner loop has exactly 1000 operations)
X */
X#define STOR_CL auto
X#define TYPE long
X#include <stdio.h>
Xmain(ac, av)
X int ac;
X char *av[];
X {
X STOR_CL TYPE a, b, c;
X long d, major, atol();
X static TYPE m[10] = {0};
X
X major = atol(av[1]);
X printf("executing %ld iterations\n", major);
X a = b = (av[1][0] - '0');
X for (d = 1; d <= major; ++d)
X {
X /* inner loop executes 1000 selected operations */
X for (c = 1; c <= 40; ++c)
X {
X a = a + b + c;
X b = a >> 1;
X a = b % 10;
X m[a] = a;
X b = m[a] - b - c;
X a = b == c;
X b = a | c;
X a = !b;
X b = a + c;
X a = b > c;
X }
X }
X printf("a=%d\n", a);
X }
END-of-benchlng.c
echo file: benchmul.c
sed 's/^X//' >benchmul.c << 'END-of-benchmul.c'
X/* benchmul - benchmark for int multiply
X * Thomas Plum, Plum Hall Inc, 609-927-3770
X * If machine traps overflow, use an unsigned type
X * Let T be the execution time in milliseconds
X * Then average time per operator = T/major usec
X * (Because the inner loop has exactly 1000 operations)
X */
X#define STOR_CL auto
X#define TYPE int
X#include <stdio.h>
Xmain(ac, av)
X int ac;
X char *av[];
X {
X STOR_CL TYPE a, b, c;
X long d, major, atol();
X static TYPE m[10] = {0};
X
X major = atol(av[1]);
X printf("executing %ld iterations\n", major);
X a = b = (av[1][0] - '0');
X for (d = 1; d <= major; ++d)
X {
X /* inner loop executes 1000 selected operations */
X for (c = 1; c <= 40; ++c)
X {
X a = 3 *a*a*a*a*a*a*a*a * a*a*a*a*a*a*a*a * a*a*a*a*a*a*a*a * a; /* 25 * */
X }
X }
X printf("a=%d\n", a);
X }
END-of-benchmul.c
echo file: benchreg.c
sed 's/^X//' >benchreg.c << 'END-of-benchreg.c'
X/* benchreg - benchmark for register integers
X * Thomas Plum, Plum Hall Inc, 609-927-3770
X * If machine traps overflow, use an unsigned type
X * Let T be the execution time in milliseconds
X * Then average time per operator = T/major usec
X * (Because the inner loop has exactly 1000 operations)
X */
X#define STOR_CL register
X#define TYPE int
X#include <stdio.h>
Xmain(ac, av)
X int ac;
X char *av[];
X {
X STOR_CL TYPE a, b, c;
X long d, major, atol();
X static TYPE m[10] = {0};
X
X major = atol(av[1]);
X printf("executing %ld iterations\n", major);
X a = b = (av[1][0] - '0');
X for (d = 1; d <= major; ++d)
X {
X /* inner loop executes 1000 selected operations */
X for (c = 1; c <= 40; ++c)
X {
X a = a + b + c;
X b = a >> 1;
X a = b % 10;
X m[a] = a;
X b = m[a] - b - c;
X a = b == c;
X b = a | c;
X a = !b;
X b = a + c;
X a = b > c;
X }
X }
X printf("a=%d\n", a);
X }
END-of-benchreg.c
echo file: benchsho.c
sed 's/^X//' >benchsho.c << 'END-of-benchsho.c'
X/* benchsho - benchmark for short integers
X * Thomas Plum, Plum Hall Inc, 609-927-3770
X * If machine traps overflow, use an unsigned type
X * Let T be the execution time in milliseconds
X * Then average time per operator = T/major usec
X * (Because the inner loop has exactly 1000 operations)
X */
X#define STOR_CL auto
X#define TYPE short
X#include <stdio.h>
Xmain(ac, av)
X int ac;
X char *av[];
X {
X STOR_CL TYPE a, b, c;
X long d, major, atol();
X static TYPE m[10] = {0};
X
X major = atol(av[1]);
X printf("executing %ld iterations\n", major);
X a = b = (av[1][0] - '0');
X for (d = 1; d <= major; ++d)
X {
X /* inner loop executes 1000 selected operations */
X for (c = 1; c <= 40; ++c)
X {
X a = a + b + c;
X b = a >> 1;
X a = b % 10;
X m[a] = a;
X b = m[a] - b - c;
X a = b == c;
X b = a | c;
X a = !b;
X b = a + c;
X a = b > c;
X }
X }
X printf("a=%d\n", a);
X }
END-of-benchsho.c
echo file: clock.c
sed 's/^X//' >clock.c << 'END-of-clock.c'
X/* clock - primitive version of ANSI 'clock' function for UNIX */
Xlong clock()
X {
X struct tbuff { long pu; long ps; long cu; long cs; } tbuff;
X
X times(&tbuff);
X return(tbuff.pu + tbuff.ps);
X }
END-of-clock.c
echo file: f3.c
sed 's/^X//' >f3.c << 'END-of-f3.c'
X/* f3 - lowest level function
X * Put this in separate source file if compiler detects and optimizes
X * useless code
X */
Xf3() { }
END-of-f3.c
echo file: fround.c
sed 's/^X//' >fround.c << 'END-of-fround.c'
X/* fround - round double x to precision p, n significant digits
X * uses static string for result - not re-entrant
X * fround is an accomodation for K+R-level printf which lacks %.*e or %g
X * slow, fat version - uses sprintf
X */
X#include <stdio.h>
Xchar *fround(x, p, n)
X double x;
X short p;
X short n;
X {
X double y;
X double log10();
X short digs;
X short nlog;
X static char s[40] = {0};
X char fmt[20];
X
X sprintf(fmt, "%%.%de", n-1);
X sprintf(s, fmt, x);
X sscanf(s, "%lf", &y);
X if (y == 0)
X nlog = 0;
X else
X nlog = log10(y);
X if (nlog < 0)
X --nlog;
X digs = n - nlog - 1;
X if (digs < 0)
X digs = 0;
X else if (digs > p)
X digs = p;
X sprintf(fmt, "%%.%df", digs);
X sprintf(s, fmt, y);
X if (digs == 0)
X strcat(s, ".");
X while (digs++ < p)
X strcat(s, " ");
X return (s);
X }
X#ifdef TRYMAIN
Xmain()
X {
X short m;
X
X for (m = 1; m <= 5; ++m)
X printf("fround(123.57, 2, %d) = %s;\n", m, fround(123.57, 2, m));
X for (m = 1; m <= 5; ++m)
X printf("fround(.013579, 5, %d) = %s;\n", m, fround(.013579, 5, m));
X }
X#endif
END-of-fround.c
echo file: run-all.c
sed 's/^X//' >run-all.c << 'END-of-run-all.c'
X/* do_allbench - run all the benchmark programs */
X#include <stdio.h>
X#define NBENCHES 6
X#define TIME_FMT "Current time is %lf:%lf:%lf"
X#define CPUTIME_MIN 10000.
Xstatic struct timing
X {
X double cputime; char *fname; char *title1; char *title2;
X } timings[NBENCHES] =
X {
X 0., "benchreg", "register", "int",
X 0., "benchsho", "auto", "short",
X 0., "benchlng", "auto", "long",
X 0., "benchmul", "integer", "multiply",
X 0., "benchfn", "function", "call",
X 0., "benchdbl", "auto", "double",
X };
Xstatic char cc_cmd[BUFSIZ] = {0};
Xstatic char command[BUFSIZ] = {0};
Xint compile(fname)
X char *fname;
X {
X sprintf(command, cc_cmd, fname);
X return (system(command));
X }
Xint mk_crlf()
X {
X FILE *crlf;
X
X crlf = fopen("cr-lf", "w");
X if (crlf == NULL)
X {
X fprintf(stderr, "unable to create file crlf\n");
X exit(2);
X }
X putc('\n', crlf);
X fclose(crlf);
X }
Xdouble rd_time(tmpname)
X char *tmpname;
X {
X FILE *fp;
X double hrs, mins, secs;
X
X fp = fopen(tmpname, "r");
X fgets(buf, sizeof(buf), fp);
X sscanf(buf, TIME_FMT, &hrs, &mins, &secs);
X fclose(fp);
X return (1000 * (secs + 60 * (mins + 60 * hrs));
X }
Xdouble time_it(fname, iterations)
X char *fname;
X long iterations;
X {
X double t0, t1;
X
X sprintf(command, "time <cr-lf >t0");
X system(command);
X t0 = rd_time("t0");
X sprintf(command, "%s %ld", fname, iterations);
X system(command);
X sprintf(command, "time <cr-lf >t1");
X system(command);
X t1 = rd_time("t1");
X return (t1 - t0);
X }
Xdouble run(fname, major)
X char *fname;
X long major;
X {
X double t_empty, t_major;
X
X t_empty = time_it(fname, 0L);
X t_major = time_it(fname, major);
X return (t_major - t_empty);
X }
Xdouble do_all(fname)
X char *fname;
X {
X double cputime;
X long major;
X
X compile(fname);
X major = MAJOR_MIN;
X do {
X cputime = run(fname, major);
X major *= 10;
X } while (cputime < CPUTIME_MIN);
X return (cputime / major);
X }
Xmain(ac, av)
X int ac;
X char *av[];
X {
X int i;
X
X strcpy(cc_cmd, av[1]);
X for (i = 0; i <= NBENCHES; ++i)
X timings[i].cputime = do_all(timings[i].fname);
X printf("\n\n\nRESULTS:\n\n");
X for (i = 0; i <= NBENCHES; ++i)
X printf("%10s ", timings[i].title1;
X printf(\n");
X for (i = 0; i <= NBENCHES; ++i)
X printf("%10s ", timings[i].title2;
X printf(\n");
X for (i = 0; i <= NBENCHES; ++i)
X printf("%10.4f ", timings[i].cputime);
X printf("\n\n(All times are in microseconds\n");
X }
END-of-run-all.c
echo file: benches.out
sed 's/^X//' >benches.out << 'END-of-benches.out'
Xexecuting 1 iterations
Xa=0
Xexecuting 10 iterations
Xa=0
Xexecuting 100 iterations
Xa=0
Xexecuting 1000 iterations
Xa=0
Xexecuting 10000 iterations
Xa=0
Xexecuting 1 iterations
Xa=0
Xexecuting 10 iterations
Xa=0
Xexecuting 100 iterations
Xa=0
Xexecuting 1000 iterations
Xa=0
Xexecuting 10000 iterations
Xa=0
Xexecuting 1 iterations
Xa=0
Xexecuting 10 iterations
Xa=0
Xexecuting 100 iterations
Xa=0
Xexecuting 1000 iterations
Xa=0
Xexecuting 10000 iterations
Xa=0
Xexecuting 1 iterations
Xa=-407629151
Xexecuting 10 iterations
Xa=-483154367
Xexecuting 100 iterations
Xa=-1034506623
Xexecuting 1000 iterations
Xa=-1045589759
Xexecuting 1 iterations
Xdummy=0
Xexecuting 10 iterations
Xdummy=0
Xexecuting 100 iterations
Xdummy=0
Xexecuting 1000 iterations
Xdummy=0
Xexecuting 1 iterations
Xa=0
Xexecuting 10 iterations
Xa=0
Xexecuting 100 iterations
Xa=0
X
X
X register auto auto int function auto
X int short long multiply call+ret double
X xenix-386 0.24 0.46 0.43 1.87 3.00 90.5
END-of-benches.out
echo file: run-all.out
sed 's/^X//' >run-all.out << 'END-of-run-all.out'
X+ cc -o benchfn.x benchfn.c
Xbenchfn.c
X+ time benchfn.x 1000
Xexecuting 1000 iterations
Xdummy=0
X
Xreal 3.3
Xuser 3.1
Xsys 0.0
X+ cc -o benchmul.x benchmul.c
Xbenchmul.c
X+ time benchmul.x 10000
Xexecuting 10000 iterations
Xa=427469313
X
Xreal 19.0
Xuser 18.8
Xsys 0.1
X+ cc -o benchlng.x benchlng.c
Xbenchlng.c
X+ time benchlng.x 10000
Xexecuting 10000 iterations
Xa=0
X
Xreal 5.1
Xuser 5.0
Xsys 0.0
X+ cc -o benchsho.x benchsho.c
Xbenchsho.c
X+ time benchsho.x 10000
Xexecuting 10000 iterations
Xa=0
X
Xreal 5.2
Xuser 5.1
Xsys 0.1
X+ cc -o benchreg.x benchreg.c
Xbenchreg.c
X+ time benchreg.x 10000
Xexecuting 10000 iterations
Xa=0
X
Xreal 2.6
Xuser 2.5
Xsys 0.0
X+ cc -o benchdbl.x benchdbl.c
Xbenchdbl.c
X+ time benchdbl.x 10000
Xexecuting 10000 iterations
Xa=0
X
Xreal 15:04.5
Xuser 15:04.4
Xsys 0.1
END-of-run-all.out
echo file: run-all.bat
sed 's/^X//' >run-all.bat << 'END-of-run-all.bat'
Xcl benchreg.c
Xcommand /c time-cmd benchreg >benchreg.out
X
Xcl benchsho.c
Xcommand /c time-cmd benchsho >benchsho.out
X
Xcl benchlng.c
Xcommand /c time-cmd benchlng >benchlng.out
X
Xcl benchfn.c
Xcommand /c time-cmd benchfn >benchfn.out
X
Xcl benchmul.c
Xcommand /c time-cmd benchmul >benchmul.out
X
Xcl benchdbl.c
Xcommand /c time-cmd benchdbl >benchdbl.out
X
END-of-run-all.bat
echo file: time-cmd.bat
sed 's/^X//' >time-cmd.bat << 'END-of-time-cmd.bat'
Xtime <cr-lf
X%1 0
Xtime <cr-lf
X%1 10000
Xtime <cr-lf
END-of-time-cmd.bat
echo file: time-dbl.bat
sed 's/^X//' >time-dbl.bat << 'END-of-time-dbl.bat'
Xtime <cr-lf
Xbenchdbl 0
Xtime <cr-lf
Xbenchdbl 100
Xtime <cr-lf
END-of-time-dbl.bat
echo file: run-all.sh
sed 's/^X//' >run-all.sh << 'END-of-run-all.sh'
Xcc -o benchfn.x benchfn.c
Xtime benchfn.x 1000
Xcc -o benchmul.x benchmul.c
Xtime benchmul.x 10000
Xcc -o benchlng.x benchlng.c
Xtime benchlng.x 10000
Xcc -o benchsho.x benchsho.c
Xtime benchsho.x 10000
Xcc -o benchreg.x benchreg.c
Xtime benchreg.x 10000
Xcc -o benchdbl.x benchdbl.c
Xtime benchdbl.x 10000
END-of-run-all.sh
echo file: cr-lf
sed 's/^X//' >cr-lf << 'END-of-cr-lf'
X
END-of-cr-lf
echo file: n-n
sed 's/^X//' >n-n << 'END-of-n-n'
Xn
Xn
END-of-n-n
echo file: bench.tbl
sed 's/^X//' >bench.tbl << 'END-of-bench.tbl'
XMachine/compiler register auto auto int func auto
X int short long multiply call dbl
X
XAT&T 3B2/05 (-O) 1.36 3.87 2.62 15.4 7.7 22.5
XAT&T 3B2/05 (no -O) 1.78 4.66 2.75 16.2 9.3 22.5
XAT&T 3B2/400 (-O) 1.09 1.36 1.10 16.2 10.0(?) 91.4
XAT&T 3B2/400 (no -O) 1.14 2.61 2.36 17.3 11.3 91.1
XAT&T 6386/375 (no -O) 0.61 1.39 1.23 3.85 5.62 6.77
XAT&T 6386/375 (-O) 0.52 1.17 0.54 3.68 5.78 7.68
XApollo DN330 (-O) 1.36 .78 1.36 10.17 3.57
XApollo DN330 (no -O) 1.54 1.28 1.54 11.30 3.64
XApollo DN580 (-O) 1.03 .59 1.03 7.67 2.72
XApollo DN580 (no -O) 1.18 .97 1.18 8.48 2.77
XApollo DN660 (_O) 5.88 1.24 5.88 21.86 4.26
XApollo DN660 (no -O) 5.93 1.52 5.93 21.93 4.29
XMasscomp 5500 3.18 2.7 4.9 30.8 7.3
XMasscomp 5600 (-O) .45 .61 .46 2.83 1.04
XMasscomp 5600 (no -O) .46 .78 .64 2.99 1.76
XPC/8088 (InstantC) 25.8 25.8 82.0 74.2 152.
XPC/8088 (WSL 3.1 lg) 6.18 10.4 66.5 31.8 28.8
XPyramid 90X (-O) .85 1.04 .86 3.64 1.9 2.37
XPyramid 90X (no -O) .86 1.01 .86 3.65 1.8 2.34
XSequent (-O) 1.39 2.99 2.53 9.90 9.3
XSequent (no -O) 1.50 3.25 2.83 9.95 13.2
XSun 3/260HM (-O) .31 .48 .47 1.98 1.16
XSun 3/260HM (no -O) .36 .58 .57 1.99 1.62
XSun 3/75M (-O) .47 .77 .76 3.00 2.12
XSun 3/75M (no -O) .53 .95 .94 3.01 2.73
XSun 3/75M(4.2, -O) .50 .81 .83 2.85 1.5 20.7
XSun 3/75M(4.2, no -O) .54 1.00 1.01 2.97 2.7 21.1
XSun 3/75M(VM, -O) .46 .77 .75 2.96 2.1 20.8
XSun 3/75M(VM, no -O) .52 .96 .93 2.97 2.7 21.1
XVAX 11/730 (-O) 4.00 9.80 6.20 16.2 42.8 12.4
XVAX 11/730 (no -O) 4.73 10.2 7.45 16.57 51.5 17.0
XVAX 11/780 (-O) 1.21 2.43 1.67 2.76 15.04 2.95
XVAX 11/780 (BSD 4.2) 1.38 2.42 1.96 2.92 17.2
XVAX 11/780 (UNIX 5.2) 1.24 2.48 1.79 2.72 15.7 3.89
XVAX 11/780 (no -O) 1.29 2.51 1.85 2.70 16.7 3.89
XVAX 11/785 (-O) .93 1.85 1.32 5.00 13.9 47.5
XVAX 11/785 (no -O) 1.01 1.96 1.44 5.08 14.2 5.42
XVAX 8650(UNIX -O) .236 .484 .298 .589 2.63 .578
XVAX 8650(UNIX no -O) .258 .482 .316 .574 3.06 .791
XVAX 8650(Ultrix -O) .23 .40 .29 .53 2.4 .56
XVAX 8650(Ultrix no -O) .26 .41 .34 .56 2.8 .77
END-of-bench.tbl
echo file: ARTICLE
sed 's/^X//' >ARTICLE << 'END-of-ARTICLE'
X
X
X
X
X
X
X[The following article appeared in "C Users Journal" May 1988.
X It describes the purpose and use of the enclosed benchmarks. ]
X
X
XSIMPLE BENCHMARKS FOR C COMPILERS
X
Xby Thomas Plum
X
XDr.Plum is the author of several books on C, including Efficient C (co-
Xauthored with Jim Brodie). He is Vice-Chair of the ANSI X3J11 Committee,
Xand Chairman of Plum Hall Inc, which offers introductory and advanced sem-
Xinars on C.
X
XCopyright (c) 1988, Plum Hall Inc
X
X
XWe are placing into the public domain some simple benchmarks with several
Xappealing properties:
X
X They are short enough to type while browsing at trade shows.
X
X They are protected against overly-aggressive compiler optimizations.
X
X They reflect empirically-observed operator frequencies in C programs.
X
X They give a C programmer information directly relevant to programming.
X
XIn Efficient C, Jim Brodie and I described how useful it can be for a pro-
Xgrammer to have a general idea of how many microseconds it takes to execute
Xthe "average operator" on register int's, on auto short's, on auto
Xlong's, and on double data, as well as the time for an integer multiply,
Xand the time to call-and-return from a function. These six numbers allow a
Xprogrammer to make very good first-order estimates of the CPU time that a
Xparticular algorithm will take.
X
XThe following easily-typed benchmark programs determine these times
Xdirectly. The first one is benchreg.c ("benchmark for register opera-
Xtors"):
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X - 1 -
X
X
X
X
X
X - 2 -
X
X
X 1 /* benchreg - benchmark for register integers
X 2 * Thomas Plum, Plum Hall Inc, 609-927-3770
X 3 * If machine traps overflow, use an unsigned type
X 4 * Let T be the execution time in milliseconds
X 5 * Then average time per operator = T/major usec
X 6 * (Because the inner loop has exactly 1000 operations)
X 7 */
X 8 #define STOR_CL register
X 9 #define TYPE int
X 10 #include <stdio.h>
X 11 main(ac, av)
X 12 int ac;
X 13 char *av[];
X 14 {
X 15 STOR_CL TYPE a, b, c;
X 16 long d, major, atol();
X 17 static TYPE m[10] = {0};
X 18
X 19 major = atol(av[1]);
X 20 printf("executing %ld iterations0, major);
X 21 a = b = (av[1][0] - '0');
X 22 for (d = 1; d <= major; ++d)
X 23 {
X 24 /* inner loop executes 1000 selected operations */
X 25 for (c = 1; c <= 40; ++c)
X 26 {
X 27 a = a + b + c;
X 28 b = a >> 1;
X 29 a = b % 10;
X 30 m[a] = a;
X 31 b = m[a] - b - c;
X 32 a = b == c;
X 33 b = a | c;
X 34 a = !b;
X 35 b = a + c;
X 36 a = b > c;
X 37 }
X 38 }
X 39 printf("a=%d0, a);
X 40 }
X
XIf you enter this and compile it to produce an executable program, you can
Xinvoke it with one argument, the number of iterations for the major loop:
X
X benchreg 10000
X
XIf this execution takes 16 seconds, this means that the average register
Xoperation takes 1.6 microseconds (16,000 milliseconds divided by 10,000
Xiterations of the major loop).
X
XLet us examine the program in detail. Lines 8 and 9 define STOR_CL
X("storage class") and TYPE to be register and int . Thus, on line 15,
Xthree variables ( a , b , and c ) are declared to be of this storage class
Xand type. At line 16, the major loop control variables are long integers,
Xbut they are touched only one one-thousandth as often as the inner loop
X
X
X
X
X
X
X
X
X - 3 -
X
X
Xvariables, so they have little effect upon the timings. We are declaring
Xthe atol function to return a long integer; it would otherwise default
Xto an int return. (If we were using a compiler based upon draft ANSI C,
Xwe could #include <stdlib.h> to get the declaration of atol , but this
Xwould limit the applicability of the benchmarks. This simple declaration is
Xall that even an ANSI compiler would need.)
X
XAt line 19, we set the major loop variable to the number given on the com-
Xmand line, and at line 20, we confirm it to the output.
X
XLine 21 is crucial to preventing some overly aggressive optimizations. Ear-
Xlier versions of these benchmarks had simply initialized a and b to 1,
Xbut this allows a compiler to forward-propagate a known constant value. The
Xexpression av[1][0] gives the first digit-character of the command-line
Xargument; subtracting '0' produces a digit between 0 and 9. (Yes, the
Xlatest ANSI draft now guarantees that the digit characters are a contiguous
Xsequence in any environment.)
X
XLine 22 simply executes the major loop the number of times given by the
Xvariable major . Line 25 repeats the inner loop 40 times, and with 25
Xoperators in that loop, this produces 1000 operators. (Actually there are
X1003, because of the initialization and the extra increment and test at loop
Xcompletion. The discrepancy is well within acceptable tolerances.)
X
XWithin the inner loop, 40% of the operators are assignments, in keeping with
Xthe percentages reported in the original Drhystone work. Of the other
Xoperators, the most frequent are plus and minus. The sequence of operations
Xis carefully chosen to ensure that a very aggressive optimizer cannot find
Xany useless code sections; each result depends functionally upon previous
Xresults.
X
XFinally, the printout at line 39 is also important to preventing over-
Xoptimization. If the compiler could notice that we did nothing with the
Xcomputed result, it could discard all the operations that produced that
Xresult.
X
XWe have completed our perusal of the first benchmark program, benchreg.c .
XThe second program ( benchsho.c , for short's) is derived from benchreg.c
Xby changing lines 8 and 9: STOR_CL becomes auto , and TYPE becomes
Xshort . The program is otherwise unchanged.
X
XThe third program ( benchlng.c , for long's) is obtained by leaving
XSTOR_CL as auto and changing TYPE to long .
X
XTo make the fourth program ( benchmul.c , for multiplies) we set TYPE to
Xint , and change lines 27 through 36 to one source line which does 25 multi-
Xplies:
X
X a = 3 *a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a; /* 25 * */
X
XThe fifth program ( benchfn.c , for functions) is a major rewrite. We
Xarrange a series of function definitions for f3 , f2 , f1 , and f0 such
Xthat each call to function f0 generates exactly 1000 function-call opera-
Xtions. In case the compiler has an aggressive optimizer, move the function
Xf3 to a separate source file, so that the compiler cannot see how useless
X
X
X
X
X
X
X
X
X - 4 -
X
X
Xit is. The global variable dummy will make the compiler think that f3
Xmight be up to something useful. Here, then, is the benchfn.c function:
X
X 1 /* benchfn - benchmark for function calls
X 2 * Thomas Plum, Plum Hall Inc, 609-927-3770
X 3 * Let T be the execution time in milliseconds
X 4 * Then average time per operator = T/major usec
X 5 * (Because the inner loop has exactly 1000 operations)
X 6 */
X 7 #include <stdio.h>
X 8 int dummy = 0;
X 9
X 10 /* f3 - lowest level function
X 11 * Put this in separate source file if compiler detects and
X 12 * optimizes useless code
X 13 */
X 14 f3() { }
X 15
X 16 f2() { f3();f3();f3();f3();f3();f3();f3();f3();f3();f3();} /* 10 */
X 17 f1() { f2();f2();f2();f2();f2();f2();f2();f2();f2();f2();} /* 10 */
X 18 f0() { f1();f1();f1();f1();f1();f1();f1();f1();f1();} /* 9 */
X 19
X 20 main(ac, av)
X 21 int ac;
X 22 char *av[];
X 23 {
X 24 long d, major, atol();
X 25
X 26 major = atol(av[1]);
X 27 printf("executing %ld iterations0, major);
X 28 for (d = 1; d <= major; ++d)
X 29 f0(); /* executes 1000 calls */
X 30 printf("dummy=%d0, dummy);
X 31 }
X
XThe sixth program ( benchdblc. , for double's ) is derived from benchlng.c
Xby changing STOR_CL to auto , TYPE to double , and replacing the inner
Xloop body with this slightly different version:
X
X a = a + b + c;
X b = a * 2;
X a = b / 10;
X a = -a;
X b = -a - b - c;
X a = b == c;
X b = a + c;
X a = !b;
X b = a + c;
X a = b > c;
X
XThese changes are necessary because floating-point operands are not allowed
Xfor the shift, remainder, and bitwise operators, and because the subscript
Xoperator does not really exercise the floating-point instructions. This
Xrevised inner loop still gives us a representative mix of typical opera-
Xtions.
X
X
X
X
X
X
X
X
X - 5 -
X
X
XThis, then, completes our collection of six benchmark programs. After they
Xare compiled to produce executable programs, the next question is "How do I
Xtime the execution?"
X
XOn UNIX systems, the timing is easy -- just run the time command:
X
X $ time benchreg 10000
X
XThe sum of the "user" and "system" times will give the CPU time used by the
Xprogram.
X
XMore accurately, we could time the execution of zero iterations, and sub-
Xtract that time from the time for the measured number of iterations.
X
XOn MS-DOS systems, timings can be obtained, but with greater difficulty. If
Xwe create a file named CR-LF which contains just one newline (or
X"carriage-return-newline" in DOS parlance), we could time our program with a
X"batch" file such as this:
X
X time <cr-lf
X benchreg 0
X time <cr-lf
X benchreg 10000
X time <cr-lf
X
XWe must then take times that are expressed in minutes-and-seconds and pro-
Xduce differences expressed in seconds.
X
XWith whichever method, we eventually produce six numbers that are character-
Xistic of a particular environment (a specific compiler supporting a specific
Xmachine).
X
X[NOTE: Since this article appeared, I have added a driver program, benches.c.
XIn an ANSI environment with the clock function, it will run all the tests
Xand report the results, eliminating the need for manual computations.]
X
XHere are some examples of timing results that have been obtained on a
Xvariety of minicomputer and workstation environments:
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X
X - 6 -
X
X
XMachine/compiler register auto auto int func auto
X int short long multiply call dbl
X
XAT&T 3B2/05 (-O) 1.36 3.87 2.62 15.4 7.7 22.5
XAT&T 3B2/05 (no -O) 1.78 4.66 2.75 16.2 9.3 22.5
XAT&T 3B2/400 (-O) 1.09 1.36 1.10 16.2 10.0(?) 91.4
XAT&T 3B2/400 (no -O) 1.14 2.61 2.36 17.3 11.3 91.1
XApollo DN330 (-O) 1.36 .78 1.36 10.17 3.57
XApollo DN330 (no -O) 1.54 1.28 1.54 11.30 3.64
XApollo DN580 (-O) 1.03 .59 1.03 7.67 2.72
XApollo DN580 (no -O) 1.18 .97 1.18 8.48 2.77
XApollo DN660 (-O) 5.88 1.24 5.88 21.86 4.26
XApollo DN660 (no -O) 5.93 1.52 5.93 21.93 4.29
XCray X-MP (no vectors) .0567 .0656 .0822 .366 .821 .082
XMasscomp 5500 3.18 2.7 4.9 30.8 7.3
XMasscomp 5600 (-O) .45 .61 .46 2.83 1.04
XMasscomp 5600 (no -O) .46 .78 .64 2.99 1.76
XPyramid 90X (-O) .85 1.04 .86 3.64 1.9 2.37
XPyramid 90X (no -O) .86 1.01 .86 3.65 1.8 2.34
XSequent (-O) 1.39 2.99 2.53 9.90 9.3
XSequent (no -O) 1.50 3.25 2.83 9.95 13.2
XSun 3/260HM (-O) .31 .48 .47 1.98 1.16
XSun 3/260HM (no -O) .36 .58 .57 1.99 1.62
XSun 3/75M (-O) .47 .77 .76 3.00 2.12
XSun 3/75M (no -O) .53 .95 .94 3.01 2.73
XSun 3/75M(4.2, -O) .50 .81 .83 2.85 1.5 20.7
XSun 3/75M(4.2, no -O) .54 1.00 1.01 2.97 2.7 21.1
XSun 3/75M(VM, -O) .46 .77 .75 2.96 2.1 20.8
XSun 3/75M(VM, no -O) .52 .96 .93 2.97 2.7 21.1
XVAX 11/730 (-O) 4.00 9.80 6.20 16.2 42.8 12.4
XVAX 11/730 (no -O) 4.73 10.2 7.45 16.57 51.5 17.0
XVAX 11/780 (-O) 1.21 2.43 1.67 2.76 15.0 2.95
XVAX 11/780 (BSD 4.2) 1.38 2.42 1.96 2.92 17.2
XVAX 11/780 (UNIX 5.2) 1.24 2.48 1.79 2.72 15.7 3.89
XVAX 11/780 (no -O) 1.29 2.51 1.85 2.70 16.7 3.89
XVAX 11/785 (-O) .93 1.85 1.32 5.00 13.9 47.5
XVAX 11/785 (no -O) 1.01 1.96 1.44 5.08 14.2 5.42
XVAX 8650(UNIX -O) .236 .484 .298 .589 2.63 .578
XVAX 8650(UNIX no -O) .258 .482 .316 .574 3.06 .791
XVAX 8650(Ultrix -O) .23 .40 .29 .53 2.4 .56
XVAX 8650(Ultrix no -O) .26 .41 .34 .56 2.8 .77
X
XNotice that some of these timings were run before the benchdbl benchmark
Xhad been written. There are no examples of the popular PC environments in
Xthis table. If interested readers wish to run these benchmarks on their own
Xenvironments, I will endeavor to present these results in a future article.
X
XProcessor speeds are sometimes described in "MIPS" (millions of instructions
Xper second); using a value such as the number of register operators per
Xsecond in C might give rise to a "MOPS" measurement of more use to C pro-
Xgrammers. Those of us who have tried these benchmarks have appreciated the
Xintuitive grasp that they give of the speed of current machines and com-
Xpilers. I hope that you too will find them of interest.
X
X
X
X
X
X
X
END-of-ARTICLE
exit
--
Eric S. Raymond = eric@snark.uu.net (mad mastermind of TMN-Netnews)