home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Fresh Fish 5
/
FreshFish_July-August1994.bin
/
bbs
/
gnu
/
gawk-2.15.5-diffs.lha
/
src
/
diffs
/
gawk-2.15.5.diffs
Wrap
Text File
|
1994-07-12
|
502KB
|
12,599 lines
diff -rc --new-file /src/baseline/gawk-2.15.5/Makefile.in gawk-2.15.5/Makefile.in
*** /src/baseline/gawk-2.15.5/Makefile.in Wed Dec 29 11:25:34 1993
--- gawk-2.15.5/Makefile.in Sun Jun 12 21:43:01 1994
***************
*** 19,98 ****
# along with GAWK; see the file COPYING. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
! # User tunable macros -- CHANGE THESE IN Makefile.in RATHER THAN IN
! # Makefile, OR configure WILL OVERWRITE YOUR CHANGES
! prefix = /usr/local
exec_prefix = $(prefix)
- binprefix =
- manprefix =
bindir = $(exec_prefix)/bin
libdir = $(exec_prefix)/lib
! mandir = $(prefix)/man/man1
! manext = .1
infodir = $(prefix)/info
! # The provided "configure" is used to turn a config file (samples in
! # the "config" directory into commands to edit config.in into
! # a suitable config.h and to edit Makefile.in into Makefile.
! # To port GAWK, create an appropriate config file using the ones in
! # the config directory as examples and using the comments in config.in
! # as a guide.
!
! CC= gcc -g
! ##MAKE_CC## CC = cc
!
! PROFILE= #-pg
! DEBUG= #-DMALLOCDEBUG #-DDEBUG #-DFUNC_TRACE #-DMPROF
! LINKSTATIC= #-Bstatic
! WARN= #-W -Wunused -Wimplicit -Wreturn-type -Wcomment # for gcc only
!
! # Parser to use on grammar - any one of the following will work
! PARSER = bison -y
! #PARSER = yacc
! #PARSER = byacc
!
! # Set LIBS to any libraries that are machine specific
! LIBS =
!
! # Cray 2 running Unicos 5.0.7
! ##MAKE_LIBNET## LIBS = -lnet
!
!
! # Systems with alloca in /lib/libPW.a
! ##MAKE_ALLOCA_PW## LIBS = -lPW
!
! # ALLOCA - only needed if you use bison
! # Set equal to alloca.o if your system is S5 and you don't have
! # alloca. Uncomment one of the rules below to make alloca.o from
! # either alloca.s or alloca.c.
! # This should have already been done automatically by configure.
! #
! # Some systems have alloca in libPW.a, so LIBS=-lPW may work, too.
! ##MAKE_ALLOCA_C## ALLOCA= alloca.o
! ##MAKE_ALLOCA_S## ALLOCA= alloca.o
!
! VFLAGS=
!
! # VMS POSIX, VAXC V3.2
! ##MAKE_VMS-Posix## VFLAGS = -UVMS -D__STDC__=0
! # HP/Apollo running cc version 6.7 or earlier
! ##MAKE_Apollo## VFLAGS = -U__STDC__ -A run,sys5.3
! ##MAKE_Apollo## LIBS = -A sys,any
! # SGI IRIX 4.0.5 cc flags
! ##MAKE_SGI## VFLAGS = -cckr
!
! ##MAKE_NeXT## VFLAGS = -DGFMT_WORKAROUND
!
! CFLAGS = -O
! FLAGS = -DGAWK -DHAVE_CONFIG_H $(VFLAGS) $(DEBUG) $(PROFILE) $(WARN)
! LDFLAGS = $(LINKSTATIC) $(PROFILE)
.c.o:
! $(CC) $(CFLAGS) $(FLAGS) -c $<
# object files
AWKOBJS = main.o eval.o builtin.o msg.o iop.o io.o field.o array.o \
--- 19,66 ----
# along with GAWK; see the file COPYING. If not, write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
! #### Start of system configuration section. ####
! VPATH = @srcdir@
! srcdir = @srcdir@
!
! # Common prefix for machine-independent installed files.
! prefix = /gnu
! # Common prefix for machine-dependent installed files.
exec_prefix = $(prefix)
+ # Directory to install executables in.
bindir = $(exec_prefix)/bin
+ # Directory to install libraries in.
libdir = $(exec_prefix)/lib
! # Directory to install the Info files in.
infodir = $(prefix)/info
+ # Directory to install the man page in.
+ mandir = $(prefix)/man/man$(manext)
+ # Number to put on the man page filename.
+ manext = 1
+
+ # Program to install executables.
+ INSTALL_PROGRAM = @INSTALL_PROGRAM@
+ # Program to install data like man pages.
+ INSTALL_DATA = @INSTALL_DATA@
+ # Generic install program.
+ INSTALL = @INSTALL@
+
+ CC = @CC@
+ DEFS = @DEFS@
+ CFLAGS = @CFLAGS@
! LDFLAGS = @LDFLAGS@
! LIBS = @LIBS@
! YACC = @YACC@
! #### End of system configuration section. ####
+ # "-I." is needed to find config.h in the build directory.
.c.o:
! $(CC) -c -I. -I$(srcdir) $(DEFS) $(CFLAGS) $< $(OUTPUT_OPTION)
# object files
AWKOBJS = main.o eval.o builtin.o msg.o iop.o io.o field.o array.o \
***************
*** 137,142 ****
--- 105,112 ----
# Release of gawk. There can be no leading or trailing white space here!
REL=2.15
+ all: gawk
+
# rules to build gawk
gawk: $(ALLOBJS) $(GNUOBJS) $(REOBJS)
$(CC) -o gawk $(LDFLAGS) $(ALLOBJS) $(GNUOBJS) $(REOBJS) -lm $(LIBS)
***************
*** 150,172 ****
main.o: patchlevel.h
awktab.c: awk.y
! $(PARSER) -v awk.y
! ##MAKE_VMS-Posix## mv ytab.c awktab.c
! ##MAKE_VMS-Posix## dummy.awk_tab.target:
! sed '/^extern char .malloc(), .realloc();$$/d' y.tab.c >awktab.c
! rm y.tab.c
awktab.o: awk.h
! config.h: config.in
! @echo You must provide a config.h!
! @echo Run \"./configure\" to build it for known systems
! @echo or copy config.in to config.h and edit it.; exit 1
install: gawk gawk.info
! cp gawk $(bindir) && chmod 755 $(bindir)/gawk
! cp gawk.1 $(mandir)/gawk$(manext) && chmod 644 $(mandir)/gawk$(manext)
! cp gawk.info* $(infodir) && chmod 644 $(infodir)/gawk.info*
uninstall:
rm -f $(bindir)/gawk $(mandir)/gawk$(manext) $(infodir)/gawk.info*
--- 120,141 ----
main.o: patchlevel.h
awktab.c: awk.y
! @echo "expect 40 shift/reduce conflicts"
! $(YACC) -v $(srcdir)/awk.y
! @sed '/extern char.*malloc/d' <y.tab.c >awktab.c
! @rm y.tab.c
awktab.o: awk.h
! # On AmigaDOS, there is no "native awk", so install gawk as both
! # /bin/gawk and /bin/awk.
install: gawk gawk.info
! $(INSTALL_PROGRAM) gawk $(bindir)/awk
! $(INSTALL_PROGRAM) gawk $(bindir)/gawk
! $(INSTALL_DATA) $(srcdir)/gawk.1 $(mandir)/gawk$(manext)
! cd $(srcdir); for f in gawk.info*; \
! do $(INSTALL_DATA) $$f $(infodir)/$$f; done
uninstall:
rm -f $(bindir)/gawk $(mandir)/gawk$(manext) $(infodir)/gawk.info*
***************
*** 181,193 ****
# One of these rules should have already been selected by running configure.
- ##MAKE_ALLOCA_S## alloca.o: alloca.s
- ##MAKE_ALLOCA_S## /lib/cpp < alloca.s | sed '/^#/d' > t.s
- ##MAKE_ALLOCA_S## as t.s -o alloca.o
- ##MAKE_ALLOCA_S## rm t.s
-
- ##MAKE_ALLOCA_C## alloca.o: alloca.c
-
# auxiliary rules for release maintenance
lint: $(ALLSRC)
lint -hcbax $(FLAGS) $(ALLSRC)
--- 150,155 ----
***************
*** 215,228 ****
rm -f $(ALLDOC) gawk.log config.h
gawk.dvi: gawk.texi
! cp support/texinfo.tex .
! tex gawk.texi; texindex gawk.??
! tex gawk.texi; texindex gawk.??
! tex gawk.texi
rm -f texinfo.tex
gawk.info: gawk.texi
! makeinfo gawk.texi
dist: $(AWKSRC) $(GNUSRC) $(DOCS) $(MISC) $(COPIES) $(SUPPORT) distclean
-rm -rf gawk-$(REL)*
--- 177,190 ----
rm -f $(ALLDOC) gawk.log config.h
gawk.dvi: gawk.texi
! cp $(srcdir)/support/texinfo.tex .
! tex $(srcdir)/gawk.texi; texindex gawk.??
! tex $(srcdir)/gawk.texi; texindex gawk.??
! tex $(srcdir)/gawk.texi
rm -f texinfo.tex
gawk.info: gawk.texi
! makeinfo $(srcdir)/gawk.texi
dist: $(AWKSRC) $(GNUSRC) $(DOCS) $(MISC) $(COPIES) $(SUPPORT) distclean
-rm -rf gawk-$(REL)*
***************
*** 264,266 ****
--- 226,242 ----
check: test
+ Makefile: config.status $(srcdir)/Makefile.in
+ $(SHELL) config.status
+
+ config.h: stamp-config ;
+
+ stamp-config: config.status $(srcdir)/config.h.in
+ $(SHELL) config.status
+ touch stamp-config
+
+ configure: configure.in
+ autoconf $(ACFLAGS)
+
+ config.h.in: configure.in
+ autoheader $(ACFLAGS)
diff -rc --new-file /src/baseline/gawk-2.15.5/Product-Info gawk-2.15.5/Product-Info
*** /src/baseline/gawk-2.15.5/Product-Info Thu Jan 1 00:00:00 1970
--- gawk-2.15.5/Product-Info Tue Jul 12 17:12:01 1994
***************
*** 0 ****
--- 1,31 ----
+ .name
+ gawk
+ .fullname
+ GNU awk, pattern scanning and processing language
+ .type
+ Programmer Tool
+ .short
+ Pattern scanning & processing.
+ .description
+ Gawk is the GNU Project's implementation of the AWK programming
+ language. It conforms to the definition of the language in the POSIX
+ 1003.2 Command Language And Utilities Standard. This version in turn
+ is based on the description in "The AWK Programming Language", by Aho,
+ Kernighan, and Weinberger, with the additional features defined in the
+ System V Release 4 version of UNIX awk. Gawk also provides some
+ GNU-specific extensions.
+
+ The command line consists of options to gawk itself, the AWK program
+ text (if not supplied via the -f or --file options), and values to be
+ made available in the ARGC and ARGV pre-defined AWK variables.
+ .version
+ 2.15.5
+ .author
+ Paul Rubin
+ Jay Fenlason
+ .requirements
+ Amiga binary requires ixemul.library.
+ .distribution
+ GNU Public License
+ .described-by
+ Fred Fish (fnf@amigalib.com)
diff -rc --new-file /src/baseline/gawk-2.15.5/README.amiga gawk-2.15.5/README.amiga
*** /src/baseline/gawk-2.15.5/README.amiga Tue Jun 1 15:22:31 1993
--- gawk-2.15.5/README.amiga Sun Jun 12 21:43:03 1994
***************
*** 24,30 ****
Amiga under AmigaOS with gcc
DOPRNT_MISSING 1
ENVSEP ','
! DEFPATH ".,/usr/local/lib"
HAVE_UNDERSCORE_SETJMP 1
SRANDOM_PROTO 1
STDC_HEADERS 1
--- 24,30 ----
Amiga under AmigaOS with gcc
DOPRNT_MISSING 1
ENVSEP ','
! DEFPATH ".,/gnu/lib"
HAVE_UNDERSCORE_SETJMP 1
SRANDOM_PROTO 1
STDC_HEADERS 1
diff -rc --new-file /src/baseline/gawk-2.15.5/awk.h gawk-2.15.5/awk.h
*** /src/baseline/gawk-2.15.5/awk.h Wed May 11 22:28:46 1994
--- gawk-2.15.5/awk.h Sun Jun 12 21:43:07 1994
***************
*** 67,74 ****
#define const
#endif
! #ifndef SIGTYPE
! #define SIGTYPE void
#endif
#ifdef SIZE_T_MISSING
--- 67,74 ----
#define const
#endif
! #ifndef RETSIGTYPE
! #define RETSIGTYPE void
#endif
#ifdef SIZE_T_MISSING
***************
*** 191,197 ****
#endif
#ifndef DEFPATH
! #define DEFPATH ".:/usr/local/lib/awk:/usr/lib/awk"
#endif
#ifndef ENVSEP
--- 191,197 ----
#endif
#ifndef DEFPATH
! #define DEFPATH ".:/local/lib/awk:/gnu/lib/awk"
#endif
#ifndef ENVSEP
***************
*** 723,729 ****
extern Regexp *mk_re_parse P((char *s, int ignorecase));
extern void load_environ P((void));
extern char *arg_assign P((char *arg));
! extern SIGTYPE catchsig P((int sig, int code));
/* msg.c */
extern void err P((const char *s, const char *emsg, va_list argp));
#if _MSC_VER == 510
--- 723,729 ----
extern Regexp *mk_re_parse P((char *s, int ignorecase));
extern void load_environ P((void));
extern char *arg_assign P((char *arg));
! extern RETSIGTYPE catchsig P((int sig, int code));
/* msg.c */
extern void err P((const char *s, const char *emsg, va_list argp));
#if _MSC_VER == 510
diff -rc --new-file /src/baseline/gawk-2.15.5/config/amigados gawk-2.15.5/config/amigados
*** /src/baseline/gawk-2.15.5/config/amigados Thu Jan 1 00:00:00 1970
--- gawk-2.15.5/config/amigados Sun Jun 12 21:43:23 1994
***************
*** 0 ****
--- 1,6 ----
+ Amiga under AmigaOS with gcc
+ ENVSEP ','
+ DEFPATH ".,/gnu/lib"
+ HAVE_UNDERSCORE_SETJMP 1
+ SRANDOM_PROTO 1
+ STDC_HEADERS 1
diff -rc --new-file /src/baseline/gawk-2.15.5/config.h.in gawk-2.15.5/config.h.in
*** /src/baseline/gawk-2.15.5/config.h.in Thu Jan 1 00:00:00 1970
--- gawk-2.15.5/config.h.in Sun Jun 12 21:43:26 1994
***************
*** 0 ****
--- 1,290 ----
+ /*
+ * config.h -- configuration definitions for gawk.
+ *
+ * __SYSTEM__
+ */
+
+ /*
+ * Copyright (C) 1991, 1992, 1993 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+ /*
+ * This file isolates configuration dependencies for gnu awk.
+ * You should know something about your system, perhaps by having
+ * a manual handy, when you edit this file. You should copy config.h-dist
+ * to config.h, and edit config.h. Do not modify config.h-dist, so that
+ * it will be easy to apply any patches that may be distributed.
+ *
+ * The general idea is that systems conforming to the various standards
+ * should need to do the least amount of changing. Definining the various
+ * items in ths file usually means that your system is missing that
+ * particular feature.
+ *
+ * The order of preference in standard conformance is ANSI C, POSIX,
+ * and the SVID.
+ *
+ * If you have no clue as to what's going on with your system, try
+ * compiling gawk without editing this file and see what shows up
+ * missing in the link stage. From there, you can probably figure out
+ * which defines to turn on.
+ */
+
+ /**************************/
+ /* Miscellanious features */
+ /**************************/
+
+ /*
+ * BLKSIZE_MISSING
+ *
+ * Check your /usr/include/sys/stat.h file. If the stat structure
+ * does not have a member named st_blksize, define this. (This will
+ * most likely be the case on most System V systems prior to V.4.)
+ */
+ #undef BLKSIZE_MISSING
+
+ /*
+ * RETSIGTYPE
+ *
+ * The return type of the routines passed to the signal function.
+ * Modern systems use `void', older systems use `int'.
+ * If left undefined, it will default to void.
+ */
+ #undef RETSIGTYPE
+
+ /*
+ * SIZE_T_MISSING
+ *
+ * If your system has no typedef for size_t, define this to get a default
+ */
+ #undef SIZE_T_MISSING
+
+ /*
+ * CHAR_UNSIGNED
+ *
+ * If your machine uses unsigned characters (IBM RT and RS/6000 and others)
+ * then define this for use in regex.c
+ */
+ #undef CHAR_UNSIGNED
+
+ /*
+ * HAVE_UNDERSCORE_SETJMP
+ *
+ * Check in your /usr/include/setjmp.h file. If there are routines
+ * there named _setjmp and _longjmp, then you should define this.
+ * Typically only systems derived from Berkeley Unix have this.
+ */
+
+ #undef HAVE_UNDERSCORE_SETJMP
+
+ /***********************************************/
+ /* Missing library subroutines or system calls */
+ /***********************************************/
+
+ /*
+ * MEMCMP_MISSING
+ * MEMCPY_MISSING
+ * MEMSET_MISSING
+ *
+ * These three routines are for manipulating blocks of memory. Most
+ * likely they will either all three be present or all three be missing,
+ * so they're grouped together.
+ */
+
+ #undef MEMCMP_MISSING
+ #undef MEMCPY_MISSING
+ #undef MEMSET_MISSING
+
+ /*
+ * RANDOM_MISSING
+ *
+ * Your system does not have the random(3) suite of random number
+ * generating routines. These are different than the old rand(3)
+ * routines!
+ */
+ #undef RANDOM_MISSING
+
+ /*
+ * STRCASE_MISSING
+ *
+ * Your system does not have the strcasemp() and strncasecmp()
+ * routines that originated in Berkeley Unix.
+ */
+ #undef STRCASE_MISSING
+
+ /*
+ * STRCHR_MISSING
+ *
+ * Your system does not have the strchr() and strrchr() functions.
+ */
+ #undef STRCHR_MISSING
+
+ /*
+ * STRERROR_MISSING
+ *
+ * Your system lacks the ANSI C strerror() routine for returning the
+ * strings associated with errno values.
+ */
+ #undef STRERROR_MISSING
+
+ /*
+ * STRTOD_MISSING
+ *
+ * Your system does not have the strtod() routine for converting
+ * strings to double precision floating point values.
+ */
+ #undef STRTOD_MISSING
+
+ /*
+ * STRFTIME_MISSING
+ *
+ * Your system lacks the ANSI C strftime() routine for formatting
+ * broken down time values.
+ */
+ #undef STRFTIME_MISSING
+
+ /*
+ * TZSET_MISSING
+ *
+ * If you have a 4.2 BSD vintage system, then the strftime() routine
+ * supplied in the missing directory won't be enough, because it relies on the
+ * tzset() routine from System V / Posix. Fortunately, there is an
+ * emulation for tzset() too that should do the trick. If you don't
+ * have tzset(), define this.
+ */
+ #undef TZSET_MISSING
+
+ /*
+ * TZNAME_MISSING
+ *
+ * Some systems do not support the external variables tzname and daylight.
+ * If this is the case *and* strftime() is missing, define this.
+ */
+ #undef TZNAME_MISSING
+
+ /*
+ * STDC_HEADERS
+ *
+ * If your system does have ANSI compliant header files that
+ * provide prototypes for library routines, then define this.
+ */
+ #undef STDC_HEADERS
+
+ /*
+ * NO_TOKEN_PASTING
+ *
+ * If your compiler define's __STDC__ but does not support token
+ * pasting (tok##tok), then define this.
+ */
+ #undef NO_TOKEN_PASTING
+
+ /*****************************************************************/
+ /* Stuff related to the Standard I/O Library. */
+ /*****************************************************************/
+ /* Much of this is (still, unfortunately) black magic in nature. */
+ /* You may have to use some or all of these together to get gawk */
+ /* to work correctly. */
+ /*****************************************************************/
+
+ /*
+ * NON_STD_SPRINTF
+ *
+ * Look in your /usr/include/stdio.h file. If the return type of the
+ * sprintf() function is NOT `int', define this.
+ */
+ #undef NON_STD_SPRINTF
+
+ /*
+ * VPRINTF_MISSING
+ *
+ * Define this if your system lacks vprintf() and the other routines
+ * that go with it. This will trigger an attempt to use _doprnt().
+ * If you don't have that, this attempt will fail and you are on your own.
+ */
+ #undef VPRINTF_MISSING
+
+ /*
+ * Casts from size_t to int and back. These will become unnecessary
+ * at some point in the future, but for now are required where the
+ * two types are a different representation.
+ */
+ #undef SZTC
+ #undef INTC
+
+ /*
+ * SYSTEM_MISSING
+ *
+ * Define this if your library does not provide a system function
+ * or you are not entirely happy with it and would rather use
+ * a provided replacement (atari only).
+ */
+ #undef SYSTEM_MISSING
+
+ /*
+ * FMOD_MISSING
+ *
+ * Define this if your system lacks the fmod() function and modf() will
+ * be used instead.
+ */
+ #undef FMOD_MISSING
+
+
+ /*******************************/
+ /* Gawk configuration options. */
+ /*******************************/
+
+ /*
+ * DEFPATH
+ *
+ * The default search path for the -f option of gawk. It is used
+ * if the AWKPATH environment variable is undefined. The default
+ * definition is provided here. Most likely you should not change
+ * this.
+ */
+
+ #define DEFPATH ".,/local/lib/awk,/gnu/lib/awk" /* HACK - fnf */
+ #define ENVSEP ',' /* HACK - fnf */
+
+ /*
+ * alloca already has a prototype defined - don't redefine it
+ */
+ #undef ALLOCA_PROTO
+
+ /*
+ * srandom already has a prototype defined - don't redefine it
+ */
+ #undef SRANDOM_PROTO
+
+ /*
+ * getpgrp() in sysvr4 and POSIX takes no argument
+ */
+ #undef GETPGRP_NOARG
+
+ /*
+ * define const to nothing if not __STDC__
+ */
+ #ifndef __STDC__
+ #define const
+ #endif
+
+ /* If svr4 and not gcc */
+ #undef SVR4
+ #ifdef SVR4
+ #define __svr4__ 1
+ #endif
diff -rc --new-file /src/baseline/gawk-2.15.5/config.in gawk-2.15.5/config.in
*** /src/baseline/gawk-2.15.5/config.in Sun May 1 18:20:10 1994
--- gawk-2.15.5/config.in Sun Jun 12 21:43:29 1994
***************
*** 272,278 ****
* this.
*/
! /* #define DEFPATH ".:/usr/lib/awk:/usr/local/lib/awk" */
/* #define ENVSEP ':' */
/*
--- 272,278 ----
* this.
*/
! /* #define DEFPATH ".:/local/lib/awk:/gnu/lib/awk" */
/* #define ENVSEP ':' */
/*
diff -rc --new-file /src/baseline/gawk-2.15.5/configure gawk-2.15.5/configure
*** /src/baseline/gawk-2.15.5/configure Tue May 18 12:34:04 1993
--- gawk-2.15.5/configure Sun Jun 12 21:53:47 1994
***************
*** 1,36 ****
! #! /bin/sh
! #
! # configure -- produce a config.h from a known configuration
! case "$#" in
! 1) ;;
! *) echo "Usage: $0 system_type" >&2
! echo "Known systems: `cd config; echo ;ls -C`" >&2
! exit 2
! ;;
! esac
!
! if [ -f config/$1 ]; then
! sh ./mungeconf config/$1 config.in >config.h
!
! # echo #echo lines to stdout
! sed -n '/^#echo /s///p' config/$1
!
! case "$1" in
! bsd44) ln -s Makefile.bsd44 Makefile ; exit 0 ;;
! esac
!
! sed -n '/^MAKE_.*/s//s,^##&## ,,/p' config/$1 >sedscr
! if [ -s sedscr ]
! then
! sed -f sedscr Makefile.in >Makefile
! else
! cp Makefile.in Makefile
fi
! rm -f sedscr
else
! echo "\`$1' is not a known configuration."
! echo "Either construct one based on the examples in the config directory,"
! echo "or copy config.in to config.h and edit it."
! exit 1
fi
--- 1,899 ----
! #!/bin/sh
! # Guess values for system-dependent variables and create Makefiles.
! # Generated automatically using autoconf version 1.11
! # Copyright (C) 1991, 1992, 1993, 1994 Free Software Foundation, Inc.
!
! # This configure script is free software; you can redistribute it and/or
! # modify it under the terms of the GNU General Public License as published
! # by the Free Software Foundation; either version 2, or (at your option)
! # any later version.
!
! # This script is distributed in the hope that it will be useful, but
! # WITHOUT ANY WARRANTY; without even the implied warranty of
! # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
! # Public License for more details.
!
! # You should have received a copy of the GNU General Public License
! # along with this program; if not, write to the Free Software
! # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
!
! # Save the original args to write them into config.status later.
! configure_args="$*"
!
! # Only options that might do something get documented.
! ac_usage="Usage: configure [options] [host]
! Options: [defaults in brackets after descriptions]
! --build=BUILD configure for building on BUILD [BUILD=HOST]
! --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no)
! --enable-FEATURE[=ARG] include FEATURE [ARG=yes]
! --exec-prefix=PREFIX install host dependent files in PREFIX [/usr/local]
! --help print this message
! --host=HOST configure for HOST [guessed]
! --prefix=PREFIX install host independent files in PREFIX [/usr/local]
! --quiet, --silent do not print \`checking for...' messages
! --srcdir=DIR find the sources in DIR [configure dir or ..]
! --target=TARGET configure for TARGET [TARGET=HOST]
! --verbose print results of checks
! --version print the version of autoconf that created configure
! --with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
! --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no)
! --x-includes=DIR X include files are in DIR
! --x-libraries=DIR X library files are in DIR"
!
! # Initialize some variables set by options.
! # The variables have the same names as the options, with
! # dashes changed to underlines.
! build=NONE
! exec_prefix=
! host=NONE
! no_create=
! nonopt=NONE
! norecursion=
! prefix=
! program_prefix=
! program_suffix=
! program_transform_name=
! silent=
! srcdir=
! target=NONE
! verbose=
! x_includes=
! x_libraries=
!
! ac_prev=
! for ac_option
! do
!
! # If the previous option needs an argument, assign it.
! if test -n "$ac_prev"; then
! eval "$ac_prev=\$ac_option"
! ac_prev=
! continue
! fi
!
! # Accept (but ignore some of) the important Cygnus configure
! # options, so we can diagnose typos.
!
! case "$ac_option" in
! -*=*) ac_optarg=`/bin/echo "$ac_option" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
! *) ac_optarg= ;;
! esac
!
! case "$ac_option" in
!
! -build | --build | --buil | --bui | --bu | --b)
! ac_prev=build ;;
! -build=* | --build=* | --buil=* | --bui=* | --bu=* | --b=*)
! build="$ac_optarg" ;;
!
! -disable-* | --disable-*)
! ac_feature=`/bin/echo $ac_option|sed -e 's/-*disable-//'`
! # Reject names that aren't valid shell variable names.
! if test -n "`/bin/echo $ac_feature| sed 's/[-a-zA-Z0-9_]//g'`"; then
! /bin/echo "configure: $ac_feature: invalid feature name" >&2; exit 1
! fi
! ac_feature=`/bin/echo $ac_feature| sed 's/-/_/g'`
! eval "enable_${ac_feature}=no" ;;
!
! -enable-* | --enable-*)
! ac_feature=`/bin/echo $ac_option|sed -e 's/-*enable-//' -e 's/=.*//'`
! # Reject names that aren't valid shell variable names.
! if test -n "`/bin/echo $ac_feature| sed 's/[-_a-zA-Z0-9]//g'`"; then
! /bin/echo "configure: $ac_feature: invalid feature name" >&2; exit 1
! fi
! ac_feature=`/bin/echo $ac_feature| sed 's/-/_/g'`
! case "$ac_option" in
! *=*) ;;
! *) ac_optarg=yes ;;
! esac
! eval "enable_${ac_feature}='$ac_optarg'" ;;
!
! # For backward compatibility, recognize -exec-prefix and --exec_prefix.
! -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
! | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
! | --exec | --exe | --ex)
! ac_prev=exec_prefix ;;
! -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
! | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
! | --exec=* | --exe=* | --ex=*)
! exec_prefix="$ac_optarg" ;;
!
! -gas | --gas | --ga | --g)
! with_gas=yes ;; # Obsolete; use --with-gas.
!
! -help | --help | --hel | --he)
! cat << EOF
! $ac_usage
! EOF
! exit 0 ;;
!
! -host | --host | --hos | --ho)
! ac_prev=host ;;
! -host=* | --host=* | --hos=* | --ho=*)
! host="$ac_optarg" ;;
!
! -nfp | --nfp | --nf)
! with_fp=no ;; # Obsolete; use --without-fp.
!
! -no-create | --no-create | --no-creat | --no-crea | --no-cre \
! | --no-cr | --no-c)
! no_create=yes ;;
!
! -norecursion | --norecursion | --norecursio | --norecursi \
! | --norecurs | --norecur | --norecu | --norec | --nore | --nor)
! norecursion=yes ;;
!
! -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
! ac_prev=prefix ;;
! -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
! prefix="$ac_optarg" ;;
!
! -program-prefix | --program-prefix | --program-prefi | --program-pref \
! | --program-pre | --program-pr | --program-p)
! ac_prev=program_prefix ;;
! -program-prefix=* | --program-prefix=* | --program-prefi=* \
! | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
! program_prefix="$ac_optarg" ;;
!
! -program-suffix | --program-suffix | --program-suffi | --program-suff \
! | --program-suf | --program-su | --program-s)
! ac_prev=program_suffix ;;
! -program-suffix=* | --program-suffix=* | --program-suffi=* \
! | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
! program_suffix="$ac_optarg" ;;
!
! -program-transform-name | --program-transform-name \
! | --program-transform-nam | --program-transform-na \
! | --program-transform-n | --program-transform- \
! | --program-transform | --program-transfor \
! | --program-transfo | --program-transf \
! | --program-trans | --program-tran \
! | --progr-tra | --program-tr | --program-t)
! ac_prev=program_transform_name ;;
! -program-transform-name=* | --program-transform-name=* \
! | --program-transform-nam=* | --program-transform-na=* \
! | --program-transform-n=* | --program-transform-=* \
! | --program-transform=* | --program-transfor=* \
! | --program-transfo=* | --program-transf=* \
! | --program-trans=* | --program-tran=* \
! | --progr-tra=* | --program-tr=* | --program-t=*)
! program_transform_name="$ac_optarg" ;;
!
! -q | -quiet | --quiet | --quie | --qui | --qu | --q \
! | -silent | --silent | --silen | --sile | --sil)
! silent=yes ;;
!
! -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
! ac_prev=srcdir ;;
! -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
! srcdir="$ac_optarg" ;;
!
! -target | --target | --targe | --targ | --tar | --ta | --t)
! ac_prev=target ;;
! -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
! target="$ac_optarg" ;;
!
! -v | -verbose | --verbose | --verbos | --verbo | --verb)
! verbose=yes ;;
!
! -version | --version | --versio | --versi | --vers)
! /bin/echo "configure generated by autoconf version 1.11"
! exit 0 ;;
!
! -with-* | --with-*)
! ac_package=`/bin/echo $ac_option|sed -e 's/-*with-//' -e 's/=.*//'`
! # Reject names that aren't valid shell variable names.
! if test -n "`/bin/echo $ac_package| sed 's/[-_a-zA-Z0-9]//g'`"; then
! /bin/echo "configure: $ac_package: invalid package name" >&2; exit 1
! fi
! ac_package=`/bin/echo $ac_package| sed 's/-/_/g'`
! case "$ac_option" in
! *=*) ;;
! *) ac_optarg=yes ;;
! esac
! eval "with_${ac_package}='$ac_optarg'" ;;
!
! -without-* | --without-*)
! ac_package=`/bin/echo $ac_option|sed -e 's/-*without-//'`
! # Reject names that aren't valid shell variable names.
! if test -n "`/bin/echo $ac_package| sed 's/[-a-zA-Z0-9_]//g'`"; then
! /bin/echo "configure: $ac_package: invalid package name" >&2; exit 1
! fi
! ac_package=`/bin/echo $ac_package| sed 's/-/_/g'`
! eval "with_${ac_package}=no" ;;
!
! --x) with_x=yes ;; # Obsolete; use --with-x.
!
! -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
! | --x-incl | --x-inc | --x-in | --x-i)
! ac_prev=x_includes ;;
! -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
! | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
! x_includes="$ac_optarg" ;;
!
! -x-libraries | --x-libraries | --x-librarie | --x-librari \
! | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
! ac_prev=x_libraries ;;
! -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
! | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
! x_libraries="$ac_optarg" ;;
!
! -*) /bin/echo "configure: $ac_option: invalid option; use --help to show usage" >&2; exit 1
! ;;
!
! *)
! if test -n "`/bin/echo $ac_option| sed 's/[-a-z0-9.]//g'`"; then
! /bin/echo "configure: warning: $ac_option: invalid host type" >&2
! fi
! if test "x$nonopt" != xNONE; then
! /bin/echo "configure: can only configure for one host and one target at a time" >&2; exit 1
! fi
! nonopt="$ac_option"
! ;;
!
! esac
! done
!
! if test -n "$ac_prev"; then
! /bin/echo "configure: missing argument to --`/bin/echo $ac_prev | sed 's/_/-/g'`" >&2; exit 1
! fi
!
! trap 'rm -fr conftest* confdefs* core $ac_clean_files; exit 1' 1 2 15
! trap 'rm -fr confdefs* $ac_clean_files' 0
!
! # Save the original args if we used an alternate arg parser.
! ac_configure_temp="${configure_args-$*}"
! # Strip out --no-create and --norecursion so they don't pile up.
! configure_args=
! for ac_arg in $ac_configure_temp; do
! case "$ac_arg" in
! -no-create | --no-create | --no-creat | --no-crea | --no-cre \
! | --no-cr | --no-c) ;;
! -norecursion | --norecursion | --norecursio | --norecursi \
! | --norecurs | --norecur | --norecu | --norec | --nore | --nor) ;;
! *) configure_args="$configure_args $ac_arg" ;;
! esac
! done
!
! # NLS nuisances.
! # These must not be set unconditionally because not all systems understand
! # e.g. LANG=C (notably SCO).
! if test "${LC_ALL+set}" = 'set'; then LC_ALL=C; export LC_ALL; fi
! if test "${LANG+set}" = 'set'; then LANG=C; export LANG; fi
!
! # confdefs.h avoids OS command line length limits that DEFS can exceed.
! rm -rf conftest* confdefs.h
! # AIX cpp loses on an empty file, so make sure it contains at least a newline.
! /bin/echo > confdefs.h
!
! # A filename unique to this package, relative to the directory that
! # configure is in, which we can look for to find out if srcdir is correct.
! ac_unique_file=awk.y
!
! # Find the source files, if location was not specified.
! if test -z "$srcdir"; then
! ac_srcdir_defaulted=yes
! # Try the directory containing this script, then `..'.
! ac_prog=$0
! ac_confdir=`/bin/echo $ac_prog|sed 's%/[^/][^/]*$%%'`
! test "x$ac_confdir" = "x$ac_prog" && ac_confdir=.
! srcdir=$ac_confdir
! if test ! -r $srcdir/$ac_unique_file; then
! srcdir=..
! fi
! fi
! if test ! -r $srcdir/$ac_unique_file; then
! if test x$ac_srcdir_defaulted = xyes; then
! /bin/echo "configure: can not find sources in ${ac_confdir} or .." >&2; exit 1
! else
! /bin/echo "configure: can not find sources in ${srcdir}" >&2; exit 1
! fi
! fi
! ac_ext=c
! # CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
! ac_cpp='${CPP}'
! ac_compile='${CC-cc} $CFLAGS $LDFLAGS conftest.${ac_ext} -o conftest $LIBS >/dev/null 2>&1'
!
!
!
!
! # We want these before the checks, so the checks can modify their values.
! test -z "$CFLAGS" && CFLAGS= auto_cflags=1
! test -z "$LDFLAGS" && LDFLAGS=
!
! if test -z "$CC"; then
! # Extract the first word of `gcc', so it can be a program name with args.
! set ac_dummy gcc; ac_word=$2
! test -n "$silent" || /bin/echo "checking for $ac_word"
! IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:"
! for ac_dir in $PATH; do
! test -z "$ac_dir" && ac_dir=.
! if test -f $ac_dir/$ac_word; then
! CC="gcc"
! break
! fi
! done
! IFS="$ac_save_ifs"
! fi
! test -z "$CC" && CC="cc"
! test -n "$CC" && test -n "$verbose" && /bin/echo " setting CC to $CC"
!
! # Find out if we are using GNU C, under whatever name.
! cat > conftest.c <<EOF
! #ifdef __GNUC__
! yes
! #endif
! EOF
! ${CC-cc} -E conftest.c > conftest.out 2>&1
! if egrep yes conftest.out >/dev/null 2>&1; then
! GCC=1 # For later tests.
! fi
! rm -f conftest*
!
!
! # If we're using gcc and the user hasn't specified CFLAGS, add -O to CFLAGS.
! test -n "$GCC" && test -n "$auto_cflags" && CFLAGS="$CFLAGS -O2"
!
!
! test -n "$silent" || /bin/echo "checking how to run the C preprocessor"
! if test -z "$CPP"; then
! # This must be in double quotes, not single quotes, because CPP may get
! # substituted into the Makefile and ``${CC-cc}'' will simply confuse
! # make. It must be expanded now.
! CPP="${CC-cc} -E"
! cat > conftest.${ac_ext} <<EOF
! #include "confdefs.h"
! #include <stdio.h>
! Syntax Error
! EOF
! # Some shells (Coherent) do redirections in the wrong order, so need
! # the parens.
! ac_err=`eval "($ac_cpp conftest.${ac_ext} >/dev/null) 2>&1"`
! if test -z "$ac_err"; then
! :
! else
! rm -rf conftest*
! CPP="${CC-cc} -E -traditional-cpp"
! cat > conftest.${ac_ext} <<EOF
! #include "confdefs.h"
! #include <stdio.h>
! Syntax Error
! EOF
! # Some shells (Coherent) do redirections in the wrong order, so need
! # the parens.
! ac_err=`eval "($ac_cpp conftest.${ac_ext} >/dev/null) 2>&1"`
! if test -z "$ac_err"; then
! :
! else
! rm -rf conftest*
! CPP=/lib/cpp
! fi
! rm -f conftest*
! fi
! rm -f conftest*
! fi
! test -n "$verbose" && /bin/echo " setting CPP to $CPP"
!
! if test -n "$GCC"; then
! test -n "$silent" || /bin/echo "checking whether -traditional is needed"
! ac_pattern="Autoconf.*'x'"
! ac_prog='#include <sgtty.h>
! Autoconf TIOCGETP'
! cat > conftest.${ac_ext} <<EOF
! #include "confdefs.h"
! $ac_prog
! EOF
! eval "$ac_cpp conftest.${ac_ext} > conftest.out 2>&1"
! if egrep "$ac_pattern" conftest.out >/dev/null 2>&1; then
! rm -rf conftest*
! ac_need_trad=1
!
! fi
! rm -f conftest*
!
! if test -z "$ac_need_trad"; then
! ac_prog='#include <termio.h>
! Autoconf TCGETA'
! cat > conftest.${ac_ext} <<EOF
! #include "confdefs.h"
! $ac_prog
! EOF
! eval "$ac_cpp conftest.${ac_ext} > conftest.out 2>&1"
! if egrep "$ac_pattern" conftest.out >/dev/null 2>&1; then
! rm -rf conftest*
! ac_need_trad=1
!
! fi
! rm -f conftest*
!
! fi
! test -n "$ac_need_trad" && CC="$CC -traditional"
! fi
!
! # Make sure to not get the incompatible SysV /etc/install and
! # /usr/sbin/install, which might be in PATH before a BSD-like install,
! # or the SunOS /usr/etc/install directory, or the AIX /bin/install,
! # or the AFS install, which mishandles nonexistent args, or
! # /usr/ucb/install on SVR4, which tries to use the nonexistent group
! # `staff', or /sbin/install on IRIX which has incompatible command-line
! # syntax. Sigh.
! #
! # On most BSDish systems install is in /usr/bin, not /usr/ucb
! # anyway.
! # This turns out not to be true, so the mere pathname isn't an indication
! # of whether the program works. What we really need is a set of tests for
! # the install program to see if it actually works in all the required ways.
! #
! # Avoid using ./install, which might have been erroneously created
! # by make from ./install.sh.
! if test -z "${INSTALL}"; then
! test -n "$silent" || /bin/echo "checking for a BSD compatible install"
! IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:"
! for ac_dir in $PATH; do
! case "$ac_dir" in
! ''|.|/gnu/etc|/sbin|/usr/sbin|/usr/etc|/usr/afsws/bin|/usr/ucb) ;;
! *)
! # OSF1 and SCO ODT 3.0 have their own names for install.
! for ac_prog in installbsd scoinst install; do
! if test -f $ac_dir/$ac_prog; then
! if test $ac_prog = install &&
! grep dspmsg $ac_dir/$ac_prog >/dev/null 2>&1; then
! # AIX install. It has an incompatible calling convention.
! # OSF/1 installbsd also uses dspmsg, but is usable.
! :
! else
! INSTALL="$ac_dir/$ac_prog -c"
! break 2
! fi
fi
! done
! ;;
! esac
! done
! IFS="$ac_save_ifs"
! fi
!
! if test -z "$INSTALL"; then
! # As a last resort, use the slow shell script.
! for ac_dir in ${srcdir} ${srcdir}/.. ${srcdir}/../..; do
! if test -f $ac_dir/install.sh; then
! INSTALL="$ac_dir/install.sh -c"; break
! fi
! done
! fi
! if test -z "$INSTALL"; then
! /bin/echo "configure: can not find install.sh in ${srcdir} or ${srcdir}/.. or ${srcdir}/../.." >&2; exit 1
! fi
! test -n "$verbose" && /bin/echo " setting INSTALL to $INSTALL"
!
! # Use test -z because SunOS4 sh mishandles ${INSTALL_PROGRAM-'${INSTALL}'}.
! # It thinks the first close brace ends the variable substitution.
! test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}'
! test -n "$verbose" && /bin/echo " setting INSTALL_PROGRAM to $INSTALL_PROGRAM"
!
! test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644'
! test -n "$verbose" && /bin/echo " setting INSTALL_DATA to $INSTALL_DATA"
!
! for ac_prog in 'bison -y' byacc
! do
! if test -z "$YACC"; then
! # Extract the first word of `$ac_prog', so it can be a program name with args.
! set ac_dummy $ac_prog; ac_word=$2
! test -n "$silent" || /bin/echo "checking for $ac_word"
! IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:"
! for ac_dir in $PATH; do
! test -z "$ac_dir" && ac_dir=.
! if test -f $ac_dir/$ac_word; then
! YACC="$ac_prog"
! break
! fi
! done
! IFS="$ac_save_ifs"
! fi
!
! test -n "$YACC" && test -n "$verbose" && /bin/echo " setting YACC to $YACC"
!
! test -n "$YACC" && break
! done
! test -n "$YACC" || YACC="yacc"
!
! test -n "$silent" || /bin/echo "checking for return type of signal handlers"
! cat > conftest.${ac_ext} <<EOF
! #include "confdefs.h"
! #include <sys/types.h>
! #include <signal.h>
! #ifdef signal
! #undef signal
! #endif
! extern void (*signal ()) ();
! int main() { return 0; }
! int t() { int i;; return 0; }
! EOF
! if eval $ac_compile; then
! rm -rf conftest*
!
! {
! test -n "$verbose" && \
! /bin/echo " defining" RETSIGTYPE to be "void"
! /bin/echo "#define" RETSIGTYPE "void" >> confdefs.h
! DEFS="$DEFS -DRETSIGTYPE=void"
! ac_sed_defs="${ac_sed_defs}\${ac_dA}RETSIGTYPE\${ac_dB}RETSIGTYPE\${ac_dC}void\${ac_dD}
! \${ac_uA}RETSIGTYPE\${ac_uB}RETSIGTYPE\${ac_uC}void\${ac_uD}
! \${ac_eA}RETSIGTYPE\${ac_eB}RETSIGTYPE\${ac_eC}void\${ac_eD}
! "
! }
!
!
! else
! rm -rf conftest*
!
! {
! test -n "$verbose" && \
! /bin/echo " defining" RETSIGTYPE to be "int"
! /bin/echo "#define" RETSIGTYPE "int" >> confdefs.h
! DEFS="$DEFS -DRETSIGTYPE=int"
! ac_sed_defs="${ac_sed_defs}\${ac_dA}RETSIGTYPE\${ac_dB}RETSIGTYPE\${ac_dC}int\${ac_dD}
! \${ac_uA}RETSIGTYPE\${ac_uB}RETSIGTYPE\${ac_uC}int\${ac_uD}
! \${ac_eA}RETSIGTYPE\${ac_eB}RETSIGTYPE\${ac_eC}int\${ac_eD}
! "
! }
!
! fi
! rm -f conftest*
!
!
! test -n "$silent" || /bin/echo "checking for ANSI C header files"
! cat > conftest.${ac_ext} <<EOF
! #include "confdefs.h"
! #include <stdlib.h>
! #include <stdarg.h>
! #include <string.h>
! #include <float.h>
! EOF
! # Some shells (Coherent) do redirections in the wrong order, so need
! # the parens.
! ac_err=`eval "($ac_cpp conftest.${ac_ext} >/dev/null) 2>&1"`
! if test -z "$ac_err"; then
! rm -rf conftest*
! # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
! /bin/echo '#include "confdefs.h"
! #include <string.h>' > conftest.${ac_ext}
! eval "$ac_cpp conftest.${ac_ext} > conftest.out 2>&1"
! if egrep "memchr" conftest.out >/dev/null 2>&1; then
! rm -rf conftest*
! # SGI's /bin/cc from Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
! cat > conftest.${ac_ext} <<EOF
! #include "confdefs.h"
! #include <ctype.h>
! #define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
! #define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
! #define XOR(e,f) (((e) && !(f)) || (!(e) && (f)))
! int main () { int i; for (i = 0; i < 256; i++)
! if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) exit(2);
! exit (0); }
!
! EOF
! eval $ac_compile
! if test -s conftest && (./conftest; exit) 2>/dev/null; then
! # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
! /bin/echo '#include "confdefs.h"
! #include <stdlib.h>' > conftest.${ac_ext}
! eval "$ac_cpp conftest.${ac_ext} > conftest.out 2>&1"
! if egrep "free" conftest.out >/dev/null 2>&1; then
! rm -rf conftest*
!
! {
! test -n "$verbose" && \
! /bin/echo " defining STDC_HEADERS"
! /bin/echo "#define" STDC_HEADERS "1" >> confdefs.h
! DEFS="$DEFS -DSTDC_HEADERS=1"
! ac_sed_defs="${ac_sed_defs}\${ac_dA}STDC_HEADERS\${ac_dB}STDC_HEADERS\${ac_dC}1\${ac_dD}
! \${ac_uA}STDC_HEADERS\${ac_uB}STDC_HEADERS\${ac_uC}1\${ac_uD}
! \${ac_eA}STDC_HEADERS\${ac_eB}STDC_HEADERS\${ac_eC}1\${ac_eD}
! "
! }
!
!
! fi
! rm -f conftest*
!
!
! fi
! rm -fr conftest*
!
! fi
! rm -f conftest*
!
!
! fi
! rm -f conftest*
!
!
! test -n "$silent" || /bin/echo "checking for _setjmp declaration in <setjmp.h>"
! cat > conftest.${ac_ext} <<EOF
! #include "confdefs.h"
! #include <setjmp.h>
! int main() { return 0; }
! int t() { jmp_buf buf; _setjmp (buf); return 0; }
! EOF
! if eval $ac_compile; then
! rm -rf conftest*
!
! {
! test -n "$verbose" && \
! /bin/echo " defining HAVE_UNDERSCORE_SETJMP"
! /bin/echo "#define" HAVE_UNDERSCORE_SETJMP "1" >> confdefs.h
! DEFS="$DEFS -DHAVE_UNDERSCORE_SETJMP=1"
! ac_sed_defs="${ac_sed_defs}\${ac_dA}HAVE_UNDERSCORE_SETJMP\${ac_dB}HAVE_UNDERSCORE_SETJMP\${ac_dC}1\${ac_dD}
! \${ac_uA}HAVE_UNDERSCORE_SETJMP\${ac_uB}HAVE_UNDERSCORE_SETJMP\${ac_uC}1\${ac_uD}
! \${ac_eA}HAVE_UNDERSCORE_SETJMP\${ac_eB}HAVE_UNDERSCORE_SETJMP\${ac_eC}1\${ac_eD}
! "
! }
!
!
! fi
! rm -f conftest*
!
!
!
! # The preferred way to propogate these variables is regular @ substitutions.
! if test -n "$prefix"; then
! ac_prsub="s%^prefix\\([ ]*\\)=\\([ ]*\\).*$%prefix\\1=\\2$prefix%"
! else
! prefix=/usr/local
! fi
! if test -n "$exec_prefix"; then
! ac_prsub="$ac_prsub
! s%^exec_prefix\\([ ]*\\)=\\([ ]*\\).*$%exec_prefix\\1=\\2$exec_prefix%"
else
! exec_prefix='${prefix}' # Let make expand it.
! fi
!
! # Any assignment to VPATH causes Sun make to only execute
! # the first set of double-colon rules, so remove it if not needed.
! # If there is a colon in the path, we need to keep it.
! if test "x$srcdir" = x.; then
! ac_vpsub='/^[ ]*VPATH[ ]*=[^:]*$/d'
fi
+
+ # Quote sed substitution magic chars in DEFS.
+ cat >conftest.def <<EOF
+ $DEFS
+ EOF
+ ac_escape_ampersand_and_backslash='s%[&\\]%\\&%g'
+ DEFS=`sed "$ac_escape_ampersand_and_backslash" <conftest.def`
+ rm -f conftest.def
+ # Substitute for predefined variables.
+
+ trap 'rm -f config.status; exit 1' 1 2 15
+ /bin/echo creating config.status
+ # Some systems, like AmigaDOS, won't allow you to remove a script that is
+ # being executed, so just move it out of the way instead.
+ if test -f config.status; then mv config.status config.status.old; else true; fi
+ cat > config.status <<EOF
+ #!/bin/sh
+ # Generated automatically by configure.
+ # Run this file to recreate the current configuration.
+ # This directory was configured as follows,
+ # on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
+ #
+ # $0 $configure_args
+
+ ac_cs_usage="Usage: config.status [--recheck] [--version] [--help]"
+ for ac_option
+ do
+ case "\$ac_option" in
+ -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+ /bin/echo running \${CONFIG_SHELL-/bin/sh} $0 $configure_args --no-create
+ exec \${CONFIG_SHELL-/bin/sh} $0 $configure_args --no-create ;;
+ -version | --version | --versio | --versi | --vers | --ver | --ve | --v)
+ /bin/echo "config.status generated by autoconf version 1.11"
+ exit 0 ;;
+ -help | --help | --hel | --he | --h)
+ /bin/echo "\$ac_cs_usage"; exit 0 ;;
+ *) /bin/echo "\$ac_cs_usage"; exit 1 ;;
+ esac
+ done
+
+ trap 'rm -fr Makefile config.h conftest*; exit 1' 1 2 15
+ CC='$CC'
+ CFLAGS='$CFLAGS'
+ LDFLAGS='$LDFLAGS'
+ CPP='$CPP'
+ INSTALL='$INSTALL'
+ INSTALL_PROGRAM='$INSTALL_PROGRAM'
+ INSTALL_DATA='$INSTALL_DATA'
+ YACC='$YACC'
+ LIBS='$LIBS'
+ srcdir='$srcdir'
+ top_srcdir='$top_srcdir'
+ prefix='$prefix'
+ exec_prefix='$exec_prefix'
+ ac_prsub='$ac_prsub'
+ ac_vpsub='$ac_vpsub'
+ extrasub='$extrasub'
+ EOF
+ cat >> config.status <<\EOF
+
+ ac_given_srcdir=$srcdir
+
+ CONFIG_FILES=${CONFIG_FILES-"Makefile"}
+ for ac_file in .. ${CONFIG_FILES}; do if test "x$ac_file" != x..; then
+ # Remove last slash and all that follows it. Not all systems have dirname.
+ ac_dir=`/bin/echo $ac_file|sed 's%/[^/][^/]*$%%'`
+ if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then
+ # The file is in a subdirectory.
+ test ! -d "$ac_dir" && mkdir "$ac_dir"
+ ac_dir_suffix="/$ac_dir"
+ else
+ ac_dir_suffix=
+ fi
+
+ # A "../" for each directory in $ac_dir_suffix.
+ ac_dots=`/bin/echo $ac_dir_suffix|sed 's%/[^/]*%../%g'`
+ case "$ac_given_srcdir" in
+ .) srcdir=.
+ if test -z "$ac_dir_suffix"; then top_srcdir=.
+ else top_srcdir=`/bin/echo $ac_dots|sed 's%/$%%'`; fi ;;
+ /*) srcdir="$ac_given_srcdir$ac_dir_suffix"; top_srcdir="$ac_given_srcdir" ;;
+ *) # Relative path.
+ srcdir="$ac_dots$ac_given_srcdir$ac_dir_suffix"
+ top_srcdir="$ac_dots$ac_given_srcdir" ;;
+ esac
+
+ /bin/echo creating "$ac_file"
+ rm -f "$ac_file"
+ comment_str="Generated automatically from `/bin/echo $ac_file|sed 's|.*/||'`.in by configure."
+ case "$ac_file" in
+ *.c | *.h | *.C | *.cc | *.m ) /bin/echo "/* $comment_str */" > "$ac_file" ;;
+ * ) /bin/echo "# $comment_str" > "$ac_file" ;;
+ esac
+ sed -e "
+ $ac_prsub
+ $ac_vpsub
+ $extrasub
+ s%@CC@%$CC%g
+ s%@CFLAGS@%$CFLAGS%g
+ s%@LDFLAGS@%$LDFLAGS%g
+ s%@CPP@%$CPP%g
+ s%@INSTALL@%$INSTALL%g
+ s%@INSTALL_PROGRAM@%$INSTALL_PROGRAM%g
+ s%@INSTALL_DATA@%$INSTALL_DATA%g
+ s%@YACC@%$YACC%g
+ s%@LIBS@%$LIBS%g
+ s%@srcdir@%$srcdir%g
+ s%@top_srcdir@%$top_srcdir%g
+ s%@prefix@%$prefix%g
+ s%@exec_prefix@%$exec_prefix%g
+ s%@DEFS@%-DHAVE_CONFIG_H%" $ac_given_srcdir/${ac_file}.in >> $ac_file
+ fi; done
+
+ # These sed commands are put into ac_sed_defs when defining a macro.
+ # They are broken into pieces to make the sed script easier to manage.
+ # They are passed to sed as "A NAME B NAME C VALUE D", where NAME
+ # is the cpp macro being defined and VALUE is the value it is being given.
+ # Each defining turns into a single global substitution command.
+ # Hopefully no one uses "!" as a variable value.
+ # Other candidates for the sed separators, like , and @, do get used.
+ #
+ # ac_d sets the value in "#define NAME VALUE" lines.
+ ac_dA='s!^\([ ]*\)#\([ ]*define[ ][ ]*\)'
+ ac_dB='\([ ][ ]*\)[^ ]*!\1#\2'
+ ac_dC='\3'
+ ac_dD='!g'
+ # ac_u turns "#undef NAME" with trailing blanks into "#define NAME VALUE".
+ ac_uA='s!^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)'
+ ac_uB='\([ ]\)!\1#\2define\3'
+ ac_uC=' '
+ ac_uD='\4!g'
+ # ac_e turns "#undef NAME" without trailing blanks into "#define NAME VALUE".
+ ac_eA='s!^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)'
+ ac_eB='$!\1#\2define\3'
+ ac_eC=' '
+ ac_eD='!g'
+ rm -f conftest.sed
+ EOF
+ # Turn off quoting long enough to insert the sed commands.
+ rm -f conftest.sh
+ cat > conftest.sh <<EOF
+ $ac_sed_defs
+ EOF
+
+ # Break up $ac_sed_defs (now in conftest.sh) because some shells have a limit
+ # on the size of here documents.
+
+ # Maximum number of lines to put in a single here document.
+ ac_max_sh_lines=9
+
+ while :
+ do
+ # wc gives bogus results for an empty file on some AIX systems.
+ ac_lines=`grep -c . conftest.sh`
+ if test -z "$ac_lines" || test "$ac_lines" -eq 0; then break; fi
+ rm -f conftest.s1 conftest.s2
+ sed ${ac_max_sh_lines}q conftest.sh > conftest.s1 # Like head -9.
+ sed 1,${ac_max_sh_lines}d conftest.sh > conftest.s2 # Like tail +10.
+ # Write a limited-size here document to append to conftest.sed.
+ /bin/echo 'cat >> conftest.sed <<CONFEOF' >> config.status
+ cat conftest.s1 >> config.status
+ /bin/echo 'CONFEOF' >> config.status
+ rm -f conftest.s1 conftest.sh
+ mv conftest.s2 conftest.sh
+ done
+ rm -f conftest.sh
+
+ # Now back to your regularly scheduled config.status.
+ cat >> config.status <<\EOF
+ # This sed command replaces #undef's with comments. This is necessary, for
+ # example, in the case of _POSIX_SOURCE, which is predefined and required
+ # on some systems where configure will not decide to define it in
+ # config.h.
+ cat >> conftest.sed <<\CONFEOF
+ s,^[ ]*#[ ]*undef[ ][ ]*[a-zA-Z_][a-zA-Z_0-9]*,/* & */,
+ CONFEOF
+ rm -f conftest.h
+ # Break up the sed commands because old seds have small limits.
+ ac_max_sed_lines=20
+
+ CONFIG_HEADERS=${CONFIG_HEADERS-"config.h"}
+ for ac_file in .. ${CONFIG_HEADERS}; do if test "x$ac_file" != x..; then
+ /bin/echo creating $ac_file
+
+ cp $ac_given_srcdir/$ac_file.in conftest.h1
+ cp conftest.sed conftest.stm
+ while :
+ do
+ ac_lines=`grep -c . conftest.stm`
+ if test -z "$ac_lines" || test "$ac_lines" -eq 0; then break; fi
+ rm -f conftest.s1 conftest.s2 conftest.h2
+ sed ${ac_max_sed_lines}q conftest.stm > conftest.s1 # Like head -20.
+ sed 1,${ac_max_sed_lines}d conftest.stm > conftest.s2 # Like tail +21.
+ sed -f conftest.s1 < conftest.h1 > conftest.h2
+ rm -f conftest.s1 conftest.h1 conftest.stm
+ mv conftest.h2 conftest.h1
+ mv conftest.s2 conftest.stm
+ done
+ rm -f conftest.stm conftest.h
+ /bin/echo "/* $ac_file. Generated automatically by configure. */" > conftest.h
+ cat conftest.h1 >> conftest.h
+ rm -f conftest.h1
+ if cmp -s $ac_file conftest.h 2>/dev/null; then
+ # The file exists and we would not be changing it.
+ /bin/echo "$ac_file is unchanged"
+ rm -f conftest.h
+ else
+ rm -f $ac_file
+ mv conftest.h $ac_file
+ fi
+ fi; done
+ rm -f conftest.sed
+
+
+
+ exit 0
+ EOF
+ chmod +x config.status
+ # Some shells look in PATH for config.status without the "./".
+ test -n "$no_create" || ${CONFIG_SHELL-/bin/sh} ./config.status
+
diff -rc --new-file /src/baseline/gawk-2.15.5/configure.in gawk-2.15.5/configure.in
*** /src/baseline/gawk-2.15.5/configure.in Thu Jan 1 00:00:00 1970
--- gawk-2.15.5/configure.in Sun Jun 12 21:43:35 1994
***************
*** 0 ****
--- 1,28 ----
+ dnl Process this file with autoconf to produce a configure script.
+ AC_INIT(awk.y)
+ AC_CONFIG_HEADER(config.h)
+
+ # We want these before the checks, so the checks can modify their values.
+ test -z "$CFLAGS" && CFLAGS= auto_cflags=1
+ test -z "$LDFLAGS" && LDFLAGS=
+
+ AC_PROG_CC
+
+ # If we're using gcc and the user hasn't specified CFLAGS, add -O to CFLAGS.
+ test -n "$GCC" && test -n "$auto_cflags" && CFLAGS="$CFLAGS -O2"
+
+ AC_SUBST(CFLAGS)dnl
+ AC_SUBST(LDFLAGS)dnl
+
+ AC_PROG_CPP
+ AC_GCC_TRADITIONAL
+ AC_PROG_INSTALL
+ AC_PROG_YACC
+ AC_RETSIGTYPE
+ AC_STDC_HEADERS
+
+ AC_COMPILE_CHECK(_setjmp declaration in <setjmp.h>,
+ [#include <setjmp.h>], [jmp_buf buf; _setjmp (buf)],
+ AC_DEFINE(HAVE_UNDERSCORE_SETJMP))
+
+ AC_OUTPUT(Makefile)
diff -rc --new-file /src/baseline/gawk-2.15.5/dfa.h gawk-2.15.5/dfa.h
*** /src/baseline/gawk-2.15.5/dfa.h Tue Jan 4 17:18:17 1994
--- gawk-2.15.5/dfa.h Sun Jun 12 21:43:38 1994
***************
*** 23,35 ****
--- 23,39 ----
name space. */
/* Number of bits in an unsigned char. */
+ #ifndef CHARBITS
#define CHARBITS 8
+ #endif
/* First integer value that is greater than any character code. */
#define NOTCHAR (1 << CHARBITS)
/* INTBITS need not be exact, just a lower bound. */
+ #ifndef INTBITS
#define INTBITS (CHARBITS * sizeof (int))
+ #endif
/* Number of ints required to hold a bit for every character. */
#define CHARCLASS_INTS ((NOTCHAR + INTBITS - 1) / INTBITS)
diff -rc --new-file /src/baseline/gawk-2.15.5/gawk.1 gawk-2.15.5/gawk.1
*** /src/baseline/gawk-2.15.5/gawk.1 Mon Apr 18 18:04:03 1994
--- gawk-2.15.5/gawk.1 Sun Jun 12 21:43:43 1994
***************
*** 329,335 ****
the
.B \-f
option. If this variable does not exist, the default path is
! \fB".:/usr/lib/awk:/usr/local/lib/awk"\fR.
If a file name given to the
.B \-f
option contains a ``/'' character, no path search is performed.
--- 329,335 ----
the
.B \-f
option. If this variable does not exist, the default path is
! \fB".:/local/lib/awk:/gnu/lib/awk"\fR.
If a file name given to the
.B \-f
option contains a ``/'' character, no path search is performed.
diff -rc --new-file /src/baseline/gawk-2.15.5/gawk.info gawk-2.15.5/gawk.info
*** /src/baseline/gawk-2.15.5/gawk.info Thu Jan 1 00:00:00 1970
--- gawk-2.15.5/gawk.info Sun Jun 12 22:28:57 1994
***************
*** 0 ****
--- 1,204 ----
+ This is Info file gawk.info, produced by Makeinfo-1.55 from the input
+ file /gnu/src/amiga/gawk-2.15.5/gawk.texi.
+
+ This file documents `awk', a program that you can use to select
+ particular records in a file and perform operations upon them.
+
+ This is Edition 0.15 of `The GAWK Manual',
+ for the 2.15 version of the GNU implementation
+ of AWK.
+
+ Copyright (C) 1989, 1991, 1992, 1993 Free Software Foundation, Inc.
+
+ Permission is granted to make and distribute verbatim copies of this
+ manual provided the copyright notice and this permission notice are
+ preserved on all copies.
+
+ Permission is granted to copy and distribute modified versions of
+ this manual under the conditions for verbatim copying, provided that
+ the entire resulting derived work is distributed under the terms of a
+ permission notice identical to this one.
+
+ Permission is granted to copy and distribute translations of this
+ manual into another language, under the above conditions for modified
+ versions, except that this permission notice may be stated in a
+ translation approved by the Foundation.
+
+
+ Indirect:
+ gawk.info-1: 1079
+ gawk.info-2: 48275
+ gawk.info-3: 98104
+ gawk.info-4: 146775
+ gawk.info-5: 195898
+ gawk.info-6: 243621
+ gawk.info-7: 291484
+ gawk.info-8: 340862
+ gawk.info-9: 380695
+
+ Tag Table:
+ (Indirect)
+ Node: Top1079
+ Node: Preface3990
+ Node: History5583
+ Node: Copying7926
+ Node: This Manual27078
+ Node: Sample Data Files28922
+ Node: Getting Started31714
+ Node: Very Simple33425
+ Node: Two Rules35393
+ Node: More Complex37477
+ Node: Running gawk40463
+ Node: One-shot41412
+ Node: Read Terminal42548
+ Node: Long43607
+ Node: Executable Scripts44949
+ Node: Comments47172
+ Node: Statements/Lines48275
+ Node: When51166
+ Node: Reading Files53081
+ Node: Records54808
+ Node: Fields58042
+ Node: Non-Constant Fields60655
+ Node: Changing Fields62479
+ Node: Field Separators65852
+ Node: Constant Size75488
+ Node: Multiple Line79036
+ Node: Getline81432
+ Node: Close Input91176
+ Node: Printing92632
+ Node: Print93631
+ Node: Print Examples95765
+ Node: Output Separators98104
+ Node: OFMT99842
+ Node: Printf100986
+ Node: Basic Printf101890
+ Node: Control Letters103315
+ Node: Format Modifiers105129
+ Node: Printf Examples107634
+ Node: Redirection110316
+ Node: File/Pipe Redirection111052
+ Node: Close Output114525
+ Node: Special Files117029
+ Node: One-liners121963
+ Node: Patterns124685
+ Node: Kinds of Patterns125663
+ Node: Regexp126668
+ Node: Regexp Usage127612
+ Node: Regexp Operators129641
+ Node: Case-sensitivity135119
+ Node: Comparison Patterns137429
+ Node: Boolean Patterns139590
+ Node: Expression Patterns141100
+ Node: Ranges142540
+ Node: BEGIN/END143950
+ Node: Empty146504
+ Node: Actions146775
+ Node: Expressions149155
+ Node: Constants151176
+ Node: Variables156741
+ Node: Assignment Options158350
+ Node: Arithmetic Ops160034
+ Node: Concatenation161700
+ Node: Comparison Ops163048
+ Node: Boolean Ops167473
+ Node: Assignment Ops169936
+ Node: Increment Ops174010
+ Node: Conversion176506
+ Node: Values179581
+ Node: Conditional Exp182164
+ Node: Function Calls183492
+ Node: Precedence186256
+ Node: Statements189639
+ Node: If Statement191322
+ Node: While Statement192850
+ Node: Do Statement194813
+ Node: For Statement195898
+ Node: Break Statement199090
+ Node: Continue Statement200699
+ Node: Next Statement203322
+ Node: Next File Statement205266
+ Node: Exit Statement208312
+ Node: Arrays209927
+ Node: Array Intro211129
+ Node: Reference to Elements214632
+ Node: Assigning Elements216575
+ Node: Array Example217077
+ Node: Scanning an Array218808
+ Node: Delete221108
+ Node: Numeric Array Subscripts222088
+ Node: Multi-dimensional223968
+ Node: Multi-scanning227196
+ Node: Built-in228818
+ Node: Calling Built-in229775
+ Node: Numeric Functions231039
+ Node: String Functions234386
+ Node: I/O Functions243621
+ Node: Time Functions246245
+ Node: User-defined254329
+ Node: Definition Syntax255047
+ Node: Function Example259039
+ Node: Function Caveats260114
+ Node: Return Statement263157
+ Node: Built-in Variables265540
+ Node: User-modified266576
+ Node: Auto-set270853
+ Node: Command Line276592
+ Node: Options277517
+ Node: Other Arguments284591
+ Node: AWKPATH Variable286957
+ Node: Obsolete289095
+ Node: Undocumented290355
+ Node: Language History290543
+ Node: V7/S5R3.1291484
+ Node: S5R4294188
+ Node: POSIX295725
+ Node: POSIX/GNU296443
+ Node: Installation298074
+ Node: Gawk Distribution298940
+ Node: Extracting299393
+ Node: Distribution contents300988
+ Node: Unix Installation303792
+ Node: Quick Installation304483
+ Node: Configuration Philosophy305652
+ Node: New Configurations307978
+ Node: VMS Installation310080
+ Node: VMS Compilation310639
+ Node: VMS Installation Details312254
+ Node: VMS Running313891
+ Node: VMS POSIX315477
+ Node: MS-DOS Installation316865
+ Node: Atari Installation317793
+ Node: Gawk Summary323555
+ Node: Command Line Summary324316
+ Node: Language Summary326749
+ Node: Variables/Fields328834
+ Node: Fields Summary329560
+ Node: Built-in Summary331027
+ Node: Arrays Summary334232
+ Node: Data Type Summary335507
+ Node: Rules Summary336980
+ Node: Pattern Summary338629
+ Node: Regexp Summary340862
+ Node: Actions Summary342082
+ Node: Operator Summary342997
+ Node: Control Flow Summary344358
+ Node: I/O Summary344964
+ Node: Printf Summary346843
+ Node: Special File Summary348669
+ Node: Numeric Functions Summary350727
+ Node: String Functions Summary351563
+ Node: Time Functions Summary353395
+ Node: String Constants Summary354159
+ Node: Functions Summary355542
+ Node: Historical Features356601
+ Node: Sample Program357458
+ Node: Bugs361712
+ Node: Notes364560
+ Node: Compatibility Mode365089
+ Node: Future Extensions365922
+ Node: Improvements368219
+ Node: Glossary370254
+ Node: Index380695
+
+ End Tag Table
diff -rc --new-file /src/baseline/gawk-2.15.5/gawk.info-1 gawk-2.15.5/gawk.info-1
*** /src/baseline/gawk-2.15.5/gawk.info-1 Thu Jan 1 00:00:00 1970
--- gawk-2.15.5/gawk.info-1 Sun Jun 12 22:28:48 1994
***************
*** 0 ****
--- 1,1069 ----
+ This is Info file gawk.info, produced by Makeinfo-1.55 from the input
+ file /gnu/src/amiga/gawk-2.15.5/gawk.texi.
+
+ This file documents `awk', a program that you can use to select
+ particular records in a file and perform operations upon them.
+
+ This is Edition 0.15 of `The GAWK Manual',
+ for the 2.15 version of the GNU implementation
+ of AWK.
+
+ Copyright (C) 1989, 1991, 1992, 1993 Free Software Foundation, Inc.
+
+ Permission is granted to make and distribute verbatim copies of this
+ manual provided the copyright notice and this permission notice are
+ preserved on all copies.
+
+ Permission is granted to copy and distribute modified versions of
+ this manual under the conditions for verbatim copying, provided that
+ the entire resulting derived work is distributed under the terms of a
+ permission notice identical to this one.
+
+ Permission is granted to copy and distribute translations of this
+ manual into another language, under the above conditions for modified
+ versions, except that this permission notice may be stated in a
+ translation approved by the Foundation.
+
+
+ File: gawk.info, Node: Top, Next: Preface, Prev: (dir), Up: (dir)
+
+ General Introduction
+ ********************
+
+ This file documents `awk', a program that you can use to select
+ particular records in a file and perform operations upon them.
+
+ This is Edition 0.15 of `The GAWK Manual',
+ for the 2.15 version of the GNU implementation
+ of AWK.
+
+ * Menu:
+
+ * Preface:: What you can do with `awk'; brief history
+ and acknowledgements.
+ * Copying:: Your right to copy and distribute `gawk'.
+ * This Manual:: Using this manual.
+ Includes sample input files that you can use.
+ * Getting Started:: A basic introduction to using `awk'.
+ How to run an `awk' program.
+ Command line syntax.
+ * Reading Files:: How to read files and manipulate fields.
+ * Printing:: How to print using `awk'. Describes the
+ `print' and `printf' statements.
+ Also describes redirection of output.
+ * One-liners:: Short, sample `awk' programs.
+ * Patterns:: The various types of patterns
+ explained in detail.
+ * Actions:: The various types of actions are
+ introduced here. Describes
+ expressions and the various operators in
+ detail. Also describes comparison expressions.
+ * Expressions:: Expressions are the basic building
+ blocks of statements.
+ * Statements:: The various control statements are
+ described in detail.
+ * Arrays:: The description and use of arrays.
+ Also includes array-oriented control
+ statements.
+ * Built-in:: The built-in functions are summarized here.
+ * User-defined:: User-defined functions are described in detail.
+ * Built-in Variables:: Built-in Variables
+ * Command Line:: How to run `gawk'.
+ * Language History:: The evolution of the `awk' language.
+ * Installation:: Installing `gawk' under
+ various operating systems.
+ * Gawk Summary:: `gawk' Options and Language Summary.
+ * Sample Program:: A sample `awk' program with a
+ complete explanation.
+ * Bugs:: Reporting Problems and Bugs.
+ * Notes:: Something about the
+ implementation of `gawk'.
+ * Glossary:: An explanation of some unfamiliar terms.
+ * Index::
+
+
+ File: gawk.info, Node: Preface, Next: Copying, Prev: Top, Up: Top
+
+ Preface
+ *******
+
+ If you are like many computer users, you would frequently like to
+ make changes in various text files wherever certain patterns appear, or
+ extract data from parts of certain lines while discarding the rest. To
+ write a program to do this in a language such as C or Pascal is a
+ time-consuming inconvenience that may take many lines of code. The job
+ may be easier with `awk'.
+
+ The `awk' utility interprets a special-purpose programming language
+ that makes it possible to handle simple data-reformatting jobs easily
+ with just a few lines of code.
+
+ The GNU implementation of `awk' is called `gawk'; it is fully upward
+ compatible with the System V Release 4 version of `awk'. `gawk' is
+ also upward compatible with the POSIX (draft) specification of the
+ `awk' language. This means that all properly written `awk' programs
+ should work with `gawk'. Thus, we usually don't distinguish between
+ `gawk' and other `awk' implementations in this manual.
+
+ This manual teaches you what `awk' does and how you can use `awk'
+ effectively. You should already be familiar with basic system commands
+ such as `ls'. Using `awk' you can:
+
+ * manage small, personal databases
+
+ * generate reports
+
+ * validate data
+
+ * produce indexes, and perform other document preparation tasks
+
+ * even experiment with algorithms that can be adapted later to other
+ computer languages
+
+ * Menu:
+
+ * History:: The history of `gawk' and
+ `awk'. Acknowledgements.
+
+
+ File: gawk.info, Node: History, Prev: Preface, Up: Preface
+
+ History of `awk' and `gawk'
+ ===========================
+
+ The name `awk' comes from the initials of its designers: Alfred V.
+ Aho, Peter J. Weinberger, and Brian W. Kernighan. The original version
+ of `awk' was written in 1977. In 1985 a new version made the
+ programming language more powerful, introducing user-defined functions,
+ multiple input streams, and computed regular expressions. This new
+ version became generally available with System V Release 3.1. The
+ version in System V Release 4 added some new features and also cleaned
+ up the behavior in some of the "dark corners" of the language. The
+ specification for `awk' in the POSIX Command Language and Utilities
+ standard further clarified the language based on feedback from both the
+ `gawk' designers, and the original `awk' designers.
+
+ The GNU implementation, `gawk', was written in 1986 by Paul Rubin
+ and Jay Fenlason, with advice from Richard Stallman. John Woods
+ contributed parts of the code as well. In 1988 and 1989, David
+ Trueman, with help from Arnold Robbins, thoroughly reworked `gawk' for
+ compatibility with the newer `awk'. Current development (1992) focuses
+ on bug fixes, performance improvements, and standards compliance.
+
+ We need to thank many people for their assistance in producing this
+ manual. Jay Fenlason contributed many ideas and sample programs.
+ Richard Mlynarik and Robert J. Chassell gave helpful comments on early
+ drafts of this manual. The paper `A Supplemental Document for `awk''
+ by John W. Pierce of the Chemistry Department at UC San Diego,
+ pinpointed several issues relevant both to `awk' implementation and to
+ this manual, that would otherwise have escaped us. David Trueman, Pat
+ Rankin, and Michal Jaegermann also contributed sections of the manual.
+
+ The following people provided many helpful comments on this edition
+ of the manual: Rick Adams, Michael Brennan, Rich Burridge, Diane Close,
+ Christopher ("Topher") Eliot, Michael Lijewski, Pat Rankin, Miriam
+ Robbins, and Michal Jaegermann. Robert J. Chassell provided much
+ valuable advice on the use of Texinfo.
+
+ Finally, we would like to thank Brian Kernighan of Bell Labs for
+ invaluable assistance during the testing and debugging of `gawk', and
+ for help in clarifying numerous points about the language.
+
+
+ File: gawk.info, Node: Copying, Next: This Manual, Prev: Preface, Up: Top
+
+ GNU GENERAL PUBLIC LICENSE
+ **************************
+
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 675 Mass Ave, Cambridge, MA 02139, USA
+
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+ ========
+
+ The licenses for most software are designed to take away your
+ freedom to share and change it. By contrast, the GNU General Public
+ License is intended to guarantee your freedom to share and change free
+ software--to make sure the software is free for all its users. This
+ General Public License applies to most of the Free Software
+ Foundation's software and to any other program whose authors commit to
+ using it. (Some other Free Software Foundation software is covered by
+ the GNU Library General Public License instead.) You can apply it to
+ your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+ price. Our General Public Licenses are designed to make sure that you
+ have the freedom to distribute copies of free software (and charge for
+ this service if you wish), that you receive source code or can get it
+ if you want it, that you can change the software or use pieces of it in
+ new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+ anyone to deny you these rights or to ask you to surrender the rights.
+ These restrictions translate to certain responsibilities for you if you
+ distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+ gratis or for a fee, you must give the recipients all the rights that
+ you have. You must make sure that they, too, receive or can get the
+ source code. And you must show them these terms so they know their
+ rights.
+
+ We protect your rights with two steps: (1) copyright the software,
+ and (2) offer you this license which gives you legal permission to copy,
+ distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+ that everyone understands that there is no warranty for this free
+ software. If the software is modified by someone else and passed on, we
+ want its recipients to know that what they have is not the original, so
+ that any problems introduced by others will not reflect on the original
+ authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+ patents. We wish to avoid the danger that redistributors of a free
+ program will individually obtain patent licenses, in effect making the
+ program proprietary. To prevent this, we have made it clear that any
+ patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+ modification follow.
+
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 1. This License applies to any program or other work which contains a
+ notice placed by the copyright holder saying it may be distributed
+ under the terms of this General Public License. The "Program",
+ below, refers to any such program or work, and a "work based on
+ the Program" means either the Program or any derivative work under
+ copyright law: that is to say, a work containing the Program or a
+ portion of it, either verbatim or with modifications and/or
+ translated into another language. (Hereinafter, translation is
+ included without limitation in the term "modification".) Each
+ licensee is addressed as "you".
+
+ Activities other than copying, distribution and modification are
+ not covered by this License; they are outside its scope. The act
+ of running the Program is not restricted, and the output from the
+ Program is covered only if its contents constitute a work based on
+ the Program (independent of having been made by running the
+ Program). Whether that is true depends on what the Program does.
+
+ 2. You may copy and distribute verbatim copies of the Program's
+ source code as you receive it, in any medium, provided that you
+ conspicuously and appropriately publish on each copy an appropriate
+ copyright notice and disclaimer of warranty; keep intact all the
+ notices that refer to this License and to the absence of any
+ warranty; and give any other recipients of the Program a copy of
+ this License along with the Program.
+
+ You may charge a fee for the physical act of transferring a copy,
+ and you may at your option offer warranty protection in exchange
+ for a fee.
+
+ 3. You may modify your copy or copies of the Program or any portion
+ of it, thus forming a work based on the Program, and copy and
+ distribute such modifications or work under the terms of Section 1
+ above, provided that you also meet all of these conditions:
+
+ a. You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b. You must cause any work that you distribute or publish, that
+ in whole or in part contains or is derived from the Program
+ or any part thereof, to be licensed as a whole at no charge
+ to all third parties under the terms of this License.
+
+ c. If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display
+ an announcement including an appropriate copyright notice and
+ a notice that there is no warranty (or else, saying that you
+ provide a warranty) and that users may redistribute the
+ program under these conditions, and telling the user how to
+ view a copy of this License. (Exception: if the Program
+ itself is interactive but does not normally print such an
+ announcement, your work based on the Program is not required
+ to print an announcement.)
+
+ These requirements apply to the modified work as a whole. If
+ identifiable sections of that work are not derived from the
+ Program, and can be reasonably considered independent and separate
+ works in themselves, then this License, and its terms, do not
+ apply to those sections when you distribute them as separate
+ works. But when you distribute the same sections as part of a
+ whole which is a work based on the Program, the distribution of
+ the whole must be on the terms of this License, whose permissions
+ for other licensees extend to the entire whole, and thus to each
+ and every part regardless of who wrote it.
+
+ Thus, it is not the intent of this section to claim rights or
+ contest your rights to work written entirely by you; rather, the
+ intent is to exercise the right to control the distribution of
+ derivative or collective works based on the Program.
+
+ In addition, mere aggregation of another work not based on the
+ Program with the Program (or with a work based on the Program) on
+ a volume of a storage or distribution medium does not bring the
+ other work under the scope of this License.
+
+ 4. You may copy and distribute the Program (or a work based on it,
+ under Section 2) in object code or executable form under the terms
+ of Sections 1 and 2 above provided that you also do one of the
+ following:
+
+ a. Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of
+ Sections 1 and 2 above on a medium customarily used for
+ software interchange; or,
+
+ b. Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a
+ medium customarily used for software interchange; or,
+
+ c. Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with
+ such an offer, in accord with Subsection b above.)
+
+ The source code for a work means the preferred form of the work for
+ making modifications to it. For an executable work, complete
+ source code means all the source code for all modules it contains,
+ plus any associated interface definition files, plus the scripts
+ used to control compilation and installation of the executable.
+ However, as a special exception, the source code distributed need
+ not include anything that is normally distributed (in either
+ source or binary form) with the major components (compiler,
+ kernel, and so on) of the operating system on which the executable
+ runs, unless that component itself accompanies the executable.
+
+ If distribution of executable or object code is made by offering
+ access to copy from a designated place, then offering equivalent
+ access to copy the source code from the same place counts as
+ distribution of the source code, even though third parties are not
+ compelled to copy the source along with the object code.
+
+ 5. You may not copy, modify, sublicense, or distribute the Program
+ except as expressly provided under this License. Any attempt
+ otherwise to copy, modify, sublicense or distribute the Program is
+ void, and will automatically terminate your rights under this
+ License. However, parties who have received copies, or rights,
+ from you under this License will not have their licenses
+ terminated so long as such parties remain in full compliance.
+
+ 6. You are not required to accept this License, since you have not
+ signed it. However, nothing else grants you permission to modify
+ or distribute the Program or its derivative works. These actions
+ are prohibited by law if you do not accept this License.
+ Therefore, by modifying or distributing the Program (or any work
+ based on the Program), you indicate your acceptance of this
+ License to do so, and all its terms and conditions for copying,
+ distributing or modifying the Program or works based on it.
+
+ 7. Each time you redistribute the Program (or any work based on the
+ Program), the recipient automatically receives a license from the
+ original licensor to copy, distribute or modify the Program
+ subject to these terms and conditions. You may not impose any
+ further restrictions on the recipients' exercise of the rights
+ granted herein. You are not responsible for enforcing compliance
+ by third parties to this License.
+
+ 8. If, as a consequence of a court judgment or allegation of patent
+ infringement or for any other reason (not limited to patent
+ issues), conditions are imposed on you (whether by court order,
+ agreement or otherwise) that contradict the conditions of this
+ License, they do not excuse you from the conditions of this
+ License. If you cannot distribute so as to satisfy simultaneously
+ your obligations under this License and any other pertinent
+ obligations, then as a consequence you may not distribute the
+ Program at all. For example, if a patent license would not permit
+ royalty-free redistribution of the Program by all those who
+ receive copies directly or indirectly through you, then the only
+ way you could satisfy both it and this License would be to refrain
+ entirely from distribution of the Program.
+
+ If any portion of this section is held invalid or unenforceable
+ under any particular circumstance, the balance of the section is
+ intended to apply and the section as a whole is intended to apply
+ in other circumstances.
+
+ It is not the purpose of this section to induce you to infringe any
+ patents or other property right claims or to contest validity of
+ any such claims; this section has the sole purpose of protecting
+ the integrity of the free software distribution system, which is
+ implemented by public license practices. Many people have made
+ generous contributions to the wide range of software distributed
+ through that system in reliance on consistent application of that
+ system; it is up to the author/donor to decide if he or she is
+ willing to distribute software through any other system and a
+ licensee cannot impose that choice.
+
+ This section is intended to make thoroughly clear what is believed
+ to be a consequence of the rest of this License.
+
+ 9. If the distribution and/or use of the Program is restricted in
+ certain countries either by patents or by copyrighted interfaces,
+ the original copyright holder who places the Program under this
+ License may add an explicit geographical distribution limitation
+ excluding those countries, so that distribution is permitted only
+ in or among countries not thus excluded. In such case, this
+ License incorporates the limitation as if written in the body of
+ this License.
+
+ 10. The Free Software Foundation may publish revised and/or new
+ versions of the General Public License from time to time. Such
+ new versions will be similar in spirit to the present version, but
+ may differ in detail to address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+ Program specifies a version number of this License which applies
+ to it and "any later version", you have the option of following
+ the terms and conditions either of that version or of any later
+ version published by the Free Software Foundation. If the Program
+ does not specify a version number of this License, you may choose
+ any version ever published by the Free Software Foundation.
+
+ 11. If you wish to incorporate parts of the Program into other free
+ programs whose distribution conditions are different, write to the
+ author to ask for permission. For software which is copyrighted
+ by the Free Software Foundation, write to the Free Software
+ Foundation; we sometimes make exceptions for this. Our decision
+ will be guided by the two goals of preserving the free status of
+ all derivatives of our free software and of promoting the sharing
+ and reuse of software generally.
+
+ NO WARRANTY
+
+ 12. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO
+ WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE
+ LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+ HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT
+ WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT
+ NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE
+ QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+ PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY
+ SERVICING, REPAIR OR CORRECTION.
+
+ 13. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+ WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY
+ MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE
+ LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL,
+ INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR
+ INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+ DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU
+ OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY
+ OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN
+ ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+ =============================================
+
+ If you develop a new program, and you want it to be of the greatest
+ possible use to the public, the best way to achieve this is to make it
+ free software which everyone can redistribute and change under these
+ terms.
+
+ To do so, attach the following notices to the program. It is safest
+ to attach them to the start of each source file to most effectively
+ convey the exclusion of warranty; and each file should have at least
+ the "copyright" line and a pointer to where the full notice is found.
+
+ ONE LINE TO GIVE THE PROGRAM'S NAME AND A BRIEF IDEA OF WHAT IT DOES.
+ Copyright (C) 19YY NAME OF AUTHOR
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ Also add information on how to contact you by electronic and paper
+ mail.
+
+ If the program is interactive, make it output a short notice like
+ this when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) 19YY NAME OF AUTHOR
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details
+ type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+ The hypothetical commands `show w' and `show c' should show the
+ appropriate parts of the General Public License. Of course, the
+ commands you use may be called something other than `show w' and `show
+ c'; they could even be mouse-clicks or menu items--whatever suits your
+ program.
+
+ You should also get your employer (if you work as a programmer) or
+ your school, if any, to sign a "copyright disclaimer" for the program,
+ if necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ SIGNATURE OF TY COON, 1 April 1989
+ Ty Coon, President of Vice
+
+ This General Public License does not permit incorporating your
+ program into proprietary programs. If your program is a subroutine
+ library, you may consider it more useful to permit linking proprietary
+ applications with the library. If this is what you want to do, use the
+ GNU Library General Public License instead of this License.
+
+
+ File: gawk.info, Node: This Manual, Next: Getting Started, Prev: Copying, Up: Top
+
+ Using this Manual
+ *****************
+
+ The term `awk' refers to a particular program, and to the language
+ you use to tell this program what to do. When we need to be careful,
+ we call the program "the `awk' utility" and the language "the `awk'
+ language." The term `gawk' refers to a version of `awk' developed as
+ part the GNU project. The purpose of this manual is to explain both the
+ `awk' language and how to run the `awk' utility.
+
+ While concentrating on the features of `gawk', the manual will also
+ attempt to describe important differences between `gawk' and other
+ `awk' implementations. In particular, any features that are not in the
+ POSIX standard for `awk' will be noted.
+
+ The term "`awk' program" refers to a program written by you in the
+ `awk' programming language.
+
+ *Note Getting Started with `awk': Getting Started, for the bare
+ essentials you need to know to start using `awk'.
+
+ Some useful "one-liners" are included to give you a feel for the
+ `awk' language (*note Useful "One-liners": One-liners.).
+
+ A sample `awk' program has been provided for you (*note Sample
+ Program::.).
+
+ If you find terms that you aren't familiar with, try looking them up
+ in the glossary (*note Glossary::.).
+
+ The entire `awk' language is summarized for quick reference in *Note
+ `gawk' Summary: Gawk Summary. Look there if you just need to refresh
+ your memory about a particular feature.
+
+ Most of the time complete `awk' programs are used as examples, but in
+ some of the more advanced sections, only the part of the `awk' program
+ that illustrates the concept being described is shown.
+
+ * Menu:
+
+ * Sample Data Files:: Sample data files for use in the `awk'
+ programs illustrated in this manual.
+
+
+ File: gawk.info, Node: Sample Data Files, Prev: This Manual, Up: This Manual
+
+ Data Files for the Examples
+ ===========================
+
+ Many of the examples in this manual take their input from two sample
+ data files. The first, called `BBS-list', represents a list of
+ computer bulletin board systems together with information about those
+ systems. The second data file, called `inventory-shipped', contains
+ information about shipments on a monthly basis. Each line of these
+ files is one "record".
+
+ In the file `BBS-list', each record contains the name of a computer
+ bulletin board, its phone number, the board's baud rate, and a code for
+ the number of hours it is operational. An `A' in the last column means
+ the board operates 24 hours a day. A `B' in the last column means the
+ board operates evening and weekend hours, only. A `C' means the board
+ operates only on weekends.
+
+ aardvark 555-5553 1200/300 B
+ alpo-net 555-3412 2400/1200/300 A
+ barfly 555-7685 1200/300 A
+ bites 555-1675 2400/1200/300 A
+ camelot 555-0542 300 C
+ core 555-2912 1200/300 C
+ fooey 555-1234 2400/1200/300 B
+ foot 555-6699 1200/300 B
+ macfoo 555-6480 1200/300 A
+ sdace 555-3430 2400/1200/300 A
+ sabafoo 555-2127 1200/300 C
+
+ The second data file, called `inventory-shipped', represents
+ information about shipments during the year. Each record contains the
+ month of the year, the number of green crates shipped, the number of
+ red boxes shipped, the number of orange bags shipped, and the number of
+ blue packages shipped, respectively. There are 16 entries, covering
+ the 12 months of one year and 4 months of the next year.
+
+ Jan 13 25 15 115
+ Feb 15 32 24 226
+ Mar 15 24 34 228
+ Apr 31 52 63 420
+ May 16 34 29 208
+ Jun 31 42 75 492
+ Jul 24 34 67 436
+ Aug 15 34 47 316
+ Sep 13 55 37 277
+ Oct 29 54 68 525
+ Nov 20 87 82 577
+ Dec 17 35 61 401
+
+ Jan 21 36 64 620
+ Feb 26 58 80 652
+ Mar 24 75 70 495
+ Apr 21 70 74 514
+
+ If you are reading this in GNU Emacs using Info, you can copy the
+ regions of text showing these sample files into your own test files.
+ This way you can try out the examples shown in the remainder of this
+ document. You do this by using the command `M-x write-region' to copy
+ text from the Info file into a file for use with `awk' (*Note Misc File
+ Ops: (emacs)Misc File Ops, for more information). Using this
+ information, create your own `BBS-list' and `inventory-shipped' files,
+ and practice what you learn in this manual.
+
+
+ File: gawk.info, Node: Getting Started, Next: Reading Files, Prev: This Manual, Up: Top
+
+ Getting Started with `awk'
+ **************************
+
+ The basic function of `awk' is to search files for lines (or other
+ units of text) that contain certain patterns. When a line matches one
+ of the patterns, `awk' performs specified actions on that line. `awk'
+ keeps processing input lines in this way until the end of the input
+ file is reached.
+
+ When you run `awk', you specify an `awk' "program" which tells `awk'
+ what to do. The program consists of a series of "rules". (It may also
+ contain "function definitions", but that is an advanced feature, so we
+ will ignore it for now. *Note User-defined Functions: User-defined.)
+ Each rule specifies one pattern to search for, and one action to
+ perform when that pattern is found.
+
+ Syntactically, a rule consists of a pattern followed by an action.
+ The action is enclosed in curly braces to separate it from the pattern.
+ Rules are usually separated by newlines. Therefore, an `awk' program
+ looks like this:
+
+ PATTERN { ACTION }
+ PATTERN { ACTION }
+ ...
+
+ * Menu:
+
+ * Very Simple:: A very simple example.
+ * Two Rules:: A less simple one-line example with two rules.
+ * More Complex:: A more complex example.
+ * Running gawk:: How to run `gawk' programs;
+ includes command line syntax.
+ * Comments:: Adding documentation to `gawk' programs.
+ * Statements/Lines:: Subdividing or combining statements into lines.
+ * When:: When to use `gawk' and
+ when to use other things.
+
+
+ File: gawk.info, Node: Very Simple, Next: Two Rules, Prev: Getting Started, Up: Getting Started
+
+ A Very Simple Example
+ =====================
+
+ The following command runs a simple `awk' program that searches the
+ input file `BBS-list' for the string of characters: `foo'. (A string
+ of characters is usually called, a "string". The term "string" is
+ perhaps based on similar usage in English, such as "a string of
+ pearls," or, "a string of cars in a train.")
+
+ awk '/foo/ { print $0 }' BBS-list
+
+ When lines containing `foo' are found, they are printed, because
+ `print $0' means print the current line. (Just `print' by itself means
+ the same thing, so we could have written that instead.)
+
+ You will notice that slashes, `/', surround the string `foo' in the
+ actual `awk' program. The slashes indicate that `foo' is a pattern to
+ search for. This type of pattern is called a "regular expression", and
+ is covered in more detail later (*note Regular Expressions as Patterns:
+ Regexp.). There are single-quotes around the `awk' program so that the
+ shell won't interpret any of it as special shell characters.
+
+ Here is what this program prints:
+
+ fooey 555-1234 2400/1200/300 B
+ foot 555-6699 1200/300 B
+ macfoo 555-6480 1200/300 A
+ sabafoo 555-2127 1200/300 C
+
+ In an `awk' rule, either the pattern or the action can be omitted,
+ but not both. If the pattern is omitted, then the action is performed
+ for *every* input line. If the action is omitted, the default action
+ is to print all lines that match the pattern.
+
+ Thus, we could leave out the action (the `print' statement and the
+ curly braces) in the above example, and the result would be the same:
+ all lines matching the pattern `foo' would be printed. By comparison,
+ omitting the `print' statement but retaining the curly braces makes an
+ empty action that does nothing; then no lines would be printed.
+
+
+ File: gawk.info, Node: Two Rules, Next: More Complex, Prev: Very Simple, Up: Getting Started
+
+ An Example with Two Rules
+ =========================
+
+ The `awk' utility reads the input files one line at a time. For
+ each line, `awk' tries the patterns of each of the rules. If several
+ patterns match then several actions are run, in the order in which they
+ appear in the `awk' program. If no patterns match, then no actions are
+ run.
+
+ After processing all the rules (perhaps none) that match the line,
+ `awk' reads the next line (however, *note The `next' Statement: Next
+ Statement.). This continues until the end of the file is reached.
+
+ For example, the `awk' program:
+
+ /12/ { print $0 }
+ /21/ { print $0 }
+
+ contains two rules. The first rule has the string `12' as the pattern
+ and `print $0' as the action. The second rule has the string `21' as
+ the pattern and also has `print $0' as the action. Each rule's action
+ is enclosed in its own pair of braces.
+
+ This `awk' program prints every line that contains the string `12'
+ *or* the string `21'. If a line contains both strings, it is printed
+ twice, once by each rule.
+
+ If we run this program on our two sample data files, `BBS-list' and
+ `inventory-shipped', as shown here:
+
+ awk '/12/ { print $0 }
+ /21/ { print $0 }' BBS-list inventory-shipped
+
+ we get the following output:
+
+ aardvark 555-5553 1200/300 B
+ alpo-net 555-3412 2400/1200/300 A
+ barfly 555-7685 1200/300 A
+ bites 555-1675 2400/1200/300 A
+ core 555-2912 1200/300 C
+ fooey 555-1234 2400/1200/300 B
+ foot 555-6699 1200/300 B
+ macfoo 555-6480 1200/300 A
+ sdace 555-3430 2400/1200/300 A
+ sabafoo 555-2127 1200/300 C
+ sabafoo 555-2127 1200/300 C
+ Jan 21 36 64 620
+ Apr 21 70 74 514
+
+ Note how the line in `BBS-list' beginning with `sabafoo' was printed
+ twice, once for each rule.
+
+
+ File: gawk.info, Node: More Complex, Next: Running gawk, Prev: Two Rules, Up: Getting Started
+
+ A More Complex Example
+ ======================
+
+ Here is an example to give you an idea of what typical `awk'
+ programs do. This example shows how `awk' can be used to summarize,
+ select, and rearrange the output of another utility. It uses features
+ that haven't been covered yet, so don't worry if you don't understand
+ all the details.
+
+ ls -l | awk '$5 == "Nov" { sum += $4 }
+ END { print sum }'
+
+ This command prints the total number of bytes in all the files in the
+ current directory that were last modified in November (of any year).
+ (In the C shell you would need to type a semicolon and then a backslash
+ at the end of the first line; in a POSIX-compliant shell, such as the
+ Bourne shell or the Bourne-Again shell, you can type the example as
+ shown.)
+
+ The `ls -l' part of this example is a command that gives you a
+ listing of the files in a directory, including file size and date. Its
+ output looks like this:
+
+ -rw-r--r-- 1 close 1933 Nov 7 13:05 Makefile
+ -rw-r--r-- 1 close 10809 Nov 7 13:03 gawk.h
+ -rw-r--r-- 1 close 983 Apr 13 12:14 gawk.tab.h
+ -rw-r--r-- 1 close 31869 Jun 15 12:20 gawk.y
+ -rw-r--r-- 1 close 22414 Nov 7 13:03 gawk1.c
+ -rw-r--r-- 1 close 37455 Nov 7 13:03 gawk2.c
+ -rw-r--r-- 1 close 27511 Dec 9 13:07 gawk3.c
+ -rw-r--r-- 1 close 7989 Nov 7 13:03 gawk4.c
+
+ The first field contains read-write permissions, the second field
+ contains the number of links to the file, and the third field
+ identifies the owner of the file. The fourth field contains the size
+ of the file in bytes. The fifth, sixth, and seventh fields contain the
+ month, day, and time, respectively, that the file was last modified.
+ Finally, the eighth field contains the name of the file.
+
+ The `$5 == "Nov"' in our `awk' program is an expression that tests
+ whether the fifth field of the output from `ls -l' matches the string
+ `Nov'. Each time a line has the string `Nov' in its fifth field, the
+ action `{ sum += $4 }' is performed. This adds the fourth field (the
+ file size) to the variable `sum'. As a result, when `awk' has finished
+ reading all the input lines, `sum' is the sum of the sizes of files
+ whose lines matched the pattern. (This works because `awk' variables
+ are automatically initialized to zero.)
+
+ After the last line of output from `ls' has been processed, the
+ `END' rule is executed, and the value of `sum' is printed. In this
+ example, the value of `sum' would be 80600.
+
+ These more advanced `awk' techniques are covered in later sections
+ (*note Overview of Actions: Actions.). Before you can move on to more
+ advanced `awk' programming, you have to know how `awk' interprets your
+ input and displays your output. By manipulating fields and using
+ `print' statements, you can produce some very useful and spectacular
+ looking reports.
+
+
+ File: gawk.info, Node: Running gawk, Next: Comments, Prev: More Complex, Up: Getting Started
+
+ How to Run `awk' Programs
+ =========================
+
+ There are several ways to run an `awk' program. If the program is
+ short, it is easiest to include it in the command that runs `awk', like
+ this:
+
+ awk 'PROGRAM' INPUT-FILE1 INPUT-FILE2 ...
+
+ where PROGRAM consists of a series of patterns and actions, as
+ described earlier.
+
+ When the program is long, it is usually more convenient to put it in
+ a file and run it with a command like this:
+
+ awk -f PROGRAM-FILE INPUT-FILE1 INPUT-FILE2 ...
+
+ * Menu:
+
+ * One-shot:: Running a short throw-away `awk' program.
+ * Read Terminal:: Using no input files (input from
+ terminal instead).
+ * Long:: Putting permanent `awk' programs in files.
+ * Executable Scripts:: Making self-contained `awk' programs.
+
+
+ File: gawk.info, Node: One-shot, Next: Read Terminal, Prev: Running gawk, Up: Running gawk
+
+ One-shot Throw-away `awk' Programs
+ ----------------------------------
+
+ Once you are familiar with `awk', you will often type simple
+ programs at the moment you want to use them. Then you can write the
+ program as the first argument of the `awk' command, like this:
+
+ awk 'PROGRAM' INPUT-FILE1 INPUT-FILE2 ...
+
+ where PROGRAM consists of a series of PATTERNS and ACTIONS, as
+ described earlier.
+
+ This command format instructs the shell to start `awk' and use the
+ PROGRAM to process records in the input file(s). There are single
+ quotes around PROGRAM so that the shell doesn't interpret any `awk'
+ characters as special shell characters. They also cause the shell to
+ treat all of PROGRAM as a single argument for `awk' and allow PROGRAM
+ to be more than one line long.
+
+ This format is also useful for running short or medium-sized `awk'
+ programs from shell scripts, because it avoids the need for a separate
+ file for the `awk' program. A self-contained shell script is more
+ reliable since there are no other files to misplace.
+
+
+ File: gawk.info, Node: Read Terminal, Next: Long, Prev: One-shot, Up: Running gawk
+
+ Running `awk' without Input Files
+ ---------------------------------
+
+ You can also run `awk' without any input files. If you type the
+ command line:
+
+ awk 'PROGRAM'
+
+ then `awk' applies the PROGRAM to the "standard input", which usually
+ means whatever you type on the terminal. This continues until you
+ indicate end-of-file by typing `Control-d'.
+
+ For example, if you execute this command:
+
+ awk '/th/'
+
+ whatever you type next is taken as data for that `awk' program. If you
+ go on to type the following data:
+
+ Kathy
+ Ben
+ Tom
+ Beth
+ Seth
+ Karen
+ Thomas
+ `Control-d'
+
+ then `awk' prints this output:
+
+ Kathy
+ Beth
+ Seth
+
+ as matching the pattern `th'. Notice that it did not recognize
+ `Thomas' as matching the pattern. The `awk' language is "case
+ sensitive", and matches patterns exactly. (However, you can override
+ this with the variable `IGNORECASE'. *Note Case-sensitivity in
+ Matching: Case-sensitivity.)
+
+
+ File: gawk.info, Node: Long, Next: Executable Scripts, Prev: Read Terminal, Up: Running gawk
+
+ Running Long Programs
+ ---------------------
+
+ Sometimes your `awk' programs can be very long. In this case it is
+ more convenient to put the program into a separate file. To tell `awk'
+ to use that file for its program, you type:
+
+ awk -f SOURCE-FILE INPUT-FILE1 INPUT-FILE2 ...
+
+ The `-f' instructs the `awk' utility to get the `awk' program from
+ the file SOURCE-FILE. Any file name can be used for SOURCE-FILE. For
+ example, you could put the program:
+
+ /th/
+
+ into the file `th-prog'. Then this command:
+
+ awk -f th-prog
+
+ does the same thing as this one:
+
+ awk '/th/'
+
+ which was explained earlier (*note Running `awk' without Input Files:
+ Read Terminal.). Note that you don't usually need single quotes around
+ the file name that you specify with `-f', because most file names don't
+ contain any of the shell's special characters. Notice that in
+ `th-prog', the `awk' program did not have single quotes around it. The
+ quotes are only needed for programs that are provided on the `awk'
+ command line.
+
+ If you want to identify your `awk' program files clearly as such,
+ you can add the extension `.awk' to the file name. This doesn't affect
+ the execution of the `awk' program, but it does make "housekeeping"
+ easier.
+
+
+ File: gawk.info, Node: Executable Scripts, Prev: Long, Up: Running gawk
+
+ Executable `awk' Programs
+ -------------------------
+
+ Once you have learned `awk', you may want to write self-contained
+ `awk' scripts, using the `#!' script mechanism. You can do this on
+ many Unix systems (1) (and someday on GNU).
+
+ For example, you could create a text file named `hello', containing
+ the following (where `BEGIN' is a feature we have not yet discussed):
+
+ #! /bin/awk -f
+
+ # a sample awk program
+ BEGIN { print "hello, world" }
+
+ After making this file executable (with the `chmod' command), you can
+ simply type:
+
+ hello
+
+ at the shell, and the system will arrange to run `awk' (2) as if you
+ had typed:
+
+ awk -f hello
+
+ Self-contained `awk' scripts are useful when you want to write a
+ program which users can invoke without knowing that the program is
+ written in `awk'.
+
+ If your system does not support the `#!' mechanism, you can get a
+ similar effect using a regular shell script. It would look something
+ like this:
+
+ : The colon makes sure this script is executed by the Bourne shell.
+ awk 'PROGRAM' "$@"
+
+ Using this technique, it is *vital* to enclose the PROGRAM in single
+ quotes to protect it from interpretation by the shell. If you omit the
+ quotes, only a shell wizard can predict the results.
+
+ The `"$@"' causes the shell to forward all the command line
+ arguments to the `awk' program, without interpretation. The first
+ line, which starts with a colon, is used so that this shell script will
+ work even if invoked by a user who uses the C shell.
+
+ ---------- Footnotes ----------
+
+ (1) The `#!' mechanism works on Unix systems derived from Berkeley
+ Unix, System V Release 4, and some System V Release 3 systems.
+
+ (2) The line beginning with `#!' lists the full pathname of an
+ interpreter to be run, and an optional initial command line argument to
+ pass to that interpreter. The operating system then runs the
+ interpreter with the given argument and the full argument list of the
+ executed program. The first argument in the list is the full pathname
+ of the `awk' program. The rest of the argument list will either be
+ options to `awk', or data files, or both.
+
+
+ File: gawk.info, Node: Comments, Next: Statements/Lines, Prev: Running gawk, Up: Getting Started
+
+ Comments in `awk' Programs
+ ==========================
+
+ A "comment" is some text that is included in a program for the sake
+ of human readers, and that is not really part of the program. Comments
+ can explain what the program does, and how it works. Nearly all
+ programming languages have provisions for comments, because programs are
+ typically hard to understand without their extra help.
+
+ In the `awk' language, a comment starts with the sharp sign
+ character, `#', and continues to the end of the line. The `awk'
+ language ignores the rest of a line following a sharp sign. For
+ example, we could have put the following into `th-prog':
+
+ # This program finds records containing the pattern `th'. This is how
+ # you continue comments on additional lines.
+ /th/
+
+ You can put comment lines into keyboard-composed throw-away `awk'
+ programs also, but this usually isn't very useful; the purpose of a
+ comment is to help you or another person understand the program at a
+ later time.
+
diff -rc --new-file /src/baseline/gawk-2.15.5/gawk.info-2 gawk-2.15.5/gawk.info-2
*** /src/baseline/gawk-2.15.5/gawk.info-2 Thu Jan 1 00:00:00 1970
--- gawk-2.15.5/gawk.info-2 Sun Jun 12 22:28:49 1994
***************
*** 0 ****
--- 1,1236 ----
+ This is Info file gawk.info, produced by Makeinfo-1.55 from the input
+ file /gnu/src/amiga/gawk-2.15.5/gawk.texi.
+
+ This file documents `awk', a program that you can use to select
+ particular records in a file and perform operations upon them.
+
+ This is Edition 0.15 of `The GAWK Manual',
+ for the 2.15 version of the GNU implementation
+ of AWK.
+
+ Copyright (C) 1989, 1991, 1992, 1993 Free Software Foundation, Inc.
+
+ Permission is granted to make and distribute verbatim copies of this
+ manual provided the copyright notice and this permission notice are
+ preserved on all copies.
+
+ Permission is granted to copy and distribute modified versions of
+ this manual under the conditions for verbatim copying, provided that
+ the entire resulting derived work is distributed under the terms of a
+ permission notice identical to this one.
+
+ Permission is granted to copy and distribute translations of this
+ manual into another language, under the above conditions for modified
+ versions, except that this permission notice may be stated in a
+ translation approved by the Foundation.
+
+
+ File: gawk.info, Node: Statements/Lines, Next: When, Prev: Comments, Up: Getting Started
+
+ `awk' Statements versus Lines
+ =============================
+
+ Most often, each line in an `awk' program is a separate statement or
+ separate rule, like this:
+
+ awk '/12/ { print $0 }
+ /21/ { print $0 }' BBS-list inventory-shipped
+
+ But sometimes statements can be more than one line, and lines can
+ contain several statements. You can split a statement into multiple
+ lines by inserting a newline after any of the following:
+
+ , { ? : || && do else
+
+ A newline at any other point is considered the end of the statement.
+ (Splitting lines after `?' and `:' is a minor `gawk' extension. The
+ `?' and `:' referred to here is the three operand conditional
+ expression described in *Note Conditional Expressions: Conditional Exp.)
+
+ If you would like to split a single statement into two lines at a
+ point where a newline would terminate it, you can "continue" it by
+ ending the first line with a backslash character, `\'. This is allowed
+ absolutely anywhere in the statement, even in the middle of a string or
+ regular expression. For example:
+
+ awk '/This program is too long, so continue it\
+ on the next line/ { print $1 }'
+
+ We have generally not used backslash continuation in the sample
+ programs in this manual. Since in `gawk' there is no limit on the
+ length of a line, it is never strictly necessary; it just makes
+ programs prettier. We have preferred to make them even more pretty by
+ keeping the statements short. Backslash continuation is most useful
+ when your `awk' program is in a separate source file, instead of typed
+ in on the command line. You should also note that many `awk'
+ implementations are more picky about where you may use backslash
+ continuation. For maximal portability of your `awk' programs, it is
+ best not to split your lines in the middle of a regular expression or a
+ string.
+
+ *Warning: backslash continuation does not work as described above
+ with the C shell.* Continuation with backslash works for `awk'
+ programs in files, and also for one-shot programs *provided* you are
+ using a POSIX-compliant shell, such as the Bourne shell or the
+ Bourne-again shell. But the C shell used on Berkeley Unix behaves
+ differently! There, you must use two backslashes in a row, followed by
+ a newline.
+
+ When `awk' statements within one rule are short, you might want to
+ put more than one of them on a line. You do this by separating the
+ statements with a semicolon, `;'. This also applies to the rules
+ themselves. Thus, the previous program could have been written:
+
+ /12/ { print $0 } ; /21/ { print $0 }
+
+ *Note:* the requirement that rules on the same line must be separated
+ with a semicolon is a recent change in the `awk' language; it was done
+ for consistency with the treatment of statements within an action.
+
+
+ File: gawk.info, Node: When, Prev: Statements/Lines, Up: Getting Started
+
+ When to Use `awk'
+ =================
+
+ You might wonder how `awk' might be useful for you. Using additional
+ utility programs, more advanced patterns, field separators, arithmetic
+ statements, and other selection criteria, you can produce much more
+ complex output. The `awk' language is very useful for producing
+ reports from large amounts of raw data, such as summarizing information
+ from the output of other utility programs like `ls'. (*Note A More
+ Complex Example: More Complex.)
+
+ Programs written with `awk' are usually much smaller than they would
+ be in other languages. This makes `awk' programs easy to compose and
+ use. Often `awk' programs can be quickly composed at your terminal,
+ used once, and thrown away. Since `awk' programs are interpreted, you
+ can avoid the usually lengthy edit-compile-test-debug cycle of software
+ development.
+
+ Complex programs have been written in `awk', including a complete
+ retargetable assembler for 8-bit microprocessors (*note Glossary::., for
+ more information) and a microcode assembler for a special purpose Prolog
+ computer. However, `awk''s capabilities are strained by tasks of such
+ complexity.
+
+ If you find yourself writing `awk' scripts of more than, say, a few
+ hundred lines, you might consider using a different programming
+ language. Emacs Lisp is a good choice if you need sophisticated string
+ or pattern matching capabilities. The shell is also good at string and
+ pattern matching; in addition, it allows powerful use of the system
+ utilities. More conventional languages, such as C, C++, and Lisp, offer
+ better facilities for system programming and for managing the complexity
+ of large programs. Programs in these languages may require more lines
+ of source code than the equivalent `awk' programs, but they are easier
+ to maintain and usually run more efficiently.
+
+
+ File: gawk.info, Node: Reading Files, Next: Printing, Prev: Getting Started, Up: Top
+
+ Reading Input Files
+ *******************
+
+ In the typical `awk' program, all input is read either from the
+ standard input (by default the keyboard, but often a pipe from another
+ command) or from files whose names you specify on the `awk' command
+ line. If you specify input files, `awk' reads them in order, reading
+ all the data from one before going on to the next. The name of the
+ current input file can be found in the built-in variable `FILENAME'
+ (*note Built-in Variables::.).
+
+ The input is read in units called records, and processed by the
+ rules one record at a time. By default, each record is one line. Each
+ record is split automatically into fields, to make it more convenient
+ for a rule to work on its parts.
+
+ On rare occasions you will need to use the `getline' command, which
+ can do explicit input from any number of files (*note Explicit Input
+ with `getline': Getline.).
+
+ * Menu:
+
+ * Records:: Controlling how data is split into records.
+ * Fields:: An introduction to fields.
+ * Non-Constant Fields:: Non-constant Field Numbers.
+ * Changing Fields:: Changing the Contents of a Field.
+ * Field Separators:: The field separator and how to change it.
+ * Constant Size:: Reading constant width data.
+ * Multiple Line:: Reading multi-line records.
+ * Getline:: Reading files under explicit program control
+ using the `getline' function.
+ * Close Input:: Closing an input file (so you can read from
+ the beginning once more).
+
+
+ File: gawk.info, Node: Records, Next: Fields, Prev: Reading Files, Up: Reading Files
+
+ How Input is Split into Records
+ ===============================
+
+ The `awk' language divides its input into records and fields.
+ Records are separated by a character called the "record separator". By
+ default, the record separator is the newline character, defining a
+ record to be a single line of text.
+
+ Sometimes you may want to use a different character to separate your
+ records. You can use a different character by changing the built-in
+ variable `RS'. The value of `RS' is a string that says how to separate
+ records; the default value is `"\n"', the string containing just a
+ newline character. This is why records are, by default, single lines.
+
+ `RS' can have any string as its value, but only the first character
+ of the string is used as the record separator. The other characters are
+ ignored. `RS' is exceptional in this regard; `awk' uses the full value
+ of all its other built-in variables.
+
+ You can change the value of `RS' in the `awk' program with the
+ assignment operator, `=' (*note Assignment Expressions: Assignment
+ Ops.). The new record-separator character should be enclosed in
+ quotation marks to make a string constant. Often the right time to do
+ this is at the beginning of execution, before any input has been
+ processed, so that the very first record will be read with the proper
+ separator. To do this, use the special `BEGIN' pattern (*note `BEGIN'
+ and `END' Special Patterns: BEGIN/END.). For example:
+
+ awk 'BEGIN { RS = "/" } ; { print $0 }' BBS-list
+
+ changes the value of `RS' to `"/"', before reading any input. This is
+ a string whose first character is a slash; as a result, records are
+ separated by slashes. Then the input file is read, and the second rule
+ in the `awk' program (the action with no pattern) prints each record.
+ Since each `print' statement adds a newline at the end of its output,
+ the effect of this `awk' program is to copy the input with each slash
+ changed to a newline.
+
+ Another way to change the record separator is on the command line,
+ using the variable-assignment feature (*note Invoking `awk': Command
+ Line.).
+
+ awk '{ print $0 }' RS="/" BBS-list
+
+ This sets `RS' to `/' before processing `BBS-list'.
+
+ Reaching the end of an input file terminates the current input
+ record, even if the last character in the file is not the character in
+ `RS'.
+
+ The empty string, `""' (a string of no characters), has a special
+ meaning as the value of `RS': it means that records are separated only
+ by blank lines. *Note Multiple-Line Records: Multiple Line, for more
+ details.
+
+ The `awk' utility keeps track of the number of records that have
+ been read so far from the current input file. This value is stored in a
+ built-in variable called `FNR'. It is reset to zero when a new file is
+ started. Another built-in variable, `NR', is the total number of input
+ records read so far from all files. It starts at zero but is never
+ automatically reset to zero.
+
+ If you change the value of `RS' in the middle of an `awk' run, the
+ new value is used to delimit subsequent records, but the record
+ currently being processed (and records already processed) are not
+ affected.
+
+
+ File: gawk.info, Node: Fields, Next: Non-Constant Fields, Prev: Records, Up: Reading Files
+
+ Examining Fields
+ ================
+
+ When `awk' reads an input record, the record is automatically
+ separated or "parsed" by the interpreter into chunks called "fields".
+ By default, fields are separated by whitespace, like words in a line.
+ Whitespace in `awk' means any string of one or more spaces and/or tabs;
+ other characters such as newline, formfeed, and so on, that are
+ considered whitespace by other languages are *not* considered
+ whitespace by `awk'.
+
+ The purpose of fields is to make it more convenient for you to refer
+ to these pieces of the record. You don't have to use them--you can
+ operate on the whole record if you wish--but fields are what make
+ simple `awk' programs so powerful.
+
+ To refer to a field in an `awk' program, you use a dollar-sign, `$',
+ followed by the number of the field you want. Thus, `$1' refers to the
+ first field, `$2' to the second, and so on. For example, suppose the
+ following is a line of input:
+
+ This seems like a pretty nice example.
+
+ Here the first field, or `$1', is `This'; the second field, or `$2', is
+ `seems'; and so on. Note that the last field, `$7', is `example.'.
+ Because there is no space between the `e' and the `.', the period is
+ considered part of the seventh field.
+
+ No matter how many fields there are, the last field in a record can
+ be represented by `$NF'. So, in the example above, `$NF' would be the
+ same as `$7', which is `example.'. Why this works is explained below
+ (*note Non-constant Field Numbers: Non-Constant Fields.). If you try
+ to refer to a field beyond the last one, such as `$8' when the record
+ has only 7 fields, you get the empty string.
+
+ Plain `NF', with no `$', is a built-in variable whose value is the
+ number of fields in the current record.
+
+ `$0', which looks like an attempt to refer to the zeroth field, is a
+ special case: it represents the whole input record. This is what you
+ would use if you weren't interested in fields.
+
+ Here are some more examples:
+
+ awk '$1 ~ /foo/ { print $0 }' BBS-list
+
+ This example prints each record in the file `BBS-list' whose first
+ field contains the string `foo'. The operator `~' is called a
+ "matching operator" (*note Comparison Expressions: Comparison Ops.); it
+ tests whether a string (here, the field `$1') matches a given regular
+ expression.
+
+ By contrast, the following example:
+
+ awk '/foo/ { print $1, $NF }' BBS-list
+
+ looks for `foo' in *the entire record* and prints the first field and
+ the last field for each input record containing a match.
+
+
+ File: gawk.info, Node: Non-Constant Fields, Next: Changing Fields, Prev: Fields, Up: Reading Files
+
+ Non-constant Field Numbers
+ ==========================
+
+ The number of a field does not need to be a constant. Any
+ expression in the `awk' language can be used after a `$' to refer to a
+ field. The value of the expression specifies the field number. If the
+ value is a string, rather than a number, it is converted to a number.
+ Consider this example:
+
+ awk '{ print $NR }'
+
+ Recall that `NR' is the number of records read so far: 1 in the first
+ record, 2 in the second, etc. So this example prints the first field
+ of the first record, the second field of the second record, and so on.
+ For the twentieth record, field number 20 is printed; most likely, the
+ record has fewer than 20 fields, so this prints a blank line.
+
+ Here is another example of using expressions as field numbers:
+
+ awk '{ print $(2*2) }' BBS-list
+
+ The `awk' language must evaluate the expression `(2*2)' and use its
+ value as the number of the field to print. The `*' sign represents
+ multiplication, so the expression `2*2' evaluates to 4. The
+ parentheses are used so that the multiplication is done before the `$'
+ operation; they are necessary whenever there is a binary operator in
+ the field-number expression. This example, then, prints the hours of
+ operation (the fourth field) for every line of the file `BBS-list'.
+
+ If the field number you compute is zero, you get the entire record.
+ Thus, `$(2-2)' has the same value as `$0'. Negative field numbers are
+ not allowed.
+
+ The number of fields in the current record is stored in the built-in
+ variable `NF' (*note Built-in Variables::.). The expression `$NF' is
+ not a special feature: it is the direct consequence of evaluating `NF'
+ and using its value as a field number.
+
+
+ File: gawk.info, Node: Changing Fields, Next: Field Separators, Prev: Non-Constant Fields, Up: Reading Files
+
+ Changing the Contents of a Field
+ ================================
+
+ You can change the contents of a field as seen by `awk' within an
+ `awk' program; this changes what `awk' perceives as the current input
+ record. (The actual input is untouched: `awk' never modifies the input
+ file.)
+
+ Consider this example:
+
+ awk '{ $3 = $2 - 10; print $2, $3 }' inventory-shipped
+
+ The `-' sign represents subtraction, so this program reassigns field
+ three, `$3', to be the value of field two minus ten, `$2 - 10'. (*Note
+ Arithmetic Operators: Arithmetic Ops.) Then field two, and the new
+ value for field three, are printed.
+
+ In order for this to work, the text in field `$2' must make sense as
+ a number; the string of characters must be converted to a number in
+ order for the computer to do arithmetic on it. The number resulting
+ from the subtraction is converted back to a string of characters which
+ then becomes field three. *Note Conversion of Strings and Numbers:
+ Conversion.
+
+ When you change the value of a field (as perceived by `awk'), the
+ text of the input record is recalculated to contain the new field where
+ the old one was. Therefore, `$0' changes to reflect the altered field.
+ Thus,
+
+ awk '{ $2 = $2 - 10; print $0 }' inventory-shipped
+
+ prints a copy of the input file, with 10 subtracted from the second
+ field of each line.
+
+ You can also assign contents to fields that are out of range. For
+ example:
+
+ awk '{ $6 = ($5 + $4 + $3 + $2) ; print $6 }' inventory-shipped
+
+ We've just created `$6', whose value is the sum of fields `$2', `$3',
+ `$4', and `$5'. The `+' sign represents addition. For the file
+ `inventory-shipped', `$6' represents the total number of parcels
+ shipped for a particular month.
+
+ Creating a new field changes the internal `awk' copy of the current
+ input record--the value of `$0'. Thus, if you do `print $0' after
+ adding a field, the record printed includes the new field, with the
+ appropriate number of field separators between it and the previously
+ existing fields.
+
+ This recomputation affects and is affected by several features not
+ yet discussed, in particular, the "output field separator", `OFS',
+ which is used to separate the fields (*note Output Separators::.), and
+ `NF' (the number of fields; *note Examining Fields: Fields.). For
+ example, the value of `NF' is set to the number of the highest field
+ you create.
+
+ Note, however, that merely *referencing* an out-of-range field does
+ *not* change the value of either `$0' or `NF'. Referencing an
+ out-of-range field merely produces a null string. For example:
+
+ if ($(NF+1) != "")
+ print "can't happen"
+ else
+ print "everything is normal"
+
+ should print `everything is normal', because `NF+1' is certain to be
+ out of range. (*Note The `if' Statement: If Statement, for more
+ information about `awk''s `if-else' statements.)
+
+ It is important to note that assigning to a field will change the
+ value of `$0', but will not change the value of `NF', even when you
+ assign the null string to a field. For example:
+
+ echo a b c d | awk '{ OFS = ":"; $2 = "" ; print ; print NF }'
+
+ prints
+
+ a::c:d
+ 4
+
+ The field is still there, it just has an empty value. You can tell
+ because there are two colons in a row.
+
+
+ File: gawk.info, Node: Field Separators, Next: Constant Size, Prev: Changing Fields, Up: Reading Files
+
+ Specifying how Fields are Separated
+ ===================================
+
+ (This section is rather long; it describes one of the most
+ fundamental operations in `awk'. If you are a novice with `awk', we
+ recommend that you re-read this section after you have studied the
+ section on regular expressions, *Note Regular Expressions as Patterns:
+ Regexp.)
+
+ The way `awk' splits an input record into fields is controlled by
+ the "field separator", which is a single character or a regular
+ expression. `awk' scans the input record for matches for the
+ separator; the fields themselves are the text between the matches. For
+ example, if the field separator is `oo', then the following line:
+
+ moo goo gai pan
+
+ would be split into three fields: `m', ` g' and ` gai pan'.
+
+ The field separator is represented by the built-in variable `FS'.
+ Shell programmers take note! `awk' does not use the name `IFS' which
+ is used by the shell.
+
+ You can change the value of `FS' in the `awk' program with the
+ assignment operator, `=' (*note Assignment Expressions: Assignment
+ Ops.). Often the right time to do this is at the beginning of
+ execution, before any input has been processed, so that the very first
+ record will be read with the proper separator. To do this, use the
+ special `BEGIN' pattern (*note `BEGIN' and `END' Special Patterns:
+ BEGIN/END.). For example, here we set the value of `FS' to the string
+ `","':
+
+ awk 'BEGIN { FS = "," } ; { print $2 }'
+
+ Given the input line,
+
+ John Q. Smith, 29 Oak St., Walamazoo, MI 42139
+
+ this `awk' program extracts the string ` 29 Oak St.'.
+
+ Sometimes your input data will contain separator characters that
+ don't separate fields the way you thought they would. For instance, the
+ person's name in the example we've been using might have a title or
+ suffix attached, such as `John Q. Smith, LXIX'. From input containing
+ such a name:
+
+ John Q. Smith, LXIX, 29 Oak St., Walamazoo, MI 42139
+
+ the previous sample program would extract ` LXIX', instead of ` 29 Oak
+ St.'. If you were expecting the program to print the address, you
+ would be surprised. So choose your data layout and separator
+ characters carefully to prevent such problems.
+
+ As you know, by default, fields are separated by whitespace sequences
+ (spaces and tabs), not by single spaces: two spaces in a row do not
+ delimit an empty field. The default value of the field separator is a
+ string `" "' containing a single space. If this value were interpreted
+ in the usual way, each space character would separate fields, so two
+ spaces in a row would make an empty field between them. The reason
+ this does not happen is that a single space as the value of `FS' is a
+ special case: it is taken to specify the default manner of delimiting
+ fields.
+
+ If `FS' is any other single character, such as `","', then each
+ occurrence of that character separates two fields. Two consecutive
+ occurrences delimit an empty field. If the character occurs at the
+ beginning or the end of the line, that too delimits an empty field. The
+ space character is the only single character which does not follow these
+ rules.
+
+ More generally, the value of `FS' may be a string containing any
+ regular expression. Then each match in the record for the regular
+ expression separates fields. For example, the assignment:
+
+ FS = ", \t"
+
+ makes every area of an input line that consists of a comma followed by a
+ space and a tab, into a field separator. (`\t' stands for a tab.)
+
+ For a less trivial example of a regular expression, suppose you want
+ single spaces to separate fields the way single commas were used above.
+ You can set `FS' to `"[ ]"'. This regular expression matches a single
+ space and nothing else.
+
+ `FS' can be set on the command line. You use the `-F' argument to
+ do so. For example:
+
+ awk -F, 'PROGRAM' INPUT-FILES
+
+ sets `FS' to be the `,' character. Notice that the argument uses a
+ capital `F'. Contrast this with `-f', which specifies a file
+ containing an `awk' program. Case is significant in command options:
+ the `-F' and `-f' options have nothing to do with each other. You can
+ use both options at the same time to set the `FS' argument *and* get an
+ `awk' program from a file.
+
+ The value used for the argument to `-F' is processed in exactly the
+ same way as assignments to the built-in variable `FS'. This means that
+ if the field separator contains special characters, they must be escaped
+ appropriately. For example, to use a `\' as the field separator, you
+ would have to type:
+
+ # same as FS = "\\"
+ awk -F\\\\ '...' files ...
+
+ Since `\' is used for quoting in the shell, `awk' will see `-F\\'.
+ Then `awk' processes the `\\' for escape characters (*note Constant
+ Expressions: Constants.), finally yielding a single `\' to be used for
+ the field separator.
+
+ As a special case, in compatibility mode (*note Invoking `awk':
+ Command Line.), if the argument to `-F' is `t', then `FS' is set to the
+ tab character. (This is because if you type `-F\t', without the quotes,
+ at the shell, the `\' gets deleted, so `awk' figures that you really
+ want your fields to be separated with tabs, and not `t's. Use `-v
+ FS="t"' on the command line if you really do want to separate your
+ fields with `t's.)
+
+ For example, let's use an `awk' program file called `baud.awk' that
+ contains the pattern `/300/', and the action `print $1'. Here is the
+ program:
+
+ /300/ { print $1 }
+
+ Let's also set `FS' to be the `-' character, and run the program on
+ the file `BBS-list'. The following command prints a list of the names
+ of the bulletin boards that operate at 300 baud and the first three
+ digits of their phone numbers:
+
+ awk -F- -f baud.awk BBS-list
+
+ It produces this output:
+
+ aardvark 555
+ alpo
+ barfly 555
+ bites 555
+ camelot 555
+ core 555
+ fooey 555
+ foot 555
+ macfoo 555
+ sdace 555
+ sabafoo 555
+
+ Note the second line of output. If you check the original file, you
+ will see that the second line looked like this:
+
+ alpo-net 555-3412 2400/1200/300 A
+
+ The `-' as part of the system's name was used as the field
+ separator, instead of the `-' in the phone number that was originally
+ intended. This demonstrates why you have to be careful in choosing
+ your field and record separators.
+
+ The following program searches the system password file, and prints
+ the entries for users who have no password:
+
+ awk -F: '$2 == ""' /etc/passwd
+
+ Here we use the `-F' option on the command line to set the field
+ separator. Note that fields in `/etc/passwd' are separated by colons.
+ The second field represents a user's encrypted password, but if the
+ field is empty, that user has no password.
+
+ According to the POSIX standard, `awk' is supposed to behave as if
+ each record is split into fields at the time that it is read. In
+ particular, this means that you can change the value of `FS' after a
+ record is read, but before any of the fields are referenced. The value
+ of the fields (i.e. how they were split) should reflect the old value
+ of `FS', not the new one.
+
+ However, many implementations of `awk' do not do this. Instead,
+ they defer splitting the fields until a field reference actually
+ happens, using the *current* value of `FS'! This behavior can be
+ difficult to diagnose. The following example illustrates the results of
+ the two methods. (The `sed' command prints just the first line of
+ `/etc/passwd'.)
+
+ sed 1q /etc/passwd | awk '{ FS = ":" ; print $1 }'
+
+ will usually print
+
+ root
+
+ on an incorrect implementation of `awk', while `gawk' will print
+ something like
+
+ root:nSijPlPhZZwgE:0:0:Root:/:
+
+ There is an important difference between the two cases of `FS = " "'
+ (a single blank) and `FS = "[ \t]+"' (which is a regular expression
+ matching one or more blanks or tabs). For both values of `FS', fields
+ are separated by runs of blanks and/or tabs. However, when the value of
+ `FS' is `" "', `awk' will strip leading and trailing whitespace from
+ the record, and then decide where the fields are.
+
+ For example, the following expression prints `b':
+
+ echo ' a b c d ' | awk '{ print $2 }'
+
+ However, the following prints `a':
+
+ echo ' a b c d ' | awk 'BEGIN { FS = "[ \t]+" } ; { print $2 }'
+
+ In this case, the first field is null.
+
+ The stripping of leading and trailing whitespace also comes into
+ play whenever `$0' is recomputed. For instance, this pipeline
+
+ echo ' a b c d' | awk '{ print; $2 = $2; print }'
+
+ produces this output:
+
+ a b c d
+ a b c d
+
+ The first `print' statement prints the record as it was read, with
+ leading whitespace intact. The assignment to `$2' rebuilds `$0' by
+ concatenating `$1' through `$NF' together, separated by the value of
+ `OFS'. Since the leading whitespace was ignored when finding `$1', it
+ is not part of the new `$0'. Finally, the last `print' statement
+ prints the new `$0'.
+
+ The following table summarizes how fields are split, based on the
+ value of `FS'.
+
+ `FS == " "'
+ Fields are separated by runs of whitespace. Leading and trailing
+ whitespace are ignored. This is the default.
+
+ `FS == ANY SINGLE CHARACTER'
+ Fields are separated by each occurrence of the character. Multiple
+ successive occurrences delimit empty fields, as do leading and
+ trailing occurrences.
+
+ `FS == REGEXP'
+ Fields are separated by occurrences of characters that match
+ REGEXP. Leading and trailing matches of REGEXP delimit empty
+ fields.
+
+
+ File: gawk.info, Node: Constant Size, Next: Multiple Line, Prev: Field Separators, Up: Reading Files
+
+ Reading Fixed-width Data
+ ========================
+
+ (This section discusses an advanced, experimental feature. If you
+ are a novice `awk' user, you may wish to skip it on the first reading.)
+
+ `gawk' 2.13 introduced a new facility for dealing with fixed-width
+ fields with no distinctive field separator. Data of this nature arises
+ typically in one of at least two ways: the input for old FORTRAN
+ programs where numbers are run together, and the output of programs
+ that did not anticipate the use of their output as input for other
+ programs.
+
+ An example of the latter is a table where all the columns are lined
+ up by the use of a variable number of spaces and *empty fields are just
+ spaces*. Clearly, `awk''s normal field splitting based on `FS' will
+ not work well in this case. (Although a portable `awk' program can use
+ a series of `substr' calls on `$0', this is awkward and inefficient for
+ a large number of fields.)
+
+ The splitting of an input record into fixed-width fields is
+ specified by assigning a string containing space-separated numbers to
+ the built-in variable `FIELDWIDTHS'. Each number specifies the width
+ of the field *including* columns between fields. If you want to ignore
+ the columns between fields, you can specify the width as a separate
+ field that is subsequently ignored.
+
+ The following data is the output of the `w' utility. It is useful
+ to illustrate the use of `FIELDWIDTHS'.
+
+ 10:06pm up 21 days, 14:04, 23 users
+ User tty login idle JCPU PCPU what
+ hzuo ttyV0 8:58pm 9 5 vi p24.tex
+ hzang ttyV3 6:37pm 50 -csh
+ eklye ttyV5 9:53pm 7 1 em thes.tex
+ dportein ttyV6 8:17pm 1:47 -csh
+ gierd ttyD3 10:00pm 1 elm
+ dave ttyD4 9:47pm 4 4 w
+ brent ttyp0 26Jun91 4:46 26:46 4:41 bash
+ dave ttyq4 26Jun9115days 46 46 wnewmail
+
+ The following program takes the above input, converts the idle time
+ to number of seconds and prints out the first two fields and the
+ calculated idle time. (This program uses a number of `awk' features
+ that haven't been introduced yet.)
+
+ BEGIN { FIELDWIDTHS = "9 6 10 6 7 7 35" }
+ NR > 2 {
+ idle = $4
+ sub(/^ */, "", idle) # strip leading spaces
+ if (idle == "") idle = 0
+ if (idle ~ /:/) { split(idle, t, ":"); idle = t[1] * 60 + t[2] }
+ if (idle ~ /days/) { idle *= 24 * 60 * 60 }
+
+ print $1, $2, idle
+ }
+
+ Here is the result of running the program on the data:
+
+ hzuo ttyV0 0
+ hzang ttyV3 50
+ eklye ttyV5 0
+ dportein ttyV6 107
+ gierd ttyD3 1
+ dave ttyD4 0
+ brent ttyp0 286
+ dave ttyq4 1296000
+
+ Another (possibly more practical) example of fixed-width input data
+ would be the input from a deck of balloting cards. In some parts of
+ the United States, voters make their choices by punching holes in
+ computer cards. These cards are then processed to count the votes for
+ any particular candidate or on any particular issue. Since a voter may
+ choose not to vote on some issue, any column on the card may be empty.
+ An `awk' program for processing such data could use the `FIELDWIDTHS'
+ feature to simplify reading the data.
+
+ This feature is still experimental, and will likely evolve over time.
+
+
+ File: gawk.info, Node: Multiple Line, Next: Getline, Prev: Constant Size, Up: Reading Files
+
+ Multiple-Line Records
+ =====================
+
+ In some data bases, a single line cannot conveniently hold all the
+ information in one entry. In such cases, you can use multi-line
+ records.
+
+ The first step in doing this is to choose your data format: when
+ records are not defined as single lines, how do you want to define them?
+ What should separate records?
+
+ One technique is to use an unusual character or string to separate
+ records. For example, you could use the formfeed character (written
+ `\f' in `awk', as in C) to separate them, making each record a page of
+ the file. To do this, just set the variable `RS' to `"\f"' (a string
+ containing the formfeed character). Any other character could equally
+ well be used, as long as it won't be part of the data in a record.
+
+ Another technique is to have blank lines separate records. By a
+ special dispensation, a null string as the value of `RS' indicates that
+ records are separated by one or more blank lines. If you set `RS' to
+ the null string, a record always ends at the first blank line
+ encountered. And the next record doesn't start until the first nonblank
+ line that follows--no matter how many blank lines appear in a row, they
+ are considered one record-separator. (End of file is also considered a
+ record separator.)
+
+ The second step is to separate the fields in the record. One way to
+ do this is to put each field on a separate line: to do this, just set
+ the variable `FS' to the string `"\n"'. (This simple regular
+ expression matches a single newline.)
+
+ Another way to separate fields is to divide each of the lines into
+ fields in the normal manner. This happens by default as a result of a
+ special feature: when `RS' is set to the null string, the newline
+ character *always* acts as a field separator. This is in addition to
+ whatever field separations result from `FS'.
+
+ The original motivation for this special exception was probably so
+ that you get useful behavior in the default case (i.e., `FS == " "').
+ This feature can be a problem if you really don't want the newline
+ character to separate fields, since there is no way to prevent it.
+ However, you can work around this by using the `split' function to
+ break up the record manually (*note Built-in Functions for String
+ Manipulation: String Functions.).
+
+
+ File: gawk.info, Node: Getline, Next: Close Input, Prev: Multiple Line, Up: Reading Files
+
+ Explicit Input with `getline'
+ =============================
+
+ So far we have been getting our input files from `awk''s main input
+ stream--either the standard input (usually your terminal) or the files
+ specified on the command line. The `awk' language has a special
+ built-in command called `getline' that can be used to read input under
+ your explicit control.
+
+ This command is quite complex and should *not* be used by beginners.
+ It is covered here because this is the chapter on input. The examples
+ that follow the explanation of the `getline' command include material
+ that has not been covered yet. Therefore, come back and study the
+ `getline' command *after* you have reviewed the rest of this manual and
+ have a good knowledge of how `awk' works.
+
+ `getline' returns 1 if it finds a record, and 0 if the end of the
+ file is encountered. If there is some error in getting a record, such
+ as a file that cannot be opened, then `getline' returns -1. In this
+ case, `gawk' sets the variable `ERRNO' to a string describing the error
+ that occurred.
+
+ In the following examples, COMMAND stands for a string value that
+ represents a shell command.
+
+ `getline'
+ The `getline' command can be used without arguments to read input
+ from the current input file. All it does in this case is read the
+ next input record and split it up into fields. This is useful if
+ you've finished processing the current record, but you want to do
+ some special processing *right now* on the next record. Here's an
+ example:
+
+ awk '{
+ if (t = index($0, "/*")) {
+ if (t > 1)
+ tmp = substr($0, 1, t - 1)
+ else
+ tmp = ""
+ u = index(substr($0, t + 2), "*/")
+ while (u == 0) {
+ getline
+ t = -1
+ u = index($0, "*/")
+ }
+ if (u <= length($0) - 2)
+ $0 = tmp substr($0, t + u + 3)
+ else
+ $0 = tmp
+ }
+ print $0
+ }'
+
+ This `awk' program deletes all C-style comments, `/* ... */',
+ from the input. By replacing the `print $0' with other
+ statements, you could perform more complicated processing on the
+ decommented input, like searching for matches of a regular
+ expression. (This program has a subtle problem--can you spot it?)
+
+ This form of the `getline' command sets `NF' (the number of
+ fields; *note Examining Fields: Fields.), `NR' (the number of
+ records read so far; *note How Input is Split into Records:
+ Records.), `FNR' (the number of records read from this input
+ file), and the value of `$0'.
+
+ *Note:* the new value of `$0' is used in testing the patterns of
+ any subsequent rules. The original value of `$0' that triggered
+ the rule which executed `getline' is lost. By contrast, the
+ `next' statement reads a new record but immediately begins
+ processing it normally, starting with the first rule in the
+ program. *Note The `next' Statement: Next Statement.
+
+ `getline VAR'
+ This form of `getline' reads a record into the variable VAR. This
+ is useful when you want your program to read the next record from
+ the current input file, but you don't want to subject the record
+ to the normal input processing.
+
+ For example, suppose the next line is a comment, or a special
+ string, and you want to read it, but you must make certain that it
+ won't trigger any rules. This version of `getline' allows you to
+ read that line and store it in a variable so that the main
+ read-a-line-and-check-each-rule loop of `awk' never sees it.
+
+ The following example swaps every two lines of input. For
+ example, given:
+
+ wan
+ tew
+ free
+ phore
+
+ it outputs:
+
+ tew
+ wan
+ phore
+ free
+
+ Here's the program:
+
+ awk '{
+ if ((getline tmp) > 0) {
+ print tmp
+ print $0
+ } else
+ print $0
+ }'
+
+ The `getline' function used in this way sets only the variables
+ `NR' and `FNR' (and of course, VAR). The record is not split into
+ fields, so the values of the fields (including `$0') and the value
+ of `NF' do not change.
+
+ `getline < FILE'
+ This form of the `getline' function takes its input from the file
+ FILE. Here FILE is a string-valued expression that specifies the
+ file name. `< FILE' is called a "redirection" since it directs
+ input to come from a different place.
+
+ This form is useful if you want to read your input from a
+ particular file, instead of from the main input stream. For
+ example, the following program reads its input record from the
+ file `foo.input' when it encounters a first field with a value
+ equal to 10 in the current input file.
+
+ awk '{
+ if ($1 == 10) {
+ getline < "foo.input"
+ print
+ } else
+ print
+ }'
+
+ Since the main input stream is not used, the values of `NR' and
+ `FNR' are not changed. But the record read is split into fields in
+ the normal manner, so the values of `$0' and other fields are
+ changed. So is the value of `NF'.
+
+ This does not cause the record to be tested against all the
+ patterns in the `awk' program, in the way that would happen if the
+ record were read normally by the main processing loop of `awk'.
+ However the new record is tested against any subsequent rules,
+ just as when `getline' is used without a redirection.
+
+ `getline VAR < FILE'
+ This form of the `getline' function takes its input from the file
+ FILE and puts it in the variable VAR. As above, FILE is a
+ string-valued expression that specifies the file from which to
+ read.
+
+ In this version of `getline', none of the built-in variables are
+ changed, and the record is not split into fields. The only
+ variable changed is VAR.
+
+ For example, the following program copies all the input files to
+ the output, except for records that say `@include FILENAME'. Such
+ a record is replaced by the contents of the file FILENAME.
+
+ awk '{
+ if (NF == 2 && $1 == "@include") {
+ while ((getline line < $2) > 0)
+ print line
+ close($2)
+ } else
+ print
+ }'
+
+ Note here how the name of the extra input file is not built into
+ the program; it is taken from the data, from the second field on
+ the `@include' line.
+
+ The `close' function is called to ensure that if two identical
+ `@include' lines appear in the input, the entire specified file is
+ included twice. *Note Closing Input Files and Pipes: Close Input.
+
+ One deficiency of this program is that it does not process nested
+ `@include' statements the way a true macro preprocessor would.
+
+ `COMMAND | getline'
+ You can "pipe" the output of a command into `getline'. A pipe is
+ simply a way to link the output of one program to the input of
+ another. In this case, the string COMMAND is run as a shell
+ command and its output is piped into `awk' to be used as input.
+ This form of `getline' reads one record from the pipe.
+
+ For example, the following program copies input to output, except
+ for lines that begin with `@execute', which are replaced by the
+ output produced by running the rest of the line as a shell command:
+
+ awk '{
+ if ($1 == "@execute") {
+ tmp = substr($0, 10)
+ while ((tmp | getline) > 0)
+ print
+ close(tmp)
+ } else
+ print
+ }'
+
+ The `close' function is called to ensure that if two identical
+ `@execute' lines appear in the input, the command is run for each
+ one. *Note Closing Input Files and Pipes: Close Input.
+
+ Given the input:
+
+ foo
+ bar
+ baz
+ @execute who
+ bletch
+
+ the program might produce:
+
+ foo
+ bar
+ baz
+ hack ttyv0 Jul 13 14:22
+ hack ttyp0 Jul 13 14:23 (gnu:0)
+ hack ttyp1 Jul 13 14:23 (gnu:0)
+ hack ttyp2 Jul 13 14:23 (gnu:0)
+ hack ttyp3 Jul 13 14:23 (gnu:0)
+ bletch
+
+ Notice that this program ran the command `who' and printed the
+ result. (If you try this program yourself, you will get different
+ results, showing you who is logged in on your system.)
+
+ This variation of `getline' splits the record into fields, sets the
+ value of `NF' and recomputes the value of `$0'. The values of
+ `NR' and `FNR' are not changed.
+
+ `COMMAND | getline VAR'
+ The output of the command COMMAND is sent through a pipe to
+ `getline' and into the variable VAR. For example, the following
+ program reads the current date and time into the variable
+ `current_time', using the `date' utility, and then prints it.
+
+ awk 'BEGIN {
+ "date" | getline current_time
+ close("date")
+ print "Report printed on " current_time
+ }'
+
+ In this version of `getline', none of the built-in variables are
+ changed, and the record is not split into fields.
+
+
+ File: gawk.info, Node: Close Input, Prev: Getline, Up: Reading Files
+
+ Closing Input Files and Pipes
+ =============================
+
+ If the same file name or the same shell command is used with
+ `getline' more than once during the execution of an `awk' program, the
+ file is opened (or the command is executed) only the first time. At
+ that time, the first record of input is read from that file or command.
+ The next time the same file or command is used in `getline', another
+ record is read from it, and so on.
+
+ This implies that if you want to start reading the same file again
+ from the beginning, or if you want to rerun a shell command (rather than
+ reading more output from the command), you must take special steps.
+ What you must do is use the `close' function, as follows:
+
+ close(FILENAME)
+
+ or
+
+ close(COMMAND)
+
+ The argument FILENAME or COMMAND can be any expression. Its value
+ must exactly equal the string that was used to open the file or start
+ the command--for example, if you open a pipe with this:
+
+ "sort -r names" | getline foo
+
+ then you must close it with this:
+
+ close("sort -r names")
+
+ Once this function call is executed, the next `getline' from that
+ file or command will reopen the file or rerun the command.
+
+ `close' returns a value of zero if the close succeeded. Otherwise,
+ the value will be non-zero. In this case, `gawk' sets the variable
+ `ERRNO' to a string describing the error that occurred.
+
+
+ File: gawk.info, Node: Printing, Next: One-liners, Prev: Reading Files, Up: Top
+
+ Printing Output
+ ***************
+
+ One of the most common things that actions do is to output or "print"
+ some or all of the input. For simple output, use the `print'
+ statement. For fancier formatting use the `printf' statement. Both
+ are described in this chapter.
+
+ * Menu:
+
+ * Print:: The `print' statement.
+ * Print Examples:: Simple examples of `print' statements.
+ * Output Separators:: The output separators and how to change them.
+ * OFMT:: Controlling Numeric Output With `print'.
+ * Printf:: The `printf' statement.
+ * Redirection:: How to redirect output to multiple
+ files and pipes.
+ * Special Files:: File name interpretation in `gawk'.
+ `gawk' allows access to
+ inherited file descriptors.
+
+
+ File: gawk.info, Node: Print, Next: Print Examples, Prev: Printing, Up: Printing
+
+ The `print' Statement
+ =====================
+
+ The `print' statement does output with simple, standardized
+ formatting. You specify only the strings or numbers to be printed, in a
+ list separated by commas. They are output, separated by single spaces,
+ followed by a newline. The statement looks like this:
+
+ print ITEM1, ITEM2, ...
+
+ The entire list of items may optionally be enclosed in parentheses. The
+ parentheses are necessary if any of the item expressions uses a
+ relational operator; otherwise it could be confused with a redirection
+ (*note Redirecting Output of `print' and `printf': Redirection.). The
+ relational operators are `==', `!=', `<', `>', `>=', `<=', `~' and `!~'
+ (*note Comparison Expressions: Comparison Ops.).
+
+ The items printed can be constant strings or numbers, fields of the
+ current record (such as `$1'), variables, or any `awk' expressions.
+ The `print' statement is completely general for computing *what* values
+ to print. With two exceptions, you cannot specify *how* to print
+ them--how many columns, whether to use exponential notation or not, and
+ so on. (*Note Output Separators::, and *Note Controlling Numeric
+ Output with `print': OFMT.) For that, you need the `printf' statement
+ (*note Using `printf' Statements for Fancier Printing: Printf.).
+
+ The simple statement `print' with no items is equivalent to `print
+ $0': it prints the entire current record. To print a blank line, use
+ `print ""', where `""' is the null, or empty, string.
+
+ To print a fixed piece of text, use a string constant such as
+ `"Hello there"' as one item. If you forget to use the double-quote
+ characters, your text will be taken as an `awk' expression, and you
+ will probably get an error. Keep in mind that a space is printed
+ between any two items.
+
+ Most often, each `print' statement makes one line of output. But it
+ isn't limited to one line. If an item value is a string that contains a
+ newline, the newline is output along with the rest of the string. A
+ single `print' can make any number of lines this way.
+
+
+ File: gawk.info, Node: Print Examples, Next: Output Separators, Prev: Print, Up: Printing
+
+ Examples of `print' Statements
+ ==============================
+
+ Here is an example of printing a string that contains embedded
+ newlines:
+
+ awk 'BEGIN { print "line one\nline two\nline three" }'
+
+ produces output like this:
+
+ line one
+ line two
+ line three
+
+ Here is an example that prints the first two fields of each input
+ record, with a space between them:
+
+ awk '{ print $1, $2 }' inventory-shipped
+
+ Its output looks like this:
+
+ Jan 13
+ Feb 15
+ Mar 15
+ ...
+
+ A common mistake in using the `print' statement is to omit the comma
+ between two items. This often has the effect of making the items run
+ together in the output, with no space. The reason for this is that
+ juxtaposing two string expressions in `awk' means to concatenate them.
+ For example, without the comma:
+
+ awk '{ print $1 $2 }' inventory-shipped
+
+ prints:
+
+ Jan13
+ Feb15
+ Mar15
+ ...
+
+ Neither example's output makes much sense to someone unfamiliar with
+ the file `inventory-shipped'. A heading line at the beginning would
+ make it clearer. Let's add some headings to our table of months (`$1')
+ and green crates shipped (`$2'). We do this using the `BEGIN' pattern
+ (*note `BEGIN' and `END' Special Patterns: BEGIN/END.) to force the
+ headings to be printed only once:
+
+ awk 'BEGIN { print "Month Crates"
+ print "----- ------" }
+ { print $1, $2 }' inventory-shipped
+
+ Did you already guess what happens? This program prints the following:
+
+ Month Crates
+ ----- ------
+ Jan 13
+ Feb 15
+ Mar 15
+ ...
+
+ The headings and the table data don't line up! We can fix this by
+ printing some spaces between the two fields:
+
+ awk 'BEGIN { print "Month Crates"
+ print "----- ------" }
+ { print $1, " ", $2 }' inventory-shipped
+
+ You can imagine that this way of lining up columns can get pretty
+ complicated when you have many columns to fix. Counting spaces for two
+ or three columns can be simple, but more than this and you can get
+ "lost" quite easily. This is why the `printf' statement was created
+ (*note Using `printf' Statements for Fancier Printing: Printf.); one of
+ its specialties is lining up columns of data.
+
diff -rc --new-file /src/baseline/gawk-2.15.5/gawk.info-3 gawk-2.15.5/gawk.info-3
*** /src/baseline/gawk-2.15.5/gawk.info-3 Thu Jan 1 00:00:00 1970
--- gawk-2.15.5/gawk.info-3 Sun Jun 12 22:28:50 1994
***************
*** 0 ****
--- 1,1288 ----
+ This is Info file gawk.info, produced by Makeinfo-1.55 from the input
+ file /gnu/src/amiga/gawk-2.15.5/gawk.texi.
+
+ This file documents `awk', a program that you can use to select
+ particular records in a file and perform operations upon them.
+
+ This is Edition 0.15 of `The GAWK Manual',
+ for the 2.15 version of the GNU implementation
+ of AWK.
+
+ Copyright (C) 1989, 1991, 1992, 1993 Free Software Foundation, Inc.
+
+ Permission is granted to make and distribute verbatim copies of this
+ manual provided the copyright notice and this permission notice are
+ preserved on all copies.
+
+ Permission is granted to copy and distribute modified versions of
+ this manual under the conditions for verbatim copying, provided that
+ the entire resulting derived work is distributed under the terms of a
+ permission notice identical to this one.
+
+ Permission is granted to copy and distribute translations of this
+ manual into another language, under the above conditions for modified
+ versions, except that this permission notice may be stated in a
+ translation approved by the Foundation.
+
+
+ File: gawk.info, Node: Output Separators, Next: OFMT, Prev: Print Examples, Up: Printing
+
+ Output Separators
+ =================
+
+ As mentioned previously, a `print' statement contains a list of
+ items, separated by commas. In the output, the items are normally
+ separated by single spaces. But they do not have to be spaces; a
+ single space is only the default. You can specify any string of
+ characters to use as the "output field separator" by setting the
+ built-in variable `OFS'. The initial value of this variable is the
+ string `" "', that is, just a single space.
+
+ The output from an entire `print' statement is called an "output
+ record". Each `print' statement outputs one output record and then
+ outputs a string called the "output record separator". The built-in
+ variable `ORS' specifies this string. The initial value of the
+ variable is the string `"\n"' containing a newline character; thus,
+ normally each `print' statement makes a separate line.
+
+ You can change how output fields and records are separated by
+ assigning new values to the variables `OFS' and/or `ORS'. The usual
+ place to do this is in the `BEGIN' rule (*note `BEGIN' and `END'
+ Special Patterns: BEGIN/END.), so that it happens before any input is
+ processed. You may also do this with assignments on the command line,
+ before the names of your input files.
+
+ The following example prints the first and second fields of each
+ input record separated by a semicolon, with a blank line added after
+ each line:
+
+ awk 'BEGIN { OFS = ";"; ORS = "\n\n" }
+ { print $1, $2 }' BBS-list
+
+ If the value of `ORS' does not contain a newline, all your output
+ will be run together on a single line, unless you output newlines some
+ other way.
+
+
+ File: gawk.info, Node: OFMT, Next: Printf, Prev: Output Separators, Up: Printing
+
+ Controlling Numeric Output with `print'
+ =======================================
+
+ When you use the `print' statement to print numeric values, `awk'
+ internally converts the number to a string of characters, and prints
+ that string. `awk' uses the `sprintf' function to do this conversion.
+ For now, it suffices to say that the `sprintf' function accepts a
+ "format specification" that tells it how to format numbers (or
+ strings), and that there are a number of different ways that numbers
+ can be formatted. The different format specifications are discussed
+ more fully in *Note Using `printf' Statements for Fancier Printing:
+ Printf.
+
+ The built-in variable `OFMT' contains the default format
+ specification that `print' uses with `sprintf' when it wants to convert
+ a number to a string for printing. By supplying different format
+ specifications as the value of `OFMT', you can change how `print' will
+ print your numbers. As a brief example:
+
+ awk 'BEGIN { OFMT = "%d" # print numbers as integers
+ print 17.23 }'
+
+ will print `17'.
+
+
+ File: gawk.info, Node: Printf, Next: Redirection, Prev: OFMT, Up: Printing
+
+ Using `printf' Statements for Fancier Printing
+ ==============================================
+
+ If you want more precise control over the output format than `print'
+ gives you, use `printf'. With `printf' you can specify the width to
+ use for each item, and you can specify various stylistic choices for
+ numbers (such as what radix to use, whether to print an exponent,
+ whether to print a sign, and how many digits to print after the decimal
+ point). You do this by specifying a string, called the "format
+ string", which controls how and where to print the other arguments.
+
+ * Menu:
+
+ * Basic Printf:: Syntax of the `printf' statement.
+ * Control Letters:: Format-control letters.
+ * Format Modifiers:: Format-specification modifiers.
+ * Printf Examples:: Several examples.
+
+
+ File: gawk.info, Node: Basic Printf, Next: Control Letters, Prev: Printf, Up: Printf
+
+ Introduction to the `printf' Statement
+ --------------------------------------
+
+ The `printf' statement looks like this:
+
+ printf FORMAT, ITEM1, ITEM2, ...
+
+ The entire list of arguments may optionally be enclosed in parentheses.
+ The parentheses are necessary if any of the item expressions uses a
+ relational operator; otherwise it could be confused with a redirection
+ (*note Redirecting Output of `print' and `printf': Redirection.). The
+ relational operators are `==', `!=', `<', `>', `>=', `<=', `~' and `!~'
+ (*note Comparison Expressions: Comparison Ops.).
+
+ The difference between `printf' and `print' is the argument FORMAT.
+ This is an expression whose value is taken as a string; it specifies
+ how to output each of the other arguments. It is called the "format
+ string".
+
+ The format string is the same as in the ANSI C library function
+ `printf'. Most of FORMAT is text to be output verbatim. Scattered
+ among this text are "format specifiers", one per item. Each format
+ specifier says to output the next item at that place in the format.
+
+ The `printf' statement does not automatically append a newline to its
+ output. It outputs only what the format specifies. So if you want a
+ newline, you must include one in the format. The output separator
+ variables `OFS' and `ORS' have no effect on `printf' statements.
+
+
+ File: gawk.info, Node: Control Letters, Next: Format Modifiers, Prev: Basic Printf, Up: Printf
+
+ Format-Control Letters
+ ----------------------
+
+ A format specifier starts with the character `%' and ends with a
+ "format-control letter"; it tells the `printf' statement how to output
+ one item. (If you actually want to output a `%', write `%%'.) The
+ format-control letter specifies what kind of value to print. The rest
+ of the format specifier is made up of optional "modifiers" which are
+ parameters such as the field width to use.
+
+ Here is a list of the format-control letters:
+
+ `c'
+ This prints a number as an ASCII character. Thus, `printf "%c",
+ 65' outputs the letter `A'. The output for a string value is the
+ first character of the string.
+
+ `d'
+ This prints a decimal integer.
+
+ `i'
+ This also prints a decimal integer.
+
+ `e'
+ This prints a number in scientific (exponential) notation. For
+ example,
+
+ printf "%4.3e", 1950
+
+ prints `1.950e+03', with a total of four significant figures of
+ which three follow the decimal point. The `4.3' are "modifiers",
+ discussed below.
+
+ `f'
+ This prints a number in floating point notation.
+
+ `g'
+ This prints a number in either scientific notation or floating
+ point notation, whichever uses fewer characters.
+
+ `o'
+ This prints an unsigned octal integer.
+
+ `s'
+ This prints a string.
+
+ `x'
+ This prints an unsigned hexadecimal integer.
+
+ `X'
+ This prints an unsigned hexadecimal integer. However, for the
+ values 10 through 15, it uses the letters `A' through `F' instead
+ of `a' through `f'.
+
+ `%'
+ This isn't really a format-control letter, but it does have a
+ meaning when used after a `%': the sequence `%%' outputs one `%'.
+ It does not consume an argument.
+
+
+ File: gawk.info, Node: Format Modifiers, Next: Printf Examples, Prev: Control Letters, Up: Printf
+
+ Modifiers for `printf' Formats
+ ------------------------------
+
+ A format specification can also include "modifiers" that can control
+ how much of the item's value is printed and how much space it gets. The
+ modifiers come between the `%' and the format-control letter. Here are
+ the possible modifiers, in the order in which they may appear:
+
+ `-'
+ The minus sign, used before the width modifier, says to
+ left-justify the argument within its specified width. Normally
+ the argument is printed right-justified in the specified width.
+ Thus,
+
+ printf "%-4s", "foo"
+
+ prints `foo '.
+
+ `WIDTH'
+ This is a number representing the desired width of a field.
+ Inserting any number between the `%' sign and the format control
+ character forces the field to be expanded to this width. The
+ default way to do this is to pad with spaces on the left. For
+ example,
+
+ printf "%4s", "foo"
+
+ prints ` foo'.
+
+ The value of WIDTH is a minimum width, not a maximum. If the item
+ value requires more than WIDTH characters, it can be as wide as
+ necessary. Thus,
+
+ printf "%4s", "foobar"
+
+ prints `foobar'.
+
+ Preceding the WIDTH with a minus sign causes the output to be
+ padded with spaces on the right, instead of on the left.
+
+ `.PREC'
+ This is a number that specifies the precision to use when printing.
+ This specifies the number of digits you want printed to the right
+ of the decimal point. For a string, it specifies the maximum
+ number of characters from the string that should be printed.
+
+ The C library `printf''s dynamic WIDTH and PREC capability (for
+ example, `"%*.*s"') is supported. Instead of supplying explicit WIDTH
+ and/or PREC values in the format string, you pass them in the argument
+ list. For example:
+
+ w = 5
+ p = 3
+ s = "abcdefg"
+ printf "<%*.*s>\n", w, p, s
+
+ is exactly equivalent to
+
+ s = "abcdefg"
+ printf "<%5.3s>\n", s
+
+ Both programs output `<**abc>'. (We have used the bullet symbol "*" to
+ represent a space, to clearly show you that there are two spaces in the
+ output.)
+
+ Earlier versions of `awk' did not support this capability. You may
+ simulate it by using concatenation to build up the format string, like
+ so:
+
+ w = 5
+ p = 3
+ s = "abcdefg"
+ printf "<%" w "." p "s>\n", s
+
+ This is not particularly easy to read, however.
+
+
+ File: gawk.info, Node: Printf Examples, Prev: Format Modifiers, Up: Printf
+
+ Examples of Using `printf'
+ --------------------------
+
+ Here is how to use `printf' to make an aligned table:
+
+ awk '{ printf "%-10s %s\n", $1, $2 }' BBS-list
+
+ prints the names of bulletin boards (`$1') of the file `BBS-list' as a
+ string of 10 characters, left justified. It also prints the phone
+ numbers (`$2') afterward on the line. This produces an aligned
+ two-column table of names and phone numbers:
+
+ aardvark 555-5553
+ alpo-net 555-3412
+ barfly 555-7685
+ bites 555-1675
+ camelot 555-0542
+ core 555-2912
+ fooey 555-1234
+ foot 555-6699
+ macfoo 555-6480
+ sdace 555-3430
+ sabafoo 555-2127
+
+ Did you notice that we did not specify that the phone numbers be
+ printed as numbers? They had to be printed as strings because the
+ numbers are separated by a dash. This dash would be interpreted as a
+ minus sign if we had tried to print the phone numbers as numbers. This
+ would have led to some pretty confusing results.
+
+ We did not specify a width for the phone numbers because they are the
+ last things on their lines. We don't need to put spaces after them.
+
+ We could make our table look even nicer by adding headings to the
+ tops of the columns. To do this, use the `BEGIN' pattern (*note
+ `BEGIN' and `END' Special Patterns: BEGIN/END.) to force the header to
+ be printed only once, at the beginning of the `awk' program:
+
+ awk 'BEGIN { print "Name Number"
+ print "---- ------" }
+ { printf "%-10s %s\n", $1, $2 }' BBS-list
+
+ Did you notice that we mixed `print' and `printf' statements in the
+ above example? We could have used just `printf' statements to get the
+ same results:
+
+ awk 'BEGIN { printf "%-10s %s\n", "Name", "Number"
+ printf "%-10s %s\n", "----", "------" }
+ { printf "%-10s %s\n", $1, $2 }' BBS-list
+
+ By outputting each column heading with the same format specification
+ used for the elements of the column, we have made sure that the headings
+ are aligned just like the columns.
+
+ The fact that the same format specification is used three times can
+ be emphasized by storing it in a variable, like this:
+
+ awk 'BEGIN { format = "%-10s %s\n"
+ printf format, "Name", "Number"
+ printf format, "----", "------" }
+ { printf format, $1, $2 }' BBS-list
+
+ See if you can use the `printf' statement to line up the headings and
+ table data for our `inventory-shipped' example covered earlier in the
+ section on the `print' statement (*note The `print' Statement: Print.).
+
+
+ File: gawk.info, Node: Redirection, Next: Special Files, Prev: Printf, Up: Printing
+
+ Redirecting Output of `print' and `printf'
+ ==========================================
+
+ So far we have been dealing only with output that prints to the
+ standard output, usually your terminal. Both `print' and `printf' can
+ also send their output to other places. This is called "redirection".
+
+ A redirection appears after the `print' or `printf' statement.
+ Redirections in `awk' are written just like redirections in shell
+ commands, except that they are written inside the `awk' program.
+
+ * Menu:
+
+ * File/Pipe Redirection:: Redirecting Output to Files and Pipes.
+ * Close Output:: How to close output files and pipes.
+
+
+ File: gawk.info, Node: File/Pipe Redirection, Next: Close Output, Prev: Redirection, Up: Redirection
+
+ Redirecting Output to Files and Pipes
+ -------------------------------------
+
+ Here are the three forms of output redirection. They are all shown
+ for the `print' statement, but they work identically for `printf' also.
+
+ `print ITEMS > OUTPUT-FILE'
+ This type of redirection prints the items onto the output file
+ OUTPUT-FILE. The file name OUTPUT-FILE can be any expression.
+ Its value is changed to a string and then used as a file name
+ (*note Expressions as Action Statements: Expressions.).
+
+ When this type of redirection is used, the OUTPUT-FILE is erased
+ before the first output is written to it. Subsequent writes do not
+ erase OUTPUT-FILE, but append to it. If OUTPUT-FILE does not
+ exist, then it is created.
+
+ For example, here is how one `awk' program can write a list of BBS
+ names to a file `name-list' and a list of phone numbers to a file
+ `phone-list'. Each output file contains one name or number per
+ line.
+
+ awk '{ print $2 > "phone-list"
+ print $1 > "name-list" }' BBS-list
+
+ `print ITEMS >> OUTPUT-FILE'
+ This type of redirection prints the items onto the output file
+ OUTPUT-FILE. The difference between this and the single-`>'
+ redirection is that the old contents (if any) of OUTPUT-FILE are
+ not erased. Instead, the `awk' output is appended to the file.
+
+ `print ITEMS | COMMAND'
+ It is also possible to send output through a "pipe" instead of
+ into a file. This type of redirection opens a pipe to COMMAND
+ and writes the values of ITEMS through this pipe, to another
+ process created to execute COMMAND.
+
+ The redirection argument COMMAND is actually an `awk' expression.
+ Its value is converted to a string, whose contents give the shell
+ command to be run.
+
+ For example, this produces two files, one unsorted list of BBS
+ names and one list sorted in reverse alphabetical order:
+
+ awk '{ print $1 > "names.unsorted"
+ print $1 | "sort -r > names.sorted" }' BBS-list
+
+ Here the unsorted list is written with an ordinary redirection
+ while the sorted list is written by piping through the `sort'
+ utility.
+
+ Here is an example that uses redirection to mail a message to a
+ mailing list `bug-system'. This might be useful when trouble is
+ encountered in an `awk' script run periodically for system
+ maintenance.
+
+ report = "mail bug-system"
+ print "Awk script failed:", $0 | report
+ print "at record number", FNR, "of", FILENAME | report
+ close(report)
+
+ We call the `close' function here because it's a good idea to close
+ the pipe as soon as all the intended output has been sent to it.
+ *Note Closing Output Files and Pipes: Close Output, for more
+ information on this. This example also illustrates the use of a
+ variable to represent a FILE or COMMAND: it is not necessary to
+ always use a string constant. Using a variable is generally a
+ good idea, since `awk' requires you to spell the string value
+ identically every time.
+
+ Redirecting output using `>', `>>', or `|' asks the system to open a
+ file or pipe only if the particular FILE or COMMAND you've specified
+ has not already been written to by your program, or if it has been
+ closed since it was last written to.
+
+
+ File: gawk.info, Node: Close Output, Prev: File/Pipe Redirection, Up: Redirection
+
+ Closing Output Files and Pipes
+ ------------------------------
+
+ When a file or pipe is opened, the file name or command associated
+ with it is remembered by `awk' and subsequent writes to the same file or
+ command are appended to the previous writes. The file or pipe stays
+ open until `awk' exits. This is usually convenient.
+
+ Sometimes there is a reason to close an output file or pipe earlier
+ than that. To do this, use the `close' function, as follows:
+
+ close(FILENAME)
+
+ or
+
+ close(COMMAND)
+
+ The argument FILENAME or COMMAND can be any expression. Its value
+ must exactly equal the string used to open the file or pipe to begin
+ with--for example, if you open a pipe with this:
+
+ print $1 | "sort -r > names.sorted"
+
+ then you must close it with this:
+
+ close("sort -r > names.sorted")
+
+ Here are some reasons why you might need to close an output file:
+
+ * To write a file and read it back later on in the same `awk'
+ program. Close the file when you are finished writing it; then
+ you can start reading it with `getline' (*note Explicit Input with
+ `getline': Getline.).
+
+ * To write numerous files, successively, in the same `awk' program.
+ If you don't close the files, eventually you may exceed a system
+ limit on the number of open files in one process. So close each
+ one when you are finished writing it.
+
+ * To make a command finish. When you redirect output through a pipe,
+ the command reading the pipe normally continues to try to read
+ input as long as the pipe is open. Often this means the command
+ cannot really do its work until the pipe is closed. For example,
+ if you redirect output to the `mail' program, the message is not
+ actually sent until the pipe is closed.
+
+ * To run the same program a second time, with the same arguments.
+ This is not the same thing as giving more input to the first run!
+
+ For example, suppose you pipe output to the `mail' program. If you
+ output several lines redirected to this pipe without closing it,
+ they make a single message of several lines. By contrast, if you
+ close the pipe after each line of output, then each line makes a
+ separate message.
+
+ `close' returns a value of zero if the close succeeded. Otherwise,
+ the value will be non-zero. In this case, `gawk' sets the variable
+ `ERRNO' to a string describing the error that occurred.
+
+
+ File: gawk.info, Node: Special Files, Prev: Redirection, Up: Printing
+
+ Standard I/O Streams
+ ====================
+
+ Running programs conventionally have three input and output streams
+ already available to them for reading and writing. These are known as
+ the "standard input", "standard output", and "standard error output".
+ These streams are, by default, terminal input and output, but they are
+ often redirected with the shell, via the `<', `<<', `>', `>>', `>&' and
+ `|' operators. Standard error is used only for writing error messages;
+ the reason we have two separate streams, standard output and standard
+ error, is so that they can be redirected separately.
+
+ In other implementations of `awk', the only way to write an error
+ message to standard error in an `awk' program is as follows:
+
+ print "Serious error detected!\n" | "cat 1>&2"
+
+ This works by opening a pipeline to a shell command which can access the
+ standard error stream which it inherits from the `awk' process. This
+ is far from elegant, and is also inefficient, since it requires a
+ separate process. So people writing `awk' programs have often
+ neglected to do this. Instead, they have sent the error messages to the
+ terminal, like this:
+
+ NF != 4 {
+ printf("line %d skipped: doesn't have 4 fields\n", FNR) > "/dev/tty"
+ }
+
+ This has the same effect most of the time, but not always: although the
+ standard error stream is usually the terminal, it can be redirected, and
+ when that happens, writing to the terminal is not correct. In fact, if
+ `awk' is run from a background job, it may not have a terminal at all.
+ Then opening `/dev/tty' will fail.
+
+ `gawk' provides special file names for accessing the three standard
+ streams. When you redirect input or output in `gawk', if the file name
+ matches one of these special names, then `gawk' directly uses the
+ stream it stands for.
+
+ `/dev/stdin'
+ The standard input (file descriptor 0).
+
+ `/dev/stdout'
+ The standard output (file descriptor 1).
+
+ `/dev/stderr'
+ The standard error output (file descriptor 2).
+
+ `/dev/fd/N'
+ The file associated with file descriptor N. Such a file must have
+ been opened by the program initiating the `awk' execution
+ (typically the shell). Unless you take special pains, only
+ descriptors 0, 1 and 2 are available.
+
+ The file names `/dev/stdin', `/dev/stdout', and `/dev/stderr' are
+ aliases for `/dev/fd/0', `/dev/fd/1', and `/dev/fd/2', respectively,
+ but they are more self-explanatory.
+
+ The proper way to write an error message in a `gawk' program is to
+ use `/dev/stderr', like this:
+
+ NF != 4 {
+ printf("line %d skipped: doesn't have 4 fields\n", FNR) > "/dev/stderr"
+ }
+
+ `gawk' also provides special file names that give access to
+ information about the running `gawk' process. Each of these "files"
+ provides a single record of information. To read them more than once,
+ you must first close them with the `close' function (*note Closing
+ Input Files and Pipes: Close Input.). The filenames are:
+
+ `/dev/pid'
+ Reading this file returns the process ID of the current process,
+ in decimal, terminated with a newline.
+
+ `/dev/ppid'
+ Reading this file returns the parent process ID of the current
+ process, in decimal, terminated with a newline.
+
+ `/dev/pgrpid'
+ Reading this file returns the process group ID of the current
+ process, in decimal, terminated with a newline.
+
+ `/dev/user'
+ Reading this file returns a single record terminated with a
+ newline. The fields are separated with blanks. The fields
+ represent the following information:
+
+ `$1'
+ The value of the `getuid' system call.
+
+ `$2'
+ The value of the `geteuid' system call.
+
+ `$3'
+ The value of the `getgid' system call.
+
+ `$4'
+ The value of the `getegid' system call.
+
+ If there are any additional fields, they are the group IDs
+ returned by `getgroups' system call. (Multiple groups may not be
+ supported on all systems.)
+
+ These special file names may be used on the command line as data
+ files, as well as for I/O redirections within an `awk' program. They
+ may not be used as source files with the `-f' option.
+
+ Recognition of these special file names is disabled if `gawk' is in
+ compatibility mode (*note Invoking `awk': Command Line.).
+
+ *Caution*: Unless your system actually has a `/dev/fd' directory
+ (or any of the other above listed special files), the
+ interpretation of these file names is done by `gawk' itself. For
+ example, using `/dev/fd/4' for output will actually write on file
+ descriptor 4, and not on a new file descriptor that was `dup''ed
+ from file descriptor 4. Most of the time this does not matter;
+ however, it is important to *not* close any of the files related
+ to file descriptors 0, 1, and 2. If you do close one of these
+ files, unpredictable behavior will result.
+
+
+ File: gawk.info, Node: One-liners, Next: Patterns, Prev: Printing, Up: Top
+
+ Useful "One-liners"
+ *******************
+
+ Useful `awk' programs are often short, just a line or two. Here is a
+ collection of useful, short programs to get you started. Some of these
+ programs contain constructs that haven't been covered yet. The
+ description of the program will give you a good idea of what is going
+ on, but please read the rest of the manual to become an `awk' expert!
+
+ Since you are reading this in Info, each line of the example code is
+ enclosed in quotes, to represent text that you would type literally.
+ The examples themselves represent shell commands that use single quotes
+ to keep the shell from interpreting the contents of the program. When
+ reading the examples, focus on the text between the open and close
+ quotes.
+
+ `awk '{ if (NF > max) max = NF }'
+ ` END { print max }''
+ This program prints the maximum number of fields on any input line.
+
+ `awk 'length($0) > 80''
+ This program prints every line longer than 80 characters. The sole
+ rule has a relational expression as its pattern, and has no action
+ (so the default action, printing the record, is used).
+
+ `awk 'NF > 0''
+ This program prints every line that has at least one field. This
+ is an easy way to delete blank lines from a file (or rather, to
+ create a new file similar to the old file but from which the blank
+ lines have been deleted).
+
+ `awk '{ if (NF > 0) print }''
+ This program also prints every line that has at least one field.
+ Here we allow the rule to match every line, then decide in the
+ action whether to print.
+
+ `awk 'BEGIN { for (i = 1; i <= 7; i++)'
+ ` print int(101 * rand()) }''
+ This program prints 7 random numbers from 0 to 100, inclusive.
+
+ `ls -l FILES | awk '{ x += $4 } ; END { print "total bytes: " x }''
+ This program prints the total number of bytes used by FILES.
+
+ `expand FILE | awk '{ if (x < length()) x = length() }'
+ ` END { print "maximum line length is " x }''
+ This program prints the maximum line length of FILE. The input is
+ piped through the `expand' program to change tabs into spaces, so
+ the widths compared are actually the right-margin columns.
+
+ `awk 'BEGIN { FS = ":" }'
+ ` { print $1 | "sort" }' /etc/passwd'
+ This program prints a sorted list of the login names of all users.
+
+ `awk '{ nlines++ }'
+ ` END { print nlines }''
+ This programs counts lines in a file.
+
+ `awk 'END { print NR }''
+ This program also counts lines in a file, but lets `awk' do the
+ work.
+
+ `awk '{ print NR, $0 }''
+ This program adds line numbers to all its input files, similar to
+ `cat -n'.
+
+
+ File: gawk.info, Node: Patterns, Next: Actions, Prev: One-liners, Up: Top
+
+ Patterns
+ ********
+
+ Patterns in `awk' control the execution of rules: a rule is executed
+ when its pattern matches the current input record. This chapter tells
+ all about how to write patterns.
+
+ * Menu:
+
+ * Kinds of Patterns:: A list of all kinds of patterns.
+ The following subsections describe
+ them in detail.
+ * Regexp:: Regular expressions such as `/foo/'.
+ * Comparison Patterns:: Comparison expressions such as `$1 > 10'.
+ * Boolean Patterns:: Combining comparison expressions.
+ * Expression Patterns:: Any expression can be used as a pattern.
+ * Ranges:: Pairs of patterns specify record ranges.
+ * BEGIN/END:: Specifying initialization and cleanup rules.
+ * Empty:: The empty pattern, which matches every record.
+
+
+ File: gawk.info, Node: Kinds of Patterns, Next: Regexp, Prev: Patterns, Up: Patterns
+
+ Kinds of Patterns
+ =================
+
+ Here is a summary of the types of patterns supported in `awk'.
+
+ `/REGULAR EXPRESSION/'
+ A regular expression as a pattern. It matches when the text of the
+ input record fits the regular expression. (*Note Regular
+ Expressions as Patterns: Regexp.)
+
+ `EXPRESSION'
+ A single expression. It matches when its value, converted to a
+ number, is nonzero (if a number) or nonnull (if a string). (*Note
+ Expressions as Patterns: Expression Patterns.)
+
+ `PAT1, PAT2'
+ A pair of patterns separated by a comma, specifying a range of
+ records. (*Note Specifying Record Ranges with Patterns: Ranges.)
+
+ `BEGIN'
+ `END'
+ Special patterns to supply start-up or clean-up information to
+ `awk'. (*Note `BEGIN' and `END' Special Patterns: BEGIN/END.)
+
+ `NULL'
+ The empty pattern matches every input record. (*Note The Empty
+ Pattern: Empty.)
+
+
+ File: gawk.info, Node: Regexp, Next: Comparison Patterns, Prev: Kinds of Patterns, Up: Patterns
+
+ Regular Expressions as Patterns
+ ===============================
+
+ A "regular expression", or "regexp", is a way of describing a class
+ of strings. A regular expression enclosed in slashes (`/') is an `awk'
+ pattern that matches every input record whose text belongs to that
+ class.
+
+ The simplest regular expression is a sequence of letters, numbers, or
+ both. Such a regexp matches any string that contains that sequence.
+ Thus, the regexp `foo' matches any string containing `foo'. Therefore,
+ the pattern `/foo/' matches any input record containing `foo'. Other
+ kinds of regexps let you specify more complicated classes of strings.
+
+ * Menu:
+
+ * Regexp Usage:: How to Use Regular Expressions
+ * Regexp Operators:: Regular Expression Operators
+ * Case-sensitivity:: How to do case-insensitive matching.
+
+
+ File: gawk.info, Node: Regexp Usage, Next: Regexp Operators, Prev: Regexp, Up: Regexp
+
+ How to Use Regular Expressions
+ ------------------------------
+
+ A regular expression can be used as a pattern by enclosing it in
+ slashes. Then the regular expression is matched against the entire
+ text of each record. (Normally, it only needs to match some part of
+ the text in order to succeed.) For example, this prints the second
+ field of each record that contains `foo' anywhere:
+
+ awk '/foo/ { print $2 }' BBS-list
+
+ Regular expressions can also be used in comparison expressions. Then
+ you can specify the string to match against; it need not be the entire
+ current input record. These comparison expressions can be used as
+ patterns or in `if', `while', `for', and `do' statements.
+
+ `EXP ~ /REGEXP/'
+ This is true if the expression EXP (taken as a character string)
+ is matched by REGEXP. The following example matches, or selects,
+ all input records with the upper-case letter `J' somewhere in the
+ first field:
+
+ awk '$1 ~ /J/' inventory-shipped
+
+ So does this:
+
+ awk '{ if ($1 ~ /J/) print }' inventory-shipped
+
+ `EXP !~ /REGEXP/'
+ This is true if the expression EXP (taken as a character string)
+ is *not* matched by REGEXP. The following example matches, or
+ selects, all input records whose first field *does not* contain
+ the upper-case letter `J':
+
+ awk '$1 !~ /J/' inventory-shipped
+
+ The right hand side of a `~' or `!~' operator need not be a constant
+ regexp (i.e., a string of characters between slashes). It may be any
+ expression. The expression is evaluated, and converted if necessary to
+ a string; the contents of the string are used as the regexp. A regexp
+ that is computed in this way is called a "dynamic regexp". For example:
+
+ identifier_regexp = "[A-Za-z_][A-Za-z_0-9]+"
+ $0 ~ identifier_regexp
+
+ sets `identifier_regexp' to a regexp that describes `awk' variable
+ names, and tests if the input record matches this regexp.
+
+
+ File: gawk.info, Node: Regexp Operators, Next: Case-sensitivity, Prev: Regexp Usage, Up: Regexp
+
+ Regular Expression Operators
+ ----------------------------
+
+ You can combine regular expressions with the following characters,
+ called "regular expression operators", or "metacharacters", to increase
+ the power and versatility of regular expressions.
+
+ Here is a table of metacharacters. All characters not listed in the
+ table stand for themselves.
+
+ `^'
+ This matches the beginning of the string or the beginning of a line
+ within the string. For example:
+
+ ^@chapter
+
+ matches the `@chapter' at the beginning of a string, and can be
+ used to identify chapter beginnings in Texinfo source files.
+
+ `$'
+ This is similar to `^', but it matches only at the end of a string
+ or the end of a line within the string. For example:
+
+ p$
+
+ matches a record that ends with a `p'.
+
+ `.'
+ This matches any single character except a newline. For example:
+
+ .P
+
+ matches any single character followed by a `P' in a string. Using
+ concatenation we can make regular expressions like `U.A', which
+ matches any three-character sequence that begins with `U' and ends
+ with `A'.
+
+ `[...]'
+ This is called a "character set". It matches any one of the
+ characters that are enclosed in the square brackets. For example:
+
+ [MVX]
+
+ matches any one of the characters `M', `V', or `X' in a string.
+
+ Ranges of characters are indicated by using a hyphen between the
+ beginning and ending characters, and enclosing the whole thing in
+ brackets. For example:
+
+ [0-9]
+
+ matches any digit.
+
+ To include the character `\', `]', `-' or `^' in a character set,
+ put a `\' in front of it. For example:
+
+ [d\]]
+
+ matches either `d', or `]'.
+
+ This treatment of `\' is compatible with other `awk'
+ implementations, and is also mandated by the POSIX Command Language
+ and Utilities standard. The regular expressions in `awk' are a
+ superset of the POSIX specification for Extended Regular
+ Expressions (EREs). POSIX EREs are based on the regular
+ expressions accepted by the traditional `egrep' utility.
+
+ In `egrep' syntax, backslash is not syntactically special within
+ square brackets. This means that special tricks have to be used to
+ represent the characters `]', `-' and `^' as members of a
+ character set.
+
+ In `egrep' syntax, to match `-', write it as `---', which is a
+ range containing only `-'. You may also give `-' as the first or
+ last character in the set. To match `^', put it anywhere except
+ as the first character of a set. To match a `]', make it the
+ first character in the set. For example:
+
+ []d^]
+
+ matches either `]', `d' or `^'.
+
+ `[^ ...]'
+ This is a "complemented character set". The first character after
+ the `[' *must* be a `^'. It matches any characters *except* those
+ in the square brackets (or newline). For example:
+
+ [^0-9]
+
+ matches any character that is not a digit.
+
+ `|'
+ This is the "alternation operator" and it is used to specify
+ alternatives. For example:
+
+ ^P|[0-9]
+
+ matches any string that matches either `^P' or `[0-9]'. This
+ means it matches any string that contains a digit or starts with
+ `P'.
+
+ The alternation applies to the largest possible regexps on either
+ side.
+
+ `(...)'
+ Parentheses are used for grouping in regular expressions as in
+ arithmetic. They can be used to concatenate regular expressions
+ containing the alternation operator, `|'.
+
+ `*'
+ This symbol means that the preceding regular expression is to be
+ repeated as many times as possible to find a match. For example:
+
+ ph*
+
+ applies the `*' symbol to the preceding `h' and looks for matches
+ to one `p' followed by any number of `h's. This will also match
+ just `p' if no `h's are present.
+
+ The `*' repeats the *smallest* possible preceding expression.
+ (Use parentheses if you wish to repeat a larger expression.) It
+ finds as many repetitions as possible. For example:
+
+ awk '/\(c[ad][ad]*r x\)/ { print }' sample
+
+ prints every record in the input containing a string of the form
+ `(car x)', `(cdr x)', `(cadr x)', and so on.
+
+ `+'
+ This symbol is similar to `*', but the preceding expression must be
+ matched at least once. This means that:
+
+ wh+y
+
+ would match `why' and `whhy' but not `wy', whereas `wh*y' would
+ match all three of these strings. This is a simpler way of
+ writing the last `*' example:
+
+ awk '/\(c[ad]+r x\)/ { print }' sample
+
+ `?'
+ This symbol is similar to `*', but the preceding expression can be
+ matched once or not at all. For example:
+
+ fe?d
+
+ will match `fed' and `fd', but nothing else.
+
+ `\'
+ This is used to suppress the special meaning of a character when
+ matching. For example:
+
+ \$
+
+ matches the character `$'.
+
+ The escape sequences used for string constants (*note Constant
+ Expressions: Constants.) are valid in regular expressions as well;
+ they are also introduced by a `\'.
+
+ In regular expressions, the `*', `+', and `?' operators have the
+ highest precedence, followed by concatenation, and finally by `|'. As
+ in arithmetic, parentheses can change how operators are grouped.
+
+
+ File: gawk.info, Node: Case-sensitivity, Prev: Regexp Operators, Up: Regexp
+
+ Case-sensitivity in Matching
+ ----------------------------
+
+ Case is normally significant in regular expressions, both when
+ matching ordinary characters (i.e., not metacharacters), and inside
+ character sets. Thus a `w' in a regular expression matches only a
+ lower case `w' and not an upper case `W'.
+
+ The simplest way to do a case-independent match is to use a character
+ set: `[Ww]'. However, this can be cumbersome if you need to use it
+ often; and it can make the regular expressions harder for humans to
+ read. There are two other alternatives that you might prefer.
+
+ One way to do a case-insensitive match at a particular point in the
+ program is to convert the data to a single case, using the `tolower' or
+ `toupper' built-in string functions (which we haven't discussed yet;
+ *note Built-in Functions for String Manipulation: String Functions.).
+ For example:
+
+ tolower($1) ~ /foo/ { ... }
+
+ converts the first field to lower case before matching against it.
+
+ Another method is to set the variable `IGNORECASE' to a nonzero
+ value (*note Built-in Variables::.). When `IGNORECASE' is not zero,
+ *all* regexp operations ignore case. Changing the value of
+ `IGNORECASE' dynamically controls the case sensitivity of your program
+ as it runs. Case is significant by default because `IGNORECASE' (like
+ most variables) is initialized to zero.
+
+ x = "aB"
+ if (x ~ /ab/) ... # this test will fail
+
+ IGNORECASE = 1
+ if (x ~ /ab/) ... # now it will succeed
+
+ In general, you cannot use `IGNORECASE' to make certain rules
+ case-insensitive and other rules case-sensitive, because there is no way
+ to set `IGNORECASE' just for the pattern of a particular rule. To do
+ this, you must use character sets or `tolower'. However, one thing you
+ can do only with `IGNORECASE' is turn case-sensitivity on or off
+ dynamically for all the rules at once.
+
+ `IGNORECASE' can be set on the command line, or in a `BEGIN' rule.
+ Setting `IGNORECASE' from the command line is a way to make a program
+ case-insensitive without having to edit it.
+
+ The value of `IGNORECASE' has no effect if `gawk' is in
+ compatibility mode (*note Invoking `awk': Command Line.). Case is
+ always significant in compatibility mode.
+
+
+ File: gawk.info, Node: Comparison Patterns, Next: Boolean Patterns, Prev: Regexp, Up: Patterns
+
+ Comparison Expressions as Patterns
+ ==================================
+
+ "Comparison patterns" test relationships such as equality between
+ two strings or numbers. They are a special case of expression patterns
+ (*note Expressions as Patterns: Expression Patterns.). They are written
+ with "relational operators", which are a superset of those in C. Here
+ is a table of them:
+
+ `X < Y'
+ True if X is less than Y.
+
+ `X <= Y'
+ True if X is less than or equal to Y.
+
+ `X > Y'
+ True if X is greater than Y.
+
+ `X >= Y'
+ True if X is greater than or equal to Y.
+
+ `X == Y'
+ True if X is equal to Y.
+
+ `X != Y'
+ True if X is not equal to Y.
+
+ `X ~ Y'
+ True if X matches the regular expression described by Y.
+
+ `X !~ Y'
+ True if X does not match the regular expression described by Y.
+
+ The operands of a relational operator are compared as numbers if they
+ are both numbers. Otherwise they are converted to, and compared as,
+ strings (*note Conversion of Strings and Numbers: Conversion., for the
+ detailed rules). Strings are compared by comparing the first character
+ of each, then the second character of each, and so on, until there is a
+ difference. If the two strings are equal until the shorter one runs
+ out, the shorter one is considered to be less than the longer one.
+ Thus, `"10"' is less than `"9"', and `"abc"' is less than `"abcd"'.
+
+ The left operand of the `~' and `!~' operators is a string. The
+ right operand is either a constant regular expression enclosed in
+ slashes (`/REGEXP/'), or any expression, whose string value is used as
+ a dynamic regular expression (*note How to Use Regular Expressions:
+ Regexp Usage.).
+
+ The following example prints the second field of each input record
+ whose first field is precisely `foo'.
+
+ awk '$1 == "foo" { print $2 }' BBS-list
+
+ Contrast this with the following regular expression match, which would
+ accept any record with a first field that contains `foo':
+
+ awk '$1 ~ "foo" { print $2 }' BBS-list
+
+ or, equivalently, this one:
+
+ awk '$1 ~ /foo/ { print $2 }' BBS-list
+
+
+ File: gawk.info, Node: Boolean Patterns, Next: Expression Patterns, Prev: Comparison Patterns, Up: Patterns
+
+ Boolean Operators and Patterns
+ ==============================
+
+ A "boolean pattern" is an expression which combines other patterns
+ using the "boolean operators" "or" (`||'), "and" (`&&'), and "not"
+ (`!'). Whether the boolean pattern matches an input record depends on
+ whether its subpatterns match.
+
+ For example, the following command prints all records in the input
+ file `BBS-list' that contain both `2400' and `foo'.
+
+ awk '/2400/ && /foo/' BBS-list
+
+ The following command prints all records in the input file
+ `BBS-list' that contain *either* `2400' or `foo', or both.
+
+ awk '/2400/ || /foo/' BBS-list
+
+ The following command prints all records in the input file
+ `BBS-list' that do *not* contain the string `foo'.
+
+ awk '! /foo/' BBS-list
+
+ Note that boolean patterns are a special case of expression patterns
+ (*note Expressions as Patterns: Expression Patterns.); they are
+ expressions that use the boolean operators. *Note Boolean Expressions:
+ Boolean Ops, for complete information on the boolean operators.
+
+ The subpatterns of a boolean pattern can be constant regular
+ expressions, comparisons, or any other `awk' expressions. Range
+ patterns are not expressions, so they cannot appear inside boolean
+ patterns. Likewise, the special patterns `BEGIN' and `END', which
+ never match any input record, are not expressions and cannot appear
+ inside boolean patterns.
+
+
+ File: gawk.info, Node: Expression Patterns, Next: Ranges, Prev: Boolean Patterns, Up: Patterns
+
+ Expressions as Patterns
+ =======================
+
+ Any `awk' expression is also valid as an `awk' pattern. Then the
+ pattern "matches" if the expression's value is nonzero (if a number) or
+ nonnull (if a string).
+
+ The expression is reevaluated each time the rule is tested against a
+ new input record. If the expression uses fields such as `$1', the
+ value depends directly on the new input record's text; otherwise, it
+ depends only on what has happened so far in the execution of the `awk'
+ program, but that may still be useful.
+
+ Comparison patterns are actually a special case of this. For
+ example, the expression `$5 == "foo"' has the value 1 when the value of
+ `$5' equals `"foo"', and 0 otherwise; therefore, this expression as a
+ pattern matches when the two values are equal.
+
+ Boolean patterns are also special cases of expression patterns.
+
+ A constant regexp as a pattern is also a special case of an
+ expression pattern. `/foo/' as an expression has the value 1 if `foo'
+ appears in the current input record; thus, as a pattern, `/foo/'
+ matches any record containing `foo'.
+
+ Other implementations of `awk' that are not yet POSIX compliant are
+ less general than `gawk': they allow comparison expressions, and
+ boolean combinations thereof (optionally with parentheses), but not
+ necessarily other kinds of expressions.
+
+
+ File: gawk.info, Node: Ranges, Next: BEGIN/END, Prev: Expression Patterns, Up: Patterns
+
+ Specifying Record Ranges with Patterns
+ ======================================
+
+ A "range pattern" is made of two patterns separated by a comma, of
+ the form `BEGPAT, ENDPAT'. It matches ranges of consecutive input
+ records. The first pattern BEGPAT controls where the range begins, and
+ the second one ENDPAT controls where it ends. For example,
+
+ awk '$1 == "on", $1 == "off"'
+
+ prints every record between `on'/`off' pairs, inclusive.
+
+ A range pattern starts out by matching BEGPAT against every input
+ record; when a record matches BEGPAT, the range pattern becomes "turned
+ on". The range pattern matches this record. As long as it stays
+ turned on, it automatically matches every input record read. It also
+ matches ENDPAT against every input record; when that succeeds, the
+ range pattern is turned off again for the following record. Now it
+ goes back to checking BEGPAT against each record.
+
+ The record that turns on the range pattern and the one that turns it
+ off both match the range pattern. If you don't want to operate on
+ these records, you can write `if' statements in the rule's action to
+ distinguish them.
+
+ It is possible for a pattern to be turned both on and off by the same
+ record, if both conditions are satisfied by that record. Then the
+ action is executed for just that record.
+
+
+ File: gawk.info, Node: BEGIN/END, Next: Empty, Prev: Ranges, Up: Patterns
+
+ `BEGIN' and `END' Special Patterns
+ ==================================
+
+ `BEGIN' and `END' are special patterns. They are not used to match
+ input records. Rather, they are used for supplying start-up or
+ clean-up information to your `awk' script. A `BEGIN' rule is executed,
+ once, before the first input record has been read. An `END' rule is
+ executed, once, after all the input has been read. For example:
+
+ awk 'BEGIN { print "Analysis of `foo'" }
+ /foo/ { ++foobar }
+ END { print "`foo' appears " foobar " times." }' BBS-list
+
+ This program finds the number of records in the input file `BBS-list'
+ that contain the string `foo'. The `BEGIN' rule prints a title for the
+ report. There is no need to use the `BEGIN' rule to initialize the
+ counter `foobar' to zero, as `awk' does this for us automatically
+ (*note Variables::.).
+
+ The second rule increments the variable `foobar' every time a record
+ containing the pattern `foo' is read. The `END' rule prints the value
+ of `foobar' at the end of the run.
+
+ The special patterns `BEGIN' and `END' cannot be used in ranges or
+ with boolean operators (indeed, they cannot be used with any operators).
+
+ An `awk' program may have multiple `BEGIN' and/or `END' rules. They
+ are executed in the order they appear, all the `BEGIN' rules at
+ start-up and all the `END' rules at termination.
+
+ Multiple `BEGIN' and `END' sections are useful for writing library
+ functions, since each library can have its own `BEGIN' or `END' rule to
+ do its own initialization and/or cleanup. Note that the order in which
+ library functions are named on the command line controls the order in
+ which their `BEGIN' and `END' rules are executed. Therefore you have
+ to be careful to write such rules in library files so that the order in
+ which they are executed doesn't matter. *Note Invoking `awk': Command
+ Line, for more information on using library functions.
+
+ If an `awk' program only has a `BEGIN' rule, and no other rules,
+ then the program exits after the `BEGIN' rule has been run. (Older
+ versions of `awk' used to keep reading and ignoring input until end of
+ file was seen.) However, if an `END' rule exists as well, then the
+ input will be read, even if there are no other rules in the program.
+ This is necessary in case the `END' rule checks the `NR' variable.
+
+ `BEGIN' and `END' rules must have actions; there is no default
+ action for these rules since there is no current record when they run.
+
+
+ File: gawk.info, Node: Empty, Prev: BEGIN/END, Up: Patterns
+
+ The Empty Pattern
+ =================
+
+ An empty pattern is considered to match *every* input record. For
+ example, the program:
+
+ awk '{ print $1 }' BBS-list
+
+ prints the first field of every record.
+
diff -rc --new-file /src/baseline/gawk-2.15.5/gawk.info-4 gawk-2.15.5/gawk.info-4
*** /src/baseline/gawk-2.15.5/gawk.info-4 Thu Jan 1 00:00:00 1970
--- gawk-2.15.5/gawk.info-4 Sun Jun 12 22:28:51 1994
***************
*** 0 ****
--- 1,1305 ----
+ This is Info file gawk.info, produced by Makeinfo-1.55 from the input
+ file /gnu/src/amiga/gawk-2.15.5/gawk.texi.
+
+ This file documents `awk', a program that you can use to select
+ particular records in a file and perform operations upon them.
+
+ This is Edition 0.15 of `The GAWK Manual',
+ for the 2.15 version of the GNU implementation
+ of AWK.
+
+ Copyright (C) 1989, 1991, 1992, 1993 Free Software Foundation, Inc.
+
+ Permission is granted to make and distribute verbatim copies of this
+ manual provided the copyright notice and this permission notice are
+ preserved on all copies.
+
+ Permission is granted to copy and distribute modified versions of
+ this manual under the conditions for verbatim copying, provided that
+ the entire resulting derived work is distributed under the terms of a
+ permission notice identical to this one.
+
+ Permission is granted to copy and distribute translations of this
+ manual into another language, under the above conditions for modified
+ versions, except that this permission notice may be stated in a
+ translation approved by the Foundation.
+
+
+ File: gawk.info, Node: Actions, Next: Expressions, Prev: Patterns, Up: Top
+
+ Overview of Actions
+ *******************
+
+ An `awk' program or script consists of a series of rules and
+ function definitions, interspersed. (Functions are described later.
+ *Note User-defined Functions: User-defined.)
+
+ A rule contains a pattern and an action, either of which may be
+ omitted. The purpose of the "action" is to tell `awk' what to do once
+ a match for the pattern is found. Thus, the entire program looks
+ somewhat like this:
+
+ [PATTERN] [{ ACTION }]
+ [PATTERN] [{ ACTION }]
+ ...
+ function NAME (ARGS) { ... }
+ ...
+
+ An action consists of one or more `awk' "statements", enclosed in
+ curly braces (`{' and `}'). Each statement specifies one thing to be
+ done. The statements are separated by newlines or semicolons.
+
+ The curly braces around an action must be used even if the action
+ contains only one statement, or even if it contains no statements at
+ all. However, if you omit the action entirely, omit the curly braces as
+ well. (An omitted action is equivalent to `{ print $0 }'.)
+
+ Here are the kinds of statements supported in `awk':
+
+ * Expressions, which can call functions or assign values to variables
+ (*note Expressions as Action Statements: Expressions.). Executing
+ this kind of statement simply computes the value of the expression
+ and then ignores it. This is useful when the expression has side
+ effects (*note Assignment Expressions: Assignment Ops.).
+
+ * Control statements, which specify the control flow of `awk'
+ programs. The `awk' language gives you C-like constructs (`if',
+ `for', `while', and so on) as well as a few special ones (*note
+ Control Statements in Actions: Statements.).
+
+ * Compound statements, which consist of one or more statements
+ enclosed in curly braces. A compound statement is used in order
+ to put several statements together in the body of an `if',
+ `while', `do' or `for' statement.
+
+ * Input control, using the `getline' command (*note Explicit Input
+ with `getline': Getline.), and the `next' statement (*note The
+ `next' Statement: Next Statement.).
+
+ * Output statements, `print' and `printf'. *Note Printing Output:
+ Printing.
+
+ * Deletion statements, for deleting array elements. *Note The
+ `delete' Statement: Delete.
+
+
+ File: gawk.info, Node: Expressions, Next: Statements, Prev: Actions, Up: Top
+
+ Expressions as Action Statements
+ ********************************
+
+ Expressions are the basic building block of `awk' actions. An
+ expression evaluates to a value, which you can print, test, store in a
+ variable or pass to a function. But beyond that, an expression can
+ assign a new value to a variable or a field, with an assignment
+ operator.
+
+ An expression can serve as a statement on its own. Most other kinds
+ of statements contain one or more expressions which specify data to be
+ operated on. As in other languages, expressions in `awk' include
+ variables, array references, constants, and function calls, as well as
+ combinations of these with various operators.
+
+ * Menu:
+
+ * Constants:: String, numeric, and regexp constants.
+ * Variables:: Variables give names to values for later use.
+ * Arithmetic Ops:: Arithmetic operations (`+', `-', etc.)
+ * Concatenation:: Concatenating strings.
+ * Comparison Ops:: Comparison of numbers and strings
+ with `<', etc.
+ * Boolean Ops:: Combining comparison expressions
+ using boolean operators
+ `||' ("or"), `&&' ("and") and `!' ("not").
+
+ * Assignment Ops:: Changing the value of a variable or a field.
+ * Increment Ops:: Incrementing the numeric value of a variable.
+
+ * Conversion:: The conversion of strings to numbers
+ and vice versa.
+ * Values:: The whole truth about numbers and strings.
+ * Conditional Exp:: Conditional expressions select
+ between two subexpressions under control
+ of a third subexpression.
+ * Function Calls:: A function call is an expression.
+ * Precedence:: How various operators nest.
+
+
+ File: gawk.info, Node: Constants, Next: Variables, Prev: Expressions, Up: Expressions
+
+ Constant Expressions
+ ====================
+
+ The simplest type of expression is the "constant", which always has
+ the same value. There are three types of constants: numeric constants,
+ string constants, and regular expression constants.
+
+ A "numeric constant" stands for a number. This number can be an
+ integer, a decimal fraction, or a number in scientific (exponential)
+ notation. Note that all numeric values are represented within `awk' in
+ double-precision floating point. Here are some examples of numeric
+ constants, which all have the same value:
+
+ 105
+ 1.05e+2
+ 1050e-1
+
+ A string constant consists of a sequence of characters enclosed in
+ double-quote marks. For example:
+
+ "parrot"
+
+ represents the string whose contents are `parrot'. Strings in `gawk'
+ can be of any length and they can contain all the possible 8-bit ASCII
+ characters including ASCII NUL. Other `awk' implementations may have
+ difficulty with some character codes.
+
+ Some characters cannot be included literally in a string constant.
+ You represent them instead with "escape sequences", which are character
+ sequences beginning with a backslash (`\').
+
+ One use of an escape sequence is to include a double-quote character
+ in a string constant. Since a plain double-quote would end the string,
+ you must use `\"' to represent a single double-quote character as a
+ part of the string. The backslash character itself is another
+ character that cannot be included normally; you write `\\' to put one
+ backslash in the string. Thus, the string whose contents are the two
+ characters `"\' must be written `"\"\\"'.
+
+ Another use of backslash is to represent unprintable characters such
+ as newline. While there is nothing to stop you from writing most of
+ these characters directly in a string constant, they may look ugly.
+
+ Here is a table of all the escape sequences used in `awk':
+
+ `\\'
+ Represents a literal backslash, `\'.
+
+ `\a'
+ Represents the "alert" character, control-g, ASCII code 7.
+
+ `\b'
+ Represents a backspace, control-h, ASCII code 8.
+
+ `\f'
+ Represents a formfeed, control-l, ASCII code 12.
+
+ `\n'
+ Represents a newline, control-j, ASCII code 10.
+
+ `\r'
+ Represents a carriage return, control-m, ASCII code 13.
+
+ `\t'
+ Represents a horizontal tab, control-i, ASCII code 9.
+
+ `\v'
+ Represents a vertical tab, control-k, ASCII code 11.
+
+ `\NNN'
+ Represents the octal value NNN, where NNN are one to three digits
+ between 0 and 7. For example, the code for the ASCII ESC (escape)
+ character is `\033'.
+
+ `\xHH...'
+ Represents the hexadecimal value HH, where HH are hexadecimal
+ digits (`0' through `9' and either `A' through `F' or `a' through
+ `f'). Like the same construct in ANSI C, the escape sequence
+ continues until the first non-hexadecimal digit is seen. However,
+ using more than two hexadecimal digits produces undefined results.
+ (The `\x' escape sequence is not allowed in POSIX `awk'.)
+
+ A "constant regexp" is a regular expression description enclosed in
+ slashes, such as `/^beginning and end$/'. Most regexps used in `awk'
+ programs are constant, but the `~' and `!~' operators can also match
+ computed or "dynamic" regexps (*note How to Use Regular Expressions:
+ Regexp Usage.).
+
+ Constant regexps may be used like simple expressions. When a
+ constant regexp is not on the right hand side of the `~' or `!~'
+ operators, it has the same meaning as if it appeared in a pattern, i.e.
+ `($0 ~ /foo/)' (*note Expressions as Patterns: Expression Patterns.).
+ This means that the two code segments,
+
+ if ($0 ~ /barfly/ || $0 ~ /camelot/)
+ print "found"
+
+ and
+
+ if (/barfly/ || /camelot/)
+ print "found"
+
+ are exactly equivalent. One rather bizarre consequence of this rule is
+ that the following boolean expression is legal, but does not do what
+ the user intended:
+
+ if (/foo/ ~ $1) print "found foo"
+
+ This code is "obviously" testing `$1' for a match against the regexp
+ `/foo/'. But in fact, the expression `(/foo/ ~ $1)' actually means
+ `(($0 ~ /foo/) ~ $1)'. In other words, first match the input record
+ against the regexp `/foo/'. The result will be either a 0 or a 1,
+ depending upon the success or failure of the match. Then match that
+ result against the first field in the record.
+
+ Since it is unlikely that you would ever really wish to make this
+ kind of test, `gawk' will issue a warning when it sees this construct in
+ a program.
+
+ Another consequence of this rule is that the assignment statement
+
+ matches = /foo/
+
+ will assign either 0 or 1 to the variable `matches', depending upon the
+ contents of the current input record.
+
+ Constant regular expressions are also used as the first argument for
+ the `sub' and `gsub' functions (*note Built-in Functions for String
+ Manipulation: String Functions.).
+
+ This feature of the language was never well documented until the
+ POSIX specification.
+
+ You may be wondering, when is
+
+ $1 ~ /foo/ { ... }
+
+ preferable to
+
+ $1 ~ "foo" { ... }
+
+ Since the right-hand sides of both `~' operators are constants, it
+ is more efficient to use the `/foo/' form: `awk' can note that you have
+ supplied a regexp and store it internally in a form that makes pattern
+ matching more efficient. In the second form, `awk' must first convert
+ the string into this internal form, and then perform the pattern
+ matching. The first form is also better style; it shows clearly that
+ you intend a regexp match.
+
+
+ File: gawk.info, Node: Variables, Next: Arithmetic Ops, Prev: Constants, Up: Expressions
+
+ Variables
+ =========
+
+ Variables let you give names to values and refer to them later. You
+ have already seen variables in many of the examples. The name of a
+ variable must be a sequence of letters, digits and underscores, but it
+ may not begin with a digit. Case is significant in variable names; `a'
+ and `A' are distinct variables.
+
+ A variable name is a valid expression by itself; it represents the
+ variable's current value. Variables are given new values with
+ "assignment operators" and "increment operators". *Note Assignment
+ Expressions: Assignment Ops.
+
+ A few variables have special built-in meanings, such as `FS', the
+ field separator, and `NF', the number of fields in the current input
+ record. *Note Built-in Variables::, for a list of them. These
+ built-in variables can be used and assigned just like all other
+ variables, but their values are also used or changed automatically by
+ `awk'. Each built-in variable's name is made entirely of upper case
+ letters.
+
+ Variables in `awk' can be assigned either numeric or string values.
+ By default, variables are initialized to the null string, which is
+ effectively zero if converted to a number. There is no need to
+ "initialize" each variable explicitly in `awk', the way you would in C
+ or most other traditional languages.
+
+ * Menu:
+
+ * Assignment Options:: Setting variables on the command line
+ and a summary of command line syntax.
+ This is an advanced method of input.
+
+
+ File: gawk.info, Node: Assignment Options, Prev: Variables, Up: Variables
+
+ Assigning Variables on the Command Line
+ ---------------------------------------
+
+ You can set any `awk' variable by including a "variable assignment"
+ among the arguments on the command line when you invoke `awk' (*note
+ Invoking `awk': Command Line.). Such an assignment has this form:
+
+ VARIABLE=TEXT
+
+ With it, you can set a variable either at the beginning of the `awk'
+ run or in between input files.
+
+ If you precede the assignment with the `-v' option, like this:
+
+ -v VARIABLE=TEXT
+
+ then the variable is set at the very beginning, before even the `BEGIN'
+ rules are run. The `-v' option and its assignment must precede all the
+ file name arguments, as well as the program text.
+
+ Otherwise, the variable assignment is performed at a time determined
+ by its position among the input file arguments: after the processing of
+ the preceding input file argument. For example:
+
+ awk '{ print $n }' n=4 inventory-shipped n=2 BBS-list
+
+ prints the value of field number `n' for all input records. Before the
+ first file is read, the command line sets the variable `n' equal to 4.
+ This causes the fourth field to be printed in lines from the file
+ `inventory-shipped'. After the first file has finished, but before the
+ second file is started, `n' is set to 2, so that the second field is
+ printed in lines from `BBS-list'.
+
+ Command line arguments are made available for explicit examination by
+ the `awk' program in an array named `ARGV' (*note Built-in
+ Variables::.).
+
+ `awk' processes the values of command line assignments for escape
+ sequences (*note Constant Expressions: Constants.).
+
+
+ File: gawk.info, Node: Arithmetic Ops, Next: Concatenation, Prev: Variables, Up: Expressions
+
+ Arithmetic Operators
+ ====================
+
+ The `awk' language uses the common arithmetic operators when
+ evaluating expressions. All of these arithmetic operators follow normal
+ precedence rules, and work as you would expect them to. This example
+ divides field three by field four, adds field two, stores the result
+ into field one, and prints the resulting altered input record:
+
+ awk '{ $1 = $2 + $3 / $4; print }' inventory-shipped
+
+ The arithmetic operators in `awk' are:
+
+ `X + Y'
+ Addition.
+
+ `X - Y'
+ Subtraction.
+
+ `- X'
+ Negation.
+
+ `+ X'
+ Unary plus. No real effect on the expression.
+
+ `X * Y'
+ Multiplication.
+
+ `X / Y'
+ Division. Since all numbers in `awk' are double-precision
+ floating point, the result is not rounded to an integer: `3 / 4'
+ has the value 0.75.
+
+ `X % Y'
+ Remainder. The quotient is rounded toward zero to an integer,
+ multiplied by Y and this result is subtracted from X. This
+ operation is sometimes known as "trunc-mod." The following
+ relation always holds:
+
+ b * int(a / b) + (a % b) == a
+
+ One possibly undesirable effect of this definition of remainder is
+ that `X % Y' is negative if X is negative. Thus,
+
+ -17 % 8 = -1
+
+ In other `awk' implementations, the signedness of the remainder
+ may be machine dependent.
+
+ `X ^ Y'
+ `X ** Y'
+ Exponentiation: X raised to the Y power. `2 ^ 3' has the value 8.
+ The character sequence `**' is equivalent to `^'. (The POSIX
+ standard only specifies the use of `^' for exponentiation.)
+
+
+ File: gawk.info, Node: Concatenation, Next: Comparison Ops, Prev: Arithmetic Ops, Up: Expressions
+
+ String Concatenation
+ ====================
+
+ There is only one string operation: concatenation. It does not have
+ a specific operator to represent it. Instead, concatenation is
+ performed by writing expressions next to one another, with no operator.
+ For example:
+
+ awk '{ print "Field number one: " $1 }' BBS-list
+
+ produces, for the first record in `BBS-list':
+
+ Field number one: aardvark
+
+ Without the space in the string constant after the `:', the line
+ would run together. For example:
+
+ awk '{ print "Field number one:" $1 }' BBS-list
+
+ produces, for the first record in `BBS-list':
+
+ Field number one:aardvark
+
+ Since string concatenation does not have an explicit operator, it is
+ often necessary to insure that it happens where you want it to by
+ enclosing the items to be concatenated in parentheses. For example, the
+ following code fragment does not concatenate `file' and `name' as you
+ might expect:
+
+ file = "file"
+ name = "name"
+ print "something meaningful" > file name
+
+ It is necessary to use the following:
+
+ print "something meaningful" > (file name)
+
+ We recommend you use parentheses around concatenation in all but the
+ most common contexts (such as in the right-hand operand of `=').
+
+
+ File: gawk.info, Node: Comparison Ops, Next: Boolean Ops, Prev: Concatenation, Up: Expressions
+
+ Comparison Expressions
+ ======================
+
+ "Comparison expressions" compare strings or numbers for
+ relationships such as equality. They are written using "relational
+ operators", which are a superset of those in C. Here is a table of
+ them:
+
+ `X < Y'
+ True if X is less than Y.
+
+ `X <= Y'
+ True if X is less than or equal to Y.
+
+ `X > Y'
+ True if X is greater than Y.
+
+ `X >= Y'
+ True if X is greater than or equal to Y.
+
+ `X == Y'
+ True if X is equal to Y.
+
+ `X != Y'
+ True if X is not equal to Y.
+
+ `X ~ Y'
+ True if the string X matches the regexp denoted by Y.
+
+ `X !~ Y'
+ True if the string X does not match the regexp denoted by Y.
+
+ `SUBSCRIPT in ARRAY'
+ True if array ARRAY has an element with the subscript SUBSCRIPT.
+
+ Comparison expressions have the value 1 if true and 0 if false.
+
+ The rules `gawk' uses for performing comparisons are based on those
+ in draft 11.2 of the POSIX standard. The POSIX standard introduced the
+ concept of a "numeric string", which is simply a string that looks like
+ a number, for example, `" +2"'.
+
+ When performing a relational operation, `gawk' considers the type of
+ an operand to be the type it received on its last *assignment*, rather
+ than the type of its last *use* (*note Numeric and String Values:
+ Values.). This type is *unknown* when the operand is from an
+ "external" source: field variables, command line arguments, array
+ elements resulting from a `split' operation, and the value of an
+ `ENVIRON' element. In this case only, if the operand is a numeric
+ string, then it is considered to be of both string type and numeric
+ type. If at least one operand of a comparison is of string type only,
+ then a string comparison is performed. Any numeric operand will be
+ converted to a string using the value of `CONVFMT' (*note Conversion of
+ Strings and Numbers: Conversion.). If one operand of a comparison is
+ numeric, and the other operand is either numeric or both numeric and
+ string, then `gawk' does a numeric comparison. If both operands have
+ both types, then the comparison is numeric. Strings are compared by
+ comparing the first character of each, then the second character of
+ each, and so on. Thus `"10"' is less than `"9"'. If there are two
+ strings where one is a prefix of the other, the shorter string is less
+ than the longer one. Thus `"abc"' is less than `"abcd"'.
+
+ Here are some sample expressions, how `gawk' compares them, and what
+ the result of the comparison is.
+
+ `1.5 <= 2.0'
+ numeric comparison (true)
+
+ `"abc" >= "xyz"'
+ string comparison (false)
+
+ `1.5 != " +2"'
+ string comparison (true)
+
+ `"1e2" < "3"'
+ string comparison (true)
+
+ `a = 2; b = "2"'
+ `a == b'
+ string comparison (true)
+
+ echo 1e2 3 | awk '{ print ($1 < $2) ? "true" : "false" }'
+
+ prints `false' since both `$1' and `$2' are numeric strings and thus
+ have both string and numeric types, thus dictating a numeric comparison.
+
+ The purpose of the comparison rules and the use of numeric strings is
+ to attempt to produce the behavior that is "least surprising," while
+ still "doing the right thing."
+
+ String comparisons and regular expression comparisons are very
+ different. For example,
+
+ $1 == "foo"
+
+ has the value of 1, or is true, if the first field of the current input
+ record is precisely `foo'. By contrast,
+
+ $1 ~ /foo/
+
+ has the value 1 if the first field contains `foo', such as `foobar'.
+
+ The right hand operand of the `~' and `!~' operators may be either a
+ constant regexp (`/.../'), or it may be an ordinary expression, in
+ which case the value of the expression as a string is a dynamic regexp
+ (*note How to Use Regular Expressions: Regexp Usage.).
+
+ In very recent implementations of `awk', a constant regular
+ expression in slashes by itself is also an expression. The regexp
+ `/REGEXP/' is an abbreviation for this comparison expression:
+
+ $0 ~ /REGEXP/
+
+ In some contexts it may be necessary to write parentheses around the
+ regexp to avoid confusing the `gawk' parser. For example, `(/x/ - /y/)
+ > threshold' is not allowed, but `((/x/) - (/y/)) > threshold' parses
+ properly.
+
+ One special place where `/foo/' is *not* an abbreviation for `$0 ~
+ /foo/' is when it is the right-hand operand of `~' or `!~'! *Note
+ Constant Expressions: Constants, where this is discussed in more detail.
+
+
+ File: gawk.info, Node: Boolean Ops, Next: Assignment Ops, Prev: Comparison Ops, Up: Expressions
+
+ Boolean Expressions
+ ===================
+
+ A "boolean expression" is a combination of comparison expressions or
+ matching expressions, using the boolean operators "or" (`||'), "and"
+ (`&&'), and "not" (`!'), along with parentheses to control nesting.
+ The truth of the boolean expression is computed by combining the truth
+ values of the component expressions.
+
+ Boolean expressions can be used wherever comparison and matching
+ expressions can be used. They can be used in `if', `while' `do' and
+ `for' statements. They have numeric values (1 if true, 0 if false),
+ which come into play if the result of the boolean expression is stored
+ in a variable, or used in arithmetic.
+
+ In addition, every boolean expression is also a valid boolean
+ pattern, so you can use it as a pattern to control the execution of
+ rules.
+
+ Here are descriptions of the three boolean operators, with an
+ example of each. It may be instructive to compare these examples with
+ the analogous examples of boolean patterns (*note Boolean Operators and
+ Patterns: Boolean Patterns.), which use the same boolean operators in
+ patterns instead of expressions.
+
+ `BOOLEAN1 && BOOLEAN2'
+ True if both BOOLEAN1 and BOOLEAN2 are true. For example, the
+ following statement prints the current input record if it contains
+ both `2400' and `foo'.
+
+ if ($0 ~ /2400/ && $0 ~ /foo/) print
+
+ The subexpression BOOLEAN2 is evaluated only if BOOLEAN1 is true.
+ This can make a difference when BOOLEAN2 contains expressions that
+ have side effects: in the case of `$0 ~ /foo/ && ($2 == bar++)',
+ the variable `bar' is not incremented if there is no `foo' in the
+ record.
+
+ `BOOLEAN1 || BOOLEAN2'
+ True if at least one of BOOLEAN1 or BOOLEAN2 is true. For
+ example, the following command prints all records in the input
+ file `BBS-list' that contain *either* `2400' or `foo', or both.
+
+ awk '{ if ($0 ~ /2400/ || $0 ~ /foo/) print }' BBS-list
+
+ The subexpression BOOLEAN2 is evaluated only if BOOLEAN1 is false.
+ This can make a difference when BOOLEAN2 contains expressions
+ that have side effects.
+
+ `!BOOLEAN'
+ True if BOOLEAN is false. For example, the following program
+ prints all records in the input file `BBS-list' that do *not*
+ contain the string `foo'.
+
+ awk '{ if (! ($0 ~ /foo/)) print }' BBS-list
+
+
+ File: gawk.info, Node: Assignment Ops, Next: Increment Ops, Prev: Boolean Ops, Up: Expressions
+
+ Assignment Expressions
+ ======================
+
+ An "assignment" is an expression that stores a new value into a
+ variable. For example, let's assign the value 1 to the variable `z':
+
+ z = 1
+
+ After this expression is executed, the variable `z' has the value 1.
+ Whatever old value `z' had before the assignment is forgotten.
+
+ Assignments can store string values also. For example, this would
+ store the value `"this food is good"' in the variable `message':
+
+ thing = "food"
+ predicate = "good"
+ message = "this " thing " is " predicate
+
+ (This also illustrates concatenation of strings.)
+
+ The `=' sign is called an "assignment operator". It is the simplest
+ assignment operator because the value of the right-hand operand is
+ stored unchanged.
+
+ Most operators (addition, concatenation, and so on) have no effect
+ except to compute a value. If you ignore the value, you might as well
+ not use the operator. An assignment operator is different; it does
+ produce a value, but even if you ignore the value, the assignment still
+ makes itself felt through the alteration of the variable. We call this
+ a "side effect".
+
+ The left-hand operand of an assignment need not be a variable (*note
+ Variables::.); it can also be a field (*note Changing the Contents of a
+ Field: Changing Fields.) or an array element (*note Arrays in `awk':
+ Arrays.). These are all called "lvalues", which means they can appear
+ on the left-hand side of an assignment operator. The right-hand
+ operand may be any expression; it produces the new value which the
+ assignment stores in the specified variable, field or array element.
+
+ It is important to note that variables do *not* have permanent types.
+ The type of a variable is simply the type of whatever value it happens
+ to hold at the moment. In the following program fragment, the variable
+ `foo' has a numeric value at first, and a string value later on:
+
+ foo = 1
+ print foo
+ foo = "bar"
+ print foo
+
+ When the second assignment gives `foo' a string value, the fact that it
+ previously had a numeric value is forgotten.
+
+ An assignment is an expression, so it has a value: the same value
+ that is assigned. Thus, `z = 1' as an expression has the value 1. One
+ consequence of this is that you can write multiple assignments together:
+
+ x = y = z = 0
+
+ stores the value 0 in all three variables. It does this because the
+ value of `z = 0', which is 0, is stored into `y', and then the value of
+ `y = z = 0', which is 0, is stored into `x'.
+
+ You can use an assignment anywhere an expression is called for. For
+ example, it is valid to write `x != (y = 1)' to set `y' to 1 and then
+ test whether `x' equals 1. But this style tends to make programs hard
+ to read; except in a one-shot program, you should rewrite it to get rid
+ of such nesting of assignments. This is never very hard.
+
+ Aside from `=', there are several other assignment operators that do
+ arithmetic with the old value of the variable. For example, the
+ operator `+=' computes a new value by adding the right-hand value to
+ the old value of the variable. Thus, the following assignment adds 5
+ to the value of `foo':
+
+ foo += 5
+
+ This is precisely equivalent to the following:
+
+ foo = foo + 5
+
+ Use whichever one makes the meaning of your program clearer.
+
+ Here is a table of the arithmetic assignment operators. In each
+ case, the right-hand operand is an expression whose value is converted
+ to a number.
+
+ `LVALUE += INCREMENT'
+ Adds INCREMENT to the value of LVALUE to make the new value of
+ LVALUE.
+
+ `LVALUE -= DECREMENT'
+ Subtracts DECREMENT from the value of LVALUE.
+
+ `LVALUE *= COEFFICIENT'
+ Multiplies the value of LVALUE by COEFFICIENT.
+
+ `LVALUE /= QUOTIENT'
+ Divides the value of LVALUE by QUOTIENT.
+
+ `LVALUE %= MODULUS'
+ Sets LVALUE to its remainder by MODULUS.
+
+ `LVALUE ^= POWER'
+ `LVALUE **= POWER'
+ Raises LVALUE to the power POWER. (Only the `^=' operator is
+ specified by POSIX.)
+
+
+ File: gawk.info, Node: Increment Ops, Next: Conversion, Prev: Assignment Ops, Up: Expressions
+
+ Increment Operators
+ ===================
+
+ "Increment operators" increase or decrease the value of a variable
+ by 1. You could do the same thing with an assignment operator, so the
+ increment operators add no power to the `awk' language; but they are
+ convenient abbreviations for something very common.
+
+ The operator to add 1 is written `++'. It can be used to increment
+ a variable either before or after taking its value.
+
+ To pre-increment a variable V, write `++V'. This adds 1 to the
+ value of V and that new value is also the value of this expression.
+ The assignment expression `V += 1' is completely equivalent.
+
+ Writing the `++' after the variable specifies post-increment. This
+ increments the variable value just the same; the difference is that the
+ value of the increment expression itself is the variable's *old* value.
+ Thus, if `foo' has the value 4, then the expression `foo++' has the
+ value 4, but it changes the value of `foo' to 5.
+
+ The post-increment `foo++' is nearly equivalent to writing `(foo +=
+ 1) - 1'. It is not perfectly equivalent because all numbers in `awk'
+ are floating point: in floating point, `foo + 1 - 1' does not
+ necessarily equal `foo'. But the difference is minute as long as you
+ stick to numbers that are fairly small (less than a trillion).
+
+ Any lvalue can be incremented. Fields and array elements are
+ incremented just like variables. (Use `$(i++)' when you wish to do a
+ field reference and a variable increment at the same time. The
+ parentheses are necessary because of the precedence of the field
+ reference operator, `$'.)
+
+ The decrement operator `--' works just like `++' except that it
+ subtracts 1 instead of adding. Like `++', it can be used before the
+ lvalue to pre-decrement or after it to post-decrement.
+
+ Here is a summary of increment and decrement expressions.
+
+ `++LVALUE'
+ This expression increments LVALUE and the new value becomes the
+ value of this expression.
+
+ `LVALUE++'
+ This expression causes the contents of LVALUE to be incremented.
+ The value of the expression is the *old* value of LVALUE.
+
+ `--LVALUE'
+ Like `++LVALUE', but instead of adding, it subtracts. It
+ decrements LVALUE and delivers the value that results.
+
+ `LVALUE--'
+ Like `LVALUE++', but instead of adding, it subtracts. It
+ decrements LVALUE. The value of the expression is the *old* value
+ of LVALUE.
+
+
+ File: gawk.info, Node: Conversion, Next: Values, Prev: Increment Ops, Up: Expressions
+
+ Conversion of Strings and Numbers
+ =================================
+
+ Strings are converted to numbers, and numbers to strings, if the
+ context of the `awk' program demands it. For example, if the value of
+ either `foo' or `bar' in the expression `foo + bar' happens to be a
+ string, it is converted to a number before the addition is performed.
+ If numeric values appear in string concatenation, they are converted to
+ strings. Consider this:
+
+ two = 2; three = 3
+ print (two three) + 4
+
+ This eventually prints the (numeric) value 27. The numeric values of
+ the variables `two' and `three' are converted to strings and
+ concatenated together, and the resulting string is converted back to the
+ number 23, to which 4 is then added.
+
+ If, for some reason, you need to force a number to be converted to a
+ string, concatenate the null string with that number. To force a string
+ to be converted to a number, add zero to that string.
+
+ A string is converted to a number by interpreting a numeric prefix
+ of the string as numerals: `"2.5"' converts to 2.5, `"1e3"' converts to
+ 1000, and `"25fix"' has a numeric value of 25. Strings that can't be
+ interpreted as valid numbers are converted to zero.
+
+ The exact manner in which numbers are converted into strings is
+ controlled by the `awk' built-in variable `CONVFMT' (*note Built-in
+ Variables::.). Numbers are converted using a special version of the
+ `sprintf' function (*note Built-in Functions: Built-in.) with `CONVFMT'
+ as the format specifier.
+
+ `CONVFMT''s default value is `"%.6g"', which prints a value with at
+ least six significant digits. For some applications you will want to
+ change it to specify more precision. Double precision on most modern
+ machines gives you 16 or 17 decimal digits of precision.
+
+ Strange results can happen if you set `CONVFMT' to a string that
+ doesn't tell `sprintf' how to format floating point numbers in a useful
+ way. For example, if you forget the `%' in the format, all numbers
+ will be converted to the same constant string.
+
+ As a special case, if a number is an integer, then the result of
+ converting it to a string is *always* an integer, no matter what the
+ value of `CONVFMT' may be. Given the following code fragment:
+
+ CONVFMT = "%2.2f"
+ a = 12
+ b = a ""
+
+ `b' has the value `"12"', not `"12.00"'.
+
+ Prior to the POSIX standard, `awk' specified that the value of
+ `OFMT' was used for converting numbers to strings. `OFMT' specifies
+ the output format to use when printing numbers with `print'. `CONVFMT'
+ was introduced in order to separate the semantics of conversions from
+ the semantics of printing. Both `CONVFMT' and `OFMT' have the same
+ default value: `"%.6g"'. In the vast majority of cases, old `awk'
+ programs will not change their behavior. However, this use of `OFMT'
+ is something to keep in mind if you must port your program to other
+ implementations of `awk'; we recommend that instead of changing your
+ programs, you just port `gawk' itself!
+
+
+ File: gawk.info, Node: Values, Next: Conditional Exp, Prev: Conversion, Up: Expressions
+
+ Numeric and String Values
+ =========================
+
+ Through most of this manual, we present `awk' values (such as
+ constants, fields, or variables) as *either* numbers *or* strings.
+ This is a convenient way to think about them, since typically they are
+ used in only one way, or the other.
+
+ In truth though, `awk' values can be *both* string and numeric, at
+ the same time. Internally, `awk' represents values with a string, a
+ (floating point) number, and an indication that one, the other, or both
+ representations of the value are valid.
+
+ Keeping track of both kinds of values is important for execution
+ efficiency: a variable can acquire a string value the first time it is
+ used as a string, and then that string value can be used until the
+ variable is assigned a new value. Thus, if a variable with only a
+ numeric value is used in several concatenations in a row, it only has
+ to be given a string representation once. The numeric value remains
+ valid, so that no conversion back to a number is necessary if the
+ variable is later used in an arithmetic expression.
+
+ Tracking both kinds of values is also important for precise numerical
+ calculations. Consider the following:
+
+ a = 123.321
+ CONVFMT = "%3.1f"
+ b = a " is a number"
+ c = a + 1.654
+
+ The variable `a' receives a string value in the concatenation and
+ assignment to `b'. The string value of `a' is `"123.3"'. If the
+ numeric value was lost when it was converted to a string, then the
+ numeric use of `a' in the last statement would lose information. `c'
+ would be assigned the value 124.954 instead of 124.975. Such errors
+ accumulate rapidly, and very adversely affect numeric computations.
+
+ Once a numeric value acquires a corresponding string value, it stays
+ valid until a new assignment is made. If `CONVFMT' (*note Conversion
+ of Strings and Numbers: Conversion.) changes in the meantime, the old
+ string value will still be used. For example:
+
+ BEGIN {
+ CONVFMT = "%2.2f"
+ a = 123.456
+ b = a "" # force `a' to have string value too
+ printf "a = %s\n", a
+ CONVFMT = "%.6g"
+ printf "a = %s\n", a
+ a += 0 # make `a' numeric only again
+ printf "a = %s\n", a # use `a' as string
+ }
+
+ This program prints `a = 123.46' twice, and then prints `a = 123.456'.
+
+ *Note Conversion of Strings and Numbers: Conversion, for the rules
+ that specify how string values are made from numeric values.
+
+
+ File: gawk.info, Node: Conditional Exp, Next: Function Calls, Prev: Values, Up: Expressions
+
+ Conditional Expressions
+ =======================
+
+ A "conditional expression" is a special kind of expression with
+ three operands. It allows you to use one expression's value to select
+ one of two other expressions.
+
+ The conditional expression looks the same as in the C language:
+
+ SELECTOR ? IF-TRUE-EXP : IF-FALSE-EXP
+
+ There are three subexpressions. The first, SELECTOR, is always
+ computed first. If it is "true" (not zero and not null) then
+ IF-TRUE-EXP is computed next and its value becomes the value of the
+ whole expression. Otherwise, IF-FALSE-EXP is computed next and its
+ value becomes the value of the whole expression.
+
+ For example, this expression produces the absolute value of `x':
+
+ x > 0 ? x : -x
+
+ Each time the conditional expression is computed, exactly one of
+ IF-TRUE-EXP and IF-FALSE-EXP is computed; the other is ignored. This
+ is important when the expressions contain side effects. For example,
+ this conditional expression examines element `i' of either array `a' or
+ array `b', and increments `i'.
+
+ x == y ? a[i++] : b[i++]
+
+ This is guaranteed to increment `i' exactly once, because each time one
+ or the other of the two increment expressions is executed, and the
+ other is not.
+
+
+ File: gawk.info, Node: Function Calls, Next: Precedence, Prev: Conditional Exp, Up: Expressions
+
+ Function Calls
+ ==============
+
+ A "function" is a name for a particular calculation. Because it has
+ a name, you can ask for it by name at any point in the program. For
+ example, the function `sqrt' computes the square root of a number.
+
+ A fixed set of functions are "built-in", which means they are
+ available in every `awk' program. The `sqrt' function is one of these.
+ *Note Built-in Functions: Built-in, for a list of built-in functions
+ and their descriptions. In addition, you can define your own functions
+ in the program for use elsewhere in the same program. *Note
+ User-defined Functions: User-defined, for how to do this.
+
+ The way to use a function is with a "function call" expression,
+ which consists of the function name followed by a list of "arguments"
+ in parentheses. The arguments are expressions which give the raw
+ materials for the calculation that the function will do. When there is
+ more than one argument, they are separated by commas. If there are no
+ arguments, write just `()' after the function name. Here are some
+ examples:
+
+ sqrt(x^2 + y^2) # One argument
+ atan2(y, x) # Two arguments
+ rand() # No arguments
+
+ *Do not put any space between the function name and the
+ open-parenthesis!* A user-defined function name looks just like the
+ name of a variable, and space would make the expression look like
+ concatenation of a variable with an expression inside parentheses.
+ Space before the parenthesis is harmless with built-in functions, but
+ it is best not to get into the habit of using space to avoid mistakes
+ with user-defined functions.
+
+ Each function expects a particular number of arguments. For
+ example, the `sqrt' function must be called with a single argument, the
+ number to take the square root of:
+
+ sqrt(ARGUMENT)
+
+ Some of the built-in functions allow you to omit the final argument.
+ If you do so, they use a reasonable default. *Note Built-in Functions:
+ Built-in, for full details. If arguments are omitted in calls to
+ user-defined functions, then those arguments are treated as local
+ variables, initialized to the null string (*note User-defined
+ Functions: User-defined.).
+
+ Like every other expression, the function call has a value, which is
+ computed by the function based on the arguments you give it. In this
+ example, the value of `sqrt(ARGUMENT)' is the square root of the
+ argument. A function can also have side effects, such as assigning the
+ values of certain variables or doing I/O.
+
+ Here is a command to read numbers, one number per line, and print the
+ square root of each one:
+
+ awk '{ print "The square root of", $1, "is", sqrt($1) }'
+
+
+ File: gawk.info, Node: Precedence, Prev: Function Calls, Up: Expressions
+
+ Operator Precedence (How Operators Nest)
+ ========================================
+
+ "Operator precedence" determines how operators are grouped, when
+ different operators appear close by in one expression. For example,
+ `*' has higher precedence than `+'; thus, `a + b * c' means to multiply
+ `b' and `c', and then add `a' to the product (i.e., `a + (b * c)').
+
+ You can overrule the precedence of the operators by using
+ parentheses. You can think of the precedence rules as saying where the
+ parentheses are assumed if you do not write parentheses yourself. In
+ fact, it is wise to always use parentheses whenever you have an unusual
+ combination of operators, because other people who read the program may
+ not remember what the precedence is in this case. You might forget,
+ too; then you could make a mistake. Explicit parentheses will help
+ prevent any such mistake.
+
+ When operators of equal precedence are used together, the leftmost
+ operator groups first, except for the assignment, conditional and
+ exponentiation operators, which group in the opposite order. Thus, `a
+ - b + c' groups as `(a - b) + c'; `a = b = c' groups as `a = (b = c)'.
+
+ The precedence of prefix unary operators does not matter as long as
+ only unary operators are involved, because there is only one way to
+ parse them--innermost first. Thus, `$++i' means `$(++i)' and `++$x'
+ means `++($x)'. However, when another operator follows the operand,
+ then the precedence of the unary operators can matter. Thus, `$x^2'
+ means `($x)^2', but `-x^2' means `-(x^2)', because `-' has lower
+ precedence than `^' while `$' has higher precedence.
+
+ Here is a table of the operators of `awk', in order of increasing
+ precedence:
+
+ assignment
+ `=', `+=', `-=', `*=', `/=', `%=', `^=', `**='. These operators
+ group right-to-left. (The `**=' operator is not specified by
+ POSIX.)
+
+ conditional
+ `?:'. This operator groups right-to-left.
+
+ logical "or".
+ `||'.
+
+ logical "and".
+ `&&'.
+
+ array membership
+ `in'.
+
+ matching
+ `~', `!~'.
+
+ relational, and redirection
+ The relational operators and the redirections have the same
+ precedence level. Characters such as `>' serve both as
+ relationals and as redirections; the context distinguishes between
+ the two meanings.
+
+ The relational operators are `<', `<=', `==', `!=', `>=' and `>'.
+
+ The I/O redirection operators are `<', `>', `>>' and `|'.
+
+ Note that I/O redirection operators in `print' and `printf'
+ statements belong to the statement level, not to expressions. The
+ redirection does not produce an expression which could be the
+ operand of another operator. As a result, it does not make sense
+ to use a redirection operator near another operator of lower
+ precedence, without parentheses. Such combinations, for example
+ `print foo > a ? b : c', result in syntax errors.
+
+ concatenation
+ No special token is used to indicate concatenation. The operands
+ are simply written side by side.
+
+ add, subtract
+ `+', `-'.
+
+ multiply, divide, mod
+ `*', `/', `%'.
+
+ unary plus, minus, "not"
+ `+', `-', `!'.
+
+ exponentiation
+ `^', `**'. These operators group right-to-left. (The `**'
+ operator is not specified by POSIX.)
+
+ increment, decrement
+ `++', `--'.
+
+ field
+ `$'.
+
+
+ File: gawk.info, Node: Statements, Next: Arrays, Prev: Expressions, Up: Top
+
+ Control Statements in Actions
+ *****************************
+
+ "Control statements" such as `if', `while', and so on control the
+ flow of execution in `awk' programs. Most of the control statements in
+ `awk' are patterned on similar statements in C.
+
+ All the control statements start with special keywords such as `if'
+ and `while', to distinguish them from simple expressions.
+
+ Many control statements contain other statements; for example, the
+ `if' statement contains another statement which may or may not be
+ executed. The contained statement is called the "body". If you want
+ to include more than one statement in the body, group them into a
+ single compound statement with curly braces, separating them with
+ newlines or semicolons.
+
+ * Menu:
+
+ * If Statement:: Conditionally execute
+ some `awk' statements.
+ * While Statement:: Loop until some condition is satisfied.
+ * Do Statement:: Do specified action while looping until some
+ condition is satisfied.
+ * For Statement:: Another looping statement, that provides
+ initialization and increment clauses.
+ * Break Statement:: Immediately exit the innermost enclosing loop.
+ * Continue Statement:: Skip to the end of the innermost
+ enclosing loop.
+ * Next Statement:: Stop processing the current input record.
+ * Next File Statement:: Stop processing the current file.
+ * Exit Statement:: Stop execution of `awk'.
+
+
+ File: gawk.info, Node: If Statement, Next: While Statement, Prev: Statements, Up: Statements
+
+ The `if' Statement
+ ==================
+
+ The `if'-`else' statement is `awk''s decision-making statement. It
+ looks like this:
+
+ if (CONDITION) THEN-BODY [else ELSE-BODY]
+
+ CONDITION is an expression that controls what the rest of the statement
+ will do. If CONDITION is true, THEN-BODY is executed; otherwise,
+ ELSE-BODY is executed (assuming that the `else' clause is present).
+ The `else' part of the statement is optional. The condition is
+ considered false if its value is zero or the null string, and true
+ otherwise.
+
+ Here is an example:
+
+ if (x % 2 == 0)
+ print "x is even"
+ else
+ print "x is odd"
+
+ In this example, if the expression `x % 2 == 0' is true (that is,
+ the value of `x' is divisible by 2), then the first `print' statement
+ is executed, otherwise the second `print' statement is performed.
+
+ If the `else' appears on the same line as THEN-BODY, and THEN-BODY
+ is not a compound statement (i.e., not surrounded by curly braces),
+ then a semicolon must separate THEN-BODY from `else'. To illustrate
+ this, let's rewrite the previous example:
+
+ awk '{ if (x % 2 == 0) print "x is even"; else
+ print "x is odd" }'
+
+ If you forget the `;', `awk' won't be able to parse the statement, and
+ you will get a syntax error.
+
+ We would not actually write this example this way, because a human
+ reader might fail to see the `else' if it were not the first thing on
+ its line.
+
+
+ File: gawk.info, Node: While Statement, Next: Do Statement, Prev: If Statement, Up: Statements
+
+ The `while' Statement
+ =====================
+
+ In programming, a "loop" means a part of a program that is (or at
+ least can be) executed two or more times in succession.
+
+ The `while' statement is the simplest looping statement in `awk'.
+ It repeatedly executes a statement as long as a condition is true. It
+ looks like this:
+
+ while (CONDITION)
+ BODY
+
+ Here BODY is a statement that we call the "body" of the loop, and
+ CONDITION is an expression that controls how long the loop keeps
+ running.
+
+ The first thing the `while' statement does is test CONDITION. If
+ CONDITION is true, it executes the statement BODY. (CONDITION is true
+ when the value is not zero and not a null string.) After BODY has been
+ executed, CONDITION is tested again, and if it is still true, BODY is
+ executed again. This process repeats until CONDITION is no longer
+ true. If CONDITION is initially false, the body of the loop is never
+ executed.
+
+ This example prints the first three fields of each record, one per
+ line.
+
+ awk '{ i = 1
+ while (i <= 3) {
+ print $i
+ i++
+ }
+ }'
+
+ Here the body of the loop is a compound statement enclosed in braces,
+ containing two statements.
+
+ The loop works like this: first, the value of `i' is set to 1.
+ Then, the `while' tests whether `i' is less than or equal to three.
+ This is the case when `i' equals one, so the `i'-th field is printed.
+ Then the `i++' increments the value of `i' and the loop repeats. The
+ loop terminates when `i' reaches 4.
+
+ As you can see, a newline is not required between the condition and
+ the body; but using one makes the program clearer unless the body is a
+ compound statement or is very simple. The newline after the open-brace
+ that begins the compound statement is not required either, but the
+ program would be hard to read without it.
+
+
+ File: gawk.info, Node: Do Statement, Next: For Statement, Prev: While Statement, Up: Statements
+
+ The `do'-`while' Statement
+ ==========================
+
+ The `do' loop is a variation of the `while' looping statement. The
+ `do' loop executes the BODY once, then repeats BODY as long as
+ CONDITION is true. It looks like this:
+
+ do
+ BODY
+ while (CONDITION)
+
+ Even if CONDITION is false at the start, BODY is executed at least
+ once (and only once, unless executing BODY makes CONDITION true).
+ Contrast this with the corresponding `while' statement:
+
+ while (CONDITION)
+ BODY
+
+ This statement does not execute BODY even once if CONDITION is false to
+ begin with.
+
+ Here is an example of a `do' statement:
+
+ awk '{ i = 1
+ do {
+ print $0
+ i++
+ } while (i <= 10)
+ }'
+
+ prints each input record ten times. It isn't a very realistic example,
+ since in this case an ordinary `while' would do just as well. But this
+ reflects actual experience; there is only occasionally a real use for a
+ `do' statement.
+
diff -rc --new-file /src/baseline/gawk-2.15.5/gawk.info-5 gawk-2.15.5/gawk.info-5
*** /src/baseline/gawk-2.15.5/gawk.info-5 Thu Jan 1 00:00:00 1970
--- gawk-2.15.5/gawk.info-5 Sun Jun 12 22:28:52 1994
***************
*** 0 ****
--- 1,1256 ----
+ This is Info file gawk.info, produced by Makeinfo-1.55 from the input
+ file /gnu/src/amiga/gawk-2.15.5/gawk.texi.
+
+ This file documents `awk', a program that you can use to select
+ particular records in a file and perform operations upon them.
+
+ This is Edition 0.15 of `The GAWK Manual',
+ for the 2.15 version of the GNU implementation
+ of AWK.
+
+ Copyright (C) 1989, 1991, 1992, 1993 Free Software Foundation, Inc.
+
+ Permission is granted to make and distribute verbatim copies of this
+ manual provided the copyright notice and this permission notice are
+ preserved on all copies.
+
+ Permission is granted to copy and distribute modified versions of
+ this manual under the conditions for verbatim copying, provided that
+ the entire resulting derived work is distributed under the terms of a
+ permission notice identical to this one.
+
+ Permission is granted to copy and distribute translations of this
+ manual into another language, under the above conditions for modified
+ versions, except that this permission notice may be stated in a
+ translation approved by the Foundation.
+
+
+ File: gawk.info, Node: For Statement, Next: Break Statement, Prev: Do Statement, Up: Statements
+
+ The `for' Statement
+ ===================
+
+ The `for' statement makes it more convenient to count iterations of a
+ loop. The general form of the `for' statement looks like this:
+
+ for (INITIALIZATION; CONDITION; INCREMENT)
+ BODY
+
+ This statement starts by executing INITIALIZATION. Then, as long as
+ CONDITION is true, it repeatedly executes BODY and then INCREMENT.
+ Typically INITIALIZATION sets a variable to either zero or one,
+ INCREMENT adds 1 to it, and CONDITION compares it against the desired
+ number of iterations.
+
+ Here is an example of a `for' statement:
+
+ awk '{ for (i = 1; i <= 3; i++)
+ print $i
+ }'
+
+ This prints the first three fields of each input record, one field per
+ line.
+
+ In the `for' statement, BODY stands for any statement, but
+ INITIALIZATION, CONDITION and INCREMENT are just expressions. You
+ cannot set more than one variable in the INITIALIZATION part unless you
+ use a multiple assignment statement such as `x = y = 0', which is
+ possible only if all the initial values are equal. (But you can
+ initialize additional variables by writing their assignments as
+ separate statements preceding the `for' loop.)
+
+ The same is true of the INCREMENT part; to increment additional
+ variables, you must write separate statements at the end of the loop.
+ The C compound expression, using C's comma operator, would be useful in
+ this context, but it is not supported in `awk'.
+
+ Most often, INCREMENT is an increment expression, as in the example
+ above. But this is not required; it can be any expression whatever.
+ For example, this statement prints all the powers of 2 between 1 and
+ 100:
+
+ for (i = 1; i <= 100; i *= 2)
+ print i
+
+ Any of the three expressions in the parentheses following the `for'
+ may be omitted if there is nothing to be done there. Thus,
+ `for (;x > 0;)' is equivalent to `while (x > 0)'. If the CONDITION is
+ omitted, it is treated as TRUE, effectively yielding an "infinite loop"
+ (i.e., a loop that will never terminate).
+
+ In most cases, a `for' loop is an abbreviation for a `while' loop,
+ as shown here:
+
+ INITIALIZATION
+ while (CONDITION) {
+ BODY
+ INCREMENT
+ }
+
+ The only exception is when the `continue' statement (*note The
+ `continue' Statement: Continue Statement.) is used inside the loop;
+ changing a `for' statement to a `while' statement in this way can
+ change the effect of the `continue' statement inside the loop.
+
+ There is an alternate version of the `for' loop, for iterating over
+ all the indices of an array:
+
+ for (i in array)
+ DO SOMETHING WITH array[i]
+
+ *Note Arrays in `awk': Arrays, for more information on this version of
+ the `for' loop.
+
+ The `awk' language has a `for' statement in addition to a `while'
+ statement because often a `for' loop is both less work to type and more
+ natural to think of. Counting the number of iterations is very common
+ in loops. It can be easier to think of this counting as part of
+ looping rather than as something to do inside the loop.
+
+ The next section has more complicated examples of `for' loops.
+
+
+ File: gawk.info, Node: Break Statement, Next: Continue Statement, Prev: For Statement, Up: Statements
+
+ The `break' Statement
+ =====================
+
+ The `break' statement jumps out of the innermost `for', `while', or
+ `do'-`while' loop that encloses it. The following example finds the
+ smallest divisor of any integer, and also identifies prime numbers:
+
+ awk '# find smallest divisor of num
+ { num = $1
+ for (div = 2; div*div <= num; div++)
+ if (num % div == 0)
+ break
+ if (num % div == 0)
+ printf "Smallest divisor of %d is %d\n", num, div
+ else
+ printf "%d is prime\n", num }'
+
+ When the remainder is zero in the first `if' statement, `awk'
+ immediately "breaks out" of the containing `for' loop. This means that
+ `awk' proceeds immediately to the statement following the loop and
+ continues processing. (This is very different from the `exit'
+ statement which stops the entire `awk' program. *Note The `exit'
+ Statement: Exit Statement.)
+
+ Here is another program equivalent to the previous one. It
+ illustrates how the CONDITION of a `for' or `while' could just as well
+ be replaced with a `break' inside an `if':
+
+ awk '# find smallest divisor of num
+ { num = $1
+ for (div = 2; ; div++) {
+ if (num % div == 0) {
+ printf "Smallest divisor of %d is %d\n", num, div
+ break
+ }
+ if (div*div > num) {
+ printf "%d is prime\n", num
+ break
+ }
+ }
+ }'
+
+
+ File: gawk.info, Node: Continue Statement, Next: Next Statement, Prev: Break Statement, Up: Statements
+
+ The `continue' Statement
+ ========================
+
+ The `continue' statement, like `break', is used only inside `for',
+ `while', and `do'-`while' loops. It skips over the rest of the loop
+ body, causing the next cycle around the loop to begin immediately.
+ Contrast this with `break', which jumps out of the loop altogether.
+ Here is an example:
+
+ # print names that don't contain the string "ignore"
+
+ # first, save the text of each line
+ { names[NR] = $0 }
+
+ # print what we're interested in
+ END {
+ for (x in names) {
+ if (names[x] ~ /ignore/)
+ continue
+ print names[x]
+ }
+ }
+
+ If one of the input records contains the string `ignore', this
+ example skips the print statement for that record, and continues back to
+ the first statement in the loop.
+
+ This is not a practical example of `continue', since it would be
+ just as easy to write the loop like this:
+
+ for (x in names)
+ if (names[x] !~ /ignore/)
+ print names[x]
+
+ The `continue' statement in a `for' loop directs `awk' to skip the
+ rest of the body of the loop, and resume execution with the
+ increment-expression of the `for' statement. The following program
+ illustrates this fact:
+
+ awk 'BEGIN {
+ for (x = 0; x <= 20; x++) {
+ if (x == 5)
+ continue
+ printf ("%d ", x)
+ }
+ print ""
+ }'
+
+ This program prints all the numbers from 0 to 20, except for 5, for
+ which the `printf' is skipped. Since the increment `x++' is not
+ skipped, `x' does not remain stuck at 5. Contrast the `for' loop above
+ with the `while' loop:
+
+ awk 'BEGIN {
+ x = 0
+ while (x <= 20) {
+ if (x == 5)
+ continue
+ printf ("%d ", x)
+ x++
+ }
+ print ""
+ }'
+
+ This program loops forever once `x' gets to 5.
+
+ As described above, the `continue' statement has no meaning when
+ used outside the body of a loop. However, although it was never
+ documented, historical implementations of `awk' have treated the
+ `continue' statement outside of a loop as if it were a `next' statement
+ (*note The `next' Statement: Next Statement.). By default, `gawk'
+ silently supports this usage. However, if `-W posix' has been
+ specified on the command line (*note Invoking `awk': Command Line.), it
+ will be treated as an error, since the POSIX standard specifies that
+ `continue' should only be used inside the body of a loop.
+
+
+ File: gawk.info, Node: Next Statement, Next: Next File Statement, Prev: Continue Statement, Up: Statements
+
+ The `next' Statement
+ ====================
+
+ The `next' statement forces `awk' to immediately stop processing the
+ current record and go on to the next record. This means that no
+ further rules are executed for the current record. The rest of the
+ current rule's action is not executed either.
+
+ Contrast this with the effect of the `getline' function (*note
+ Explicit Input with `getline': Getline.). That too causes `awk' to
+ read the next record immediately, but it does not alter the flow of
+ control in any way. So the rest of the current action executes with a
+ new input record.
+
+ At the highest level, `awk' program execution is a loop that reads
+ an input record and then tests each rule's pattern against it. If you
+ think of this loop as a `for' statement whose body contains the rules,
+ then the `next' statement is analogous to a `continue' statement: it
+ skips to the end of the body of this implicit loop, and executes the
+ increment (which reads another record).
+
+ For example, if your `awk' program works only on records with four
+ fields, and you don't want it to fail when given bad input, you might
+ use this rule near the beginning of the program:
+
+ NF != 4 {
+ printf("line %d skipped: doesn't have 4 fields", FNR) > "/dev/stderr"
+ next
+ }
+
+ so that the following rules will not see the bad record. The error
+ message is redirected to the standard error output stream, as error
+ messages should be. *Note Standard I/O Streams: Special Files.
+
+ According to the POSIX standard, the behavior is undefined if the
+ `next' statement is used in a `BEGIN' or `END' rule. `gawk' will treat
+ it as a syntax error.
+
+ If the `next' statement causes the end of the input to be reached,
+ then the code in the `END' rules, if any, will be executed. *Note
+ `BEGIN' and `END' Special Patterns: BEGIN/END.
+
+
+ File: gawk.info, Node: Next File Statement, Next: Exit Statement, Prev: Next Statement, Up: Statements
+
+ The `next file' Statement
+ =========================
+
+ The `next file' statement is similar to the `next' statement.
+ However, instead of abandoning processing of the current record, the
+ `next file' statement instructs `awk' to stop processing the current
+ data file.
+
+ Upon execution of the `next file' statement, `FILENAME' is updated
+ to the name of the next data file listed on the command line, `FNR' is
+ reset to 1, and processing starts over with the first rule in the
+ progam. *Note Built-in Variables::.
+
+ If the `next file' statement causes the end of the input to be
+ reached, then the code in the `END' rules, if any, will be executed.
+ *Note `BEGIN' and `END' Special Patterns: BEGIN/END.
+
+ The `next file' statement is a `gawk' extension; it is not
+ (currently) available in any other `awk' implementation. You can
+ simulate its behavior by creating a library file named `nextfile.awk',
+ with the following contents. (This sample program uses user-defined
+ functions, a feature that has not been presented yet. *Note
+ User-defined Functions: User-defined, for more information.)
+
+ # nextfile --- function to skip remaining records in current file
+
+ # this should be read in before the "main" awk program
+
+ function nextfile() { _abandon_ = FILENAME; next }
+
+ _abandon_ == FILENAME && FNR > 1 { next }
+ _abandon_ == FILENAME && FNR == 1 { _abandon_ = "" }
+
+ The `nextfile' function simply sets a "private" variable(1) to the
+ name of the current data file, and then retrieves the next record.
+ Since this file is read before the main `awk' program, the rules that
+ follows the function definition will be executed before the rules in
+ the main program. The first rule continues to skip records as long as
+ the name of the input file has not changed, and this is not the first
+ record in the file. This rule is sufficient most of the time. But
+ what if the *same* data file is named twice in a row on the command
+ line? This rule would not process the data file the second time. The
+ second rule catches this case: If the data file name is what was being
+ skipped, but `FNR' is 1, then this is the second time the file is being
+ processed, and it should not be skipped.
+
+ The `next file' statement would be useful if you have many data
+ files to process, and due to the nature of the data, you expect that you
+ would not want to process every record in the file. In order to move
+ on to the next data file, you would have to continue scanning the
+ unwanted records (as described above). The `next file' statement
+ accomplishes this much more efficiently.
+
+ ---------- Footnotes ----------
+
+ (1) Since all variables in `awk' are global, this program uses the
+ common practice of prefixing the variable name with an underscore. In
+ fact, it also suffixes the variable name with an underscore, as extra
+ insurance against using a variable name that might be used in some
+ other library file.
+
+
+ File: gawk.info, Node: Exit Statement, Prev: Next File Statement, Up: Statements
+
+ The `exit' Statement
+ ====================
+
+ The `exit' statement causes `awk' to immediately stop executing the
+ current rule and to stop processing input; any remaining input is
+ ignored.
+
+ If an `exit' statement is executed from a `BEGIN' rule the program
+ stops processing everything immediately. No input records are read.
+ However, if an `END' rule is present, it is executed (*note `BEGIN' and
+ `END' Special Patterns: BEGIN/END.).
+
+ If `exit' is used as part of an `END' rule, it causes the program to
+ stop immediately.
+
+ An `exit' statement that is part of an ordinary rule (that is, not
+ part of a `BEGIN' or `END' rule) stops the execution of any further
+ automatic rules, but the `END' rule is executed if there is one. If
+ you do not want the `END' rule to do its job in this case, you can set
+ a variable to nonzero before the `exit' statement, and check that
+ variable in the `END' rule.
+
+ If an argument is supplied to `exit', its value is used as the exit
+ status code for the `awk' process. If no argument is supplied, `exit'
+ returns status zero (success).
+
+ For example, let's say you've discovered an error condition you
+ really don't know how to handle. Conventionally, programs report this
+ by exiting with a nonzero status. Your `awk' program can do this using
+ an `exit' statement with a nonzero argument. Here's an example of this:
+
+ BEGIN {
+ if (("date" | getline date_now) < 0) {
+ print "Can't get system date" > "/dev/stderr"
+ exit 4
+ }
+ }
+
+
+ File: gawk.info, Node: Arrays, Next: Built-in, Prev: Statements, Up: Top
+
+ Arrays in `awk'
+ ***************
+
+ An "array" is a table of values, called "elements". The elements of
+ an array are distinguished by their indices. "Indices" may be either
+ numbers or strings. Each array has a name, which looks like a variable
+ name, but must not be in use as a variable name in the same `awk'
+ program.
+
+ * Menu:
+
+ * Array Intro:: Introduction to Arrays
+ * Reference to Elements:: How to examine one element of an array.
+ * Assigning Elements:: How to change an element of an array.
+ * Array Example:: Basic Example of an Array
+ * Scanning an Array:: A variation of the `for' statement.
+ It loops through the indices of
+ an array's existing elements.
+ * Delete:: The `delete' statement removes
+ an element from an array.
+ * Numeric Array Subscripts:: How to use numbers as subscripts in `awk'.
+ * Multi-dimensional:: Emulating multi-dimensional arrays in `awk'.
+ * Multi-scanning:: Scanning multi-dimensional arrays.
+
+
+ File: gawk.info, Node: Array Intro, Next: Reference to Elements, Prev: Arrays, Up: Arrays
+
+ Introduction to Arrays
+ ======================
+
+ The `awk' language has one-dimensional "arrays" for storing groups
+ of related strings or numbers.
+
+ Every `awk' array must have a name. Array names have the same
+ syntax as variable names; any valid variable name would also be a valid
+ array name. But you cannot use one name in both ways (as an array and
+ as a variable) in one `awk' program.
+
+ Arrays in `awk' superficially resemble arrays in other programming
+ languages; but there are fundamental differences. In `awk', you don't
+ need to specify the size of an array before you start to use it.
+ Additionally, any number or string in `awk' may be used as an array
+ index.
+
+ In most other languages, you have to "declare" an array and specify
+ how many elements or components it contains. In such languages, the
+ declaration causes a contiguous block of memory to be allocated for that
+ many elements. An index in the array must be a positive integer; for
+ example, the index 0 specifies the first element in the array, which is
+ actually stored at the beginning of the block of memory. Index 1
+ specifies the second element, which is stored in memory right after the
+ first element, and so on. It is impossible to add more elements to the
+ array, because it has room for only as many elements as you declared.
+
+ A contiguous array of four elements might look like this,
+ conceptually, if the element values are `8', `"foo"', `""' and `30':
+
+ +---------+---------+--------+---------+
+ | 8 | "foo" | "" | 30 | value
+ +---------+---------+--------+---------+
+ 0 1 2 3 index
+
+ Only the values are stored; the indices are implicit from the order of
+ the values. `8' is the value at index 0, because `8' appears in the
+ position with 0 elements before it.
+
+ Arrays in `awk' are different: they are "associative". This means
+ that each array is a collection of pairs: an index, and its
+ corresponding array element value:
+
+ Element 4 Value 30
+ Element 2 Value "foo"
+ Element 1 Value 8
+ Element 3 Value ""
+
+ We have shown the pairs in jumbled order because their order is
+ irrelevant.
+
+ One advantage of an associative array is that new pairs can be added
+ at any time. For example, suppose we add to the above array a tenth
+ element whose value is `"number ten"'. The result is this:
+
+ Element 10 Value "number ten"
+ Element 4 Value 30
+ Element 2 Value "foo"
+ Element 1 Value 8
+ Element 3 Value ""
+
+ Now the array is "sparse" (i.e., some indices are missing): it has
+ elements 1-4 and 10, but doesn't have elements 5, 6, 7, 8, or 9.
+
+ Another consequence of associative arrays is that the indices don't
+ have to be positive integers. Any number, or even a string, can be an
+ index. For example, here is an array which translates words from
+ English into French:
+
+ Element "dog" Value "chien"
+ Element "cat" Value "chat"
+ Element "one" Value "un"
+ Element 1 Value "un"
+
+ Here we decided to translate the number 1 in both spelled-out and
+ numeric form--thus illustrating that a single array can have both
+ numbers and strings as indices.
+
+ When `awk' creates an array for you, e.g., with the `split' built-in
+ function, that array's indices are consecutive integers starting at 1.
+ (*Note Built-in Functions for String Manipulation: String Functions.)
+
+
+ File: gawk.info, Node: Reference to Elements, Next: Assigning Elements, Prev: Array Intro, Up: Arrays
+
+ Referring to an Array Element
+ =============================
+
+ The principal way of using an array is to refer to one of its
+ elements. An array reference is an expression which looks like this:
+
+ ARRAY[INDEX]
+
+ Here, ARRAY is the name of an array. The expression INDEX is the index
+ of the element of the array that you want.
+
+ The value of the array reference is the current value of that array
+ element. For example, `foo[4.3]' is an expression for the element of
+ array `foo' at index 4.3.
+
+ If you refer to an array element that has no recorded value, the
+ value of the reference is `""', the null string. This includes elements
+ to which you have not assigned any value, and elements that have been
+ deleted (*note The `delete' Statement: Delete.). Such a reference
+ automatically creates that array element, with the null string as its
+ value. (In some cases, this is unfortunate, because it might waste
+ memory inside `awk').
+
+ You can find out if an element exists in an array at a certain index
+ with the expression:
+
+ INDEX in ARRAY
+
+ This expression tests whether or not the particular index exists,
+ without the side effect of creating that element if it is not present.
+ The expression has the value 1 (true) if `ARRAY[INDEX]' exists, and 0
+ (false) if it does not exist.
+
+ For example, to test whether the array `frequencies' contains the
+ index `"2"', you could write this statement:
+
+ if ("2" in frequencies) print "Subscript \"2\" is present."
+
+ Note that this is *not* a test of whether or not the array
+ `frequencies' contains an element whose *value* is `"2"'. (There is no
+ way to do that except to scan all the elements.) Also, this *does not*
+ create `frequencies["2"]', while the following (incorrect) alternative
+ would do so:
+
+ if (frequencies["2"] != "") print "Subscript \"2\" is present."
+
+
+ File: gawk.info, Node: Assigning Elements, Next: Array Example, Prev: Reference to Elements, Up: Arrays
+
+ Assigning Array Elements
+ ========================
+
+ Array elements are lvalues: they can be assigned values just like
+ `awk' variables:
+
+ ARRAY[SUBSCRIPT] = VALUE
+
+ Here ARRAY is the name of your array. The expression SUBSCRIPT is the
+ index of the element of the array that you want to assign a value. The
+ expression VALUE is the value you are assigning to that element of the
+ array.
+
+
+ File: gawk.info, Node: Array Example, Next: Scanning an Array, Prev: Assigning Elements, Up: Arrays
+
+ Basic Example of an Array
+ =========================
+
+ The following program takes a list of lines, each beginning with a
+ line number, and prints them out in order of line number. The line
+ numbers are not in order, however, when they are first read: they are
+ scrambled. This program sorts the lines by making an array using the
+ line numbers as subscripts. It then prints out the lines in sorted
+ order of their numbers. It is a very simple program, and gets confused
+ if it encounters repeated numbers, gaps, or lines that don't begin with
+ a number.
+
+ {
+ if ($1 > max)
+ max = $1
+ arr[$1] = $0
+ }
+
+ END {
+ for (x = 1; x <= max; x++)
+ print arr[x]
+ }
+
+ The first rule keeps track of the largest line number seen so far;
+ it also stores each line into the array `arr', at an index that is the
+ line's number.
+
+ The second rule runs after all the input has been read, to print out
+ all the lines.
+
+ When this program is run with the following input:
+
+ 5 I am the Five man
+ 2 Who are you? The new number two!
+ 4 . . . And four on the floor
+ 1 Who is number one?
+ 3 I three you.
+
+ its output is this:
+
+ 1 Who is number one?
+ 2 Who are you? The new number two!
+ 3 I three you.
+ 4 . . . And four on the floor
+ 5 I am the Five man
+
+ If a line number is repeated, the last line with a given number
+ overrides the others.
+
+ Gaps in the line numbers can be handled with an easy improvement to
+ the program's `END' rule:
+
+ END {
+ for (x = 1; x <= max; x++)
+ if (x in arr)
+ print arr[x]
+ }
+
+
+ File: gawk.info, Node: Scanning an Array, Next: Delete, Prev: Array Example, Up: Arrays
+
+ Scanning all Elements of an Array
+ =================================
+
+ In programs that use arrays, often you need a loop that executes
+ once for each element of an array. In other languages, where arrays are
+ contiguous and indices are limited to positive integers, this is easy:
+ the largest index is one less than the length of the array, and you can
+ find all the valid indices by counting from zero up to that value. This
+ technique won't do the job in `awk', since any number or string may be
+ an array index. So `awk' has a special kind of `for' statement for
+ scanning an array:
+
+ for (VAR in ARRAY)
+ BODY
+
+ This loop executes BODY once for each different value that your program
+ has previously used as an index in ARRAY, with the variable VAR set to
+ that index.
+
+ Here is a program that uses this form of the `for' statement. The
+ first rule scans the input records and notes which words appear (at
+ least once) in the input, by storing a 1 into the array `used' with the
+ word as index. The second rule scans the elements of `used' to find
+ all the distinct words that appear in the input. It prints each word
+ that is more than 10 characters long, and also prints the number of
+ such words. *Note Built-in Functions: Built-in, for more information
+ on the built-in function `length'.
+
+ # Record a 1 for each word that is used at least once.
+ {
+ for (i = 1; i <= NF; i++)
+ used[$i] = 1
+ }
+
+ # Find number of distinct words more than 10 characters long.
+ END {
+ for (x in used)
+ if (length(x) > 10) {
+ ++num_long_words
+ print x
+ }
+ print num_long_words, "words longer than 10 characters"
+ }
+
+ *Note Sample Program::, for a more detailed example of this type.
+
+ The order in which elements of the array are accessed by this
+ statement is determined by the internal arrangement of the array
+ elements within `awk' and cannot be controlled or changed. This can
+ lead to problems if new elements are added to ARRAY by statements in
+ BODY; you cannot predict whether or not the `for' loop will reach them.
+ Similarly, changing VAR inside the loop can produce strange results.
+ It is best to avoid such things.
+
+
+ File: gawk.info, Node: Delete, Next: Numeric Array Subscripts, Prev: Scanning an Array, Up: Arrays
+
+ The `delete' Statement
+ ======================
+
+ You can remove an individual element of an array using the `delete'
+ statement:
+
+ delete ARRAY[INDEX]
+
+ You can not refer to an array element after it has been deleted; it
+ is as if you had never referred to it and had never given it any value.
+ You can no longer obtain any value the element once had.
+
+ Here is an example of deleting elements in an array:
+
+ for (i in frequencies)
+ delete frequencies[i]
+
+ This example removes all the elements from the array `frequencies'.
+
+ If you delete an element, a subsequent `for' statement to scan the
+ array will not report that element, and the `in' operator to check for
+ the presence of that element will return 0:
+
+ delete foo[4]
+ if (4 in foo)
+ print "This will never be printed"
+
+ It is not an error to delete an element which does not exist.
+
+
+ File: gawk.info, Node: Numeric Array Subscripts, Next: Multi-dimensional, Prev: Delete, Up: Arrays
+
+ Using Numbers to Subscript Arrays
+ =================================
+
+ An important aspect of arrays to remember is that array subscripts
+ are *always* strings. If you use a numeric value as a subscript, it
+ will be converted to a string value before it is used for subscripting
+ (*note Conversion of Strings and Numbers: Conversion.).
+
+ This means that the value of the `CONVFMT' can potentially affect
+ how your program accesses elements of an array. For example:
+
+ a = b = 12.153
+ data[a] = 1
+ CONVFMT = "%2.2f"
+ if (b in data)
+ printf "%s is in data", b
+ else
+ printf "%s is not in data", b
+
+ should print `12.15 is not in data'. The first statement gives both
+ `a' and `b' the same numeric value. Assigning to `data[a]' first gives
+ `a' the string value `"12.153"' (using the default conversion value of
+ `CONVFMT', `"%.6g"'), and then assigns 1 to `data["12.153"]'. The
+ program then changes the value of `CONVFMT'. The test `(b in data)'
+ forces `b' to be converted to a string, this time `"12.15"', since the
+ value of `CONVFMT' only allows two significant digits. This test fails,
+ since `"12.15"' is a different string from `"12.153"'.
+
+ According to the rules for conversions (*note Conversion of Strings
+ and Numbers: Conversion.), integer values are always converted to
+ strings as integers, no matter what the value of `CONVFMT' may happen
+ to be. So the usual case of
+
+ for (i = 1; i <= maxsub; i++)
+ do something with array[i]
+
+ will work, no matter what the value of `CONVFMT'.
+
+ Like many things in `awk', the majority of the time things work as
+ you would expect them to work. But it is useful to have a precise
+ knowledge of the actual rules, since sometimes they can have a subtle
+ effect on your programs.
+
+
+ File: gawk.info, Node: Multi-dimensional, Next: Multi-scanning, Prev: Numeric Array Subscripts, Up: Arrays
+
+ Multi-dimensional Arrays
+ ========================
+
+ A multi-dimensional array is an array in which an element is
+ identified by a sequence of indices, not a single index. For example, a
+ two-dimensional array requires two indices. The usual way (in most
+ languages, including `awk') to refer to an element of a two-dimensional
+ array named `grid' is with `grid[X,Y]'.
+
+ Multi-dimensional arrays are supported in `awk' through
+ concatenation of indices into one string. What happens is that `awk'
+ converts the indices into strings (*note Conversion of Strings and
+ Numbers: Conversion.) and concatenates them together, with a separator
+ between them. This creates a single string that describes the values
+ of the separate indices. The combined string is used as a single index
+ into an ordinary, one-dimensional array. The separator used is the
+ value of the built-in variable `SUBSEP'.
+
+ For example, suppose we evaluate the expression `foo[5,12]="value"'
+ when the value of `SUBSEP' is `"@"'. The numbers 5 and 12 are
+ converted to strings and concatenated with an `@' between them,
+ yielding `"5@12"'; thus, the array element `foo["5@12"]' is set to
+ `"value"'.
+
+ Once the element's value is stored, `awk' has no record of whether
+ it was stored with a single index or a sequence of indices. The two
+ expressions `foo[5,12]' and `foo[5 SUBSEP 12]' always have the same
+ value.
+
+ The default value of `SUBSEP' is the string `"\034"', which contains
+ a nonprinting character that is unlikely to appear in an `awk' program
+ or in the input data.
+
+ The usefulness of choosing an unlikely character comes from the fact
+ that index values that contain a string matching `SUBSEP' lead to
+ combined strings that are ambiguous. Suppose that `SUBSEP' were `"@"';
+ then `foo["a@b", "c"]' and `foo["a", "b@c"]' would be indistinguishable
+ because both would actually be stored as `foo["a@b@c"]'. Because
+ `SUBSEP' is `"\034"', such confusion can arise only when an index
+ contains the character with ASCII code 034, which is a rare event.
+
+ You can test whether a particular index-sequence exists in a
+ "multi-dimensional" array with the same operator `in' used for single
+ dimensional arrays. Instead of a single index as the left-hand operand,
+ write the whole sequence of indices, separated by commas, in
+ parentheses:
+
+ (SUBSCRIPT1, SUBSCRIPT2, ...) in ARRAY
+
+ The following example treats its input as a two-dimensional array of
+ fields; it rotates this array 90 degrees clockwise and prints the
+ result. It assumes that all lines have the same number of elements.
+
+ awk '{
+ if (max_nf < NF)
+ max_nf = NF
+ max_nr = NR
+ for (x = 1; x <= NF; x++)
+ vector[x, NR] = $x
+ }
+
+ END {
+ for (x = 1; x <= max_nf; x++) {
+ for (y = max_nr; y >= 1; --y)
+ printf("%s ", vector[x, y])
+ printf("\n")
+ }
+ }'
+
+ When given the input:
+
+ 1 2 3 4 5 6
+ 2 3 4 5 6 1
+ 3 4 5 6 1 2
+ 4 5 6 1 2 3
+
+ it produces:
+
+ 4 3 2 1
+ 5 4 3 2
+ 6 5 4 3
+ 1 6 5 4
+ 2 1 6 5
+ 3 2 1 6
+
+
+ File: gawk.info, Node: Multi-scanning, Prev: Multi-dimensional, Up: Arrays
+
+ Scanning Multi-dimensional Arrays
+ =================================
+
+ There is no special `for' statement for scanning a
+ "multi-dimensional" array; there cannot be one, because in truth there
+ are no multi-dimensional arrays or elements; there is only a
+ multi-dimensional *way of accessing* an array.
+
+ However, if your program has an array that is always accessed as
+ multi-dimensional, you can get the effect of scanning it by combining
+ the scanning `for' statement (*note Scanning all Elements of an Array:
+ Scanning an Array.) with the `split' built-in function (*note Built-in
+ Functions for String Manipulation: String Functions.). It works like
+ this:
+
+ for (combined in ARRAY) {
+ split(combined, separate, SUBSEP)
+ ...
+ }
+
+ This finds each concatenated, combined index in the array, and splits it
+ into the individual indices by breaking it apart where the value of
+ `SUBSEP' appears. The split-out indices become the elements of the
+ array `separate'.
+
+ Thus, suppose you have previously stored in `ARRAY[1, "foo"]'; then
+ an element with index `"1\034foo"' exists in ARRAY. (Recall that the
+ default value of `SUBSEP' contains the character with code 034.)
+ Sooner or later the `for' statement will find that index and do an
+ iteration with `combined' set to `"1\034foo"'. Then the `split'
+ function is called as follows:
+
+ split("1\034foo", separate, "\034")
+
+ The result of this is to set `separate[1]' to 1 and `separate[2]' to
+ `"foo"'. Presto, the original sequence of separate indices has been
+ recovered.
+
+
+ File: gawk.info, Node: Built-in, Next: User-defined, Prev: Arrays, Up: Top
+
+ Built-in Functions
+ ******************
+
+ "Built-in" functions are functions that are always available for
+ your `awk' program to call. This chapter defines all the built-in
+ functions in `awk'; some of them are mentioned in other sections, but
+ they are summarized here for your convenience. (You can also define
+ new functions yourself. *Note User-defined Functions: User-defined.)
+
+ * Menu:
+
+ * Calling Built-in:: How to call built-in functions.
+ * Numeric Functions:: Functions that work with numbers,
+ including `int', `sin' and `rand'.
+ * String Functions:: Functions for string manipulation,
+ such as `split', `match', and `sprintf'.
+ * I/O Functions:: Functions for files and shell commands.
+ * Time Functions:: Functions for dealing with time stamps.
+
+
+ File: gawk.info, Node: Calling Built-in, Next: Numeric Functions, Prev: Built-in, Up: Built-in
+
+ Calling Built-in Functions
+ ==========================
+
+ To call a built-in function, write the name of the function followed
+ by arguments in parentheses. For example, `atan2(y + z, 1)' is a call
+ to the function `atan2', with two arguments.
+
+ Whitespace is ignored between the built-in function name and the
+ open-parenthesis, but we recommend that you avoid using whitespace
+ there. User-defined functions do not permit whitespace in this way, and
+ you will find it easier to avoid mistakes by following a simple
+ convention which always works: no whitespace after a function name.
+
+ Each built-in function accepts a certain number of arguments. In
+ most cases, any extra arguments given to built-in functions are
+ ignored. The defaults for omitted arguments vary from function to
+ function and are described under the individual functions.
+
+ When a function is called, expressions that create the function's
+ actual parameters are evaluated completely before the function call is
+ performed. For example, in the code fragment:
+
+ i = 4
+ j = sqrt(i++)
+
+ the variable `i' is set to 5 before `sqrt' is called with a value of 4
+ for its actual parameter.
+
+
+ File: gawk.info, Node: Numeric Functions, Next: String Functions, Prev: Calling Built-in, Up: Built-in
+
+ Numeric Built-in Functions
+ ==========================
+
+ Here is a full list of built-in functions that work with numbers:
+
+ `int(X)'
+ This gives you the integer part of X, truncated toward 0. This
+ produces the nearest integer to X, located between X and 0.
+
+ For example, `int(3)' is 3, `int(3.9)' is 3, `int(-3.9)' is -3,
+ and `int(-3)' is -3 as well.
+
+ `sqrt(X)'
+ This gives you the positive square root of X. It reports an error
+ if X is negative. Thus, `sqrt(4)' is 2.
+
+ `exp(X)'
+ This gives you the exponential of X, or reports an error if X is
+ out of range. The range of values X can have depends on your
+ machine's floating point representation.
+
+ `log(X)'
+ This gives you the natural logarithm of X, if X is positive;
+ otherwise, it reports an error.
+
+ `sin(X)'
+ This gives you the sine of X, with X in radians.
+
+ `cos(X)'
+ This gives you the cosine of X, with X in radians.
+
+ `atan2(Y, X)'
+ This gives you the arctangent of `Y / X' in radians.
+
+ `rand()'
+ This gives you a random number. The values of `rand' are
+ uniformly-distributed between 0 and 1. The value is never 0 and
+ never 1.
+
+ Often you want random integers instead. Here is a user-defined
+ function you can use to obtain a random nonnegative integer less
+ than N:
+
+ function randint(n) {
+ return int(n * rand())
+ }
+
+ The multiplication produces a random real number greater than 0
+ and less than N. We then make it an integer (using `int') between
+ 0 and `N - 1'.
+
+ Here is an example where a similar function is used to produce
+ random integers between 1 and N. Note that this program will
+ print a new random number for each input record.
+
+ awk '
+ # Function to roll a simulated die.
+ function roll(n) { return 1 + int(rand() * n) }
+
+ # Roll 3 six-sided dice and print total number of points.
+ {
+ printf("%d points\n", roll(6)+roll(6)+roll(6))
+ }'
+
+ *Note:* `rand' starts generating numbers from the same point, or
+ "seed", each time you run `awk'. This means that a program will
+ produce the same results each time you run it. The numbers are
+ random within one `awk' run, but predictable from run to run.
+ This is convenient for debugging, but if you want a program to do
+ different things each time it is used, you must change the seed to
+ a value that will be different in each run. To do this, use
+ `srand'.
+
+ `srand(X)'
+ The function `srand' sets the starting point, or "seed", for
+ generating random numbers to the value X.
+
+ Each seed value leads to a particular sequence of "random" numbers.
+ Thus, if you set the seed to the same value a second time, you
+ will get the same sequence of "random" numbers again.
+
+ If you omit the argument X, as in `srand()', then the current date
+ and time of day are used for a seed. This is the way to get random
+ numbers that are truly unpredictable.
+
+ The return value of `srand' is the previous seed. This makes it
+ easy to keep track of the seeds for use in consistently reproducing
+ sequences of random numbers.
+
+
+ File: gawk.info, Node: String Functions, Next: I/O Functions, Prev: Numeric Functions, Up: Built-in
+
+ Built-in Functions for String Manipulation
+ ==========================================
+
+ The functions in this section look at or change the text of one or
+ more strings.
+
+ `index(IN, FIND)'
+ This searches the string IN for the first occurrence of the string
+ FIND, and returns the position in characters where that occurrence
+ begins in the string IN. For example:
+
+ awk 'BEGIN { print index("peanut", "an") }'
+
+ prints `3'. If FIND is not found, `index' returns 0. (Remember
+ that string indices in `awk' start at 1.)
+
+ `length(STRING)'
+ This gives you the number of characters in STRING. If STRING is a
+ number, the length of the digit string representing that number is
+ returned. For example, `length("abcde")' is 5. By contrast,
+ `length(15 * 35)' works out to 3. How? Well, 15 * 35 = 525, and
+ 525 is then converted to the string `"525"', which has three
+ characters.
+
+ If no argument is supplied, `length' returns the length of `$0'.
+
+ In older versions of `awk', you could call the `length' function
+ without any parentheses. Doing so is marked as "deprecated" in the
+ POSIX standard. This means that while you can do this in your
+ programs, it is a feature that can eventually be removed from a
+ future version of the standard. Therefore, for maximal
+ portability of your `awk' programs you should always supply the
+ parentheses.
+
+ `match(STRING, REGEXP)'
+ The `match' function searches the string, STRING, for the longest,
+ leftmost substring matched by the regular expression, REGEXP. It
+ returns the character position, or "index", of where that
+ substring begins (1, if it starts at the beginning of STRING). If
+ no match if found, it returns 0.
+
+ The `match' function sets the built-in variable `RSTART' to the
+ index. It also sets the built-in variable `RLENGTH' to the length
+ in characters of the matched substring. If no match is found,
+ `RSTART' is set to 0, and `RLENGTH' to -1.
+
+ For example:
+
+ awk '{
+ if ($1 == "FIND")
+ regex = $2
+ else {
+ where = match($0, regex)
+ if (where)
+ print "Match of", regex, "found at", where, "in", $0
+ }
+ }'
+
+ This program looks for lines that match the regular expression
+ stored in the variable `regex'. This regular expression can be
+ changed. If the first word on a line is `FIND', `regex' is
+ changed to be the second word on that line. Therefore, given:
+
+ FIND fo*bar
+ My program was a foobar
+ But none of it would doobar
+ FIND Melvin
+ JF+KM
+ This line is property of The Reality Engineering Co.
+ This file created by Melvin.
+
+ `awk' prints:
+
+ Match of fo*bar found at 18 in My program was a foobar
+ Match of Melvin found at 26 in This file created by Melvin.
+
+ `split(STRING, ARRAY, FIELDSEP)'
+ This divides STRING into pieces separated by FIELDSEP, and stores
+ the pieces in ARRAY. The first piece is stored in `ARRAY[1]', the
+ second piece in `ARRAY[2]', and so forth. The string value of the
+ third argument, FIELDSEP, is a regexp describing where to split
+ STRING (much as `FS' can be a regexp describing where to split
+ input records). If the FIELDSEP is omitted, the value of `FS' is
+ used. `split' returns the number of elements created.
+
+ The `split' function, then, splits strings into pieces in a manner
+ similar to the way input lines are split into fields. For example:
+
+ split("auto-da-fe", a, "-")
+
+ splits the string `auto-da-fe' into three fields using `-' as the
+ separator. It sets the contents of the array `a' as follows:
+
+ a[1] = "auto"
+ a[2] = "da"
+ a[3] = "fe"
+
+ The value returned by this call to `split' is 3.
+
+ As with input field-splitting, when the value of FIELDSEP is `"
+ "', leading and trailing whitespace is ignored, and the elements
+ are separated by runs of whitespace.
+
+ `sprintf(FORMAT, EXPRESSION1,...)'
+ This returns (without printing) the string that `printf' would
+ have printed out with the same arguments (*note Using `printf'
+ Statements for Fancier Printing: Printf.). For example:
+
+ sprintf("pi = %.2f (approx.)", 22/7)
+
+ returns the string `"pi = 3.14 (approx.)"'.
+
+ `sub(REGEXP, REPLACEMENT, TARGET)'
+ The `sub' function alters the value of TARGET. It searches this
+ value, which should be a string, for the leftmost substring
+ matched by the regular expression, REGEXP, extending this match as
+ far as possible. Then the entire string is changed by replacing
+ the matched text with REPLACEMENT. The modified string becomes
+ the new value of TARGET.
+
+ This function is peculiar because TARGET is not simply used to
+ compute a value, and not just any expression will do: it must be a
+ variable, field or array reference, so that `sub' can store a
+ modified value there. If this argument is omitted, then the
+ default is to use and alter `$0'.
+
+ For example:
+
+ str = "water, water, everywhere"
+ sub(/at/, "ith", str)
+
+ sets `str' to `"wither, water, everywhere"', by replacing the
+ leftmost, longest occurrence of `at' with `ith'.
+
+ The `sub' function returns the number of substitutions made (either
+ one or zero).
+
+ If the special character `&' appears in REPLACEMENT, it stands for
+ the precise substring that was matched by REGEXP. (If the regexp
+ can match more than one string, then this precise substring may
+ vary.) For example:
+
+ awk '{ sub(/candidate/, "& and his wife"); print }'
+
+ changes the first occurrence of `candidate' to `candidate and his
+ wife' on each input line.
+
+ Here is another example:
+
+ awk 'BEGIN {
+ str = "daabaaa"
+ sub(/a*/, "c&c", str)
+ print str
+ }'
+
+ prints `dcaacbaaa'. This show how `&' can represent a non-constant
+ string, and also illustrates the "leftmost, longest" rule.
+
+ The effect of this special character (`&') can be turned off by
+ putting a backslash before it in the string. As usual, to insert
+ one backslash in the string, you must write two backslashes.
+ Therefore, write `\\&' in a string constant to include a literal
+ `&' in the replacement. For example, here is how to replace the
+ first `|' on each line with an `&':
+
+ awk '{ sub(/\|/, "\\&"); print }'
+
+ *Note:* as mentioned above, the third argument to `sub' must be an
+ lvalue. Some versions of `awk' allow the third argument to be an
+ expression which is not an lvalue. In such a case, `sub' would
+ still search for the pattern and return 0 or 1, but the result of
+ the substitution (if any) would be thrown away because there is no
+ place to put it. Such versions of `awk' accept expressions like
+ this:
+
+ sub(/USA/, "United States", "the USA and Canada")
+
+ But that is considered erroneous in `gawk'.
+
+ `gsub(REGEXP, REPLACEMENT, TARGET)'
+ This is similar to the `sub' function, except `gsub' replaces
+ *all* of the longest, leftmost, *nonoverlapping* matching
+ substrings it can find. The `g' in `gsub' stands for "global,"
+ which means replace everywhere. For example:
+
+ awk '{ gsub(/Britain/, "United Kingdom"); print }'
+
+ replaces all occurrences of the string `Britain' with `United
+ Kingdom' for all input records.
+
+ The `gsub' function returns the number of substitutions made. If
+ the variable to be searched and altered, TARGET, is omitted, then
+ the entire input record, `$0', is used.
+
+ As in `sub', the characters `&' and `\' are special, and the third
+ argument must be an lvalue.
+
+ `substr(STRING, START, LENGTH)'
+ This returns a LENGTH-character-long substring of STRING, starting
+ at character number START. The first character of a string is
+ character number one. For example, `substr("washington", 5, 3)'
+ returns `"ing"'.
+
+ If LENGTH is not present, this function returns the whole suffix of
+ STRING that begins at character number START. For example,
+ `substr("washington", 5)' returns `"ington"'. This is also the
+ case if LENGTH is greater than the number of characters remaining
+ in the string, counting from character number START.
+
+ `tolower(STRING)'
+ This returns a copy of STRING, with each upper-case character in
+ the string replaced with its corresponding lower-case character.
+ Nonalphabetic characters are left unchanged. For example,
+ `tolower("MiXeD cAsE 123")' returns `"mixed case 123"'.
+
+ `toupper(STRING)'
+ This returns a copy of STRING, with each lower-case character in
+ the string replaced with its corresponding upper-case character.
+ Nonalphabetic characters are left unchanged. For example,
+ `toupper("MiXeD cAsE 123")' returns `"MIXED CASE 123"'.
+
diff -rc --new-file /src/baseline/gawk-2.15.5/gawk.info-6 gawk-2.15.5/gawk.info-6
*** /src/baseline/gawk-2.15.5/gawk.info-6 Thu Jan 1 00:00:00 1970
--- gawk-2.15.5/gawk.info-6 Sun Jun 12 22:28:53 1994
***************
*** 0 ****
--- 1,1234 ----
+ This is Info file gawk.info, produced by Makeinfo-1.55 from the input
+ file /gnu/src/amiga/gawk-2.15.5/gawk.texi.
+
+ This file documents `awk', a program that you can use to select
+ particular records in a file and perform operations upon them.
+
+ This is Edition 0.15 of `The GAWK Manual',
+ for the 2.15 version of the GNU implementation
+ of AWK.
+
+ Copyright (C) 1989, 1991, 1992, 1993 Free Software Foundation, Inc.
+
+ Permission is granted to make and distribute verbatim copies of this
+ manual provided the copyright notice and this permission notice are
+ preserved on all copies.
+
+ Permission is granted to copy and distribute modified versions of
+ this manual under the conditions for verbatim copying, provided that
+ the entire resulting derived work is distributed under the terms of a
+ permission notice identical to this one.
+
+ Permission is granted to copy and distribute translations of this
+ manual into another language, under the above conditions for modified
+ versions, except that this permission notice may be stated in a
+ translation approved by the Foundation.
+
+
+ File: gawk.info, Node: I/O Functions, Next: Time Functions, Prev: String Functions, Up: Built-in
+
+ Built-in Functions for Input/Output
+ ===================================
+
+ `close(FILENAME)'
+ Close the file FILENAME, for input or output. The argument may
+ alternatively be a shell command that was used for redirecting to
+ or from a pipe; then the pipe is closed.
+
+ *Note Closing Input Files and Pipes: Close Input, regarding closing
+ input files and pipes. *Note Closing Output Files and Pipes:
+ Close Output, regarding closing output files and pipes.
+
+ `system(COMMAND)'
+ The system function allows the user to execute operating system
+ commands and then return to the `awk' program. The `system'
+ function executes the command given by the string COMMAND. It
+ returns, as its value, the status returned by the command that was
+ executed.
+
+ For example, if the following fragment of code is put in your `awk'
+ program:
+
+ END {
+ system("mail -s 'awk run done' operator < /dev/null")
+ }
+
+ the system operator will be sent mail when the `awk' program
+ finishes processing input and begins its end-of-input processing.
+
+ Note that much the same result can be obtained by redirecting
+ `print' or `printf' into a pipe. However, if your `awk' program
+ is interactive, `system' is useful for cranking up large
+ self-contained programs, such as a shell or an editor.
+
+ Some operating systems cannot implement the `system' function.
+ `system' causes a fatal error if it is not supported.
+
+ Controlling Output Buffering with `system'
+ ------------------------------------------
+
+ Many utility programs will "buffer" their output; they save
+ information to be written to a disk file or terminal in memory, until
+ there is enough to be written in one operation. This is often more
+ efficient than writing every little bit of information as soon as it is
+ ready. However, sometimes it is necessary to force a program to
+ "flush" its buffers; that is, write the information to its destination,
+ even if a buffer is not full. You can do this from your `awk' program
+ by calling `system' with a null string as its argument:
+
+ system("") # flush output
+
+ `gawk' treats this use of the `system' function as a special case, and
+ is smart enough not to run a shell (or other command interpreter) with
+ the empty command. Therefore, with `gawk', this idiom is not only
+ useful, it is efficient. While this idiom should work with other `awk'
+ implementations, it will not necessarily avoid starting an unnecessary
+ shell.
+
+
+ File: gawk.info, Node: Time Functions, Prev: I/O Functions, Up: Built-in
+
+ Functions for Dealing with Time Stamps
+ ======================================
+
+ A common use for `awk' programs is the processing of log files. Log
+ files often contain time stamp information, indicating when a
+ particular log record was written. Many programs log their time stamp
+ in the form returned by the `time' system call, which is the number of
+ seconds since a particular epoch. On POSIX systems, it is the number
+ of seconds since Midnight, January 1, 1970, UTC.
+
+ In order to make it easier to process such log files, and to easily
+ produce useful reports, `gawk' provides two functions for working with
+ time stamps. Both of these are `gawk' extensions; they are not
+ specified in the POSIX standard, nor are they in any other known version
+ of `awk'.
+
+ `systime()'
+ This function returns the current time as the number of seconds
+ since the system epoch. On POSIX systems, this is the number of
+ seconds since Midnight, January 1, 1970, UTC. It may be a
+ different number on other systems.
+
+ `strftime(FORMAT, TIMESTAMP)'
+ This function returns a string. It is similar to the function of
+ the same name in the ANSI C standard library. The time specified
+ by TIMESTAMP is used to produce a string, based on the contents of
+ the FORMAT string.
+
+ The `systime' function allows you to compare a time stamp from a log
+ file with the current time of day. In particular, it is easy to
+ determine how long ago a particular record was logged. It also allows
+ you to produce log records using the "seconds since the epoch" format.
+
+ The `strftime' function allows you to easily turn a time stamp into
+ human-readable information. It is similar in nature to the `sprintf'
+ function, copying non-format specification characters verbatim to the
+ returned string, and substituting date and time values for format
+ specifications in the FORMAT string. If no TIMESTAMP argument is
+ supplied, `gawk' will use the current time of day as the time stamp.
+
+ `strftime' is guaranteed by the ANSI C standard to support the
+ following date format specifications:
+
+ `%a'
+ The locale's abbreviated weekday name.
+
+ `%A'
+ The locale's full weekday name.
+
+ `%b'
+ The locale's abbreviated month name.
+
+ `%B'
+ The locale's full month name.
+
+ `%c'
+ The locale's "appropriate" date and time representation.
+
+ `%d'
+ The day of the month as a decimal number (01-31).
+
+ `%H'
+ The hour (24-hour clock) as a decimal number (00-23).
+
+ `%I'
+ The hour (12-hour clock) as a decimal number (01-12).
+
+ `%j'
+ The day of the year as a decimal number (001-366).
+
+ `%m'
+ The month as a decimal number (01-12).
+
+ `%M'
+ The minute as a decimal number (00-59).
+
+ `%p'
+ The locale's equivalent of the AM/PM designations associated with
+ a 12-hour clock.
+
+ `%S'
+ The second as a decimal number (00-61). (Occasionally there are
+ minutes in a year with one or two leap seconds, which is why the
+ seconds can go from 0 all the way to 61.)
+
+ `%U'
+ The week number of the year (the first Sunday as the first day of
+ week 1) as a decimal number (00-53).
+
+ `%w'
+ The weekday as a decimal number (0-6). Sunday is day 0.
+
+ `%W'
+ The week number of the year (the first Monday as the first day of
+ week 1) as a decimal number (00-53).
+
+ `%x'
+ The locale's "appropriate" date representation.
+
+ `%X'
+ The locale's "appropriate" time representation.
+
+ `%y'
+ The year without century as a decimal number (00-99).
+
+ `%Y'
+ The year with century as a decimal number.
+
+ `%Z'
+ The time zone name or abbreviation, or no characters if no time
+ zone is determinable.
+
+ `%%'
+ A literal `%'.
+
+ If a conversion specifier is not one of the above, the behavior is
+ undefined. (This is because the ANSI standard for C leaves the
+ behavior of the C version of `strftime' undefined, and `gawk' will use
+ the system's version of `strftime' if it's there. Typically, the
+ conversion specifier will either not appear in the returned string, or
+ it will appear literally.)
+
+ Informally, a "locale" is the geographic place in which a program is
+ meant to run. For example, a common way to abbreviate the date
+ September 4, 1991 in the United States would be "9/4/91". In many
+ countries in Europe, however, it would be abbreviated "4.9.91". Thus,
+ the `%x' specification in a `"US"' locale might produce `9/4/91', while
+ in a `"EUROPE"' locale, it might produce `4.9.91'. The ANSI C standard
+ defines a default `"C"' locale, which is an environment that is typical
+ of what most C programmers are used to.
+
+ A public-domain C version of `strftime' is shipped with `gawk' for
+ systems that are not yet fully ANSI-compliant. If that version is used
+ to compile `gawk' (*note Installing `gawk': Installation.), then the
+ following additional format specifications are available:
+
+ `%D'
+ Equivalent to specifying `%m/%d/%y'.
+
+ `%e'
+ The day of the month, padded with a blank if it is only one digit.
+
+ `%h'
+ Equivalent to `%b', above.
+
+ `%n'
+ A newline character (ASCII LF).
+
+ `%r'
+ Equivalent to specifying `%I:%M:%S %p'.
+
+ `%R'
+ Equivalent to specifying `%H:%M'.
+
+ `%T'
+ Equivalent to specifying `%H:%M:%S'.
+
+ `%t'
+ A TAB character.
+
+ `%k'
+ is replaced by the hour (24-hour clock) as a decimal number (0-23).
+ Single digit numbers are padded with a blank.
+
+ `%l'
+ is replaced by the hour (12-hour clock) as a decimal number (1-12).
+ Single digit numbers are padded with a blank.
+
+ `%C'
+ The century, as a number between 00 and 99.
+
+ `%u'
+ is replaced by the weekday as a decimal number [1 (Monday)-7].
+
+ `%V'
+ is replaced by the week number of the year (the first Monday as
+ the first day of week 1) as a decimal number (01-53). The method
+ for determining the week number is as specified by ISO 8601 (to
+ wit: if the week containing January 1 has four or more days in the
+ new year, then it is week 1, otherwise it is week 53 of the
+ previous year and the next week is week 1).
+
+ `%Ec %EC %Ex %Ey %EY %Od %Oe %OH %OI'
+ `%Om %OM %OS %Ou %OU %OV %Ow %OW %Oy'
+ These are "alternate representations" for the specifications that
+ use only the second letter (`%c', `%C', and so on). They are
+ recognized, but their normal representations are used. (These
+ facilitate compliance with the POSIX `date' utility.)
+
+ `%v'
+ The date in VMS format (e.g. 20-JUN-1991).
+
+ Here are two examples that use `strftime'. The first is an `awk'
+ version of the C `ctime' function. (This is a user defined function,
+ which we have not discussed yet. *Note User-defined Functions:
+ User-defined, for more information.)
+
+ # ctime.awk
+ #
+ # awk version of C ctime(3) function
+
+ function ctime(ts, format)
+ {
+ format = "%a %b %e %H:%M:%S %Z %Y"
+ if (ts == 0)
+ ts = systime() # use current time as default
+ return strftime(format, ts)
+ }
+
+ This next example is an `awk' implementation of the POSIX `date'
+ utility. Normally, the `date' utility prints the current date and time
+ of day in a well known format. However, if you provide an argument to
+ it that begins with a `+', `date' will copy non-format specifier
+ characters to the standard output, and will interpret the current time
+ according to the format specifiers in the string. For example:
+
+ date '+Today is %A, %B %d, %Y.'
+
+ might print
+
+ Today is Thursday, July 11, 1991.
+
+ Here is the `awk' version of the `date' utility.
+
+ #! /bin/gawk -f
+ #
+ # date --- implement the P1003.2 Draft 11 'date' command
+ #
+ # Bug: does not recognize the -u argument.
+
+ BEGIN \
+ {
+ format = "%a %b %e %H:%M:%S %Z %Y"
+ exitval = 0
+
+ if (ARGC > 2)
+ exitval = 1
+ else if (ARGC == 2) {
+ format = ARGV[1]
+ if (format ~ /^\+/)
+ format = substr(format, 2) # remove leading +
+ }
+ print strftime(format)
+ exit exitval
+ }
+
+
+ File: gawk.info, Node: User-defined, Next: Built-in Variables, Prev: Built-in, Up: Top
+
+ User-defined Functions
+ **********************
+
+ Complicated `awk' programs can often be simplified by defining your
+ own functions. User-defined functions can be called just like built-in
+ ones (*note Function Calls::.), but it is up to you to define them--to
+ tell `awk' what they should do.
+
+ * Menu:
+
+ * Definition Syntax:: How to write definitions and what they mean.
+ * Function Example:: An example function definition and
+ what it does.
+ * Function Caveats:: Things to watch out for.
+ * Return Statement:: Specifying the value a function returns.
+
+
+ File: gawk.info, Node: Definition Syntax, Next: Function Example, Prev: User-defined, Up: User-defined
+
+ Syntax of Function Definitions
+ ==============================
+
+ Definitions of functions can appear anywhere between the rules of the
+ `awk' program. Thus, the general form of an `awk' program is extended
+ to include sequences of rules *and* user-defined function definitions.
+
+ The definition of a function named NAME looks like this:
+
+ function NAME (PARAMETER-LIST) {
+ BODY-OF-FUNCTION
+ }
+
+ NAME is the name of the function to be defined. A valid function name
+ is like a valid variable name: a sequence of letters, digits and
+ underscores, not starting with a digit. Functions share the same pool
+ of names as variables and arrays.
+
+ PARAMETER-LIST is a list of the function's arguments and local
+ variable names, separated by commas. When the function is called, the
+ argument names are used to hold the argument values given in the call.
+ The local variables are initialized to the null string.
+
+ The BODY-OF-FUNCTION consists of `awk' statements. It is the most
+ important part of the definition, because it says what the function
+ should actually *do*. The argument names exist to give the body a way
+ to talk about the arguments; local variables, to give the body places
+ to keep temporary values.
+
+ Argument names are not distinguished syntactically from local
+ variable names; instead, the number of arguments supplied when the
+ function is called determines how many argument variables there are.
+ Thus, if three argument values are given, the first three names in
+ PARAMETER-LIST are arguments, and the rest are local variables.
+
+ It follows that if the number of arguments is not the same in all
+ calls to the function, some of the names in PARAMETER-LIST may be
+ arguments on some occasions and local variables on others. Another way
+ to think of this is that omitted arguments default to the null string.
+
+ Usually when you write a function you know how many names you intend
+ to use for arguments and how many you intend to use as locals. By
+ convention, you should write an extra space between the arguments and
+ the locals, so other people can follow how your function is supposed to
+ be used.
+
+ During execution of the function body, the arguments and local
+ variable values hide or "shadow" any variables of the same names used
+ in the rest of the program. The shadowed variables are not accessible
+ in the function definition, because there is no way to name them while
+ their names have been taken away for the local variables. All other
+ variables used in the `awk' program can be referenced or set normally
+ in the function definition.
+
+ The arguments and local variables last only as long as the function
+ body is executing. Once the body finishes, the shadowed variables come
+ back.
+
+ The function body can contain expressions which call functions. They
+ can even call this function, either directly or by way of another
+ function. When this happens, we say the function is "recursive".
+
+ There is no need in `awk' to put the definition of a function before
+ all uses of the function. This is because `awk' reads the entire
+ program before starting to execute any of it.
+
+ In many `awk' implementations, the keyword `function' may be
+ abbreviated `func'. However, POSIX only specifies the use of the
+ keyword `function'. This actually has some practical implications. If
+ `gawk' is in POSIX-compatibility mode (*note Invoking `awk': Command
+ Line.), then the following statement will *not* define a function:
+
+ func foo() { a = sqrt($1) ; print a }
+
+ Instead it defines a rule that, for each record, concatenates the value
+ of the variable `func' with the return value of the function `foo', and
+ based on the truth value of the result, executes the corresponding
+ action. This is probably not what was desired. (`awk' accepts this
+ input as syntactically valid, since functions may be used before they
+ are defined in `awk' programs.)
+
+
+ File: gawk.info, Node: Function Example, Next: Function Caveats, Prev: Definition Syntax, Up: User-defined
+
+ Function Definition Example
+ ===========================
+
+ Here is an example of a user-defined function, called `myprint', that
+ takes a number and prints it in a specific format.
+
+ function myprint(num)
+ {
+ printf "%6.3g\n", num
+ }
+
+ To illustrate, here is an `awk' rule which uses our `myprint' function:
+
+ $3 > 0 { myprint($3) }
+
+ This program prints, in our special format, all the third fields that
+ contain a positive number in our input. Therefore, when given:
+
+ 1.2 3.4 5.6 7.8
+ 9.10 11.12 -13.14 15.16
+ 17.18 19.20 21.22 23.24
+
+ this program, using our function to format the results, prints:
+
+ 5.6
+ 21.2
+
+ Here is a rather contrived example of a recursive function. It
+ prints a string backwards:
+
+ function rev (str, len) {
+ if (len == 0) {
+ printf "\n"
+ return
+ }
+ printf "%c", substr(str, len, 1)
+ rev(str, len - 1)
+ }
+
+
+ File: gawk.info, Node: Function Caveats, Next: Return Statement, Prev: Function Example, Up: User-defined
+
+ Calling User-defined Functions
+ ==============================
+
+ "Calling a function" means causing the function to run and do its
+ job. A function call is an expression, and its value is the value
+ returned by the function.
+
+ A function call consists of the function name followed by the
+ arguments in parentheses. What you write in the call for the arguments
+ are `awk' expressions; each time the call is executed, these
+ expressions are evaluated, and the values are the actual arguments. For
+ example, here is a call to `foo' with three arguments (the first being
+ a string concatenation):
+
+ foo(x y, "lose", 4 * z)
+
+ *Caution:* whitespace characters (spaces and tabs) are not allowed
+ between the function name and the open-parenthesis of the argument
+ list. If you write whitespace by mistake, `awk' might think that
+ you mean to concatenate a variable with an expression in
+ parentheses. However, it notices that you used a function name
+ and not a variable name, and reports an error.
+
+ When a function is called, it is given a *copy* of the values of its
+ arguments. This is called "call by value". The caller may use a
+ variable as the expression for the argument, but the called function
+ does not know this: it only knows what value the argument had. For
+ example, if you write this code:
+
+ foo = "bar"
+ z = myfunc(foo)
+
+ then you should not think of the argument to `myfunc' as being "the
+ variable `foo'." Instead, think of the argument as the string value,
+ `"bar"'.
+
+ If the function `myfunc' alters the values of its local variables,
+ this has no effect on any other variables. In particular, if `myfunc'
+ does this:
+
+ function myfunc (win) {
+ print win
+ win = "zzz"
+ print win
+ }
+
+ to change its first argument variable `win', this *does not* change the
+ value of `foo' in the caller. The role of `foo' in calling `myfunc'
+ ended when its value, `"bar"', was computed. If `win' also exists
+ outside of `myfunc', the function body cannot alter this outer value,
+ because it is shadowed during the execution of `myfunc' and cannot be
+ seen or changed from there.
+
+ However, when arrays are the parameters to functions, they are *not*
+ copied. Instead, the array itself is made available for direct
+ manipulation by the function. This is usually called "call by
+ reference". Changes made to an array parameter inside the body of a
+ function *are* visible outside that function. This can be *very*
+ dangerous if you do not watch what you are doing. For example:
+
+ function changeit (array, ind, nvalue) {
+ array[ind] = nvalue
+ }
+
+ BEGIN {
+ a[1] = 1 ; a[2] = 2 ; a[3] = 3
+ changeit(a, 2, "two")
+ printf "a[1] = %s, a[2] = %s, a[3] = %s\n", a[1], a[2], a[3]
+ }
+
+ prints `a[1] = 1, a[2] = two, a[3] = 3', because calling `changeit'
+ stores `"two"' in the second element of `a'.
+
+
+ File: gawk.info, Node: Return Statement, Prev: Function Caveats, Up: User-defined
+
+ The `return' Statement
+ ======================
+
+ The body of a user-defined function can contain a `return' statement.
+ This statement returns control to the rest of the `awk' program. It
+ can also be used to return a value for use in the rest of the `awk'
+ program. It looks like this:
+
+ return EXPRESSION
+
+ The EXPRESSION part is optional. If it is omitted, then the returned
+ value is undefined and, therefore, unpredictable.
+
+ A `return' statement with no value expression is assumed at the end
+ of every function definition. So if control reaches the end of the
+ function body, then the function returns an unpredictable value. `awk'
+ will not warn you if you use the return value of such a function; you
+ will simply get unpredictable or unexpected results.
+
+ Here is an example of a user-defined function that returns a value
+ for the largest number among the elements of an array:
+
+ function maxelt (vec, i, ret) {
+ for (i in vec) {
+ if (ret == "" || vec[i] > ret)
+ ret = vec[i]
+ }
+ return ret
+ }
+
+ You call `maxelt' with one argument, which is an array name. The local
+ variables `i' and `ret' are not intended to be arguments; while there
+ is nothing to stop you from passing two or three arguments to `maxelt',
+ the results would be strange. The extra space before `i' in the
+ function parameter list is to indicate that `i' and `ret' are not
+ supposed to be arguments. This is a convention which you should follow
+ when you define functions.
+
+ Here is a program that uses our `maxelt' function. It loads an
+ array, calls `maxelt', and then reports the maximum number in that
+ array:
+
+ awk '
+ function maxelt (vec, i, ret) {
+ for (i in vec) {
+ if (ret == "" || vec[i] > ret)
+ ret = vec[i]
+ }
+ return ret
+ }
+
+ # Load all fields of each record into nums.
+ {
+ for(i = 1; i <= NF; i++)
+ nums[NR, i] = $i
+ }
+
+ END {
+ print maxelt(nums)
+ }'
+
+ Given the following input:
+
+ 1 5 23 8 16
+ 44 3 5 2 8 26
+ 256 291 1396 2962 100
+ -6 467 998 1101
+ 99385 11 0 225
+
+ our program tells us (predictably) that:
+
+ 99385
+
+ is the largest number in our array.
+
+
+ File: gawk.info, Node: Built-in Variables, Next: Command Line, Prev: User-defined, Up: Top
+
+ Built-in Variables
+ ******************
+
+ Most `awk' variables are available for you to use for your own
+ purposes; they never change except when your program assigns values to
+ them, and never affect anything except when your program examines them.
+
+ A few variables have special built-in meanings. Some of them `awk'
+ examines automatically, so that they enable you to tell `awk' how to do
+ certain things. Others are set automatically by `awk', so that they
+ carry information from the internal workings of `awk' to your program.
+
+ This chapter documents all the built-in variables of `gawk'. Most
+ of them are also documented in the chapters where their areas of
+ activity are described.
+
+ * Menu:
+
+ * User-modified:: Built-in variables that you change
+ to control `awk'.
+ * Auto-set:: Built-in variables where `awk'
+ gives you information.
+
+
+ File: gawk.info, Node: User-modified, Next: Auto-set, Prev: Built-in Variables, Up: Built-in Variables
+
+ Built-in Variables that Control `awk'
+ =====================================
+
+ This is a list of the variables which you can change to control how
+ `awk' does certain things.
+
+ `CONVFMT'
+ This string is used by `awk' to control conversion of numbers to
+ strings (*note Conversion of Strings and Numbers: Conversion.).
+ It works by being passed, in effect, as the first argument to the
+ `sprintf' function. Its default value is `"%.6g"'. `CONVFMT' was
+ introduced by the POSIX standard.
+
+ `FIELDWIDTHS'
+ This is a space separated list of columns that tells `gawk' how to
+ manage input with fixed, columnar boundaries. It is an
+ experimental feature that is still evolving. Assigning to
+ `FIELDWIDTHS' overrides the use of `FS' for field splitting.
+ *Note Reading Fixed-width Data: Constant Size, for more
+ information.
+
+ If `gawk' is in compatibility mode (*note Invoking `awk': Command
+ Line.), then `FIELDWIDTHS' has no special meaning, and field
+ splitting operations are done based exclusively on the value of
+ `FS'.
+
+ `FS'
+ `FS' is the input field separator (*note Specifying how Fields are
+ Separated: Field Separators.). The value is a single-character
+ string or a multi-character regular expression that matches the
+ separations between fields in an input record.
+
+ The default value is `" "', a string consisting of a single space.
+ As a special exception, this value actually means that any
+ sequence of spaces and tabs is a single separator. It also causes
+ spaces and tabs at the beginning or end of a line to be ignored.
+
+ You can set the value of `FS' on the command line using the `-F'
+ option:
+
+ awk -F, 'PROGRAM' INPUT-FILES
+
+ If `gawk' is using `FIELDWIDTHS' for field-splitting, assigning a
+ value to `FS' will cause `gawk' to return to the normal,
+ regexp-based, field splitting.
+
+ `IGNORECASE'
+ If `IGNORECASE' is nonzero, then *all* regular expression matching
+ is done in a case-independent fashion. In particular, regexp
+ matching with `~' and `!~', and the `gsub' `index', `match',
+ `split' and `sub' functions all ignore case when doing their
+ particular regexp operations. *Note:* since field splitting with
+ the value of the `FS' variable is also a regular expression
+ operation, that too is done with case ignored. *Note
+ Case-sensitivity in Matching: Case-sensitivity.
+
+ If `gawk' is in compatibility mode (*note Invoking `awk': Command
+ Line.), then `IGNORECASE' has no special meaning, and regexp
+ operations are always case-sensitive.
+
+ `OFMT'
+ This string is used by `awk' to control conversion of numbers to
+ strings (*note Conversion of Strings and Numbers: Conversion.) for
+ printing with the `print' statement. It works by being passed, in
+ effect, as the first argument to the `sprintf' function. Its
+ default value is `"%.6g"'. Earlier versions of `awk' also used
+ `OFMT' to specify the format for converting numbers to strings in
+ general expressions; this has been taken over by `CONVFMT'.
+
+ `OFS'
+ This is the output field separator (*note Output Separators::.).
+ It is output between the fields output by a `print' statement. Its
+ default value is `" "', a string consisting of a single space.
+
+ `ORS'
+ This is the output record separator. It is output at the end of
+ every `print' statement. Its default value is a string containing
+ a single newline character, which could be written as `"\n"'.
+ (*Note Output Separators::.)
+
+ `RS'
+ This is `awk''s input record separator. Its default value is a
+ string containing a single newline character, which means that an
+ input record consists of a single line of text. (*Note How Input
+ is Split into Records: Records.)
+
+ `SUBSEP'
+ `SUBSEP' is the subscript separator. It has the default value of
+ `"\034"', and is used to separate the parts of the name of a
+ multi-dimensional array. Thus, if you access `foo[12,3]', it
+ really accesses `foo["12\0343"]' (*note Multi-dimensional Arrays:
+ Multi-dimensional.).
+
+
+ File: gawk.info, Node: Auto-set, Prev: User-modified, Up: Built-in Variables
+
+ Built-in Variables that Convey Information
+ ==========================================
+
+ This is a list of the variables that are set automatically by `awk'
+ on certain occasions so as to provide information to your program.
+
+ `ARGC'
+ `ARGV'
+ The command-line arguments available to `awk' programs are stored
+ in an array called `ARGV'. `ARGC' is the number of command-line
+ arguments present. *Note Invoking `awk': Command Line. `ARGV' is
+ indexed from zero to `ARGC - 1'. For example:
+
+ awk 'BEGIN {
+ for (i = 0; i < ARGC; i++)
+ print ARGV[i]
+ }' inventory-shipped BBS-list
+
+ In this example, `ARGV[0]' contains `"awk"', `ARGV[1]' contains
+ `"inventory-shipped"', and `ARGV[2]' contains `"BBS-list"'. The
+ value of `ARGC' is 3, one more than the index of the last element
+ in `ARGV' since the elements are numbered from zero.
+
+ The names `ARGC' and `ARGV', as well the convention of indexing
+ the array from 0 to `ARGC - 1', are derived from the C language's
+ method of accessing command line arguments.
+
+ Notice that the `awk' program is not entered in `ARGV'. The other
+ special command line options, with their arguments, are also not
+ entered. But variable assignments on the command line *are*
+ treated as arguments, and do show up in the `ARGV' array.
+
+ Your program can alter `ARGC' and the elements of `ARGV'. Each
+ time `awk' reaches the end of an input file, it uses the next
+ element of `ARGV' as the name of the next input file. By storing a
+ different string there, your program can change which files are
+ read. You can use `"-"' to represent the standard input. By
+ storing additional elements and incrementing `ARGC' you can cause
+ additional files to be read.
+
+ If you decrease the value of `ARGC', that eliminates input files
+ from the end of the list. By recording the old value of `ARGC'
+ elsewhere, your program can treat the eliminated arguments as
+ something other than file names.
+
+ To eliminate a file from the middle of the list, store the null
+ string (`""') into `ARGV' in place of the file's name. As a
+ special feature, `awk' ignores file names that have been replaced
+ with the null string.
+
+ `ARGIND'
+ The index in `ARGV' of the current file being processed. Every
+ time `gawk' opens a new data file for processing, it sets `ARGIND'
+ to the index in `ARGV' of the file name. Thus, the condition
+ `FILENAME == ARGV[ARGIND]' is always true.
+
+ This variable is useful in file processing; it allows you to tell
+ how far along you are in the list of data files, and to
+ distinguish between multiple successive instances of the same
+ filename on the command line.
+
+ While you can change the value of `ARGIND' within your `awk'
+ program, `gawk' will automatically set it to a new value when the
+ next file is opened.
+
+ This variable is a `gawk' extension; in other `awk' implementations
+ it is not special.
+
+ `ENVIRON'
+ This is an array that contains the values of the environment. The
+ array indices are the environment variable names; the values are
+ the values of the particular environment variables. For example,
+ `ENVIRON["HOME"]' might be `/u/close'. Changing this array does
+ not affect the environment passed on to any programs that `awk'
+ may spawn via redirection or the `system' function. (In a future
+ version of `gawk', it may do so.)
+
+ Some operating systems may not have environment variables. On
+ such systems, the array `ENVIRON' is empty.
+
+ `ERRNO'
+ If a system error occurs either doing a redirection for `getline',
+ during a read for `getline', or during a `close' operation, then
+ `ERRNO' will contain a string describing the error.
+
+ This variable is a `gawk' extension; in other `awk' implementations
+ it is not special.
+
+ `FILENAME'
+ This is the name of the file that `awk' is currently reading. If
+ `awk' is reading from the standard input (in other words, there
+ are no files listed on the command line), `FILENAME' is set to
+ `"-"'. `FILENAME' is changed each time a new file is read (*note
+ Reading Input Files: Reading Files.).
+
+ `FNR'
+ `FNR' is the current record number in the current file. `FNR' is
+ incremented each time a new record is read (*note Explicit Input
+ with `getline': Getline.). It is reinitialized to 0 each time a
+ new input file is started.
+
+ `NF'
+ `NF' is the number of fields in the current input record. `NF' is
+ set each time a new record is read, when a new field is created,
+ or when `$0' changes (*note Examining Fields: Fields.).
+
+ `NR'
+ This is the number of input records `awk' has processed since the
+ beginning of the program's execution. (*note How Input is Split
+ into Records: Records.). `NR' is set each time a new record is
+ read.
+
+ `RLENGTH'
+ `RLENGTH' is the length of the substring matched by the `match'
+ function (*note Built-in Functions for String Manipulation: String
+ Functions.). `RLENGTH' is set by invoking the `match' function.
+ Its value is the length of the matched string, or -1 if no match
+ was found.
+
+ `RSTART'
+ `RSTART' is the start-index in characters of the substring matched
+ by the `match' function (*note Built-in Functions for String
+ Manipulation: String Functions.). `RSTART' is set by invoking the
+ `match' function. Its value is the position of the string where
+ the matched substring starts, or 0 if no match was found.
+
+
+ File: gawk.info, Node: Command Line, Next: Language History, Prev: Built-in Variables, Up: Top
+
+ Invoking `awk'
+ **************
+
+ There are two ways to run `awk': with an explicit program, or with
+ one or more program files. Here are templates for both of them; items
+ enclosed in `[...]' in these templates are optional.
+
+ Besides traditional one-letter POSIX-style options, `gawk' also
+ supports GNU long named options.
+
+ awk [POSIX OR GNU STYLE OPTIONS] -f progfile [`--'] FILE ...
+ awk [POSIX OR GNU STYLE OPTIONS] [`--'] 'PROGRAM' FILE ...
+
+ * Menu:
+
+ * Options:: Command line options and their meanings.
+ * Other Arguments:: Input file names and variable assignments.
+ * AWKPATH Variable:: Searching directories for `awk' programs.
+ * Obsolete:: Obsolete Options and/or features.
+ * Undocumented:: Undocumented Options and Features.
+
+
+ File: gawk.info, Node: Options, Next: Other Arguments, Prev: Command Line, Up: Command Line
+
+ Command Line Options
+ ====================
+
+ Options begin with a minus sign, and consist of a single character.
+ GNU style long named options consist of two minus signs and a keyword
+ that can be abbreviated if the abbreviation allows the option to be
+ uniquely identified. If the option takes an argument, then the keyword
+ is immediately followed by an equals sign (`=') and the argument's
+ value. For brevity, the discussion below only refers to the
+ traditional short options; however the long and short options are
+ interchangeable in all contexts.
+
+ Each long named option for `gawk' has a corresponding POSIX-style
+ option. The options and their meanings are as follows:
+
+ `-F FS'
+ `--field-separator=FS'
+ Sets the `FS' variable to FS (*note Specifying how Fields are
+ Separated: Field Separators.).
+
+ `-f SOURCE-FILE'
+ `--file=SOURCE-FILE'
+ Indicates that the `awk' program is to be found in SOURCE-FILE
+ instead of in the first non-option argument.
+
+ `-v VAR=VAL'
+ `--assign=VAR=VAL'
+ Sets the variable VAR to the value VAL *before* execution of the
+ program begins. Such variable values are available inside the
+ `BEGIN' rule (see below for a fuller explanation).
+
+ The `-v' option can only set one variable, but you can use it more
+ than once, setting another variable each time, like this:
+ `-v foo=1 -v bar=2'.
+
+ `-W GAWK-OPT'
+ Following the POSIX standard, options that are implementation
+ specific are supplied as arguments to the `-W' option. With
+ `gawk', these arguments may be separated by commas, or quoted and
+ separated by whitespace. Case is ignored when processing these
+ options. These options also have corresponding GNU style long
+ named options. The following `gawk'-specific options are
+ available:
+
+ `-W compat'
+ `--compat'
+ Specifies "compatibility mode", in which the GNU extensions in
+ `gawk' are disabled, so that `gawk' behaves just like Unix
+ `awk'. *Note Extensions in `gawk' not in POSIX `awk':
+ POSIX/GNU, which summarizes the extensions. Also see *Note
+ Downward Compatibility and Debugging: Compatibility Mode.
+
+ `-W copyleft'
+ `-W copyright'
+ `--copyleft'
+ `--copyright'
+ Print the short version of the General Public License. This
+ option may disappear in a future version of `gawk'.
+
+ `-W help'
+ `-W usage'
+ `--help'
+ `--usage'
+ Print a "usage" message summarizing the short and long style
+ options that `gawk' accepts, and then exit.
+
+ `-W lint'
+ `--lint'
+ Provide warnings about constructs that are dubious or
+ non-portable to other `awk' implementations. Some warnings
+ are issued when `gawk' first reads your program. Others are
+ issued at run-time, as your program executes.
+
+ `-W posix'
+ `--posix'
+ Operate in strict POSIX mode. This disables all `gawk'
+ extensions (just like `-W compat'), and adds the following
+ additional restrictions:
+
+ * `\x' escape sequences are not recognized (*note Constant
+ Expressions: Constants.).
+
+ * The synonym `func' for the keyword `function' is not
+ recognized (*note Syntax of Function Definitions:
+ Definition Syntax.).
+
+ * The operators `**' and `**=' cannot be used in place of
+ `^' and `^=' (*note Arithmetic Operators: Arithmetic
+ Ops., and also *note Assignment Expressions: Assignment
+ Ops.).
+
+ * Specifying `-Ft' on the command line does not set the
+ value of `FS' to be a single tab character (*note
+ Specifying how Fields are Separated: Field Separators.).
+
+ Although you can supply both `-W compat' and `-W posix' on the
+ command line, `-W posix' will take precedence.
+
+ `-W source=PROGRAM-TEXT'
+ `--source=PROGRAM-TEXT'
+ Program source code is taken from the PROGRAM-TEXT. This
+ option allows you to mix `awk' source code in files with
+ program source code that you would enter on the command line.
+ This is particularly useful when you have library functions
+ that you wish to use from your command line programs (*note
+ The `AWKPATH' Environment Variable: AWKPATH Variable.).
+
+ `-W version'
+ `--version'
+ Prints version information for this particular copy of `gawk'.
+ This is so you can determine if your copy of `gawk' is up to
+ date with respect to whatever the Free Software Foundation is
+ currently distributing. This option may disappear in a
+ future version of `gawk'.
+
+ `--'
+ Signals the end of the command line options. The following
+ arguments are not treated as options even if they begin with `-'.
+ This interpretation of `--' follows the POSIX argument parsing
+ conventions.
+
+ This is useful if you have file names that start with `-', or in
+ shell scripts, if you have file names that will be specified by
+ the user which could start with `-'.
+
+ Any other options are flagged as invalid with a warning message, but
+ are otherwise ignored.
+
+ In compatibility mode, as a special case, if the value of FS supplied
+ to the `-F' option is `t', then `FS' is set to the tab character
+ (`"\t"'). This is only true for `-W compat', and not for `-W posix'
+ (*note Specifying how Fields are Separated: Field Separators.).
+
+ If the `-f' option is *not* used, then the first non-option command
+ line argument is expected to be the program text.
+
+ The `-f' option may be used more than once on the command line. If
+ it is, `awk' reads its program source from all of the named files, as
+ if they had been concatenated together into one big file. This is
+ useful for creating libraries of `awk' functions. Useful functions can
+ be written once, and then retrieved from a standard place, instead of
+ having to be included into each individual program. You can still type
+ in a program at the terminal and use library functions, by specifying
+ `-f /dev/tty'. `awk' will read a file from the terminal to use as part
+ of the `awk' program. After typing your program, type `Control-d' (the
+ end-of-file character) to terminate it. (You may also use `-f -' to
+ read program source from the standard input, but then you will not be
+ able to also use the standard input as a source of data.)
+
+ Because it is clumsy using the standard `awk' mechanisms to mix
+ source file and command line `awk' programs, `gawk' provides the
+ `--source' option. This does not require you to pre-empt the standard
+ input for your source code, and allows you to easily mix command line
+ and library source code (*note The `AWKPATH' Environment Variable:
+ AWKPATH Variable.).
+
+ If no `-f' or `--source' option is specified, then `gawk' will use
+ the first non-option command line argument as the text of the program
+ source code.
+
+
+ File: gawk.info, Node: Other Arguments, Next: AWKPATH Variable, Prev: Options, Up: Command Line
+
+ Other Command Line Arguments
+ ============================
+
+ Any additional arguments on the command line are normally treated as
+ input files to be processed in the order specified. However, an
+ argument that has the form `VAR=VALUE', means to assign the value VALUE
+ to the variable VAR--it does not specify a file at all.
+
+ All these arguments are made available to your `awk' program in the
+ `ARGV' array (*note Built-in Variables::.). Command line options and
+ the program text (if present) are omitted from the `ARGV' array. All
+ other arguments, including variable assignments, are included.
+
+ The distinction between file name arguments and variable-assignment
+ arguments is made when `awk' is about to open the next input file. At
+ that point in execution, it checks the "file name" to see whether it is
+ really a variable assignment; if so, `awk' sets the variable instead of
+ reading a file.
+
+ Therefore, the variables actually receive the specified values after
+ all previously specified files have been read. In particular, the
+ values of variables assigned in this fashion are *not* available inside
+ a `BEGIN' rule (*note `BEGIN' and `END' Special Patterns: BEGIN/END.),
+ since such rules are run before `awk' begins scanning the argument list.
+ The values given on the command line are processed for escape sequences
+ (*note Constant Expressions: Constants.).
+
+ In some earlier implementations of `awk', when a variable assignment
+ occurred before any file names, the assignment would happen *before*
+ the `BEGIN' rule was executed. Some applications came to depend upon
+ this "feature." When `awk' was changed to be more consistent, the `-v'
+ option was added to accommodate applications that depended upon this
+ old behavior.
+
+ The variable assignment feature is most useful for assigning to
+ variables such as `RS', `OFS', and `ORS', which control input and
+ output formats, before scanning the data files. It is also useful for
+ controlling state if multiple passes are needed over a data file. For
+ example:
+
+ awk 'pass == 1 { PASS 1 STUFF }
+ pass == 2 { PASS 2 STUFF }' pass=1 datafile pass=2 datafile
+
+ Given the variable assignment feature, the `-F' option is not
+ strictly necessary. It remains for historical compatibility.
+
+
+ File: gawk.info, Node: AWKPATH Variable, Next: Obsolete, Prev: Other Arguments, Up: Command Line
+
+ The `AWKPATH' Environment Variable
+ ==================================
+
+ The previous section described how `awk' program files can be named
+ on the command line with the `-f' option. In some `awk'
+ implementations, you must supply a precise path name for each program
+ file, unless the file is in the current directory.
+
+ But in `gawk', if the file name supplied in the `-f' option does not
+ contain a `/', then `gawk' searches a list of directories (called the
+ "search path"), one by one, looking for a file with the specified name.
+
+ The search path is actually a string consisting of directory names
+ separated by colons. `gawk' gets its search path from the `AWKPATH'
+ environment variable. If that variable does not exist, `gawk' uses the
+ default path, which is `.:/local/lib/awk:/gnu/lib/awk'. (Programs
+ written by system administrators should use an `AWKPATH' variable that
+ does not include the current directory, `.'.)
+
+ The search path feature is particularly useful for building up
+ libraries of useful `awk' functions. The library files can be placed
+ in a standard directory that is in the default path, and then specified
+ on the command line with a short file name. Otherwise, the full file
+ name would have to be typed for each file.
+
+ By combining the `--source' and `-f' options, your command line
+ `awk' programs can use facilities in `awk' library files.
+
+ Path searching is not done if `gawk' is in compatibility mode. This
+ is true for both `-W compat' and `-W posix'. *Note Command Line
+ Options: Options.
+
+ *Note:* if you want files in the current directory to be found, you
+ must include the current directory in the path, either by writing `.'
+ as an entry in the path, or by writing a null entry in the path. (A
+ null entry is indicated by starting or ending the path with a colon, or
+ by placing two colons next to each other (`::').) If the current
+ directory is not included in the path, then files cannot be found in
+ the current directory. This path search mechanism is identical to the
+ shell's.
+
+
+ File: gawk.info, Node: Obsolete, Next: Undocumented, Prev: AWKPATH Variable, Up: Command Line
+
+ Obsolete Options and/or Features
+ ================================
+
+ This section describes features and/or command line options from the
+ previous release of `gawk' that are either not available in the current
+ version, or that are still supported but deprecated (meaning that they
+ will *not* be in the next release).
+
+ For version 2.15 of `gawk', the following command line options from
+ version 2.11.1 are no longer recognized.
+
+ `-c'
+ Use `-W compat' instead.
+
+ `-V'
+ Use `-W version' instead.
+
+ `-C'
+ Use `-W copyright' instead.
+
+ `-a'
+ `-e'
+ These options produce an "unrecognized option" error message but
+ have no effect on the execution of `gawk'. The POSIX standard now
+ specifies traditional `awk' regular expressions for the `awk'
+ utility.
+
+ The public-domain version of `strftime' that is distributed with
+ `gawk' changed for the 2.14 release. The `%V' conversion specifier
+ that used to generate the date in VMS format was changed to `%v'. This
+ is because the POSIX standard for the `date' utility now specifies a
+ `%V' conversion specifier. *Note Functions for Dealing with Time
+ Stamps: Time Functions, for details.
+
+
+ File: gawk.info, Node: Undocumented, Prev: Obsolete, Up: Command Line
+
+ Undocumented Options and Features
+ =================================
+
+ This section intentionally left blank.
+
+
+ File: gawk.info, Node: Language History, Next: Installation, Prev: Command Line, Up: Top
+
+ The Evolution of the `awk' Language
+ ***********************************
+
+ This manual describes the GNU implementation of `awk', which is
+ patterned after the POSIX specification. Many `awk' users are only
+ familiar with the original `awk' implementation in Version 7 Unix,
+ which is also the basis for the version in Berkeley Unix (through
+ 4.3-Reno). This chapter briefly describes the evolution of the `awk'
+ language.
+
+ * Menu:
+
+ * V7/S5R3.1:: The major changes between V7 and
+ System V Release 3.1.
+ * S5R4:: Minor changes between System V
+ Releases 3.1 and 4.
+ * POSIX:: New features from the POSIX standard.
+ * POSIX/GNU:: The extensions in `gawk'
+ not in POSIX `awk'.
+
diff -rc --new-file /src/baseline/gawk-2.15.5/gawk.info-7 gawk-2.15.5/gawk.info-7
*** /src/baseline/gawk-2.15.5/gawk.info-7 Thu Jan 1 00:00:00 1970
--- gawk-2.15.5/gawk.info-7 Sun Jun 12 22:28:55 1994
***************
*** 0 ****
--- 1,1265 ----
+ This is Info file gawk.info, produced by Makeinfo-1.55 from the input
+ file /gnu/src/amiga/gawk-2.15.5/gawk.texi.
+
+ This file documents `awk', a program that you can use to select
+ particular records in a file and perform operations upon them.
+
+ This is Edition 0.15 of `The GAWK Manual',
+ for the 2.15 version of the GNU implementation
+ of AWK.
+
+ Copyright (C) 1989, 1991, 1992, 1993 Free Software Foundation, Inc.
+
+ Permission is granted to make and distribute verbatim copies of this
+ manual provided the copyright notice and this permission notice are
+ preserved on all copies.
+
+ Permission is granted to copy and distribute modified versions of
+ this manual under the conditions for verbatim copying, provided that
+ the entire resulting derived work is distributed under the terms of a
+ permission notice identical to this one.
+
+ Permission is granted to copy and distribute translations of this
+ manual into another language, under the above conditions for modified
+ versions, except that this permission notice may be stated in a
+ translation approved by the Foundation.
+
+
+ File: gawk.info, Node: V7/S5R3.1, Next: S5R4, Prev: Language History, Up: Language History
+
+ Major Changes between V7 and S5R3.1
+ ===================================
+
+ The `awk' language evolved considerably between the release of
+ Version 7 Unix (1978) and the new version first made widely available in
+ System V Release 3.1 (1987). This section summarizes the changes, with
+ cross-references to further details.
+
+ * The requirement for `;' to separate rules on a line (*note `awk'
+ Statements versus Lines: Statements/Lines.).
+
+ * User-defined functions, and the `return' statement (*note
+ User-defined Functions: User-defined.).
+
+ * The `delete' statement (*note The `delete' Statement: Delete.).
+
+ * The `do'-`while' statement (*note The `do'-`while' Statement: Do
+ Statement.).
+
+ * The built-in functions `atan2', `cos', `sin', `rand' and `srand'
+ (*note Numeric Built-in Functions: Numeric Functions.).
+
+ * The built-in functions `gsub', `sub', and `match' (*note Built-in
+ Functions for String Manipulation: String Functions.).
+
+ * The built-in functions `close', which closes an open file, and
+ `system', which allows the user to execute operating system
+ commands (*note Built-in Functions for Input/Output: I/O
+ Functions.).
+
+ * The `ARGC', `ARGV', `FNR', `RLENGTH', `RSTART', and `SUBSEP'
+ built-in variables (*note Built-in Variables::.).
+
+ * The conditional expression using the operators `?' and `:' (*note
+ Conditional Expressions: Conditional Exp.).
+
+ * The exponentiation operator `^' (*note Arithmetic Operators:
+ Arithmetic Ops.) and its assignment operator form `^=' (*note
+ Assignment Expressions: Assignment Ops.).
+
+ * C-compatible operator precedence, which breaks some old `awk'
+ programs (*note Operator Precedence (How Operators Nest):
+ Precedence.).
+
+ * Regexps as the value of `FS' (*note Specifying how Fields are
+ Separated: Field Separators.), and as the third argument to the
+ `split' function (*note Built-in Functions for String
+ Manipulation: String Functions.).
+
+ * Dynamic regexps as operands of the `~' and `!~' operators (*note
+ How to Use Regular Expressions: Regexp Usage.).
+
+ * Escape sequences (*note Constant Expressions: Constants.) in
+ regexps.
+
+ * The escape sequences `\b', `\f', and `\r' (*note Constant
+ Expressions: Constants.).
+
+ * Redirection of input for the `getline' function (*note Explicit
+ Input with `getline': Getline.).
+
+ * Multiple `BEGIN' and `END' rules (*note `BEGIN' and `END' Special
+ Patterns: BEGIN/END.).
+
+ * Simulated multi-dimensional arrays (*note Multi-dimensional
+ Arrays: Multi-dimensional.).
+
+
+ File: gawk.info, Node: S5R4, Next: POSIX, Prev: V7/S5R3.1, Up: Language History
+
+ Changes between S5R3.1 and S5R4
+ ===============================
+
+ The System V Release 4 version of Unix `awk' added these features
+ (some of which originated in `gawk'):
+
+ * The `ENVIRON' variable (*note Built-in Variables::.).
+
+ * Multiple `-f' options on the command line (*note Invoking `awk':
+ Command Line.).
+
+ * The `-v' option for assigning variables before program execution
+ begins (*note Invoking `awk': Command Line.).
+
+ * The `--' option for terminating command line options.
+
+ * The `\a', `\v', and `\x' escape sequences (*note Constant
+ Expressions: Constants.).
+
+ * A defined return value for the `srand' built-in function (*note
+ Numeric Built-in Functions: Numeric Functions.).
+
+ * The `toupper' and `tolower' built-in string functions for case
+ translation (*note Built-in Functions for String Manipulation:
+ String Functions.).
+
+ * A cleaner specification for the `%c' format-control letter in the
+ `printf' function (*note Using `printf' Statements for Fancier
+ Printing: Printf.).
+
+ * The ability to dynamically pass the field width and precision
+ (`"%*.*d"') in the argument list of the `printf' function (*note
+ Using `printf' Statements for Fancier Printing: Printf.).
+
+ * The use of constant regexps such as `/foo/' as expressions, where
+ they are equivalent to use of the matching operator, as in `$0 ~
+ /foo/' (*note Constant Expressions: Constants.).
+
+
+ File: gawk.info, Node: POSIX, Next: POSIX/GNU, Prev: S5R4, Up: Language History
+
+ Changes between S5R4 and POSIX `awk'
+ ====================================
+
+ The POSIX Command Language and Utilities standard for `awk'
+ introduced the following changes into the language:
+
+ * The use of `-W' for implementation-specific options.
+
+ * The use of `CONVFMT' for controlling the conversion of numbers to
+ strings (*note Conversion of Strings and Numbers: Conversion.).
+
+ * The concept of a numeric string, and tighter comparison rules to go
+ with it (*note Comparison Expressions: Comparison Ops.).
+
+ * More complete documentation of many of the previously undocumented
+ features of the language.
+
+
+ File: gawk.info, Node: POSIX/GNU, Prev: POSIX, Up: Language History
+
+ Extensions in `gawk' not in POSIX `awk'
+ =======================================
+
+ The GNU implementation, `gawk', adds these features:
+
+ * The `AWKPATH' environment variable for specifying a path search for
+ the `-f' command line option (*note Invoking `awk': Command Line.).
+
+ * The various `gawk' specific features available via the `-W'
+ command line option (*note Invoking `awk': Command Line.).
+
+ * The `ARGIND' variable, that tracks the movement of `FILENAME'
+ through `ARGV'. (*note Built-in Variables::.).
+
+ * The `ERRNO' variable, that contains the system error message when
+ `getline' returns -1, or when `close' fails. (*note Built-in
+ Variables::.).
+
+ * The `IGNORECASE' variable and its effects (*note Case-sensitivity
+ in Matching: Case-sensitivity.).
+
+ * The `FIELDWIDTHS' variable and its effects (*note Reading
+ Fixed-width Data: Constant Size.).
+
+ * The `next file' statement for skipping to the next data file
+ (*note The `next file' Statement: Next File Statement.).
+
+ * The `systime' and `strftime' built-in functions for obtaining and
+ printing time stamps (*note Functions for Dealing with Time
+ Stamps: Time Functions.).
+
+ * The `/dev/stdin', `/dev/stdout', `/dev/stderr', and `/dev/fd/N'
+ file name interpretation (*note Standard I/O Streams: Special
+ Files.).
+
+ * The `-W compat' option to turn off these extensions (*note
+ Invoking `awk': Command Line.).
+
+ * The `-W posix' option for full POSIX compliance (*note Invoking
+ `awk': Command Line.).
+
+
+ File: gawk.info, Node: Installation, Next: Gawk Summary, Prev: Language History, Up: Top
+
+ Installing `gawk'
+ *****************
+
+ This chapter provides instructions for installing `gawk' on the
+ various platforms that are supported by the developers. The primary
+ developers support Unix (and one day, GNU), while the other ports were
+ contributed. The file `ACKNOWLEDGMENT' in the `gawk' distribution
+ lists the electronic mail addresses of the people who did the
+ respective ports.
+
+ * Menu:
+
+ * Gawk Distribution:: What is in the `gawk' distribution.
+ * Unix Installation:: Installing `gawk' under various versions
+ of Unix.
+ * VMS Installation:: Installing `gawk' on VMS.
+ * MS-DOS Installation:: Installing `gawk' on MS-DOS.
+ * Atari Installation:: Installing `gawk' on the Atari ST.
+
+
+ File: gawk.info, Node: Gawk Distribution, Next: Unix Installation, Prev: Installation, Up: Installation
+
+ The `gawk' Distribution
+ =======================
+
+ This section first describes how to get and extract the `gawk'
+ distribution, and then discusses what is in the various files and
+ subdirectories.
+
+ * Menu:
+
+ * Extracting:: How to get and extract the distribution.
+ * Distribution contents:: What is in the distribution.
+
+
+ File: gawk.info, Node: Extracting, Next: Distribution contents, Prev: Gawk Distribution, Up: Gawk Distribution
+
+ Getting the `gawk' Distribution
+ -------------------------------
+
+ `gawk' is distributed as a `tar' file compressed with the GNU Zip
+ program, `gzip'. You can get it via anonymous `ftp' to the Internet
+ host `prep.ai.mit.edu'. Like all GNU software, it will be archived at
+ other well known systems, from which it will be possible to use some
+ sort of anonymous `uucp' to obtain the distribution as well. You can
+ also order `gawk' on tape or CD-ROM directly from the Free Software
+ Foundation. (The address is on the copyright page.) Doing so directly
+ contributes to the support of the foundation and to the production of
+ more free software.
+
+ Once you have the distribution (for example, `gawk-2.15.0.tar.z'),
+ first use `gzip' to expand the file, and then use `tar' to extract it.
+ You can use the following pipeline to produce the `gawk' distribution:
+
+ # Under System V, add 'o' to the tar flags
+ gzip -d -c gawk-2.15.0.tar.z | tar -xvpf -
+
+ This will create a directory named `gawk-2.15' in the current directory.
+
+ The distribution file name is of the form `gawk-2.15.N.tar.Z'. The
+ N represents a "patchlevel", meaning that minor bugs have been fixed in
+ the major release. The current patchlevel is 0, but when retrieving
+ distributions, you should get the version with the highest patchlevel.
+
+ If you are not on a Unix system, you will need to make other
+ arrangements for getting and extracting the `gawk' distribution. You
+ should consult a local expert.
+
+
+ File: gawk.info, Node: Distribution contents, Prev: Extracting, Up: Gawk Distribution
+
+ Contents of the `gawk' Distribution
+ -----------------------------------
+
+ `gawk' has a number of C source files, documentation files,
+ subdirectories and files related to the configuration process (*note
+ Compiling and Installing `gawk' on Unix: Unix Installation.), and
+ several subdirectories related to different, non-Unix, operating
+ systems.
+
+ various `.c', `.y', and `.h' files
+ The C and YACC source files are the actual `gawk' source code.
+
+ `README'
+ `README.VMS'
+ `README.dos'
+ `README.rs6000'
+ `README.ultrix'
+ Descriptive files: `README' for `gawk' under Unix, and the rest
+ for the various hardware and software combinations.
+
+ `PORTS'
+ A list of systems to which `gawk' has been ported, and which have
+ successfully run the test suite.
+
+ `ACKNOWLEDGMENT'
+ A list of the people who contributed major parts of the code or
+ documentation.
+
+ `NEWS'
+ A list of changes to `gawk' since the last release or patch.
+
+ `COPYING'
+ The GNU General Public License.
+
+ `FUTURES'
+ A brief list of features and/or changes being contemplated for
+ future releases, with some indication of the time frame for the
+ feature, based on its difficulty.
+
+ `LIMITATIONS'
+ A list of those factors that limit `gawk''s performance. Most of
+ these depend on the hardware or operating system software, and are
+ not limits in `gawk' itself.
+
+ `PROBLEMS'
+ A file describing known problems with the current release.
+
+ `gawk.1'
+ The `troff' source for a manual page describing `gawk'.
+
+ `gawk.texinfo'
+ The `texinfo' source file for this Info file. It should be
+ processed with TeX to produce a printed manual, and with
+ `makeinfo' to produce the Info file.
+
+ `Makefile.in'
+ `config'
+ `config.in'
+ `configure'
+ `missing'
+ `mungeconf'
+ These files and subdirectories are used when configuring `gawk'
+ for various Unix systems. They are explained in detail in *Note
+ Compiling and Installing `gawk' on Unix: Unix Installation.
+
+ `atari'
+ Files needed for building `gawk' on an Atari ST. *Note Installing
+ `gawk' on the Atari ST: Atari Installation, for details.
+
+ `pc'
+ Files needed for building `gawk' under MS-DOS. *Note Installing
+ `gawk' on MS-DOS: MS-DOS Installation, for details.
+
+ `vms'
+ Files needed for building `gawk' under VMS. *Note Compiling
+ Installing and Running `gawk' on VMS: VMS Installation, for
+ details.
+
+ `test'
+ Many interesting `awk' programs, provided as a test suite for
+ `gawk'. You can use `make test' from the top level `gawk'
+ directory to run your version of `gawk' against the test suite.
+ If `gawk' successfully passes `make test' then you can be
+ confident of a successful port.
+
+
+ File: gawk.info, Node: Unix Installation, Next: VMS Installation, Prev: Gawk Distribution, Up: Installation
+
+ Compiling and Installing `gawk' on Unix
+ =======================================
+
+ Often, you can compile and install `gawk' by typing only two
+ commands. However, if you do not use a supported system, you may need
+ to configure `gawk' for your system yourself.
+
+ * Menu:
+
+ * Quick Installation:: Compiling `gawk' on a
+ supported Unix version.
+ * Configuration Philosophy:: How it's all supposed to work.
+ * New Configurations:: What to do if there is no supplied
+ configuration for your system.
+
+
+ File: gawk.info, Node: Quick Installation, Next: Configuration Philosophy, Prev: Unix Installation, Up: Unix Installation
+
+ Compiling `gawk' for a Supported Unix Version
+ ---------------------------------------------
+
+ After you have extracted the `gawk' distribution, `cd' to
+ `gawk-2.15'. Look in the `config' subdirectory for a file that matches
+ your hardware/software combination. In general, only the software is
+ relevant; for example `sunos41' is used for SunOS 4.1, on both Sun 3
+ and Sun 4 hardware.
+
+ If you find such a file, run the command:
+
+ # assume you have SunOS 4.1
+ ./configure sunos41
+
+ This produces a `Makefile' and `config.h' tailored to your system.
+ You may wish to edit the `Makefile' to use a different C compiler, such
+ as `gcc', the GNU C compiler, if you have it. You may also wish to
+ change the `CFLAGS' variable, which controls the command line options
+ that are passed to the C compiler (such as optimization levels, or
+ compiling for debugging).
+
+ After you have configured `Makefile' and `config.h', type:
+
+ make
+
+ and shortly thereafter, you should have an executable version of `gawk'.
+ That's all there is to it!
+
+
+ File: gawk.info, Node: Configuration Philosophy, Next: New Configurations, Prev: Quick Installation, Up: Unix Installation
+
+ The Configuration Process
+ -------------------------
+
+ (This section is of interest only if you know something about using
+ the C language and the Unix operating system.)
+
+ The source code for `gawk' generally attempts to adhere to industry
+ standards wherever possible. This means that `gawk' uses library
+ routines that are specified by the ANSI C standard and by the POSIX
+ operating system interface standard. When using an ANSI C compiler,
+ function prototypes are provided to help improve the compile-time
+ checking.
+
+ Many older Unix systems do not support all of either the ANSI or the
+ POSIX standards. The `missing' subdirectory in the `gawk' distribution
+ contains replacement versions of those subroutines that are most likely
+ to be missing.
+
+ The `config.h' file that is created by the `configure' program
+ contains definitions that describe features of the particular operating
+ system where you are attempting to compile `gawk'. For the most part,
+ it lists which standard subroutines are *not* available. For example,
+ if your system lacks the `getopt' routine, then `GETOPT_MISSING' would
+ be defined.
+
+ `config.h' also defines constants that describe facts about your
+ variant of Unix. For example, there may not be an `st_blksize' element
+ in the `stat' structure. In this case `BLKSIZE_MISSING' would be
+ defined.
+
+ Based on the list in `config.h' of standard subroutines that are
+ missing, `missing.c' will do a `#include' of the appropriate file(s)
+ from the `missing' subdirectory.
+
+ Conditionally compiled code in the other source files relies on the
+ other definitions in the `config.h' file.
+
+ Besides creating `config.h', `configure' produces a `Makefile' from
+ `Makefile.in'. There are a number of lines in `Makefile.in' that are
+ system or feature specific. For example, there is line that begins
+ with `##MAKE_ALLOCA_C##'. This is normally a comment line, since it
+ starts with `#'. If a configuration file has `MAKE_ALLOCA_C' in it,
+ then `configure' will delete the `##MAKE_ALLOCA_C##' from the beginning
+ of the line. This will enable the rules in the `Makefile' that use a C
+ version of `alloca'. There are several similar features that work in
+ this fashion.
+
+
+ File: gawk.info, Node: New Configurations, Prev: Configuration Philosophy, Up: Unix Installation
+
+ Configuring `gawk' for a New System
+ -----------------------------------
+
+ (This section is of interest only if you know something about using
+ the C language and the Unix operating system, and if you have to install
+ `gawk' on a system that is not supported by the `gawk' distribution.
+ If you are a C or Unix novice, get help from a local expert.)
+
+ If you need to configure `gawk' for a Unix system that is not
+ supported in the distribution, first see *Note The Configuration
+ Process: Configuration Philosophy. Then, copy `config.in' to
+ `config.h', and copy `Makefile.in' to `Makefile'.
+
+ Next, edit both files. Both files are liberally commented, and the
+ necessary changes should be straightforward.
+
+ While editing `config.h', you need to determine what library
+ routines you do or do not have by consulting your system documentation,
+ or by perusing your actual libraries using the `ar' or `nm' utilities.
+ In the worst case, simply do not define *any* of the macros for missing
+ subroutines. When you compile `gawk', the final link-editing step will
+ fail. The link editor will provide you with a list of unresolved
+ external references--these are the missing subroutines. Edit
+ `config.h' again and recompile, and you should be set.
+
+ Editing the `Makefile' should also be straightforward. Enable or
+ disable the lines that begin with `##MAKE_WHATEVER##', as appropriate.
+ Select the correct C compiler and `CFLAGS' for it. Then run `make'.
+
+ Getting a correct configuration is likely to be an iterative process.
+ Do not be discouraged if it takes you several tries. If you have no
+ luck whatsoever, please report your system type, and the steps you took.
+ Once you do have a working configuration, please send it to the
+ maintainers so that support for your system can be added to the
+ official release.
+
+ *Note Reporting Problems and Bugs: Bugs, for information on how to
+ report problems in configuring `gawk'. You may also use the same
+ mechanisms for sending in new configurations.
+
+
+ File: gawk.info, Node: VMS Installation, Next: MS-DOS Installation, Prev: Unix Installation, Up: Installation
+
+ Compiling, Installing, and Running `gawk' on VMS
+ ================================================
+
+ This section describes how to compile and install `gawk' under VMS.
+
+ * Menu:
+
+ * VMS Compilation:: How to compile `gawk' under VMS.
+ * VMS Installation Details:: How to install `gawk' under VMS.
+ * VMS Running:: How to run `gawk' under VMS.
+ * VMS POSIX:: Alternate instructions for VMS POSIX.
+
+
+ File: gawk.info, Node: VMS Compilation, Next: VMS Installation Details, Prev: VMS Installation, Up: VMS Installation
+
+ Compiling `gawk' under VMS
+ --------------------------
+
+ To compile `gawk' under VMS, there is a `DCL' command procedure that
+ will issue all the necessary `CC' and `LINK' commands, and there is
+ also a `Makefile' for use with the `MMS' utility. From the source
+ directory, use either
+
+ $ @[.VMS]VMSBUILD.COM
+
+ or
+
+ $ MMS/DESCRIPTION=[.VMS]DECSRIP.MMS GAWK
+
+ Depending upon which C compiler you are using, follow one of the sets
+ of instructions in this table:
+
+ VAX C V3.x
+ Use either `vmsbuild.com' or `descrip.mms' as is. These use
+ `CC/OPTIMIZE=NOLINE', which is essential for Version 3.0.
+
+ VAX C V2.x
+ You must have Version 2.3 or 2.4; older ones won't work. Edit
+ either `vmsbuild.com' or `descrip.mms' according to the comments
+ in them. For `vmsbuild.com', this just entails removing two `!'
+ delimiters. Also edit `config.h' (which is a copy of file
+ `[.config]vms-conf.h') and comment out or delete the two lines
+ `#define __STDC__ 0' and `#define VAXC_BUILTINS' near the end.
+
+ GNU C
+ Edit `vmsbuild.com' or `descrip.mms'; the changes are different
+ from those for VAX C V2.x, but equally straightforward. No
+ changes to `config.h' should be needed.
+
+ DEC C
+ Edit `vmsbuild.com' or `descrip.mms' according to their comments.
+ No changes to `config.h' should be needed.
+
+ `gawk' 2.15 has been tested under VAX/VMS 5.5-1 using VAX C V3.2,
+ GNU C 1.40 and 2.3. It should work without modifications for VMS V4.6
+ and up.
+
+
+ File: gawk.info, Node: VMS Installation Details, Next: VMS Running, Prev: VMS Compilation, Up: VMS Installation
+
+ Installing `gawk' on VMS
+ ------------------------
+
+ To install `gawk', all you need is a "foreign" command, which is a
+ `DCL' symbol whose value begins with a dollar sign.
+
+ $ GAWK :== $device:[directory]GAWK
+
+ (Substitute the actual location of `gawk.exe' for
+ `device:[directory]'.) The symbol should be placed in the `login.com'
+ of any user who wishes to run `gawk', so that it will be defined every
+ time the user logs on. Alternatively, the symbol may be placed in the
+ system-wide `sylogin.com' procedure, which will allow all users to run
+ `gawk'.
+
+ Optionally, the help entry can be loaded into a VMS help library:
+
+ $ LIBRARY/HELP SYS$HELP:HELPLIB [.VMS]GAWK.HLP
+
+ (You may want to substitute a site-specific help library rather than
+ the standard VMS library `HELPLIB'.) After loading the help text,
+
+ $ HELP GAWK
+
+ will provide information about both the `gawk' implementation and the
+ `awk' programming language.
+
+ The logical name `AWK_LIBRARY' can designate a default location for
+ `awk' program files. For the `-f' option, if the specified filename
+ has no device or directory path information in it, `gawk' will look in
+ the current directory first, then in the directory specified by the
+ translation of `AWK_LIBRARY' if the file was not found. If after
+ searching in both directories, the file still is not found, then `gawk'
+ appends the suffix `.awk' to the filename and the file search will be
+ re-tried. If `AWK_LIBRARY' is not defined, that portion of the file
+ search will fail benignly.
+
+
+ File: gawk.info, Node: VMS Running, Next: VMS POSIX, Prev: VMS Installation Details, Up: VMS Installation
+
+ Running `gawk' on VMS
+ ---------------------
+
+ Command line parsing and quoting conventions are significantly
+ different on VMS, so examples in this manual or from other sources
+ often need minor changes. They *are* minor though, and all `awk'
+ programs should run correctly.
+
+ Here are a couple of trivial tests:
+
+ $ gawk -- "BEGIN {print ""Hello, World!""}"
+ $ gawk -"W" version ! could also be -"W version" or "-W version"
+
+ Note that upper-case and mixed-case text must be quoted.
+
+ The VMS port of `gawk' includes a `DCL'-style interface in addition
+ to the original shell-style interface (see the help entry for details).
+ One side-effect of dual command line parsing is that if there is only a
+ single parameter (as in the quoted string program above), the command
+ becomes ambiguous. To work around this, the normally optional `--'
+ flag is required to force Unix style rather than `DCL' parsing. If any
+ other dash-type options (or multiple parameters such as data files to be
+ processed) are present, there is no ambiguity and `--' can be omitted.
+
+ The default search path when looking for `awk' program files
+ specified by the `-f' option is `"SYS$DISK:[],AWK_LIBRARY:"'. The
+ logical name `AWKPATH' can be used to override this default. The format
+ of `AWKPATH' is a comma-separated list of directory specifications.
+ When defining it, the value should be quoted so that it retains a single
+ translation, and not a multi-translation `RMS' searchlist.
+
+
+ File: gawk.info, Node: VMS POSIX, Prev: VMS Running, Up: VMS Installation
+
+ Building and using `gawk' under VMS POSIX
+ -----------------------------------------
+
+ Ignore the instructions above, although `vms/gawk.hlp' should still
+ be made available in a help library. Make sure that the two scripts,
+ `configure' and `mungeconf', are executable; use `chmod +x' on them if
+ necessary. Then execute the following commands:
+
+ $ POSIX
+ psx> configure vms-posix
+ psx> make awktab.c gawk
+
+ The first command will construct files `config.h' and `Makefile' out of
+ templates. The second command will compile and link `gawk'. Due to a
+ `make' bug in VMS POSIX V1.0 and V1.1, the file `awktab.c' must be
+ given as an explicit target or it will not be built and the final link
+ step will fail. Ignore the warning `"Could not find lib m in lib
+ list"'; it is harmless, caused by the explicit use of `-lm' as a linker
+ option which is not needed under VMS POSIX. Under V1.1 (but not V1.0)
+ a problem with the `yacc' skeleton `/etc/yyparse.c' will cause a
+ compiler warning for `awktab.c', followed by a linker warning about
+ compilation warnings in the resulting object module. These warnings
+ can be ignored.
+
+ Once built, `gawk' will work like any other shell utility. Unlike
+ the normal VMS port of `gawk', no special command line manipulation is
+ needed in the VMS POSIX environment.
+
+
+ File: gawk.info, Node: MS-DOS Installation, Next: Atari Installation, Prev: VMS Installation, Up: Installation
+
+ Installing `gawk' on MS-DOS
+ ===========================
+
+ The first step is to get all the files in the `gawk' distribution
+ onto your PC. Move all the files from the `pc' directory into the main
+ directory where the other files are. Edit the file `make.bat' so that
+ it will be an acceptable MS-DOS batch file. This means making sure
+ that all lines are terminated with the ASCII carriage return and line
+ feed characters. restrictions.
+
+ `gawk' has only been compiled with version 5.1 of the Microsoft C
+ compiler. The file `make.bat' from the `pc' directory assumes that you
+ have this compiler.
+
+ Copy the file `setargv.obj' from the library directory where it
+ resides to the `gawk' source code directory.
+
+ Run `make.bat'. This will compile `gawk' for you, and link it.
+ That's all there is to it!
+
+
+ File: gawk.info, Node: Atari Installation, Prev: MS-DOS Installation, Up: Installation
+
+ Installing `gawk' on the Atari ST
+ =================================
+
+ This section assumes that you are running TOS. It applies to other
+ Atari models (STe, TT) as well.
+
+ In order to use `gawk', you need to have a shell, either text or
+ graphics, that does not map all the characters of a command line to
+ upper case. Maintaining case distinction in option flags is very
+ important (*note Invoking `awk': Command Line.). Popular shells like
+ `gulam' or `gemini' will work, as will newer versions of `desktop'.
+ Support for I/O redirection is necessary to make it easy to import
+ `awk' programs from other environments. Pipes are nice to have, but
+ not vital.
+
+ If you have received an executable version of `gawk', place it, as
+ usual, anywhere in your `PATH' where your shell will find it.
+
+ While executing, `gawk' creates a number of temporary files. `gawk'
+ looks for either of the environment variables `TEMP' or `TMPDIR', in
+ that order. If either one is found, its value is assumed to be a
+ directory for temporary files. This directory must exist, and if you
+ can spare the memory, it is a good idea to put it on a RAM drive. If
+ neither `TEMP' nor `TMPDIR' are found, then `gawk' uses the current
+ directory for its temporary files.
+
+ The ST version of `gawk' searches for its program files as described
+ in *Note The `AWKPATH' Environment Variable: AWKPATH Variable. On the
+ ST, the default value for the `AWKPATH' variable is
+ `".,c:\lib\awk,c:\gnu\lib\awk"'. The search path can be modified by
+ explicitly setting `AWKPATH' to whatever you wish. Note that colons
+ cannot be used on the ST to separate elements in the `AWKPATH'
+ variable, since they have another, reserved, meaning. Instead, you
+ must use a comma to separate elements in the path. If you are
+ recompiling `gawk' on the ST, then you can choose a new default search
+ path, by setting the value of `DEFPATH' in the file `...\config\atari'.
+ You may choose a different separator character by setting the value of
+ `ENVSEP' in the same file. The new values will be used when creating
+ the header file `config.h'.
+
+ Although `awk' allows great flexibility in doing I/O redirections
+ from within a program, this facility should be used with care on the ST.
+ In some circumstances the OS routines for file handle pool processing
+ lose track of certain events, causing the computer to crash, and
+ requiring a reboot. Often a warm reboot is sufficient. Fortunately,
+ this happens infrequently, and in rather esoteric situations. In
+ particular, avoid having one part of an `awk' program using `print'
+ statements explicitly redirected to `"/dev/stdout"', while other
+ `print' statements use the default standard output, and a calling shell
+ has redirected standard output to a file.
+
+ When `gawk' is compiled with the ST version of `gcc' and its usual
+ libraries, it will accept both `/' and `\' as path separators. While
+ this is convenient, it should be remembered that this removes one,
+ technically legal, character (`/') from your file names, and that it
+ may create problems for external programs, called via the `system()'
+ function, which may not support this convention. Whenever it is
+ possible that a file created by `gawk' will be used by some other
+ program, use only backslashes. Also remember that in `awk',
+ backslashes in strings have to be doubled in order to get literal
+ backslashes.
+
+ The initial port of `gawk' to the ST was done with `gcc'. If you
+ wish to recompile `gawk' from scratch, you will need to use a compiler
+ that accepts ANSI standard C (such as `gcc', Turbo C, or Prospero C).
+ If `sizeof(int) != sizeof(int *)', the correctness of the generated
+ code depends heavily on the fact that all function calls have function
+ prototypes in the current scope. If your compiler does not accept
+ function prototypes, you will probably have to add a number of casts to
+ the code.
+
+ If you are using `gcc', make sure that you have up-to-date libraries.
+ Older versions have problems with some library functions (`atan2()',
+ `strftime()', the `%g' conversion in `sprintf()') which may affect the
+ operation of `gawk'.
+
+ In the `atari' subdirectory of the `gawk' distribution is a version
+ of the `system()' function that has been tested with `gulam' and `msh';
+ it should work with other shells as well. With `gulam', it passes the
+ string to be executed without spawning an extra copy of a shell. It is
+ possible to replace this version of `system()' with a similar function
+ from a library or from some other source if that version would be a
+ better choice for the shell you prefer.
+
+ The files needed to recompile `gawk' on the ST can be found in the
+ `atari' directory. The provided files and instructions below assume
+ that you have the GNU C compiler (`gcc'), the `gulam' shell, and an ST
+ version of `sed'. The `Makefile' is set up to use `byacc' as a `yacc'
+ replacement. With a different set of tools some adjustments and/or
+ editing will be needed.
+
+ `cd' to the `atari' directory. Copy `Makefile.st' to `makefile' in
+ the source (parent) directory. Possibly adjust `../config/atari' to
+ suit your system. Execute the script `mkconf.g' which will create the
+ header file `../config.h'. Go back to the source directory. If you
+ are not using `gcc', check the file `missing.c'. It may be necessary
+ to change forward slashes in the references to files from the `atari'
+ subdirectory into backslashes. Type `make' and enjoy.
+
+ Compilation with `gcc' of some of the bigger modules, like
+ `awk_tab.c', may require a full four megabytes of memory. On smaller
+ machines you would need to cut down on optimizations, or you would have
+ to switch to another, less memory hungry, compiler.
+
+
+ File: gawk.info, Node: Gawk Summary, Next: Sample Program, Prev: Installation, Up: Top
+
+ `gawk' Summary
+ **************
+
+ This appendix provides a brief summary of the `gawk' command line
+ and the `awk' language. It is designed to serve as "quick reference."
+ It is therefore terse, but complete.
+
+ * Menu:
+
+ * Command Line Summary:: Recapitulation of the command line.
+ * Language Summary:: A terse review of the language.
+ * Variables/Fields:: Variables, fields, and arrays.
+ * Rules Summary:: Patterns and Actions, and their
+ component parts.
+ * Functions Summary:: Defining and calling functions.
+ * Historical Features:: Some undocumented but supported "features".
+
+
+ File: gawk.info, Node: Command Line Summary, Next: Language Summary, Prev: Gawk Summary, Up: Gawk Summary
+
+ Command Line Options Summary
+ ============================
+
+ The command line consists of options to `gawk' itself, the `awk'
+ program text (if not supplied via the `-f' option), and values to be
+ made available in the `ARGC' and `ARGV' predefined `awk' variables:
+
+ awk [POSIX OR GNU STYLE OPTIONS] -f source-file [`--'] FILE ...
+ awk [POSIX OR GNU STYLE OPTIONS] [`--'] 'PROGRAM' FILE ...
+
+ The options that `gawk' accepts are:
+
+ `-F FS'
+ `--field-separator=FS'
+ Use FS for the input field separator (the value of the `FS'
+ predefined variable).
+
+ `-f PROGRAM-FILE'
+ `--file=PROGRAM-FILE'
+ Read the `awk' program source from the file PROGRAM-FILE, instead
+ of from the first command line argument.
+
+ `-v VAR=VAL'
+ `--assign=VAR=VAL'
+ Assign the variable VAR the value VAL before program execution
+ begins.
+
+ `-W compat'
+ `--compat'
+ Specifies compatibility mode, in which `gawk' extensions are turned
+ off.
+
+ `-W copyleft'
+ `-W copyright'
+ `--copyleft'
+ `--copyright'
+ Print the short version of the General Public License on the error
+ output. This option may disappear in a future version of `gawk'.
+
+ `-W help'
+ `-W usage'
+ `--help'
+ `--usage'
+ Print a relatively short summary of the available options on the
+ error output.
+
+ `-W lint'
+ `--lint'
+ Give warnings about dubious or non-portable `awk' constructs.
+
+ `-W posix'
+ `--posix'
+ Specifies POSIX compatibility mode, in which `gawk' extensions are
+ turned off and additional restrictions apply.
+
+ `-W source=PROGRAM-TEXT'
+ `--source=PROGRAM-TEXT'
+ Use PROGRAM-TEXT as `awk' program source code. This option allows
+ mixing command line source code with source code from files, and is
+ particularly useful for mixing command line programs with library
+ functions.
+
+ `-W version'
+ `--version'
+ Print version information for this particular copy of `gawk' on
+ the error output. This option may disappear in a future version
+ of `gawk'.
+
+ `--'
+ Signal the end of options. This is useful to allow further
+ arguments to the `awk' program itself to start with a `-'. This
+ is mainly for consistency with the argument parsing conventions of
+ POSIX.
+
+ Any other options are flagged as invalid, but are otherwise ignored.
+ *Note Invoking `awk': Command Line, for more details.
+
+
+ File: gawk.info, Node: Language Summary, Next: Variables/Fields, Prev: Command Line Summary, Up: Gawk Summary
+
+ Language Summary
+ ================
+
+ An `awk' program consists of a sequence of pattern-action statements
+ and optional function definitions.
+
+ PATTERN { ACTION STATEMENTS }
+
+ function NAME(PARAMETER LIST) { ACTION STATEMENTS }
+
+ `gawk' first reads the program source from the PROGRAM-FILE(s) if
+ specified, or from the first non-option argument on the command line.
+ The `-f' option may be used multiple times on the command line. `gawk'
+ reads the program text from all the PROGRAM-FILE files, effectively
+ concatenating them in the order they are specified. This is useful for
+ building libraries of `awk' functions, without having to include them
+ in each new `awk' program that uses them. To use a library function in
+ a file from a program typed in on the command line, specify `-f
+ /dev/tty'; then type your program, and end it with a `Control-d'.
+ *Note Invoking `awk': Command Line.
+
+ The environment variable `AWKPATH' specifies a search path to use
+ when finding source files named with the `-f' option. The default
+ path, which is `.:/local/lib/awk:/gnu/lib/awk' is used if `AWKPATH' is
+ not set. If a file name given to the `-f' option contains a `/'
+ character, no path search is performed. *Note The `AWKPATH'
+ Environment Variable: AWKPATH Variable, for a full description of the
+ `AWKPATH' environment variable.
+
+ `gawk' compiles the program into an internal form, and then proceeds
+ to read each file named in the `ARGV' array. If there are no files
+ named on the command line, `gawk' reads the standard input.
+
+ If a "file" named on the command line has the form `VAR=VAL', it is
+ treated as a variable assignment: the variable VAR is assigned the
+ value VAL. If any of the files have a value that is the null string,
+ that element in the list is skipped.
+
+ For each line in the input, `gawk' tests to see if it matches any
+ PATTERN in the `awk' program. For each pattern that the line matches,
+ the associated ACTION is executed.
+
+
+ File: gawk.info, Node: Variables/Fields, Next: Rules Summary, Prev: Language Summary, Up: Gawk Summary
+
+ Variables and Fields
+ ====================
+
+ `awk' variables are dynamic; they come into existence when they are
+ first used. Their values are either floating-point numbers or strings.
+ `awk' also has one-dimension arrays; multiple-dimensional arrays may be
+ simulated. There are several predefined variables that `awk' sets as a
+ program runs; these are summarized below.
+
+ * Menu:
+
+ * Fields Summary:: Input field splitting.
+ * Built-in Summary:: `awk''s built-in variables.
+ * Arrays Summary:: Using arrays.
+ * Data Type Summary:: Values in `awk' are numbers or strings.
+
+
+ File: gawk.info, Node: Fields Summary, Next: Built-in Summary, Prev: Variables/Fields, Up: Variables/Fields
+
+ Fields
+ ------
+
+ As each input line is read, `gawk' splits the line into FIELDS,
+ using the value of the `FS' variable as the field separator. If `FS'
+ is a single character, fields are separated by that character.
+ Otherwise, `FS' is expected to be a full regular expression. In the
+ special case that `FS' is a single blank, fields are separated by runs
+ of blanks and/or tabs. Note that the value of `IGNORECASE' (*note
+ Case-sensitivity in Matching: Case-sensitivity.) also affects how
+ fields are split when `FS' is a regular expression.
+
+ Each field in the input line may be referenced by its position, `$1',
+ `$2', and so on. `$0' is the whole line. The value of a field may be
+ assigned to as well. Field numbers need not be constants:
+
+ n = 5
+ print $n
+
+ prints the fifth field in the input line. The variable `NF' is set to
+ the total number of fields in the input line.
+
+ References to nonexistent fields (i.e., fields after `$NF') return
+ the null-string. However, assigning to a nonexistent field (e.g.,
+ `$(NF+2) = 5') increases the value of `NF', creates any intervening
+ fields with the null string as their value, and causes the value of
+ `$0' to be recomputed, with the fields being separated by the value of
+ `OFS'.
+
+ *Note Reading Input Files: Reading Files, for a full description of
+ the way `awk' defines and uses fields.
+
+
+ File: gawk.info, Node: Built-in Summary, Next: Arrays Summary, Prev: Fields Summary, Up: Variables/Fields
+
+ Built-in Variables
+ ------------------
+
+ `awk''s built-in variables are:
+
+ `ARGC'
+ The number of command line arguments (not including options or the
+ `awk' program itself).
+
+ `ARGIND'
+ The index in `ARGV' of the current file being processed. It is
+ always true that `FILENAME == ARGV[ARGIND]'.
+
+ `ARGV'
+ The array of command line arguments. The array is indexed from 0
+ to `ARGC' - 1. Dynamically changing the contents of `ARGV' can
+ control the files used for data.
+
+ `CONVFMT'
+ The conversion format to use when converting numbers to strings.
+
+ `FIELDWIDTHS'
+ A space separated list of numbers describing the fixed-width input
+ data.
+
+ `ENVIRON'
+ An array containing the values of the environment variables. The
+ array is indexed by variable name, each element being the value of
+ that variable. Thus, the environment variable `HOME' would be in
+ `ENVIRON["HOME"]'. Its value might be `/u/close'.
+
+ Changing this array does not affect the environment seen by
+ programs which `gawk' spawns via redirection or the `system'
+ function. (This may change in a future version of `gawk'.)
+
+ Some operating systems do not have environment variables. The
+ array `ENVIRON' is empty when running on these systems.
+
+ `ERRNO'
+ The system error message when an error occurs using `getline' or
+ `close'.
+
+ `FILENAME'
+ The name of the current input file. If no files are specified on
+ the command line, the value of `FILENAME' is `-'.
+
+ `FNR'
+ The input record number in the current input file.
+
+ `FS'
+ The input field separator, a blank by default.
+
+ `IGNORECASE'
+ The case-sensitivity flag for regular expression operations. If
+ `IGNORECASE' has a nonzero value, then pattern matching in rules,
+ field splitting with `FS', regular expression matching with `~'
+ and `!~', and the `gsub', `index', `match', `split' and `sub'
+ predefined functions all ignore case when doing regular expression
+ operations.
+
+ `NF'
+ The number of fields in the current input record.
+
+ `NR'
+ The total number of input records seen so far.
+
+ `OFMT'
+ The output format for numbers for the `print' statement, `"%.6g"'
+ by default.
+
+ `OFS'
+ The output field separator, a blank by default.
+
+ `ORS'
+ The output record separator, by default a newline.
+
+ `RS'
+ The input record separator, by default a newline. `RS' is
+ exceptional in that only the first character of its string value
+ is used for separating records. If `RS' is set to the null
+ string, then records are separated by blank lines. When `RS' is
+ set to the null string, then the newline character always acts as
+ a field separator, in addition to whatever value `FS' may have.
+
+ `RSTART'
+ The index of the first character matched by `match'; 0 if no match.
+
+ `RLENGTH'
+ The length of the string matched by `match'; -1 if no match.
+
+ `SUBSEP'
+ The string used to separate multiple subscripts in array elements,
+ by default `"\034"'.
+
+ *Note Built-in Variables::, for more information.
+
+
+ File: gawk.info, Node: Arrays Summary, Next: Data Type Summary, Prev: Built-in Summary, Up: Variables/Fields
+
+ Arrays
+ ------
+
+ Arrays are subscripted with an expression between square brackets
+ (`[' and `]'). Array subscripts are *always* strings; numbers are
+ converted to strings as necessary, following the standard conversion
+ rules (*note Conversion of Strings and Numbers: Conversion.).
+
+ If you use multiple expressions separated by commas inside the square
+ brackets, then the array subscript is a string consisting of the
+ concatenation of the individual subscript values, converted to strings,
+ separated by the subscript separator (the value of `SUBSEP').
+
+ The special operator `in' may be used in an `if' or `while'
+ statement to see if an array has an index consisting of a particular
+ value.
+
+ if (val in array)
+ print array[val]
+
+ If the array has multiple subscripts, use `(i, j, ...) in array' to
+ test for existence of an element.
+
+ The `in' construct may also be used in a `for' loop to iterate over
+ all the elements of an array. *Note Scanning all Elements of an Array:
+ Scanning an Array.
+
+ An element may be deleted from an array using the `delete' statement.
+
+ *Note Arrays in `awk': Arrays, for more detailed information.
+
+
+ File: gawk.info, Node: Data Type Summary, Prev: Arrays Summary, Up: Variables/Fields
+
+ Data Types
+ ----------
+
+ The value of an `awk' expression is always either a number or a
+ string.
+
+ Certain contexts (such as arithmetic operators) require numeric
+ values. They convert strings to numbers by interpreting the text of
+ the string as a numeral. If the string does not look like a numeral,
+ it converts to 0.
+
+ Certain contexts (such as concatenation) require string values.
+ They convert numbers to strings by effectively printing them with
+ `sprintf'. *Note Conversion of Strings and Numbers: Conversion, for
+ the details.
+
+ To force conversion of a string value to a number, simply add 0 to
+ it. If the value you start with is already a number, this does not
+ change it.
+
+ To force conversion of a numeric value to a string, concatenate it
+ with the null string.
+
+ The `awk' language defines comparisons as being done numerically if
+ both operands are numeric, or if one is numeric and the other is a
+ numeric string. Otherwise one or both operands are converted to
+ strings and a string comparison is performed.
+
+ Uninitialized variables have the string value `""' (the null, or
+ empty, string). In contexts where a number is required, this is
+ equivalent to 0.
+
+ *Note Variables::, for more information on variable naming and
+ initialization; *note Conversion of Strings and Numbers: Conversion.,
+ for more information on how variable values are interpreted.
+
+
+ File: gawk.info, Node: Rules Summary, Next: Functions Summary, Prev: Variables/Fields, Up: Gawk Summary
+
+ Patterns and Actions
+ ====================
+
+ * Menu:
+
+ * Pattern Summary:: Quick overview of patterns.
+ * Regexp Summary:: Quick overview of regular expressions.
+ * Actions Summary:: Quick overview of actions.
+
+ An `awk' program is mostly composed of rules, each consisting of a
+ pattern followed by an action. The action is enclosed in `{' and `}'.
+ Either the pattern may be missing, or the action may be missing, but,
+ of course, not both. If the pattern is missing, the action is executed
+ for every single line of input. A missing action is equivalent to this
+ action,
+
+ { print }
+
+ which prints the entire line.
+
+ Comments begin with the `#' character, and continue until the end of
+ the line. Blank lines may be used to separate statements. Normally, a
+ statement ends with a newline, however, this is not the case for lines
+ ending in a `,', `{', `?', `:', `&&', or `||'. Lines ending in `do' or
+ `else' also have their statements automatically continued on the
+ following line. In other cases, a line can be continued by ending it
+ with a `\', in which case the newline is ignored.
+
+ Multiple statements may be put on one line by separating them with a
+ `;'. This applies to both the statements within the action part of a
+ rule (the usual case), and to the rule statements.
+
+ *Note Comments in `awk' Programs: Comments, for information on
+ `awk''s commenting convention; *note `awk' Statements versus Lines:
+ Statements/Lines., for a description of the line continuation mechanism
+ in `awk'.
+
+
+ File: gawk.info, Node: Pattern Summary, Next: Regexp Summary, Prev: Rules Summary, Up: Rules Summary
+
+ Patterns
+ --------
+
+ `awk' patterns may be one of the following:
+
+ /REGULAR EXPRESSION/
+ RELATIONAL EXPRESSION
+ PATTERN && PATTERN
+ PATTERN || PATTERN
+ PATTERN ? PATTERN : PATTERN
+ (PATTERN)
+ ! PATTERN
+ PATTERN1, PATTERN2
+ BEGIN
+ END
+
+ `BEGIN' and `END' are two special kinds of patterns that are not
+ tested against the input. The action parts of all `BEGIN' rules are
+ merged as if all the statements had been written in a single `BEGIN'
+ rule. They are executed before any of the input is read. Similarly,
+ all the `END' rules are merged, and executed when all the input is
+ exhausted (or when an `exit' statement is executed). `BEGIN' and `END'
+ patterns cannot be combined with other patterns in pattern expressions.
+ `BEGIN' and `END' rules cannot have missing action parts.
+
+ For `/REGULAR-EXPRESSION/' patterns, the associated statement is
+ executed for each input line that matches the regular expression.
+ Regular expressions are extensions of those in `egrep', and are
+ summarized below.
+
+ A RELATIONAL EXPRESSION may use any of the operators defined below in
+ the section on actions. These generally test whether certain fields
+ match certain regular expressions.
+
+ The `&&', `||', and `!' operators are logical "and," logical "or,"
+ and logical "not," respectively, as in C. They do short-circuit
+ evaluation, also as in C, and are used for combining more primitive
+ pattern expressions. As in most languages, parentheses may be used to
+ change the order of evaluation.
+
+ The `?:' operator is like the same operator in C. If the first
+ pattern matches, then the second pattern is matched against the input
+ record; otherwise, the third is matched. Only one of the second and
+ third patterns is matched.
+
+ The `PATTERN1, PATTERN2' form of a pattern is called a range
+ pattern. It matches all input lines starting with a line that matches
+ PATTERN1, and continuing until a line that matches PATTERN2, inclusive.
+ A range pattern cannot be used as an operand to any of the pattern
+ operators.
+
+ *Note Patterns::, for a full description of the pattern part of `awk'
+ rules.
+
diff -rc --new-file /src/baseline/gawk-2.15.5/gawk.info-8 gawk-2.15.5/gawk.info-8
*** /src/baseline/gawk-2.15.5/gawk.info-8 Thu Jan 1 00:00:00 1970
--- gawk-2.15.5/gawk.info-8 Sun Jun 12 22:28:55 1994
***************
*** 0 ****
--- 1,1173 ----
+ This is Info file gawk.info, produced by Makeinfo-1.55 from the input
+ file /gnu/src/amiga/gawk-2.15.5/gawk.texi.
+
+ This file documents `awk', a program that you can use to select
+ particular records in a file and perform operations upon them.
+
+ This is Edition 0.15 of `The GAWK Manual',
+ for the 2.15 version of the GNU implementation
+ of AWK.
+
+ Copyright (C) 1989, 1991, 1992, 1993 Free Software Foundation, Inc.
+
+ Permission is granted to make and distribute verbatim copies of this
+ manual provided the copyright notice and this permission notice are
+ preserved on all copies.
+
+ Permission is granted to copy and distribute modified versions of
+ this manual under the conditions for verbatim copying, provided that
+ the entire resulting derived work is distributed under the terms of a
+ permission notice identical to this one.
+
+ Permission is granted to copy and distribute translations of this
+ manual into another language, under the above conditions for modified
+ versions, except that this permission notice may be stated in a
+ translation approved by the Foundation.
+
+
+ File: gawk.info, Node: Regexp Summary, Next: Actions Summary, Prev: Pattern Summary, Up: Rules Summary
+
+ Regular Expressions
+ -------------------
+
+ Regular expressions are the extended kind found in `egrep'. They
+ are composed of characters as follows:
+
+ `C'
+ matches the character C (assuming C is a character with no special
+ meaning in regexps).
+
+ `\C'
+ matches the literal character C.
+
+ `.'
+ matches any character except newline.
+
+ `^'
+ matches the beginning of a line or a string.
+
+ `$'
+ matches the end of a line or a string.
+
+ `[ABC...]'
+ matches any of the characters ABC... (character class).
+
+ `[^ABC...]'
+ matches any character except ABC... and newline (negated character
+ class).
+
+ `R1|R2'
+ matches either R1 or R2 (alternation).
+
+ `R1R2'
+ matches R1, and then R2 (concatenation).
+
+ `R+'
+ matches one or more R's.
+
+ `R*'
+ matches zero or more R's.
+
+ `R?'
+ matches zero or one R's.
+
+ `(R)'
+ matches R (grouping).
+
+ *Note Regular Expressions as Patterns: Regexp, for a more detailed
+ explanation of regular expressions.
+
+ The escape sequences allowed in string constants are also valid in
+ regular expressions (*note Constant Expressions: Constants.).
+
+
+ File: gawk.info, Node: Actions Summary, Prev: Regexp Summary, Up: Rules Summary
+
+ Actions
+ -------
+
+ Action statements are enclosed in braces, `{' and `}'. Action
+ statements consist of the usual assignment, conditional, and looping
+ statements found in most languages. The operators, control statements,
+ and input/output statements available are patterned after those in C.
+
+ * Menu:
+
+ * Operator Summary:: `awk' operators.
+ * Control Flow Summary:: The control statements.
+ * I/O Summary:: The I/O statements.
+ * Printf Summary:: A summary of `printf'.
+ * Special File Summary:: Special file names interpreted internally.
+ * Numeric Functions Summary:: Built-in numeric functions.
+ * String Functions Summary:: Built-in string functions.
+ * Time Functions Summary:: Built-in time functions.
+ * String Constants Summary:: Escape sequences in strings.
+
+
+ File: gawk.info, Node: Operator Summary, Next: Control Flow Summary, Prev: Actions Summary, Up: Actions Summary
+
+ Operators
+ .........
+
+ The operators in `awk', in order of increasing precedence, are:
+
+ `= += -= *= /= %= ^='
+ Assignment. Both absolute assignment (`VAR=VALUE') and operator
+ assignment (the other forms) are supported.
+
+ `?:'
+ A conditional expression, as in C. This has the form `EXPR1 ?
+ eXPR2 : EXPR3'. If EXPR1 is true, the value of the expression is
+ EXPR2; otherwise it is EXPR3. Only one of EXPR2 and EXPR3 is
+ evaluated.
+
+ `||'
+ Logical "or".
+
+ `&&'
+ Logical "and".
+
+ `~ !~'
+ Regular expression match, negated match.
+
+ `< <= > >= != =='
+ The usual relational operators.
+
+ `BLANK'
+ String concatenation.
+
+ `+ -'
+ Addition and subtraction.
+
+ `* / %'
+ Multiplication, division, and modulus.
+
+ `+ - !'
+ Unary plus, unary minus, and logical negation.
+
+ `^'
+ Exponentiation (`**' may also be used, and `**=' for the assignment
+ operator, but they are not specified in the POSIX standard).
+
+ `++ --'
+ Increment and decrement, both prefix and postfix.
+
+ `$'
+ Field reference.
+
+ *Note Expressions as Action Statements: Expressions, for a full
+ description of all the operators listed above. *Note Examining Fields:
+ Fields, for a description of the field reference operator.
+
+
+ File: gawk.info, Node: Control Flow Summary, Next: I/O Summary, Prev: Operator Summary, Up: Actions Summary
+
+ Control Statements
+ ..................
+
+ The control statements are as follows:
+
+ if (CONDITION) STATEMENT [ else STATEMENT ]
+ while (CONDITION) STATEMENT
+ do STATEMENT while (CONDITION)
+ for (EXPR1; EXPR2; EXPR3) STATEMENT
+ for (VAR in ARRAY) STATEMENT
+ break
+ continue
+ delete ARRAY[INDEX]
+ exit [ EXPRESSION ]
+ { STATEMENTS }
+
+ *Note Control Statements in Actions: Statements, for a full
+ description of all the control statements listed above.
+
+
+ File: gawk.info, Node: I/O Summary, Next: Printf Summary, Prev: Control Flow Summary, Up: Actions Summary
+
+ I/O Statements
+ ..............
+
+ The input/output statements are as follows:
+
+ `getline'
+ Set `$0' from next input record; set `NF', `NR', `FNR'.
+
+ `getline <FILE'
+ Set `$0' from next record of FILE; set `NF'.
+
+ `getline VAR'
+ Set VAR from next input record; set `NF', `FNR'.
+
+ `getline VAR <FILE'
+ Set VAR from next record of FILE.
+
+ `next'
+ Stop processing the current input record. The next input record
+ is read and processing starts over with the first pattern in the
+ `awk' program. If the end of the input data is reached, the `END'
+ rule(s), if any, are executed.
+
+ `next file'
+ Stop processing the current input file. The next input record
+ read comes from the next input file. `FILENAME' is updated, `FNR'
+ is set to 1, and processing starts over with the first pattern in
+ the `awk' program. If the end of the input data is reached, the
+ `END' rule(s), if any, are executed.
+
+ `print'
+ Prints the current record.
+
+ `print EXPR-LIST'
+ Prints expressions.
+
+ `print EXPR-LIST > FILE'
+ Prints expressions on FILE.
+
+ `printf FMT, EXPR-LIST'
+ Format and print.
+
+ `printf FMT, EXPR-LIST > file'
+ Format and print on FILE.
+
+ Other input/output redirections are also allowed. For `print' and
+ `printf', `>> FILE' appends output to the FILE, and `| COMMAND' writes
+ on a pipe. In a similar fashion, `COMMAND | getline' pipes input into
+ `getline'. `getline' returns 0 on end of file, and -1 on an error.
+
+ *Note Explicit Input with `getline': Getline, for a full description
+ of the `getline' statement. *Note Printing Output: Printing, for a
+ full description of `print' and `printf'. Finally, *note The `next'
+ Statement: Next Statement., for a description of how the `next'
+ statement works.
+
+
+ File: gawk.info, Node: Printf Summary, Next: Special File Summary, Prev: I/O Summary, Up: Actions Summary
+
+ `printf' Summary
+ ................
+
+ The `awk' `printf' statement and `sprintf' function accept the
+ following conversion specification formats:
+
+ `%c'
+ An ASCII character. If the argument used for `%c' is numeric, it
+ is treated as a character and printed. Otherwise, the argument is
+ assumed to be a string, and the only first character of that
+ string is printed.
+
+ `%d'
+ `%i'
+ A decimal number (the integer part).
+
+ `%e'
+ A floating point number of the form `[-]d.ddddddE[+-]dd'.
+
+ `%f'
+ A floating point number of the form [`-']`ddd.dddddd'.
+
+ `%g'
+ Use `%e' or `%f' conversion, whichever produces a shorter string,
+ with nonsignificant zeros suppressed.
+
+ `%o'
+ An unsigned octal number (again, an integer).
+
+ `%s'
+ A character string.
+
+ `%x'
+ An unsigned hexadecimal number (an integer).
+
+ `%X'
+ Like `%x', except use `A' through `F' instead of `a' through `f'
+ for decimal 10 through 15.
+
+ `%%'
+ A single `%' character; no argument is converted.
+
+ There are optional, additional parameters that may lie between the
+ `%' and the control letter:
+
+ `-'
+ The expression should be left-justified within its field.
+
+ `WIDTH'
+ The field should be padded to this width. If WIDTH has a leading
+ zero, then the field is padded with zeros. Otherwise it is padded
+ with blanks.
+
+ `.PREC'
+ A number indicating the maximum width of strings or digits to the
+ right of the decimal point.
+
+ Either or both of the WIDTH and PREC values may be specified as `*'.
+ In that case, the particular value is taken from the argument list.
+
+ *Note Using `printf' Statements for Fancier Printing: Printf, for
+ examples and for a more detailed description.
+
+
+ File: gawk.info, Node: Special File Summary, Next: Numeric Functions Summary, Prev: Printf Summary, Up: Actions Summary
+
+ Special File Names
+ ..................
+
+ When doing I/O redirection from either `print' or `printf' into a
+ file, or via `getline' from a file, `gawk' recognizes certain special
+ file names internally. These file names allow access to open file
+ descriptors inherited from `gawk''s parent process (usually the shell).
+ The file names are:
+
+ `/dev/stdin'
+ The standard input.
+
+ `/dev/stdout'
+ The standard output.
+
+ `/dev/stderr'
+ The standard error output.
+
+ `/dev/fd/N'
+ The file denoted by the open file descriptor N.
+
+ In addition the following files provide process related information
+ about the running `gawk' program.
+
+ `/dev/pid'
+ Reading this file returns the process ID of the current process,
+ in decimal, terminated with a newline.
+
+ `/dev/ppid'
+ Reading this file returns the parent process ID of the current
+ process, in decimal, terminated with a newline.
+
+ `/dev/pgrpid'
+ Reading this file returns the process group ID of the current
+ process, in decimal, terminated with a newline.
+
+ `/dev/user'
+ Reading this file returns a single record terminated with a
+ newline. The fields are separated with blanks. The fields
+ represent the following information:
+
+ `$1'
+ The value of the `getuid' system call.
+
+ `$2'
+ The value of the `geteuid' system call.
+
+ `$3'
+ The value of the `getgid' system call.
+
+ `$4'
+ The value of the `getegid' system call.
+
+ If there are any additional fields, they are the group IDs
+ returned by `getgroups' system call. (Multiple groups may not be
+ supported on all systems.)
+
+ These file names may also be used on the command line to name data
+ files. These file names are only recognized internally if you do not
+ actually have files by these names on your system.
+
+ *Note Standard I/O Streams: Special Files, for a longer description
+ that provides the motivation for this feature.
+
+
+ File: gawk.info, Node: Numeric Functions Summary, Next: String Functions Summary, Prev: Special File Summary, Up: Actions Summary
+
+ Numeric Functions
+ .................
+
+ `awk' has the following predefined arithmetic functions:
+
+ `atan2(Y, X)'
+ returns the arctangent of Y/X in radians.
+
+ `cos(EXPR)'
+ returns the cosine in radians.
+
+ `exp(EXPR)'
+ the exponential function.
+
+ `int(EXPR)'
+ truncates to integer.
+
+ `log(EXPR)'
+ the natural logarithm function.
+
+ `rand()'
+ returns a random number between 0 and 1.
+
+ `sin(EXPR)'
+ returns the sine in radians.
+
+ `sqrt(EXPR)'
+ the square root function.
+
+ `srand(EXPR)'
+ use EXPR as a new seed for the random number generator. If no EXPR
+ is provided, the time of day is used. The return value is the
+ previous seed for the random number generator.
+
+
+ File: gawk.info, Node: String Functions Summary, Next: Time Functions Summary, Prev: Numeric Functions Summary, Up: Actions Summary
+
+ String Functions
+ ................
+
+ `awk' has the following predefined string functions:
+
+ `gsub(R, S, T)'
+ for each substring matching the regular expression R in the string
+ T, substitute the string S, and return the number of substitutions.
+ If T is not supplied, use `$0'.
+
+ `index(S, T)'
+ returns the index of the string T in the string S, or 0 if T is
+ not present.
+
+ `length(S)'
+ returns the length of the string S. The length of `$0' is
+ returned if no argument is supplied.
+
+ `match(S, R)'
+ returns the position in S where the regular expression R occurs,
+ or 0 if R is not present, and sets the values of `RSTART' and
+ `RLENGTH'.
+
+ `split(S, A, R)'
+ splits the string S into the array A on the regular expression R,
+ and returns the number of fields. If R is omitted, `FS' is used
+ instead.
+
+ `sprintf(FMT, EXPR-LIST)'
+ prints EXPR-LIST according to FMT, and returns the resulting
+ string.
+
+ `sub(R, S, T)'
+ this is just like `gsub', but only the first matching substring is
+ replaced.
+
+ `substr(S, I, N)'
+ returns the N-character substring of S starting at I. If N is
+ omitted, the rest of S is used.
+
+ `tolower(STR)'
+ returns a copy of the string STR, with all the upper-case
+ characters in STR translated to their corresponding lower-case
+ counterparts. Nonalphabetic characters are left unchanged.
+
+ `toupper(STR)'
+ returns a copy of the string STR, with all the lower-case
+ characters in STR translated to their corresponding upper-case
+ counterparts. Nonalphabetic characters are left unchanged.
+
+ `system(CMD-LINE)'
+ Execute the command CMD-LINE, and return the exit status.
+
+
+ File: gawk.info, Node: Time Functions Summary, Next: String Constants Summary, Prev: String Functions Summary, Up: Actions Summary
+
+ Built-in time functions
+ .......................
+
+ The following two functions are available for getting the current
+ time of day, and for formatting time stamps.
+
+ `systime()'
+ returns the current time of day as the number of seconds since a
+ particular epoch (Midnight, January 1, 1970 UTC, on POSIX systems).
+
+ `strftime(FORMAT, TIMESTAMP)'
+ formats TIMESTAMP according to the specification in FORMAT. The
+ current time of day is used if no TIMESTAMP is supplied. *Note
+ Functions for Dealing with Time Stamps: Time Functions, for the
+ details on the conversion specifiers that `strftime' accepts.
+
+
+ File: gawk.info, Node: String Constants Summary, Prev: Time Functions Summary, Up: Actions Summary
+
+ String Constants
+ ................
+
+ String constants in `awk' are sequences of characters enclosed
+ between double quotes (`"'). Within strings, certain "escape sequences"
+ are recognized, as in C. These are:
+
+ `\\'
+ A literal backslash.
+
+ `\a'
+ The "alert" character; usually the ASCII BEL character.
+
+ `\b'
+ Backspace.
+
+ `\f'
+ Formfeed.
+
+ `\n'
+ Newline.
+
+ `\r'
+ Carriage return.
+
+ `\t'
+ Horizontal tab.
+
+ `\v'
+ Vertical tab.
+
+ `\xHEX DIGITS'
+ The character represented by the string of hexadecimal digits
+ following the `\x'. As in ANSI C, all following hexadecimal
+ digits are considered part of the escape sequence. (This feature
+ should tell us something about language design by committee.)
+ E.g., `"\x1B"' is a string containing the ASCII ESC (escape)
+ character. (The `\x' escape sequence is not in POSIX `awk'.)
+
+ `\DDD'
+ The character represented by the 1-, 2-, or 3-digit sequence of
+ octal digits. Thus, `"\033"' is also a string containing the
+ ASCII ESC (escape) character.
+
+ `\C'
+ The literal character C.
+
+ The escape sequences may also be used inside constant regular
+ expressions (e.g., the regexp `/[ \t\f\n\r\v]/' matches whitespace
+ characters).
+
+ *Note Constant Expressions: Constants.
+
+
+ File: gawk.info, Node: Functions Summary, Next: Historical Features, Prev: Rules Summary, Up: Gawk Summary
+
+ Functions
+ =========
+
+ Functions in `awk' are defined as follows:
+
+ function NAME(PARAMETER LIST) { STATEMENTS }
+
+ Actual parameters supplied in the function call are used to
+ instantiate the formal parameters declared in the function. Arrays are
+ passed by reference, other variables are passed by value.
+
+ If there are fewer arguments passed than there are names in
+ PARAMETER-LIST, the extra names are given the null string as value.
+ Extra names have the effect of local variables.
+
+ The open-parenthesis in a function call of a user-defined function
+ must immediately follow the function name, without any intervening
+ white space. This is to avoid a syntactic ambiguity with the
+ concatenation operator.
+
+ The word `func' may be used in place of `function' (but not in POSIX
+ `awk').
+
+ Use the `return' statement to return a value from a function.
+
+ *Note User-defined Functions: User-defined, for a more complete
+ description.
+
+
+ File: gawk.info, Node: Historical Features, Prev: Functions Summary, Up: Gawk Summary
+
+ Historical Features
+ ===================
+
+ There are two features of historical `awk' implementations that
+ `gawk' supports. First, it is possible to call the `length' built-in
+ function not only with no arguments, but even without parentheses!
+
+ a = length
+
+ is the same as either of
+
+ a = length()
+ a = length($0)
+
+ This feature is marked as "deprecated" in the POSIX standard, and
+ `gawk' will issue a warning about its use if `-W lint' is specified on
+ the command line.
+
+ The other feature is the use of the `continue' statement outside the
+ body of a `while', `for', or `do' loop. Traditional `awk'
+ implementations have treated such usage as equivalent to the `next'
+ statement. `gawk' will support this usage if `-W posix' has not been
+ specified.
+
+
+ File: gawk.info, Node: Sample Program, Next: Bugs, Prev: Gawk Summary, Up: Top
+
+ Sample Program
+ **************
+
+ The following example is a complete `awk' program, which prints the
+ number of occurrences of each word in its input. It illustrates the
+ associative nature of `awk' arrays by using strings as subscripts. It
+ also demonstrates the `for X in ARRAY' construction. Finally, it shows
+ how `awk' can be used in conjunction with other utility programs to do
+ a useful task of some complexity with a minimum of effort. Some
+ explanations follow the program listing.
+
+ awk '
+ # Print list of word frequencies
+ {
+ for (i = 1; i <= NF; i++)
+ freq[$i]++
+ }
+
+ END {
+ for (word in freq)
+ printf "%s\t%d\n", word, freq[word]
+ }'
+
+ The first thing to notice about this program is that it has two
+ rules. The first rule, because it has an empty pattern, is executed on
+ every line of the input. It uses `awk''s field-accessing mechanism
+ (*note Examining Fields: Fields.) to pick out the individual words from
+ the line, and the built-in variable `NF' (*note Built-in Variables::.)
+ to know how many fields are available.
+
+ For each input word, an element of the array `freq' is incremented to
+ reflect that the word has been seen an additional time.
+
+ The second rule, because it has the pattern `END', is not executed
+ until the input has been exhausted. It prints out the contents of the
+ `freq' table that has been built up inside the first action.
+
+ Note that this program has several problems that would prevent it
+ from being useful by itself on real text files:
+
+ * Words are detected using the `awk' convention that fields are
+ separated by whitespace and that other characters in the input
+ (except newlines) don't have any special meaning to `awk'. This
+ means that punctuation characters count as part of words.
+
+ * The `awk' language considers upper and lower case characters to be
+ distinct. Therefore, `foo' and `Foo' are not treated by this
+ program as the same word. This is undesirable since in normal
+ text, words are capitalized if they begin sentences, and a
+ frequency analyzer should not be sensitive to that.
+
+ * The output does not come out in any useful order. You're more
+ likely to be interested in which words occur most frequently, or
+ having an alphabetized table of how frequently each word occurs.
+
+ The way to solve these problems is to use some of the more advanced
+ features of the `awk' language. First, we use `tolower' to remove case
+ distinctions. Next, we use `gsub' to remove punctuation characters.
+ Finally, we use the system `sort' utility to process the output of the
+ `awk' script. First, here is the new version of the program:
+
+ awk '
+ # Print list of word frequencies
+ {
+ $0 = tolower($0) # remove case distinctions
+ gsub(/[^a-z0-9_ \t]/, "", $0) # remove punctuation
+ for (i = 1; i <= NF; i++)
+ freq[$i]++
+ }
+
+ END {
+ for (word in freq)
+ printf "%s\t%d\n", word, freq[word]
+ }'
+
+ Assuming we have saved this program in a file named `frequency.awk',
+ and that the data is in `file1', the following pipeline
+
+ awk -f frequency.awk file1 | sort +1 -nr
+
+ produces a table of the words appearing in `file1' in order of
+ decreasing frequency.
+
+ The `awk' program suitably massages the data and produces a word
+ frequency table, which is not ordered.
+
+ The `awk' script's output is then sorted by the `sort' command and
+ printed on the terminal. The options given to `sort' in this example
+ specify to sort using the second field of each input line (skipping one
+ field), that the sort keys should be treated as numeric quantities
+ (otherwise `15' would come before `5'), and that the sorting should be
+ done in descending (reverse) order.
+
+ We could have even done the `sort' from within the program, by
+ changing the `END' action to:
+
+ END {
+ sort = "sort +1 -nr"
+ for (word in freq)
+ printf "%s\t%d\n", word, freq[word] | sort
+ close(sort)
+ }'
+
+ See the general operating system documentation for more information
+ on how to use the `sort' command.
+
+
+ File: gawk.info, Node: Bugs, Next: Notes, Prev: Sample Program, Up: Top
+
+ Reporting Problems and Bugs
+ ***************************
+
+ If you have problems with `gawk' or think that you have found a bug,
+ please report it to the developers; we cannot promise to do anything
+ but we might well want to fix it.
+
+ Before reporting a bug, make sure you have actually found a real bug.
+ Carefully reread the documentation and see if it really says you can do
+ what you're trying to do. If it's not clear whether you should be able
+ to do something or not, report that too; it's a bug in the
+ documentation!
+
+ Before reporting a bug or trying to fix it yourself, try to isolate
+ it to the smallest possible `awk' program and input data file that
+ reproduces the problem. Then send us the program and data file, some
+ idea of what kind of Unix system you're using, and the exact results
+ `gawk' gave you. Also say what you expected to occur; this will help
+ us decide whether the problem was really in the documentation.
+
+ Once you have a precise problem, send e-mail to (Internet)
+ `bug-gnu-utils@prep.ai.mit.edu' or (UUCP)
+ `mit-eddie!prep.ai.mit.edu!bug-gnu-utils'. Please include the version
+ number of `gawk' you are using. You can get this information with the
+ command `gawk -W version '{}' /dev/null'. You should send carbon
+ copies of your mail to David Trueman at `david@cs.dal.ca', and to
+ Arnold Robbins, who can be reached at `arnold@skeeve.atl.ga.us'. David
+ is most likely to fix code problems, while Arnold is most likely to fix
+ documentation problems.
+
+ Non-bug suggestions are always welcome as well. If you have
+ questions about things that are unclear in the documentation or are
+ just obscure features, ask Arnold Robbins; he will try to help you out,
+ although he may not have the time to fix the problem. You can send him
+ electronic mail at the Internet address above.
+
+ If you find bugs in one of the non-Unix ports of `gawk', please send
+ an electronic mail message to the person who maintains that port. They
+ are listed below, and also in the `README' file in the `gawk'
+ distribution. Information in the `README' file should be considered
+ authoritative if it conflicts with this manual.
+
+ The people maintaining the non-Unix ports of `gawk' are:
+
+ MS-DOS
+ The port to MS-DOS is maintained by Scott Deifik. His electronic
+ mail address is `scottd@amgen.com'.
+
+ VMS
+ The port to VAX VMS is maintained by Pat Rankin. His electronic
+ mail address is `rankin@eql.caltech.edu'.
+
+ Atari ST
+ The port to the Atari ST is maintained by Michal Jaegermann. His
+ electronic mail address is `ntomczak@vm.ucs.ualberta.ca'.
+
+ If your bug is also reproducible under Unix, please send copies of
+ your report to the general GNU bug list, as well as to Arnold Robbins
+ and David Trueman, at the addresses listed above.
+
+
+ File: gawk.info, Node: Notes, Next: Glossary, Prev: Bugs, Up: Top
+
+ Implementation Notes
+ ********************
+
+ This appendix contains information mainly of interest to
+ implementors and maintainers of `gawk'. Everything in it applies
+ specifically to `gawk', and not to other implementations.
+
+ * Menu:
+
+ * Compatibility Mode:: How to disable certain `gawk' extensions.
+ * Future Extensions:: New features we may implement soon.
+ * Improvements:: Suggestions for improvements by volunteers.
+
+
+ File: gawk.info, Node: Compatibility Mode, Next: Future Extensions, Prev: Notes, Up: Notes
+
+ Downward Compatibility and Debugging
+ ====================================
+
+ *Note Extensions in `gawk' not in POSIX `awk': POSIX/GNU, for a
+ summary of the GNU extensions to the `awk' language and program. All
+ of these features can be turned off by invoking `gawk' with the `-W
+ compat' option, or with the `-W posix' option.
+
+ If `gawk' is compiled for debugging with `-DDEBUG', then there is
+ one more option available on the command line:
+
+ `-W parsedebug'
+ Print out the parse stack information as the program is being
+ parsed.
+
+ This option is intended only for serious `gawk' developers, and not
+ for the casual user. It probably has not even been compiled into your
+ version of `gawk', since it slows down execution.
+
+
+ File: gawk.info, Node: Future Extensions, Next: Improvements, Prev: Compatibility Mode, Up: Notes
+
+ Probable Future Extensions
+ ==========================
+
+ This section briefly lists extensions that indicate the directions
+ we are currently considering for `gawk'. The file `FUTURES' in the
+ `gawk' distributions lists these extensions, as well as several others.
+
+ `RS' as a regexp
+ The meaning of `RS' may be generalized along the lines of `FS'.
+
+ Control of subprocess environment
+ Changes made in `gawk' to the array `ENVIRON' may be propagated to
+ subprocesses run by `gawk'.
+
+ Databases
+ It may be possible to map a GDBM/NDBM/SDBM file into an `awk'
+ array.
+
+ Single-character fields
+ The null string, `""', as a field separator, will cause field
+ splitting and the `split' function to separate individual
+ characters. Thus, `split(a, "abcd", "")' would yield `a[1] ==
+ "a"', `a[2] == "b"', and so on.
+
+ More `lint' warnings
+ There are more things that could be checked for portability.
+
+ `RECLEN' variable for fixed length records
+ Along with `FIELDWIDTHS', this would speed up the processing of
+ fixed-length records.
+
+ `RT' variable to hold the record terminator
+ It is occasionally useful to have access to the actual string of
+ characters that matched the `RS' variable. The `RT' variable
+ would hold these characters.
+
+ A `restart' keyword
+ After modifying `$0', `restart' would restart the pattern matching
+ loop, without reading a new record from the input.
+
+ A `|&' redirection
+ The `|&' redirection, in place of `|', would open a two-way
+ pipeline for communication with a sub-process (via `getline' and
+ `print' and `printf').
+
+ `IGNORECASE' affecting all comparisons
+ The effects of the `IGNORECASE' variable may be generalized to all
+ string comparisons, and not just regular expression operations.
+
+ A way to mix command line source code and library files
+ There may be a new option that would make it possible to easily
+ use library functions from a program entered on the command line.
+
+ GNU-style long options
+ We will add GNU-style long options to `gawk' for compatibility
+ with other GNU programs. (For example, `--field-separator=:'
+ would be equivalent to `-F:'.)
+
+
+ File: gawk.info, Node: Improvements, Prev: Future Extensions, Up: Notes
+
+ Suggestions for Improvements
+ ============================
+
+ Here are some projects that would-be `gawk' hackers might like to
+ take on. They vary in size from a few days to a few weeks of
+ programming, depending on which one you choose and how fast a
+ programmer you are. Please send any improvements you write to the
+ maintainers at the GNU project.
+
+ 1. Compilation of `awk' programs: `gawk' uses a Bison (YACC-like)
+ parser to convert the script given it into a syntax tree; the
+ syntax tree is then executed by a simple recursive evaluator.
+ This method incurs a lot of overhead, since the recursive
+ evaluator performs many procedure calls to do even the simplest
+ things.
+
+ It should be possible for `gawk' to convert the script's parse tree
+ into a C program which the user would then compile, using the
+ normal C compiler and a special `gawk' library to provide all the
+ needed functions (regexps, fields, associative arrays, type
+ coercion, and so on).
+
+ An easier possibility might be for an intermediate phase of `awk'
+ to convert the parse tree into a linear byte code form like the
+ one used in GNU Emacs Lisp. The recursive evaluator would then be
+ replaced by a straight line byte code interpreter that would be
+ intermediate in speed between running a compiled program and doing
+ what `gawk' does now.
+
+ This may actually happen for the 3.0 version of `gawk'.
+
+ 2. An error message section has not been included in this version of
+ the manual. Perhaps some nice beta testers will document some of
+ the messages for the future.
+
+ 3. The programs in the test suite could use documenting in this
+ manual.
+
+ 4. The programs and data files in the manual should be available in
+ separate files to facilitate experimentation.
+
+ 5. See the `FUTURES' file for more ideas. Contact us if you would
+ seriously like to tackle any of the items listed there.
+
+
+ File: gawk.info, Node: Glossary, Next: Index, Prev: Notes, Up: Top
+
+ Glossary
+ ********
+
+ Action
+ A series of `awk' statements attached to a rule. If the rule's
+ pattern matches an input record, the `awk' language executes the
+ rule's action. Actions are always enclosed in curly braces.
+ *Note Overview of Actions: Actions.
+
+ Amazing `awk' Assembler
+ Henry Spencer at the University of Toronto wrote a retargetable
+ assembler completely as `awk' scripts. It is thousands of lines
+ long, including machine descriptions for several 8-bit
+ microcomputers. It is a good example of a program that would have
+ been better written in another language.
+
+ ANSI
+ The American National Standards Institute. This organization
+ produces many standards, among them the standard for the C
+ programming language.
+
+ Assignment
+ An `awk' expression that changes the value of some `awk' variable
+ or data object. An object that you can assign to is called an
+ "lvalue". *Note Assignment Expressions: Assignment Ops.
+
+ `awk' Language
+ The language in which `awk' programs are written.
+
+ `awk' Program
+ An `awk' program consists of a series of "patterns" and "actions",
+ collectively known as "rules". For each input record given to the
+ program, the program's rules are all processed in turn. `awk'
+ programs may also contain function definitions.
+
+ `awk' Script
+ Another name for an `awk' program.
+
+ Built-in Function
+ The `awk' language provides built-in functions that perform various
+ numerical, time stamp related, and string computations. Examples
+ are `sqrt' (for the square root of a number) and `substr' (for a
+ substring of a string). *Note Built-in Functions: Built-in.
+
+ Built-in Variable
+ `ARGC', `ARGIND', `ARGV', `CONVFMT', `ENVIRON', `ERRNO',
+ `FIELDWIDTHS', `FILENAME', `FNR', `FS', `IGNORECASE', `NF', `NR',
+ `OFMT', `OFS', `ORS', `RLENGTH', `RSTART', `RS', and `SUBSEP', are
+ the variables that have special meaning to `awk'. Changing some
+ of them affects `awk''s running environment. *Note Built-in
+ Variables::.
+
+ Braces
+ See "Curly Braces."
+
+ C
+ The system programming language that most GNU software is written
+ in. The `awk' programming language has C-like syntax, and this
+ manual points out similarities between `awk' and C when
+ appropriate.
+
+ CHEM
+ A preprocessor for `pic' that reads descriptions of molecules and
+ produces `pic' input for drawing them. It was written by Brian
+ Kernighan, and is available from `netlib@research.att.com'.
+
+ Compound Statement
+ A series of `awk' statements, enclosed in curly braces. Compound
+ statements may be nested. *Note Control Statements in Actions:
+ Statements.
+
+ Concatenation
+ Concatenating two strings means sticking them together, one after
+ another, giving a new string. For example, the string `foo'
+ concatenated with the string `bar' gives the string `foobar'.
+ *Note String Concatenation: Concatenation.
+
+ Conditional Expression
+ An expression using the `?:' ternary operator, such as `EXPR1 ?
+ EXPR2 : EXPR3'. The expression EXPR1 is evaluated; if the result
+ is true, the value of the whole expression is the value of EXPR2
+ otherwise the value is EXPR3. In either case, only one of EXPR2
+ and EXPR3 is evaluated. *Note Conditional Expressions:
+ Conditional Exp.
+
+ Constant Regular Expression
+ A constant regular expression is a regular expression written
+ within slashes, such as `/foo/'. This regular expression is chosen
+ when you write the `awk' program, and cannot be changed doing its
+ execution. *Note How to Use Regular Expressions: Regexp Usage.
+
+ Comparison Expression
+ A relation that is either true or false, such as `(a < b)'.
+ Comparison expressions are used in `if', `while', and `for'
+ statements, and in patterns to select which input records to
+ process. *Note Comparison Expressions: Comparison Ops.
+
+ Curly Braces
+ The characters `{' and `}'. Curly braces are used in `awk' for
+ delimiting actions, compound statements, and function bodies.
+
+ Data Objects
+ These are numbers and strings of characters. Numbers are
+ converted into strings and vice versa, as needed. *Note
+ Conversion of Strings and Numbers: Conversion.
+
+ Dynamic Regular Expression
+ A dynamic regular expression is a regular expression written as an
+ ordinary expression. It could be a string constant, such as
+ `"foo"', but it may also be an expression whose value may vary.
+ *Note How to Use Regular Expressions: Regexp Usage.
+
+ Escape Sequences
+ A special sequence of characters used for describing nonprinting
+ characters, such as `\n' for newline, or `\033' for the ASCII ESC
+ (escape) character. *Note Constant Expressions: Constants.
+
+ Field
+ When `awk' reads an input record, it splits the record into pieces
+ separated by whitespace (or by a separator regexp which you can
+ change by setting the built-in variable `FS'). Such pieces are
+ called fields. If the pieces are of fixed length, you can use the
+ built-in variable `FIELDWIDTHS' to describe their lengths. *Note
+ How Input is Split into Records: Records.
+
+ Format
+ Format strings are used to control the appearance of output in the
+ `printf' statement. Also, data conversions from numbers to strings
+ are controlled by the format string contained in the built-in
+ variable `CONVFMT'. *Note Format-Control Letters: Control Letters.
+
+ Function
+ A specialized group of statements often used to encapsulate general
+ or program-specific tasks. `awk' has a number of built-in
+ functions, and also allows you to define your own. *Note Built-in
+ Functions: Built-in. Also, see *Note User-defined Functions:
+ User-defined.
+
+ `gawk'
+ The GNU implementation of `awk'.
+
+ GNU
+ "GNU's not Unix". An on-going project of the Free Software
+ Foundation to create a complete, freely distributable,
+ POSIX-compliant computing environment.
+
+ Input Record
+ A single chunk of data read in by `awk'. Usually, an `awk' input
+ record consists of one line of text. *Note How Input is Split
+ into Records: Records.
+
+ Keyword
+ In the `awk' language, a keyword is a word that has special
+ meaning. Keywords are reserved and may not be used as variable
+ names.
+
+ `awk''s keywords are: `if', `else', `while', `do...while', `for',
+ `for...in', `break', `continue', `delete', `next', `function',
+ `func', and `exit'.
+
+ Lvalue
+ An expression that can appear on the left side of an assignment
+ operator. In most languages, lvalues can be variables or array
+ elements. In `awk', a field designator can also be used as an
+ lvalue.
+
+ Number
+ A numeric valued data object. The `gawk' implementation uses
+ double precision floating point to represent numbers.
+
+ Pattern
+ Patterns tell `awk' which input records are interesting to which
+ rules.
+
+ A pattern is an arbitrary conditional expression against which
+ input is tested. If the condition is satisfied, the pattern is
+ said to "match" the input record. A typical pattern might compare
+ the input record against a regular expression. *Note Patterns::.
+
+ POSIX
+ The name for a series of standards being developed by the IEEE
+ that specify a Portable Operating System interface. The "IX"
+ denotes the Unix heritage of these standards. The main standard
+ of interest for `awk' users is P1003.2, the Command Language and
+ Utilities standard.
+
+ Range (of input lines)
+ A sequence of consecutive lines from the input file. A pattern
+ can specify ranges of input lines for `awk' to process, or it can
+ specify single lines. *Note Patterns::.
+
+ Recursion
+ When a function calls itself, either directly or indirectly. If
+ this isn't clear, refer to the entry for "recursion."
+
+ Redirection
+ Redirection means performing input from other than the standard
+ input stream, or output to other than the standard output stream.
+
+ You can redirect the output of the `print' and `printf' statements
+ to a file or a system command, using the `>', `>>', and `|'
+ operators. You can redirect input to the `getline' statement using
+ the `<' and `|' operators. *Note Redirecting Output of `print'
+ and `printf': Redirection.
+
+ Regular Expression
+ See "regexp."
+
+ Regexp
+ Short for "regular expression". A regexp is a pattern that
+ denotes a set of strings, possibly an infinite set. For example,
+ the regexp `R.*xp' matches any string starting with the letter `R'
+ and ending with the letters `xp'. In `awk', regexps are used in
+ patterns and in conditional expressions. Regexps may contain
+ escape sequences. *Note Regular Expressions as Patterns: Regexp.
+
+ Rule
+ A segment of an `awk' program, that specifies how to process single
+ input records. A rule consists of a "pattern" and an "action".
+ `awk' reads an input record; then, for each rule, if the input
+ record satisfies the rule's pattern, `awk' executes the rule's
+ action. Otherwise, the rule does nothing for that input record.
+
+ Side Effect
+ A side effect occurs when an expression has an effect aside from
+ merely producing a value. Assignment expressions, increment
+ expressions and function calls have side effects. *Note
+ Assignment Expressions: Assignment Ops.
+
+ Special File
+ A file name interpreted internally by `gawk', instead of being
+ handed directly to the underlying operating system. For example,
+ `/dev/stdin'. *Note Standard I/O Streams: Special Files.
+
+ Stream Editor
+ A program that reads records from an input stream and processes
+ them one or more at a time. This is in contrast with batch
+ programs, which may expect to read their input files in entirety
+ before starting to do anything, and with interactive programs,
+ which require input from the user.
+
+ String
+ A datum consisting of a sequence of characters, such as `I am a
+ string'. Constant strings are written with double-quotes in the
+ `awk' language, and may contain escape sequences. *Note Constant
+ Expressions: Constants.
+
+ Whitespace
+ A sequence of blank or tab characters occurring inside an input
+ record or a string.
+
diff -rc --new-file /src/baseline/gawk-2.15.5/gawk.info-9 gawk-2.15.5/gawk.info-9
*** /src/baseline/gawk-2.15.5/gawk.info-9 Thu Jan 1 00:00:00 1970
--- gawk-2.15.5/gawk.info-9 Sun Jun 12 22:28:56 1994
***************
*** 0 ****
--- 1,359 ----
+ This is Info file gawk.info, produced by Makeinfo-1.55 from the input
+ file /gnu/src/amiga/gawk-2.15.5/gawk.texi.
+
+ This file documents `awk', a program that you can use to select
+ particular records in a file and perform operations upon them.
+
+ This is Edition 0.15 of `The GAWK Manual',
+ for the 2.15 version of the GNU implementation
+ of AWK.
+
+ Copyright (C) 1989, 1991, 1992, 1993 Free Software Foundation, Inc.
+
+ Permission is granted to make and distribute verbatim copies of this
+ manual provided the copyright notice and this permission notice are
+ preserved on all copies.
+
+ Permission is granted to copy and distribute modified versions of
+ this manual under the conditions for verbatim copying, provided that
+ the entire resulting derived work is distributed under the terms of a
+ permission notice identical to this one.
+
+ Permission is granted to copy and distribute translations of this
+ manual into another language, under the above conditions for modified
+ versions, except that this permission notice may be stated in a
+ translation approved by the Foundation.
+
+
+ File: gawk.info, Node: Index, Prev: Glossary, Up: Top
+
+ Index
+ *****
+
+ * Menu:
+
+ * $ (field operator): Fields.
+ * -assign option: Options.
+ * -compat option: Options.
+ * -copyleft option: Options.
+ * -copyright option: Options.
+ * -field-separator option: Options.
+ * -file option: Options.
+ * -help option: Options.
+ * -lint option: Options.
+ * -posix option: Options.
+ * -source option: Options.
+ * -usage option: Options.
+ * -version option: Options.
+ * AWKPATH environment variable: AWKPATH Variable.
+ * awk language: This Manual.
+ * awk program: This Manual.
+ * BEGIN special pattern: BEGIN/END.
+ * break statement: Break Statement.
+ * continue statement: Continue Statement.
+ * delete statement: Delete.
+ * END special pattern: BEGIN/END.
+ * exit statement: Exit Statement.
+ * for (x in ...): Scanning an Array.
+ * for statement: For Statement.
+ * if statement: If Statement.
+ * next file statement: Next File Statement.
+ * next statement: Next Statement.
+ * printf statement, syntax of: Basic Printf.
+ * printf, format-control characters: Control Letters.
+ * printf, modifiers: Format Modifiers.
+ * print statement: Print.
+ * return statement: Return Statement.
+ * while statement: While Statement.
+ * /dev/fd/: Special Files.
+ * /dev/pgrpid: Special Files.
+ * /dev/pid: Special Files.
+ * /dev/ppid: Special Files.
+ * /dev/stderr: Special Files.
+ * /dev/stdin: Special Files.
+ * /dev/stdout: Special Files.
+ * /dev/user: Special Files.
+ * BBS-list file: Sample Data Files.
+ * inventory-shipped file: Sample Data Files.
+ * #!: Executable Scripts.
+ * #: Comments.
+ * -F option: Field Separators.
+ * -f option: Long.
+ * -v option: Options.
+ * -W option: Options.
+ * print $0: Very Simple.
+ * accessing fields: Fields.
+ * acronym: History.
+ * action, curly braces: Actions.
+ * action, default: Very Simple.
+ * action, definition of: Actions.
+ * action, separating statements: Actions.
+ * addition: Arithmetic Ops.
+ * and operator: Boolean Ops.
+ * anonymous ftp: Extracting.
+ * anonymous uucp: Extracting.
+ * applications of awk: When.
+ * ARGIND: Auto-set.
+ * arguments in function call: Function Calls.
+ * arguments, command line: Command Line.
+ * ARGV: Other Arguments.
+ * arithmetic operators: Arithmetic Ops.
+ * array assignment: Assigning Elements.
+ * array reference: Reference to Elements.
+ * arrays: Array Intro.
+ * arrays, definition of: Array Intro.
+ * arrays, deleting an element: Delete.
+ * arrays, multi-dimensional subscripts: Multi-dimensional.
+ * arrays, presence of elements: Reference to Elements.
+ * arrays, special for statement: Scanning an Array.
+ * assignment operators: Assignment Ops.
+ * assignment to fields: Changing Fields.
+ * associative arrays: Array Intro.
+ * backslash continuation: Statements/Lines.
+ * basic function of gawk: Getting Started.
+ * body of a loop: While Statement.
+ * boolean expressions: Boolean Ops.
+ * boolean operators: Boolean Ops.
+ * boolean patterns: Boolean Patterns.
+ * buffering output: I/O Functions.
+ * buffers, flushing: I/O Functions.
+ * built-in functions: Built-in.
+ * built-in variables: Built-in Variables.
+ * built-in variables, user modifiable: User-modified.
+ * call by reference: Function Caveats.
+ * call by value: Function Caveats.
+ * calling a function: Function Calls.
+ * case sensitivity: Read Terminal.
+ * changing contents of a field: Changing Fields.
+ * close: Close Input.
+ * close: Close Output.
+ * closing input files and pipes: Close Input.
+ * closing output files and pipes: Close Output.
+ * command line: Command Line.
+ * command line formats: Running gawk.
+ * command line, setting FS on: Field Separators.
+ * comments: Comments.
+ * comparison expressions: Comparison Ops.
+ * comparison expressions as patterns: Comparison Patterns.
+ * computed regular expressions: Regexp Usage.
+ * concatenation: Concatenation.
+ * conditional expression: Conditional Exp.
+ * constants, types of: Constants.
+ * continuation of lines: Statements/Lines.
+ * control statement: Statements.
+ * conversion of strings and numbers: Conversion.
+ * conversion of strings and numbers: Values.
+ * conversions, during subscripting: Numeric Array Subscripts.
+ * CONVFMT: Numeric Array Subscripts.
+ * CONVFMT: Comparison Ops.
+ * CONVFMT: Conversion.
+ * curly braces: Actions.
+ * default action: Very Simple.
+ * default pattern: Very Simple.
+ * defining functions: Definition Syntax.
+ * deleting elements of arrays: Delete.
+ * deprecated features: Obsolete.
+ * deprecated options: Obsolete.
+ * differences: gawk and awk: Getline.
+ * directory search: AWKPATH Variable.
+ * division: Arithmetic Ops.
+ * documenting awk programs: Comments.
+ * dynamic regular expressions: Regexp Usage.
+ * element assignment: Assigning Elements.
+ * element of array: Reference to Elements.
+ * empty pattern: Empty.
+ * ENVIRON: Auto-set.
+ * ERRNO: Getline.
+ * escape sequence notation: Constants.
+ * examining fields: Fields.
+ * executable scripts: Executable Scripts.
+ * explicit input: Getline.
+ * exponentiation: Arithmetic Ops.
+ * expression: Expressions.
+ * expression, conditional: Conditional Exp.
+ * expressions, assignment: Assignment Ops.
+ * expressions, boolean: Boolean Ops.
+ * expressions, comparison: Comparison Ops.
+ * field separator, FS: Field Separators.
+ * field separator, choice of: Field Separators.
+ * field separator: on command line: Field Separators.
+ * field, changing contents of: Changing Fields.
+ * fields: Fields.
+ * fields, separating: Field Separators.
+ * file descriptors: Special Files.
+ * file, awk program: Long.
+ * FILENAME: Reading Files.
+ * flushing buffers: I/O Functions.
+ * FNR: Records.
+ * format specifier: Control Letters.
+ * format string: Basic Printf.
+ * formatted output: Printf.
+ * FS: Field Separators.
+ * ftp, anonymous: Extracting.
+ * function call: Function Calls.
+ * function definition: Definition Syntax.
+ * functions, user-defined: User-defined.
+ * getline: Getline.
+ * getting gawk: Extracting.
+ * gsub: String Functions.
+ * history of awk: History.
+ * how awk works: Two Rules.
+ * increment operators: Increment Ops.
+ * input: Reading Files.
+ * input file, sample: Sample Data Files.
+ * input redirection: Getline.
+ * input, getline command: Getline.
+ * input, explicit: Getline.
+ * input, multiple line records: Multiple Line.
+ * input, standard: Read Terminal.
+ * installation, atari: Atari Installation.
+ * installation, ms-dos: MS-DOS Installation.
+ * installation, unix: Quick Installation.
+ * installation, vms: VMS Installation.
+ * interaction, awk and other programs: I/O Functions.
+ * invocation of gawk: Command Line.
+ * language, awk: This Manual.
+ * length: String Functions.
+ * logical operations: Boolean Ops.
+ * long options: Command Line.
+ * loop: While Statement.
+ * loops, exiting: Break Statement.
+ * lvalue: Assignment Ops.
+ * manual, using this: This Manual.
+ * match: String Functions.
+ * match: String Functions.
+ * metacharacters: Regexp Operators.
+ * modifiers (in format specifiers): Format Modifiers.
+ * multi-dimensional subscripts: Multi-dimensional.
+ * multiple line records: Multiple Line.
+ * multiple passes over data: Other Arguments.
+ * multiple statements on one line: Statements/Lines.
+ * multiplication: Arithmetic Ops.
+ * NF: Fields.
+ * not operator: Boolean Ops.
+ * NR: Records.
+ * number of fields, NF: Fields.
+ * number of records, NR or FNR: Records.
+ * numbers, used as subscripts: Numeric Array Subscripts.
+ * numeric constant: Constants.
+ * numeric value: Constants.
+ * obsolete features: Obsolete.
+ * obsolete options: Obsolete.
+ * OFMT: Conversion.
+ * OFMT: OFMT.
+ * OFS: Output Separators.
+ * one-liners: One-liners.
+ * operator precedence: Precedence.
+ * operators, $: Fields.
+ * operators, arithmetic: Arithmetic Ops.
+ * operators, assignment: Assignment Ops.
+ * operators, boolean: Boolean Ops.
+ * operators, increment: Increment Ops.
+ * operators, regexp matching: Regexp Usage.
+ * operators, relational: Comparison Patterns.
+ * operators, relational: Comparison Ops.
+ * operators, string: Concatenation.
+ * operators, string-matching: Regexp Usage.
+ * options, command line: Command Line.
+ * options, long: Command Line.
+ * or operator: Boolean Ops.
+ * ORS: Output Separators.
+ * output: Printing.
+ * output field separator, OFS: Output Separators.
+ * output record separator, ORS: Output Separators.
+ * output redirection: Redirection.
+ * output, buffering: I/O Functions.
+ * output, formatted: Printf.
+ * output, piping: File/Pipe Redirection.
+ * passes, multiple: Other Arguments.
+ * path, search: AWKPATH Variable.
+ * pattern, case sensitive: Read Terminal.
+ * pattern, comparison expressions: Comparison Patterns.
+ * pattern, default: Very Simple.
+ * pattern, definition of: Patterns.
+ * pattern, empty: Empty.
+ * pattern, regular expressions: Regexp.
+ * patterns, BEGIN: BEGIN/END.
+ * patterns, END: BEGIN/END.
+ * patterns, boolean: Boolean Patterns.
+ * patterns, range: Ranges.
+ * patterns, types of: Kinds of Patterns.
+ * pipes for output: File/Pipe Redirection.
+ * precedence: Precedence.
+ * printing: Printing.
+ * program file: Long.
+ * program, awk: This Manual.
+ * program, definition of: Getting Started.
+ * program, self contained: Executable Scripts.
+ * programs, documenting: Comments.
+ * quotient: Arithmetic Ops.
+ * range pattern: Ranges.
+ * reading files: Reading Files.
+ * reading files, getline command: Getline.
+ * reading files, multiple line records: Multiple Line.
+ * record separator: Records.
+ * records, multiple line: Multiple Line.
+ * redirection of input: Getline.
+ * redirection of output: Redirection.
+ * reference to array: Reference to Elements.
+ * regexp: Regexp.
+ * regexp as expression: Comparison Ops.
+ * regexp operators: Comparison Ops.
+ * regexp search operators: Regexp Usage.
+ * regular expression matching operators: Regexp Usage.
+ * regular expression metacharacters: Regexp Operators.
+ * regular expressions as field separators: Field Separators.
+ * regular expressions as patterns: Regexp.
+ * regular expressions, computed: Regexp Usage.
+ * relational operators: Comparison Ops.
+ * relational operators: Comparison Patterns.
+ * remainder: Arithmetic Ops.
+ * removing elements of arrays: Delete.
+ * RLENGTH: String Functions.
+ * RS: Records.
+ * RSTART: String Functions.
+ * rule, definition of: Getting Started.
+ * running awk programs: Running gawk.
+ * running long programs: Long.
+ * sample input file: Sample Data Files.
+ * scanning an array: Scanning an Array.
+ * script, definition of: Getting Started.
+ * scripts, executable: Executable Scripts.
+ * scripts, shell: Executable Scripts.
+ * search path: AWKPATH Variable.
+ * self contained programs: Executable Scripts.
+ * shell scripts: Executable Scripts.
+ * side effect: Assignment Ops.
+ * single quotes, why needed: One-shot.
+ * split: String Functions.
+ * sprintf: String Functions.
+ * standard error output: Special Files.
+ * standard input: Read Terminal.
+ * standard input: Reading Files.
+ * standard input: Special Files.
+ * standard output: Special Files.
+ * strftime: Time Functions.
+ * string constants: Constants.
+ * string operators: Concatenation.
+ * string-matching operators: Regexp Usage.
+ * sub: String Functions.
+ * subscripts in arrays: Multi-dimensional.
+ * SUBSEP: Multi-dimensional.
+ * substr: String Functions.
+ * subtraction: Arithmetic Ops.
+ * system: I/O Functions.
+ * systime: Time Functions.
+ * time of day: Time Functions.
+ * time stamps: Time Functions.
+ * tolower: String Functions.
+ * toupper: String Functions.
+ * use of comments: Comments.
+ * user-defined functions: User-defined.
+ * user-defined variables: Variables.
+ * uses of awk: Preface.
+ * using this manual: This Manual.
+ * uucp, anonymous: Extracting.
+ * variables, user-defined: Variables.
+ * when to use awk: When.
+
+
diff -rc --new-file /src/baseline/gawk-2.15.5/gawk.texi gawk-2.15.5/gawk.texi
*** /src/baseline/gawk-2.15.5/gawk.texi Thu May 6 13:37:11 1993
--- gawk-2.15.5/gawk.texi Sun Jun 12 21:44:03 1994
***************
*** 7567,7573 ****
Here is the @code{awk} version of the @code{date} utility.
@smallexample
! #! /usr/bin/gawk -f
#
# date --- implement the P1003.2 Draft 11 'date' command
#
--- 7567,7573 ----
Here is the @code{awk} version of the @code{date} utility.
@smallexample
! #! /bin/gawk -f
#
# date --- implement the P1003.2 Draft 11 'date' command
#
***************
*** 8555,8561 ****
separated by colons. @code{gawk} gets its search path from the
@code{AWKPATH} environment variable. If that variable does not exist,
@code{gawk} uses the default path, which is
! @samp{.:/usr/lib/awk:/usr/local/lib/awk}. (Programs written by
system administrators should use an @code{AWKPATH} variable that
does not include the current directory, @samp{.}.)@refill
--- 8555,8561 ----
separated by colons. @code{gawk} gets its search path from the
@code{AWKPATH} environment variable. If that variable does not exist,
@code{gawk} uses the default path, which is
! @samp{.:/local/lib/awk:/gnu/lib/awk}. (Programs written by
system administrators should use an @code{AWKPATH} variable that
does not include the current directory, @samp{.}.)@refill
***************
*** 9674,9680 ****
The environment variable @code{AWKPATH} specifies a search path to use
when finding source files named with the @samp{-f} option. The default
path, which is
! @samp{.:/usr/lib/awk:/usr/local/lib/awk} is used if @code{AWKPATH} is not set.
If a file name given to the @samp{-f} option contains a @samp{/} character,
no path search is performed.
@xref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable},
--- 9674,9680 ----
The environment variable @code{AWKPATH} specifies a search path to use
when finding source files named with the @samp{-f} option. The default
path, which is
! @samp{.:/local/lib/awk:/gnu/lib/awk} is used if @code{AWKPATH} is not set.
If a file name given to the @samp{-f} option contains a @samp{/} character,
no path search is performed.
@xref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable},
diff -rc --new-file /src/baseline/gawk-2.15.5/io.c gawk-2.15.5/io.c
*** /src/baseline/gawk-2.15.5/io.c Wed May 11 22:50:41 1994
--- gawk-2.15.5/io.c Sun Jun 12 21:44:08 1994
***************
*** 556,563 ****
--- 556,565 ----
status++;
}
if (fflush(stderr)) {
+ #ifndef __amigados__ /* HACK (fnf) */
warning("error writing standard error (%s).", strerror(errno));
status++;
+ #endif
}
for (rp = red_head; rp != NULL; rp = rp->next)
/* flush both files and pipes, what the heck */
***************
*** 598,605 ****
--- 600,609 ----
status++;
}
if (fflush(stderr)) {
+ #ifndef __amigados__ /* HACK (fnf) */
warning("error writing standard error (%s).", strerror(errno));
status++;
+ #endif
}
return status;
}
***************
*** 909,915 ****
wait_any(interesting)
int interesting; /* pid of interest, if any */
{
! SIGTYPE (*hstat)(), (*istat)(), (*qstat)();
int pid;
int status = 0;
struct redirect *redp;
--- 913,919 ----
wait_any(interesting)
int interesting; /* pid of interest, if any */
{
! RETSIGTYPE (*hstat)(), (*istat)(), (*qstat)();
int pid;
int status = 0;
struct redirect *redp;
diff -rc --new-file /src/baseline/gawk-2.15.5/main.c gawk-2.15.5/main.c
*** /src/baseline/gawk-2.15.5/main.c Sun May 1 18:38:49 1994
--- gawk-2.15.5/main.c Sun Jun 12 21:44:12 1994
***************
*** 33,39 ****
static void init_args P((int argc0, int argc, char *argv0, char **argv));
static void init_vars P((void));
static void pre_assign P((char *v));
! SIGTYPE catchsig P((int sig, int code));
static void gawk_option P((char *optstr));
static void nostalgia P((void));
static void version P((void));
--- 33,39 ----
static void init_args P((int argc0, int argc, char *argv0, char **argv));
static void init_vars P((void));
static void pre_assign P((char *v));
! RETSIGTYPE catchsig P((int sig, int code));
static void gawk_option P((char *optstr));
static void nostalgia P((void));
static void version P((void));
***************
*** 148,157 ****
setvbuf(stdout, NULL, _IOLBF, BUFSIZ);
#endif
! (void) signal(SIGFPE, (SIGTYPE (*) P((int))) catchsig);
! (void) signal(SIGSEGV, (SIGTYPE (*) P((int))) catchsig);
#ifdef SIGBUS
! (void) signal(SIGBUS, (SIGTYPE (*) P((int))) catchsig);
#endif
myname = gawk_name(argv[0]);
--- 148,157 ----
setvbuf(stdout, NULL, _IOLBF, BUFSIZ);
#endif
! (void) signal(SIGFPE, (RETSIGTYPE (*) P((int))) catchsig);
! (void) signal(SIGSEGV, (RETSIGTYPE (*) P((int))) catchsig);
#ifdef SIGBUS
! (void) signal(SIGBUS, (RETSIGTYPE (*) P((int))) catchsig);
#endif
myname = gawk_name(argv[0]);
***************
*** 628,634 ****
}
}
! SIGTYPE
catchsig(sig, code)
int sig, code;
{
--- 628,634 ----
}
}
! RETSIGTYPE
catchsig(sig, code)
int sig, code;
{
diff -rc --new-file /src/baseline/gawk-2.15.5/pc/config.h gawk-2.15.5/pc/config.h
*** /src/baseline/gawk-2.15.5/pc/config.h Wed Dec 29 11:44:38 1993
--- gawk-2.15.5/pc/config.h Sun Jun 12 21:44:14 1994
***************
*** 258,264 ****
* this.
*/
! /* #define DEFPATH ".:/usr/lib/awk:/usr/local/lib/awk" */
/* #define ENVSEP ':' */
#define ENVSEP ';'
--- 258,264 ----
* this.
*/
! /* #define DEFPATH ".:/local/lib/awk:/gnu/lib/awk" */
/* #define ENVSEP ':' */
#define ENVSEP ';'
diff -rc --new-file /src/baseline/gawk-2.15.5/protos.h gawk-2.15.5/protos.h
*** /src/baseline/gawk-2.15.5/protos.h Sun May 1 18:20:21 1994
--- gawk-2.15.5/protos.h Sun Jun 12 22:08:10 1994
***************
*** 69,75 ****
--- 69,77 ----
extern int fprintf P((FILE *, const char *, ...));
#if !defined(MSDOS) && !defined(__GNU_LIBRARY__)
#ifdef __STDC__
+ #ifndef __amigados__ /* HACK - conflicts with stdio.h - FIXME */
extern size_t fwrite P((const aptr_t, size_t, size_t, FILE *));
+ #endif
#else
extern int fwrite();
#endif
***************
*** 84,90 ****
extern int isatty P((int));
extern void exit P((int));
extern int system P((const char *));
! extern int sscanf P((const char *, const char *, ...));
#ifndef toupper
extern int toupper P((int));
#endif
--- 86,92 ----
extern int isatty P((int));
extern void exit P((int));
extern int system P((const char *));
! /* extern int sscanf P((const char *, const char *, ...)); */
#ifndef toupper
extern int toupper P((int));
#endif
diff -rc --new-file /src/baseline/gawk-2.15.5/test/Makefile gawk-2.15.5/test/Makefile
*** /src/baseline/gawk-2.15.5/test/Makefile Tue May 10 22:57:41 1994
--- gawk-2.15.5/test/Makefile Sun Jun 12 21:49:01 1994
***************
*** 1,4 ****
--- 1,6 ----
SHELL = /bin/sh
+ #COMPARE = cmp
+ COMPARE = diff
bigtest: basic poundbang gawk.extensions
***************
*** 14,20 ****
poundbang::
cp ../gawk /tmp && chmod +x poundbang && ./poundbang poundbang >tmp
rm -f /tmp/gawk
! cmp poundbang.good tmp && rm -f tmp
msg::
@echo 'Any output from "cmp" is bad news, although some differences'
--- 16,22 ----
poundbang::
cp ../gawk /tmp && chmod +x poundbang && ./poundbang poundbang >tmp
rm -f /tmp/gawk
! -$(COMPARE) poundbang.good tmp && rm -f tmp
msg::
@echo 'Any output from "cmp" is bad news, although some differences'
***************
*** 24,30 ****
swaplns::
@../gawk -f swaplns.awk data >tmp
! cmp swaplns.good tmp && rm -f tmp
messages::
@../gawk -f messages.awk >out2 2>out3
--- 26,32 ----
swaplns::
@../gawk -f swaplns.awk data >tmp
! -$(COMPARE) swaplns.good tmp && rm -f tmp
messages::
@../gawk -f messages.awk >out2 2>out3
***************
*** 32,62 ****
argarray::
@TEST=test echo just a test | ../gawk -f argarray.awk argarray.awk - >tmp
! cmp argarray.good tmp && rm -f tmp
fstabplus::
@echo '1 2' | ../gawk -f fstabplus >tmp
! cmp fstabplus.good tmp && rm -f tmp
fsrs::
@../gawk -f fsrs.awk fsrs.in >tmp
! cmp fsrs.good tmp && rm -f tmp
igncfs::
@../gawk -f igncfs.awk igncfs.in >tmp
! cmp igncfs.good tmp && rm -f tmp
longwrds::
@../gawk -f longwrds.awk manpage | sort >tmp
! cmp longwrds.good tmp && rm -f tmp
fieldwdth::
@echo '123456789' | ../gawk -v FIELDWIDTHS="2 3 4" '{ print $$2}' >tmp
! cmp fieldwdth.good tmp && rm -f tmp
ignrcase::
@echo xYz | ../gawk -v IGNORECASE=1 '{ sub(/y/, ""); print}' >tmp
! cmp ignrcase.good tmp && rm -f tmp
regtest::
@echo 'Some of the output from regtest is very system specific, do not'
--- 34,64 ----
argarray::
@TEST=test echo just a test | ../gawk -f argarray.awk argarray.awk - >tmp
! -$(COMPARE) argarray.good tmp && rm -f tmp
fstabplus::
@echo '1 2' | ../gawk -f fstabplus >tmp
! -$(COMPARE) fstabplus.good tmp && rm -f tmp
fsrs::
@../gawk -f fsrs.awk fsrs.in >tmp
! -$(COMPARE) fsrs.good tmp && rm -f tmp
igncfs::
@../gawk -f igncfs.awk igncfs.in >tmp
! -$(COMPARE) igncfs.good tmp && rm -f tmp
longwrds::
@../gawk -f longwrds.awk manpage | sort >tmp
! -$(COMPARE) longwrds.good tmp && rm -f tmp
fieldwdth::
@echo '123456789' | ../gawk -v FIELDWIDTHS="2 3 4" '{ print $$2}' >tmp
! -$(COMPARE) fieldwdth.good tmp && rm -f tmp
ignrcase::
@echo xYz | ../gawk -v IGNORECASE=1 '{ sub(/y/, ""); print}' >tmp
! -$(COMPARE) ignrcase.good tmp && rm -f tmp
regtest::
@echo 'Some of the output from regtest is very system specific, do not'
***************
*** 66,72 ****
posix::
@echo '1:2,3 4' | ../gawk -f posix >tmp
! cmp posix.good tmp && rm -f tmp
manyfiles::
@rm -rf junk
--- 68,74 ----
posix::
@echo '1:2,3 4' | ../gawk -f posix >tmp
! -$(COMPARE) posix.good tmp && rm -f tmp
manyfiles::
@rm -rf junk
***************
*** 79,106 ****
compare::
@../gawk -f compare.awk 0 1 compare.in >tmp
! cmp compare.good tmp && rm -f tmp
arrayref::
@../gawk -f arrayref >tmp
! cmp arrayref.good tmp && rm -f tmp
rs::
@../gawk -v RS="" '{ print $$1, $$2}' rs.data >tmp
! cmp rs.good tmp && rm -f tmp
fsbs::
@../gawk -v FS='\' '{ print $$1, $$2 }' fsbs.in >tmp
! cmp fsbs.good tmp && rm -f tmp
inftest::
@echo This test is very machine specific...
@../gawk -f inftest.awk >tmp
! cmp inftest.good tmp && rm -f tmp
getline::
@../gawk -f getline.awk getline.awk getline.awk >tmp
! cmp getline.good tmp && rm -f tmp
rand::
@echo The following line should just be 19 random numbers between 1 and 100
--- 81,108 ----
compare::
@../gawk -f compare.awk 0 1 compare.in >tmp
! -$(COMPARE) compare.good tmp && rm -f tmp
arrayref::
@../gawk -f arrayref >tmp
! -$(COMPARE) arrayref.good tmp && rm -f tmp
rs::
@../gawk -v RS="" '{ print $$1, $$2}' rs.data >tmp
! -$(COMPARE) rs.good tmp && rm -f tmp
fsbs::
@../gawk -v FS='\' '{ print $$1, $$2 }' fsbs.in >tmp
! -$(COMPARE) fsbs.good tmp && rm -f tmp
inftest::
@echo This test is very machine specific...
@../gawk -f inftest.awk >tmp
! -$(COMPARE) inftest.good tmp && rm -f tmp
getline::
@../gawk -f getline.awk getline.awk getline.awk >tmp
! -$(COMPARE) getline.good tmp && rm -f tmp
rand::
@echo The following line should just be 19 random numbers between 1 and 100
***************
*** 123,130 ****
cmp splitargv.good tmp && rm -f tmp
awkpath::
! @AWKPATH=".:lib" ../gawk -f awkpath.awk >tmp
! cmp awkpath.good tmp && rm -f tmp
nfset::
@../gawk -f nfset.awk nfset.in >tmp
--- 125,132 ----
cmp splitargv.good tmp && rm -f tmp
awkpath::
! # @AWKPATH=".:lib" ../gawk -f awkpath.awk >tmp
! # cmp awkpath.good tmp && rm -f tmp
nfset::
@../gawk -f nfset.awk nfset.in >tmp
diff -rc --new-file /src/baseline/gawk-2.15.5/test/poundbang gawk-2.15.5/test/poundbang
*** /src/baseline/gawk-2.15.5/test/poundbang Tue Oct 19 20:07:58 1993
--- gawk-2.15.5/test/poundbang Sun Jun 12 21:44:24 1994
***************
*** 1,3 ****
! #! /tmp/gawk -f
{ ccount += length($0) }
END { printf "average line length is %2.4f\n", ccount/NR}
--- 1,3 ----
! #!/tmp/gawk -f
{ ccount += length($0) }
END { printf "average line length is %2.4f\n", ccount/NR}