home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Source Code 1992 March
/
Source_Code_CD-ROM_Walnut_Creek_March_1992.iso
/
usenet
/
altsrcs
/
2
/
2594
/
mtf.icn
< prev
next >
Wrap
Text File
|
1991-01-22
|
17KB
|
524 lines
#############################################################################
#
# NAME: mtf3.icn
#
# TITLE: map tar file
#
# AUTHOR: Richard Goerwitz
#
# VERSION: 3.3
#
#############################################################################
#
# This and future versions of mtf are hereby placed in the public domain -RLG
#
#############################################################################
#
# PURPOSE: Maps 15+ char. filenames in a tar archive to 14 chars.
# Handles both header blocks and the archive itself. Mtf is intended
# to facilitate installation of tar'd archives on systems subject to
# the System V 14-character filename limit.
#
# USAGE: mtf inputfile [-r reportfile] [-e .extensions] [-x exceptions]
#
# "Inputfile" is a tar archive. "Reportfile" is file containing a
# list of files already mapped by mtf in a previous run (used to
# avoid clashes with filenames in use outside the current archive).
# The -e switch precedes a list of filename .extensions which mtf is
# supposed to leave unscathed by the mapping process
# (single-character extensions such as .c and .o are automatically
# preserved; -e allows the user to specify additional extensions,
# such as .pxl, .cpi, and .icn). The final switch, -x, precedes a
# list of strings which should not be mapped at all. Use this switch
# if, say, you have a C file with a structure.field combination such
# as "thisisveryverybig.hashptr" in an archive that contains a file
# called "thisisveryverybig.h," and you want to avoid mapping that
# portion of the struct name which matches the name of the overlong
# file (to wit, "mtf inputfile -x thisisveryverybig.hashptr"). To
# prevent mapping of any string (including overlong filenames) begin-
# ning, say, with "thisisvery," use "mtf inputfile -x thisisvery."
# Be careful with this option, or you might end up defeating the
# whole point of using mtf in the first place.
#
# OUTPUT FORMAT: Mtf writes a mapped tar archive to the stdout.
# When finished, it leaves a file called "map.report" in the current
# directory which records what filenames were mapped and how. Rename
# and save this file, and use it as the "reportfile" argument to any
# subsequent runs of mtf in this same directory. Even if you don't
# plan to run mtf again, this file should still be examined, just to
# be sure that the new filenames are acceptable, and to see if
# perhaps additional .extensions and/or exceptions should be
# specified.
#
# BUGS: Mtf only maps filenames found in the main tar headers.
# Because of this, mtf cannot accept nested tar archives. If you try
# to map a tar archive within a tar file, mtf will abort with a nasty
# message about screwing up your files. Please note that, unless you
# give mtf a "reportfile" to consider, it knows nothing about files
# existing outside the archive. Hence, if an input archive refers to
# an overlong filename in another archive, mtf naturally will not
# know to shorten it. Mtf will, in fact, have no way of knowing that
# it is a filename, and not, say, an identifier in a C program.
# Final word of caution: Try not to use mtf on binaries. It cannot
# possibly preserve the correct format and alignment of strings in an
# executable. Same goes for compressed files. Mtf can't map
# filenames that it can't read!
#
####################################################################
global filenametbl, chunkset, short_chunkset # see procedure mappiece(s)
global extensions, no_nos # ditto
record hblock(name,junk,size,mtime,chksum, # tar header struct;
linkflag,linkname,therest) # see readtarhdr(s)
procedure main(a)
usage := "usage: mtf inputfile [-r reportfile] " ||
"[-e .extensions] [-x exceptions]"
*a = 0 & stop(usage)
intext := open_input_file(a[1]) & pop(a)
i := 0
extensions := []; no_nos := []
while (i +:= 1) <= *a do {
case a[i] of {
"-r" : readin_old_map_report(a[i+:=1])
"-e" : current_list := extensions
"-x" : current_list := no_nos
default : put(current_list,a[i])
}
}
every !extensions ?:= (=".", tab(0))
# Run through all the headers in the input file, filling
# (global) filenametbl with the names of overlong files;
# make_table_of_filenames fails if there are no such files.
make_table_of_filenames(intext) | {
write(&errout,"mtf: no overlong path names to map")
a[1] ? (tab(find(".tar")+4), pos(0)) |
write(&errout,"(Is ",a[1]," even a tar archive?)")
exit(1)
}
# Now that a table of overlong filenames exists, go back
# through the text, remapping all occurrences of these names
# to new, 14-char values; also, reset header checksums, and
# reformat text into correctly padded 512-byte blocks. Ter-
# minate output with 512 nulls.
seek(intext,1)
every writes(output_mapped_headers_and_texts(intext))
close(intext)
write_report() # Record mapped file and dir names for future ref.
exit(0)
end
procedure open_input_file(s)
intext := open("" ~== s,"r") |
stop("mtf: can't open ",s)
find("UNIX",&features) |
stop("mtf: I'm not tested on non-Unix systems.")
s[-2:0] == ".Z" &
stop("mtf: sorry, can't accept compressed files")
return intext
end
procedure readin_old_map_report(s)
initial {
filenametbl := table()
chunkset := set()
short_chunkset := set()
}
mapfile := open_input_file(s)
while line := read(mapfile) do {
line ? {
if chunk := tab(many(~' \t')) & tab(upto(~' \t')) &
lchunk := move(14) & pos(0) then {
filenametbl[chunk] := lchunk
insert(chunkset,chunk)
insert(short_chunkset,chunk[1:16])
}
if /chunk | /lchunk
then stop("mtf: report file, ",s," seems mangled.")
}
}
end
procedure make_table_of_filenames(intext)
local header # chunkset is global
# search headers for overlong filenames; for now
# ignore everything else
while header := readtarhdr(reads(intext,512)) do {
# tab upto the next header block
tab_nxt_hdr(intext,trim_str(header.size),1)
# record overlong filenames in several global tables, sets
fixpath(trim_str(header.name))
}
*\chunkset ~= 0 | fail
return &null
end
procedure output_mapped_headers_and_texts(intext)
# Remember that filenametbl, chunkset, and short_chunkset
# (which are used by various procedures below) are global.
local header, newtext, full_block, block, lastblock
# Read in headers, one at a time.
while header := readtarhdr(reads(intext,512)) do {
# Replace overlong filenames with shorter ones, according to
# the conversions specified in the global hash table filenametbl
# (which were generated by fixpath() on the first pass).
header.name := left(map_filenams(header.name),100,"\x00")
header.linkname := left(map_filenams(header.linkname),100,"\x00")
# Use header.size field to determine the size of the subsequent text.
# Read in the text as one string. Map overlong filenames found in it
# to shorter names as specified in the global hash table filenamtbl.
newtext := map_filenams(tab_nxt_hdr(intext,trim_str(header.size)))
# Now, find the length of newtext, and insert it into the size field.
header.size := right(exbase10(*newtext,8) || " ",12," ")
# Calculate the checksum of the newly retouched header.
header.chksum := right(exbase10(get_checksum(header),8)||"\x00 ",8," ")
# Finally, join all the header fields into a new block and write it out
full_block := ""; every full_block ||:= !header
suspend left(full_block,512,"\x00")
# Now we're ready to write out the text, padding the final block
# out to an even 512 bytes if necessary; the next header must start
# right at the beginning of a 512-byte block.
newtext ? {
while block := move(512)
do suspend block
pos(0) & next
lastblock := left(tab(0),512,"\x00")
suspend lastblock
}
}
# Write out a final null-filled block. Some tar programs will write
# out 1024 nulls at the end. Dunno why.
return repl("\x00",512)
end
procedure trim_str(s)
# Knock out spaces, nulls from those crazy tar header
# block fields (some of which end in a space and a null,
# some just a space, and some just a null [anyone know
# why?]).
return s ? {
(tab(many(' ')) | &null) &
trim(tab(find("\x00")|0))
} \ 1
end
procedure tab_nxt_hdr(f,size_str,firstpass)
# Tab upto the next header block. Return the bypassed text
# as a string if not the first pass.
local hs, next_header_offset
hs := integer("8r" || size_str)
next_header_offset := (hs / 512) * 512
hs % 512 ~= 0 & next_header_offset +:= 512
if 0 = next_header_offset then return ""
else {
# if this is pass no. 1 don't bother returning a value; we're
# just collecting long filenames;
if \firstpass then {
seek(f,where(f)+next_header_offset)
return
}
else {
return reads(f,next_header_offset)[1:hs+1] |
stop("mtf: error reading in ",
string(next_header_offset)," bytes.")
}
}
end
procedure fixpath(s)
# Fixpath is a misnomer of sorts, since it is used on
# the first pass only, and merely examines each filename
# in a path, using the procedure mappiece to record any
# overlong ones in the global table filenametbl and in
# the global sets chunkset and short_chunkset; no fixing
# is actually done here.
s2 := ""
s ? {
while piece := tab(find("/")+1)
do s2 ||:= mappiece(piece)
s2 ||:= mappiece(tab(0))
}
return s2
end
procedure mappiece(s)
# Check s (the name of a file or dir as recorded in the tar header
# being examined) to see if it is over 14 chars long. If so,
# generate a unique 14-char version of the name, and store
# both values in the global hashtable filenametbl. Also store
# the original (overlong) file name in chunkset. Store the
# first fifteen chars of the original file name in short_chunkset.
# Sorry about all of the tables and sets. It actually makes for
# a reasonably efficient program. Doing away with both sets,
# while possible, causes a tenfold drop in execution speed!
# global filenametbl, chunkset, short_chunkset, extensions
local j, ending
initial {
/filenametbl := table()
/chunkset := set()
/short_chunkset := set()
}
chunk := trim(s,'/')
if chunk ? (tab(find(".tar")+4), pos(0)) then {
write(&errout, "mtf: Sorry, I can't let you do this.\n",
" You've nested a tar archive within\n",
" another tar archive, which makes it\n",
" likely I'll f your filenames ubar.")
exit(2)
}
if *chunk > 14 then {
i := 0
if /filenametbl[chunk] then {
# if we have not seen this file, then...
repeat {
# ...find a new unique 14-character name for it;
# preserve important suffixes like ".Z," ".c," etc.
# First, check to see if the original filename (chunk)
# ends in an important extension...
if chunk ?
(tab(find(".")),
ending := move(1) || tab(match(!extensions)|any(&ascii)),
pos(0)
)
# ...If so, then leave the extension alone; mess with the
# middle part of the filename (e.g. file.with.extension.c ->
# file.with001.c).
then {
j := (15 - *ending - 3)
lchunk:= chunk[1:j] || right(string(i+:=1),3,"0") || ending
}
# If no important extension is present, then reformat the
# end of the file (e.g. too.long.file.name -> too.long.fi01).
else lchunk := chunk[1:13] || right(string(i+:=1),2,"0")
# If the resulting shorter file name has already been used...
if lchunk == !filenametbl
# ...then go back and find another (i.e. increment i & try
# again; else break from the repeat loop, and...
then next else break
}
# ...record both the old filename (chunk) and its new,
# mapped name (lchunk) in filenametbl. Also record the
# mapped names in chunkset and short_chunkset.
filenametbl[chunk] := lchunk
insert(chunkset,chunk)
insert(short_chunkset,chunk[1:16])
}
}
# If the filename is overlong, return lchunk (the shortened
# name), else return the original name (chunk). If the name,
# as passed to the current function, contained a trailing /
# (i.e. if s[-1]=="/"), then put the / back. This could be
# done more elegantly.
return (\lchunk | chunk) || ((s[-1] == "/") | "")
end
procedure readtarhdr(s)
# Read the silly tar header into a record. Note that, as was
# complained about above, some of the fields end in a null, some
# in a space, and some in a space and a null. The procedure
# trim_str() may (and in fact often _is_) used to remove this
# extra garbage.
this_block := hblock()
s ? {
this_block.name := move(100) # <- to be looked at later
this_block.junk := move(8+8+8) # skip the permissions, uid, etc.
this_block.size := move(12) # <- to be looked at later
this_block.mtime := move(12)
this_block.chksum := move(8) # <- to be looked at later
this_block.linkflag := move(1)
this_block.linkname := move(100) # <- to be looked at later
this_block.therest := tab(0)
}
integer(this_block.size) | fail # If it's not an integer, we've hit
# the final (null-filled) block.
return this_block
end
procedure map_filenams(s)
# Chunkset is global, and contains all the overlong filenames
# found in the first pass through the input file; here the aim
# is to map these filenames to the shortened variants as stored
# in filenametbl (GLOBAL).
local s2, tmp_chunk_tbl, tmp_lst
static new_chunklist
initial {
# Make sure filenames are sorted, longest first. Say we
# have a file called long_file_name_here.1 and one called
# long_file_name_here.1a. We want to check for the longer
# one first. Otherwise the portion of the second file which
# matches the first file will get remapped.
tmp_chunk_tbl := table()
every el := !chunkset
do insert(tmp_chunk_tbl,el,*el)
tmp_lst := sort(tmp_chunk_tbl,4)
new_chunklist := list()
every put(new_chunklist,tmp_lst[*tmp_lst-1 to 1 by -2])
}
s2 := ""
s ? {
until pos(0) do {
# first narrow the possibilities, using short_chunkset
if member(short_chunkset,&subject[&pos:&pos+15])
# then try to map from a long to a shorter 14-char filename
then {
if match(ch := !new_chunklist) & not match(!no_nos)
then s2 ||:= filenametbl[=ch]
else s2 ||:= move(1)
}
else s2 ||:= move(1)
}
}
return s2
end
# From the IPL. Thanks, Ralph -
# Author: Ralph E. Griswold
# Date: June 10, 1988
# exbase10(i,j) convert base-10 integer i to base j
# The maximum base allowed is 36.
procedure exbase10(i,j)
static digits
local s, d, sign
initial digits := &digits || &lcase
if i = 0 then return 0
if i < 0 then {
sign := "-"
i := -i
}
else sign := ""
s := ""
while i > 0 do {
d := i % j
if d > 9 then d := digits[d + 1]
s := d || s
i /:= j
}
return sign || s
end
# end IPL material
procedure get_checksum(r)
# Calculates the new value of the checksum field for the
# current header block. Note that the specification say
# that, when calculating this value, the chksum field must
# be blank-filled.
sum := 0
r.chksum := " "
every field := !r
do every sum +:= ord(!field)
return sum
end
procedure write_report()
# This procedure writes out a list of filenames which were
# remapped (because they exceeded the SysV 14-char limit),
# and then notifies the user of the existence of this file.
local outtext, stbl, i, j, mapfile_name
# Get a unique name for the map.report (thereby preventing
# us from overwriting an older one).
mapfile_name := "map.report"; j := 1
until not close(open(mapfile_name,"r"))
do mapfile_name := (mapfile_name[1:11] || string(j+:=1))
(outtext := open(mapfile_name,"w")) |
open(mapfile_name := "/tmp/map.report","w") |
stop("mtf: Can't find a place to put map.report!")
stbl := sort(filenametbl,3)
every i := 1 to *stbl -1 by 2 do {
match(!no_nos,stbl[i]) |
write(outtext,left(stbl[i],35," ")," ",stbl[i+1])
}
write(&errout,"\nmtf: ",mapfile_name," contains the list of changes.")
write(&errout," Please save this list!")
close(outtext)
return &null
end