home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
HTML - Publishing on the Internet
/
html_cdrom.iso
/
tools
/
html
/
linux
/
check
/
entify.pl
< prev
next >
Wrap
Perl Script
|
1995-02-18
|
3KB
|
60 lines
#!/usr/local/bin/perl
#entify.pl: Change Latin-1 high alphabetics to HTML entities for 7-bit safety.
#
# Typical use:
#
# perl entify.pl infile.8bit > outfile.html
#
# If you have Latin 1 characters in a URL, they should actually be escaped
# using the %-hex-digits convention; the program ignores this consideration.
#
# Copyright H. Churchyard 1995 -- freely redistributable.
# Version 1.0 12/30/94 -- Converted to perl. Included in htmlchek 4.0 release.
# Version 1.1 2/17/95 -- Eliminated warning, may not have been appropriate in
# all circumstances.
#
eval "exec /usr/local/bin/perl -S $0 $*"
if $running_under_some_shell; # this emulates #! processing on NIH machines.
$, = ' '; # set output field separator
$\ = "\n"; # set output record separator
$enty{"\300"} = "À"; $enty{"\301"} = "Á";
$enty{"\302"} = "Â"; $enty{"\303"} = "Ã"; $enty{"\304"} = "Ä";
$enty{"\305"} = "Å"; $enty{"\306"} = "Æ";
$enty{"\307"} = "Ç"; $enty{"\310"} = "È";
$enty{"\311"} = "É"; $enty{"\312"} = "Ê"; $enty{"\313"} = "Ë";
$enty{"\314"} = "Ì"; $enty{"\315"} = "Í";
$enty{"\316"} = "Î"; $enty{"\317"} = "Ï"; $enty{"\320"} = "Ð";
$enty{"\321"} = "Ñ"; $enty{"\322"} = "Ò";
$enty{"\323"} = "Ó"; $enty{"\324"} = "Ô";
$enty{"\325"} = "Õ"; $enty{"\326"} = "Ö";
$enty{"\330"} = "Ø"; $enty{"\331"} = "Ù";
$enty{"\332"} = "Ú"; $enty{"\333"} = "Û"; $enty{"\334"} = "Ü";
$enty{"\335"} = "Ý"; $enty{"\336"} = "Þ";
$enty{"\337"} = "ß"; $enty{"\340"} = "à";
$enty{"\341"} = "á"; $enty{"\342"} = "â";
$enty{"\343"} = "ã"; $enty{"\344"} = "ä"; $enty{"\345"} = "å";
$enty{"\346"} = "æ"; $enty{"\347"} = "ç";
$enty{"\350"} = "è"; $enty{"\351"} = "é";
$enty{"\352"} = "ê"; $enty{"\353"} = "ë"; $enty{"\354"} = "ì";
$enty{"\355"} = "í"; $enty{"\356"} = "î"; $enty{"\357"} = "ï";
$enty{"\360"} = "ð"; $enty{"\361"} = "ñ"; $enty{"\362"} = "ò";
$enty{"\363"} = "ó"; $enty{"\364"} = "ô";
$enty{"\365"} = "õ"; $enty{"\366"} = "ö";
$enty{"\370"} = "ø"; $enty{"\371"} = "ù";
$enty{"\372"} = "ú"; $enty{"\373"} = "û"; $enty{"\374"} = "ü";
$enty{"\375"} = "ý"; $enty{"\376"} = "þ"; $enty{"\377"} = "ÿ";
$enty{"\256"} = "®"; $enty{"\251"} = "©";
#
# Main
#
$stuperlRS = $/;
while (<>) {
if ($_ =~ /$stuperlRS$/o) { # strip record separator, allow for last line to
chop;} # be unterminated.
if ($_ =~ /[\300-\377]/) {
foreach $X (keys %enty) {
if ($_ =~ $X) {
$s_ = $enty{$X}; $_ =~ s/$X/$s_/g;}}}
print $_;}
##EOF