home *** CD-ROM | disk | FTP | other *** search
/ HTML - Publishing on the Internet / html_cdrom.iso / tools / html / windows / check / entify.awk < prev    next >
Text File  |  1995-02-18  |  3KB  |  54 lines

  1. #entify.awk: Change Latin-1 high alphabetics to HTML entities for 7-bit safety.
  2. #
  3. # Typical use:
  4. #
  5. #   awk -f entify.awk infile.8bit > outfile.html
  6. #
  7. #   If you have Latin 1 characters in a URL, they should actually be escaped
  8. # using the %-hex-digits convention; the program ignores this consideration.
  9. #
  10. #   This program is written in the ``awk'' programming language (on Sun systems
  11. # and some others, non-archaic ``awk'' is called ``nawk'', so that ``nawk''
  12. # should be used instead of ``awk'').  Also, a freely-redistributable ``awk''
  13. # interpreter called ``gawk'', which is free of the bugs that some of the
  14. # vendor-supplied ``awk''/``nawk'' programs suffer from, is available for most
  15. # platforms, and as source from the FSF GNU project.
  16. #
  17. # Copyright H. Churchyard 1994 -- freely redistributable.
  18. # Version 1.0 11/27/94
  19. # Version 1.1 2/17/95 -- Eliminated warning, may not have been appropriate in
  20. # all circumstances.
  21. #
  22. #This will test the 8-bit-cleanliness of your awk:
  23. BEGIN{
  24. enty["\300"]="\\À";enty["\301"]="\\Á";enty["\302"]="\\Â";
  25. enty["\303"]="\\Ã";enty["\304"]="\\Ä";enty["\305"]="\\Å";
  26. enty["\306"]="\\Æ";enty["\307"]="\\Ç";enty["\310"]="\\È";
  27. enty["\311"]="\\É";enty["\312"]="\\Ê";enty["\313"]="\\Ë";
  28. enty["\314"]="\\Ì";enty["\315"]="\\Í";enty["\316"]="\\Î";
  29. enty["\317"]="\\Ï";enty["\320"]="\\Ð";enty["\321"]="\\Ñ";
  30. enty["\322"]="\\Ò";enty["\323"]="\\Ó";enty["\324"]="\\Ô";
  31. enty["\325"]="\\Õ";enty["\326"]="\\Ö";enty["\330"]="\\Ø";
  32. enty["\331"]="\\Ù";enty["\332"]="\\Ú";enty["\333"]="\\Û";
  33. enty["\334"]="\\Ü";enty["\335"]="\\Ý";enty["\336"]="\\Þ";
  34. enty["\337"]="\\ß";enty["\340"]="\\à";enty["\341"]="\\á";
  35. enty["\342"]="\\â";enty["\343"]="\\ã";enty["\344"]="\\ä";
  36. enty["\345"]="\\å";enty["\346"]="\\æ";enty["\347"]="\\ç";
  37. enty["\350"]="\\è";enty["\351"]="\\é";enty["\352"]="\\ê";
  38. enty["\353"]="\\ë";enty["\354"]="\\ì";enty["\355"]="\\í";
  39. enty["\356"]="\\î";enty["\357"]="\\ï";enty["\360"]="\\ð";
  40. enty["\361"]="\\ñ";enty["\362"]="\\ò";
  41. enty["\363"]="\\ó";enty["\364"]="\\ô";enty["\365"]="\\õ";
  42. enty["\366"]="\\ö";enty["\370"]="\\ø";enty["\371"]="\\ù";
  43. enty["\372"]="\\ú";enty["\373"]="\\û";enty["\374"]="\\ü";
  44. enty["\375"]="\\ý";enty["\376"]="\\þ";enty["\377"]="\\ÿ";
  45. enty["\256"]="\\®";enty["\251"]="\\©";
  46. }
  47. #
  48. # Main
  49. #
  50. {if ($0~/[\300-\377]/)
  51.    {for (x in enty) {if ($0~x) {gsub(x,enty[x])}}};
  52.  print}
  53. ##EOF
  54.