home *** CD-ROM | disk | FTP | other *** search
/ DOS/V Power Report 2000 April / VPR0004A.BIN / OLS / HTMLLINT / htmllint.lzh / charsets.rul < prev    next >
Text File  |  1999-07-15  |  14KB  |  208 lines

  1. $charsets = 'ANSI_X3\.4-1968|iso-ir-6|ANSI_X3\.4-1986|ISO_646\.irv:1991|ASCII'.
  2.             '|ISO646-US|US-ASCII|us|IBM367|cp367|csASCII|ISO-10646-UCS-2'.
  3.             '|csUnicode|ISO-10646-UCS-4|csUCS4|ISO-10646-UTF-1|csISO10646UTF1'.
  4.             '|ISO_646\.basic:1983|ref|csISO646basic1983|INVARIANT|csINVARIANT'.
  5.             '|ISO_646\.irv:1983|iso-ir-2|irv|csISO2IntlRefVersion|BS_4730'.
  6.             '|iso-ir-4|ISO646-GB|gb|uk|csISO4UnitedKingdom|NATS-SEFI'.
  7.             '|iso-ir-8-1|csNATSSEFI|NATS-SEFI-ADD|iso-ir-8-2|csNATSSEFIADD'.
  8.             '|NATS-DANO|iso-ir-9-1|csNATSDANO|NATS-DANO-ADD|iso-ir-9-2'.
  9.             '|csNATSDANOADD|SEN_850200_B|iso-ir-10|FI|ISO646-FI|ISO646-SE|se'.
  10.             '|csISO10Swedish|SEN_850200_C|iso-ir-11|ISO646-SE2|se2'.
  11.             '|csISO11SwedishForNames|KS_C_5601-1987|iso-ir-149|KS_C_5601-1989'.
  12.             '|KSC_5601|korean|csKSC56011987|ISO-2022-KR|csISO2022KR|EUC-KR'.
  13.             '|csEUCKR|ISO-2022-JP|csISO2022JP|ISO-2022-JP-2|csISO2022JP2'.
  14.             '|ISO-2022-CN|ISO-2022-CN-EXT|JIS_C6220-1969-jp|JIS_C6220-1969'.
  15.             '|iso-ir-13|katakana|x0201-7|csISO13JISC6220jp|JIS_C6220-1969-ro'.
  16.             '|iso-ir-14|jp|ISO646-JP|csISO14JISC6220ro|IT|iso-ir-15|ISO646-IT'.
  17.             '|csISO15Italian|PT|iso-ir-16|ISO646-PT|csISO16Portuguese|ES'.
  18.             '|iso-ir-17|ISO646-ES|csISO17Spanish|greek7-old|iso-ir-18'.
  19.             '|csISO18Greek7Old|latin-greek|iso-ir-19|csISO19LatinGreek'.
  20.             '|DIN_66003|iso-ir-21|de|ISO646-DE|csISO21German'.
  21.             '|NF_Z_62-010_(1973)|iso-ir-25|ISO646-FR1|csISO25French'.
  22.             '|Latin-greek-1|iso-ir-27|csISO27LatinGreek1|ISO_5427|iso-ir-37'.
  23.             '|csISO5427Cyrillic|JIS_C6226-1978|iso-ir-42|csISO42JISC62261978'.
  24.             '|BS_viewdata|iso-ir-47|csISO47BSViewdata|INIS|iso-ir-49'.
  25.             '|csISO49INIS|INIS-8|iso-ir-50|csISO50INIS8|INIS-cyrillic'.
  26.             '|iso-ir-51|csISO51INISCyrillic|ISO_5427:1981|iso-ir-54'.
  27.             '|ISO5427Cyrillic1981|ISO_5428:1980|iso-ir-55|csISO5428Greek'.
  28.             '|GB_1988-80|iso-ir-57|cn|ISO646-CN|csISO57GB1988|GB_2312-80'.
  29.             '|iso-ir-58|chinese|csISO58GB231280|NS_4551-1|iso-ir-60|ISO646-NO'.
  30.             '|no|csISO60DanishNorwegian|csISO60Norwegian1|NS_4551-2'.
  31.             '|ISO646-NO2|iso-ir-61|no2|csISO61Norwegian2|NF_Z_62-010'.
  32.             '|iso-ir-69|ISO646-FR|fr|csISO69French|videotex-suppl|iso-ir-70'.
  33.             '|csISO70VideotexSupp1|PT2|iso-ir-84|ISO646-PT2'.
  34.             '|csISO84Portuguese2|ES2|iso-ir-85|ISO646-ES2|csISO85Spanish2'.
  35.             '|MSZ_7795\.3|iso-ir-86|ISO646-HU|hu|csISO86Hungarian'.
  36.             '|JIS_C6226-1983|iso-ir-87|x0208|JIS_X0208-1983|csISO87JISX0208'.
  37.             '|greek7|iso-ir-88|csISO88Greek7|ASMO_449|ISO_9036|arabic7'.
  38.             '|iso-ir-89|csISO89ASMO449|iso-ir-90|csISO90|JIS_C6229-1984-a'.
  39.             '|iso-ir-91|jp-ocr-a|csISO91JISC62291984a|JIS_C6229-1984-b'.
  40.             '|iso-ir-92|ISO646-JP-OCR-B|jp-ocr-b|csISO92JISC62991984b'.
  41.             '|JIS_C6229-1984-b-add|iso-ir-93|jp-ocr-b-add'.
  42.             '|csISO93JIS62291984badd|JIS_C6229-1984-hand|iso-ir-94'.
  43.             '|jp-ocr-hand|csISO94JIS62291984hand|JIS_C6229-1984-hand-add'.
  44.             '|iso-ir-95|jp-ocr-hand-add|csISO95JIS62291984handadd'.
  45.             '|JIS_C6229-1984-kana|iso-ir-96|csISO96JISC62291984kana'.
  46.             '|ISO_2033-1983|iso-ir-98|e13b|csISO2033|ANSI_X3\.110-1983'.
  47.             '|iso-ir-99|CSA_T500-1983|NAPLPS|csISO99NAPLPS|ISO_8859-1:1987'.
  48.             '|iso-ir-100|ISO_8859-1|ISO-8859-1|latin1|l1|IBM819|CP819'.
  49.             '|csISOLatin1|ISO_8859-2:1987|iso-ir-101|ISO_8859-2|ISO-8859-2'.
  50.             '|latin2|l2|csISOLatin2|T\.61-7bit|iso-ir-102|csISO102T617bit'.
  51.             '|T\.61-8bit|T\.61|iso-ir-103|csISO103T618bit|ISO_8859-3:1988'.
  52.             '|iso-ir-109|ISO_8859-3|ISO-8859-3|latin3|l3|csISOLatin3'.
  53.             '|ISO_8859-4:1988|iso-ir-110|ISO_8859-4|ISO-8859-4|latin4|l4'.
  54.             '|csISOLatin4|ECMA-cyrillic|iso-ir-111|csISO111ECMACyrillic'.
  55.             '|CSA_Z243\.4-1985-1|iso-ir-121|ISO646-CA|csa7-1|ca'.
  56.             '|csISO121Canadian1|CSA_Z243\.4-1985-2|iso-ir-122|ISO646-CA2'.
  57.             '|csa7-2|csISO122Canadian2|CSA_Z243\.4-1985-gr|iso-ir-123'.
  58.             '|csISO123CSAZ24341985gr|ISO_8859-6:1987|iso-ir-127|ISO_8859-6'.
  59.             '|ISO-8859-6|ECMA-114|ASMO-708|arabic|csISOLatinArabic'.
  60.             '|ISO_8859-6-E|csISO88596E|ISO_8859-6-I|csISO88596I'.
  61.             '|ISO_8859-7:1987|iso-ir-126|ISO_8859-7|ISO-8859-7|ELOT_928'.
  62.             '|ECMA-118|greek|greek8|csISOLatinGreek|T\.101-G2|iso-ir-128'.
  63.             '|csISO128T101G2|ISO_8859-8:1988|iso-ir-138|ISO_8859-8|ISO-8859-8'.
  64.             '|hebrew|csISOLatinHebrew|ISO_8859-8-E|csISO88598E|ISO_8859-8-I'.
  65.             '|csISO88598I|CSN_369103|iso-ir-139|csISO139CSN369103'.
  66.             '|JUS_I\.B1\.002|iso-ir-141|ISO646-YU|js|yu|csISO141JUSIB1002'.
  67.             '|ISO_6937-2-add|iso-ir-142|csISOTextComm|IEC_P27-1|iso-ir-143'.
  68.             '|csISO143IECP271|ISO_8859-5:1988|iso-ir-144|ISO_8859-5'.
  69.             '|ISO-8859-5|cyrillic|csISOLatinCyrillic|JUS_I\.B1\.003-serb'.
  70.             '|iso-ir-146|serbian|csISO146Serbian|JUS_I\.B1\.003-mac'.
  71.             '|macedonian|iso-ir-147|csISO147Macedonian|ISO_8859-9:1989'.
  72.             '|iso-ir-148|ISO_8859-9|ISO-8859-9|latin5|l5|csISOLatin5'.
  73.             '|greek-ccitt|iso-ir-150|csISO150|csISO150GreekCCITT'.
  74.             '|NC_NC00-10:81|cuba|iso-ir-151|ISO646-CU|csISO151Cuba'.
  75.             '|ISO_6937-2-25|iso-ir-152|csISO6937Add|GOST_19768-74'.
  76.             '|ST_SEV_358-88|iso-ir-153|csISO153GOST1976874|ISO_8859-supp'.
  77.             '|iso-ir-154|latin1-2-5|csISO8859Supp|ISO_10367-box|iso-ir-155'.
  78.             '|csISO10367Box|latin6|iso-ir-157|l6|ISO_8859-10:1992|csISOLatin6'.
  79.             '|latin-lap|lap|iso-ir-158|csISO158Lap|JIS_X0212-1990|x0212'.
  80.             '|iso-ir-159|csISO159JISX02121990|DS_2089|DS2089|ISO646-DK|dk'.
  81.             '|csISO646Danish|us-dk|csUSDK|dk-us|csDKUS|JIS_X0201|X0201'.
  82.             '|csHalfWidthKatakana|KSC5636|ISO646-KR|csKSC5636|DEC-MCS|dec'.
  83.             '|csDECMCS|hp-roman8|roman8|r8|csHPRoman8|macintosh|mac'.
  84.             '|csMacintosh|IBM037|cp037|ebcdic-cp-us|ebcdic-cp-ca|ebcdic-cp-wt'.
  85.             '|ebcdic-cp-nl|csIBM037|IBM038|EBCDIC-INT|cp038|csIBM038|IBM273'.
  86.             '|CP273|csIBM273|IBM274|EBCDIC-BE|CP274|csIBM274|IBM275|EBCDIC-BR'.
  87.             '|cp275|csIBM275|IBM277|EBCDIC-CP-DK|EBCDIC-CP-NO|csIBM277|IBM278'.
  88.             '|CP278|ebcdic-cp-fi|ebcdic-cp-se|csIBM278|IBM280|CP280'.
  89.             '|ebcdic-cp-it|csIBM280|IBM281|EBCDIC-JP-E|cp281|csIBM281|IBM284'.
  90.             '|CP284|ebcdic-cp-es|csIBM284|IBM285|CP285|ebcdic-cp-gb|csIBM285'.
  91.             '|IBM290|cp290|EBCDIC-JP-kana|csIBM290|IBM297|cp297|ebcdic-cp-fr'.
  92.             '|csIBM297|IBM420|cp420|ebcdic-cp-ar1|csIBM420|IBM423|cp423'.
  93.             '|ebcdic-cp-gr|csIBM423|IBM424|cp424|ebcdic-cp-he|csIBM424|IBM437'.
  94.             '|cp437|437|csPC8CodePage437|IBM500|CP500|ebcdic-cp-be'.
  95.             '|ebcdic-cp-ch|csIBM500|IBM775|cp775|csPC775Baltic|IBM850|cp850'.
  96.             '|850|csPC850Multilingual|IBM851|cp851|851|csIBM851|IBM852|cp852'.
  97.             '|852|csPCp852|IBM855|cp855|855|csIBM855|IBM857|cp857|857'.
  98.             '|csIBM857|IBM860|cp860|860|csIBM860|IBM861|cp861|861|cp-is'.
  99.             '|csIBM861|IBM862|cp862|862|csPC862LatinHebrew|IBM863|cp863|863'.
  100.             '|csIBM863|IBM864|cp864|csIBM864|IBM865|cp865|865|csIBM865|IBM866'.
  101.             '|cp866|866|csIBM866|IBM868|CP868|cp-ar|csIBM868|IBM869|cp869|869'.
  102.             '|cp-gr|csIBM869|IBM870|CP870|ebcdic-cp-roece|ebcdic-cp-yu'.
  103.             '|csIBM870|IBM871|CP871|ebcdic-cp-is|csIBM871|IBM880|cp880'.
  104.             '|EBCDIC-Cyrillic|csIBM880|IBM891|cp891|csIBM891|IBM903|cp903'.
  105.             '|csIBM903|IBM904|cp904|904|csIBBM904|IBM905|CP905|ebcdic-cp-tr'.
  106.             '|csIBM905|IBM918|CP918|ebcdic-cp-ar2|csIBM918|IBM1026|CP1026'.
  107.             '|csIBM1026|EBCDIC-AT-DE|csIBMEBCDICATDE|EBCDIC-AT-DE-A'.
  108.             '|csEBCDICATDEA|EBCDIC-CA-FR|csEBCDICCAFR|EBCDIC-DK-NO'.
  109.             '|csEBCDICDKNO|EBCDIC-DK-NO-A|csEBCDICDKNOA|EBCDIC-FI-SE'.
  110.             '|csEBCDICFISE|EBCDIC-FI-SE-A|csEBCDICFISEA|EBCDIC-FR|csEBCDICFR'.
  111.             '|EBCDIC-IT|csEBCDICIT|EBCDIC-PT|EBCDIC-ES|csEBCDICES|EBCDIC-ES-A'.
  112.             '|csEBCDICESA|EBCDIC-ES-S|csEBCDICESS|EBCDIC-UK|csEBCDICUK'.
  113.             '|EBCDIC-US|csEBCDICUS|UNKNOWN-8BIT|csUnknown8BiT|MNEMONIC'.
  114.             '|csMnemonic|MNEM|csMnem|VISCII|csVISCII|VIQR|csVIQR|KOI8-R'.
  115.             '|csKOI8R|UNICODE-1-1|csUnicode11|UNICODE-1-1-UTF-7'.
  116.             '|csUnicode11UTF7|UTF-8|JIS_Encoding|csJISEncoding|Shift_JIS'.
  117.             '|MS_Kanji|csShiftJIS'.
  118.             '|Extended_UNIX_Code_Packed_Format_for_Japanese'.
  119.             '|csEUCPkdFmtJapanese|EUC-JP'.
  120.             '|Extended_UNIX_Code_Fixed_Width_for_Japanese|csEUCFixWidJapanese'.
  121.             '|ISO-10646-UCS-Basic|csUnicodeASCII|ISO-10646-Unicode-Latin1'.
  122.             '|csUnicodeLatin1|ISO-10646|ISO-10646-J-1|csUnicodeIBM2039'.
  123.             '|ISO-Unicode-IBM-1261|csUnicodeIBM1261|ISO-Unicode-IBM-1268'.
  124.             '|csUnidoceIBM1268|ISO-Unicode-IBM-1276|csUnicodeIBM1276'.
  125.             '|ISO-Unicode-IBM-1264|csUnicodeIBM1264|ISO-Unicode-IBM-1265'.
  126.             '|csUnicodeIBM1265|ISO-8859-1-Windows-3\.0-Latin-1'.
  127.             '|csWindows30Latin1|ISO-8859-1-Windows-3\.1-Latin-1'.
  128.             '|csWindows31Latin1|ISO-8859-2-Windows-Latin-2|csWindows31Latin2'.
  129.             '|ISO-8859-9-Windows-Latin-5|csWindows31Latin5'.
  130.             '|Adobe-Standard-Encoding|csAdobeStandardEncoding|Ventura-US'.
  131.             '|csVenturaUS|Ventura-International|csVenturaInternational'.
  132.             '|PC8-Danish-Norwegian|csPC8DanishNorwegian|PC8-Turkish'.
  133.             '|csPC8Turkish|IBM-Symbols|csIBMSymbols|IBM-Thai|csIBMThai'.
  134.             '|HP-Legal|csHPLegal|HP-Pi-font|csHPPiFont|HP-Math8|csHPMath8'.
  135.             '|Adobe-Symbol-Encoding|csHPPSMath|HP-DeskTop|csHPDesktop'.
  136.             '|Ventura-Math|csVenturaMath|Microsoft-Publishing'.
  137.             '|csMicrosoftPublishing|Windows-31J|csWindows31J|GB2312|csGB2312'.
  138.             '|HZ-GB-2312|Big5|csBig5|windows-1250|windows-1251|windows-1253'.
  139.             '|windows-1254|windows-1255|windows-1256|windows-1257'.
  140.             '|windows-1258';
  141. $usascii = 'ANSI_X3\.4-1968|iso-ir-6|ANSI_X3\.4-1986|ISO_646\.irv:1991|ASCII'.
  142.            '|ISO646-US|US-ASCII|us|IBM367|cp367|csASCII|ISO-10646-UTF-1'.
  143.            '|csISO10646UTF1';
  144. %iso8859sets = (
  145.   iso8859_1 =>  'ISO_8859-1:1987|iso-ir-100|ISO_8859-1|ISO-8859-1|latin1|l1'.
  146.                 '|IBM819|CP819|csISOLatin1|csUnicodeIBM2039'.
  147.                 '|ISO-8859-1-Windows-3\.0-Latin-1|csWindows30Latin1'.
  148.                 '|ISO-8859-1-Windows-3\.1-Latin-1|csWindows31Latin1',
  149.   iso8859_2 =>  'ISO_8859-2:1987|iso-ir-101|ISO_8859-2|ISO-8859-2|latin2|l2'.
  150.                 '|csISOLatin2|ISO-8859-2-Windows-Latin-2|csWindows31Latin2',
  151.   iso8859_3 =>  'ISO_8859-3:1988|iso-ir-109|ISO_8859-3|ISO-8859-3|latin3|l3'.
  152.                 '|csISOLatin3',
  153.   iso8859_4 =>  'ISO_8859-4:1988|iso-ir-110|ISO_8859-4|ISO-8859-4|latin4|l4'.
  154.                 '|csISOLatin4',
  155.   iso8859_5 =>  'ISO_8859-5:1988|iso-ir-144|ISO_8859-5|ISO-8859-5|cyrillic'.
  156.                 '|csISOLatinCyrillic',
  157.   iso8859_6 =>  'ISO_8859-6:1987|iso-ir-127|ISO_8859-6|ISO-8859-6|ECMA-114'.
  158.                 '|ASMO-708|arabic|csISOLatinArabic|ISO_8859-6-E|csISO88596E'.
  159.                 '|ISO_8859-6-I|csISO88596I',
  160.   iso8859_7 =>  'ISO_8859-7:1987|iso-ir-126|ISO_8859-7|ISO-8859-7|ELOT_928'.
  161.                 '|ECMA-118|greek|greek8|csISOLatinGreek',
  162.   iso8859_8 =>  'ISO_8859-8:1988|iso-ir-138|ISO_8859-8|ISO-8859-8|hebrew'.
  163.                 '|csISOLatinHebrew|ISO_8859-8-E|csISO88598E|ISO_8859-8-I'.
  164.                 '|csISO88598I',
  165.   iso8859_9 =>  'ISO_8859-9:1989|iso-ir-148|ISO_8859-9|ISO-8859-9|latin5|l5'.
  166.                 '|csISOLatin5|ISO-8859-9-Windows-Latin-5|csWindows31Latin5',
  167.   iso8859_10 => 'latin6|iso-ir-157|l6|ISO_8859-10:1992|csISOLatin6',
  168. );
  169. %japanesesets = (
  170.   euc =>  'Extended_UNIX_Code_Packed_Format_for_Japanese|csEUCPkdFmtJapanese'.
  171.           '|EUC-JP|Extended_UNIX_Code_Fixed_Width_for_Japanese'.
  172.           '|csEUCFixWidJapanese',
  173.   jis =>  'ISO-2022-JP|csISO2022JP|ISO-2022-JP-2|csISO2022JP2|JIS_C6226-1978'.
  174.           '|iso-ir-42|csISO42JISC62261978|JIS_C6226-1983|iso-ir-87|x0208'.
  175.           '|JIS_X0208-1983|csISO87JISX0208|JIS_X0212-1990|x0212|iso-ir-159'.
  176.           '|csISO159JISX02121990|JIS_Encoding|csJISEncoding',
  177.   sjis => 'Shift_JIS|MS_Kanji|csShiftJIS|Windows-31J|csWindows31J',
  178.   utf8 => 'UTF-8',
  179. );
  180.  
  181. %escapeseq = (
  182. # ISO-2022-JP(-2)                Character Set                     ISOREG
  183.   jis =>  '\e\(B'.             # ASCII                                6
  184.          '|\e\(I'.             # JIS X 0201-1976 Katakana            13
  185.          '|\e\(J'.             # JIS X 0201-1976 Roman               14
  186.          '|\e\$\@'.            # JIS X 0208-1978                     42
  187.          '|\e\$B'.             # JIS X 0208-1983                     87
  188.          '|\e\$A'.             # GB 2312-1980                        58
  189.          '|\e\$\(C'.           # KSC 5601-1987                      149
  190.          '|\e\$\(D'.           # JIS X 0212-1990                    159
  191.          '|\e\.A'.             # ISO 8859-1                         100
  192.          '|\e\.F'.             # ISO 8859-7 Greek                   126
  193.          '|\eN[\x20-\x7F]',    # Single Shift Char
  194. # ISO-8859-1..10
  195.   iso8859_1  => '\e\.A',       # ISO 8859-1 Latin alphabet No.1     100
  196.   iso8859_2  => '\e\.B',       # ISO 8859-2 Latin alphabet No.2     101
  197.   iso8859_3  => '\e\.C',       # ISO 8859-3 Latin alphabet No.3     109
  198.   iso8859_4  => '\e\.D',       # ISO 8859-4 Latin alphabet No.4     110
  199.   iso8859_5  => '\e\.F',       # ISO 8859-7 Latin/Greek alphabet    126
  200.   iso8859_6  => '\e\.G',       # ISO 8859-6 Latin/Arabic alphabet   127
  201.   iso8859_7  => '\e\.H',       # ISO 8859-8 Latin/Hebrew alphabet   138
  202.   iso8859_8  => '\e\.L',       # ISO 8859-5 Latin/Cyrillic alphabet 144
  203.   iso8859_9  => '\e\.M',       # ISO 8859-9 Latin alphabet No.5     148
  204.   iso8859_10 => '\e\.V',       # ISO 8859-10 Latin alphabet No.6    157
  205. );
  206.  
  207. 1;
  208.