home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
DOS/V Power Report 2000 April
/
VPR0004A.BIN
/
OLS
/
HTMLLINT
/
htmllint.lzh
/
RFC2396.pm
< prev
next >
Wrap
Text File
|
1999-12-15
|
14KB
|
288 lines
# RFC2396.pm - Syntax for URI (RFC2396/RFC2368/RFC1738/RFC822)
#
# 0.0 1998/10/03
# 0.1 1999/01/19
# 0.2 1999/03/04 RFC2368
# 0.3 1999/07/24 fixed(?) bug in mailto:
# 0.40 1999/08/28 tune up regex / use Exporter / expire parse_URI
# 0.41 1999/12/05 fixed(?) bug in URL_mailto
# 0.42 1999/12/12 use Email:Valid
#
# by ISHINO Keiichiro <k16@softvision.co.jp>
package RFC2396;
require 5.002;
BEGIN {
use Exporter;
@ISA = qw(Exporter);
@EXPORT = qw($URI_reference &URI_reference
$URI_parsing
$URL_ftp &URL_ftp
$URL_file &URL_file
$URL_http &URL_http
$URL_gopher &URL_gopher
$URL_mailto &URL_mailto
$URL_news &URL_news
$URL_nntp &URL_nntp
$URL_telnet &URL_telnet
$URL_wais &URL_wais
$URL_prospero &URL_prospero
);
}
$VERSION = '0.42';
#$lowalpha = '[a-z]';
#$upalpha = '[A-Z]';
$_digit = '0-9'; # char class
$_alpha = 'A-Za-z'; # char class
$_alphanum = $_digit.$_alpha; # char class
$digit = '['.$_digit.']';
$alpha = '['.$_alpha.']';
$alphanum = '['.$_alphanum.']';
$hex = '[0-9A-Fa-f]';
$hex2 = $hex.$hex;
$escaped = '\%'.$hex2;
$control = '[^\x20-\x7E]';
$_space = '\x20'; # char class
$_delim = '<>#%"'; # char class
$_unwise = '{}|\x5C^\[\]`'; # char class
$spdelimunwise = '['.$_space.$_delim.$_unwise.']';
$delimunwise = '['.$_delim.$_unwise.']';
$_mark = '\-_.!~*\'()'; # char class
$_unreserved = $_alphanum.$_mark; # char class
$_reserved = ';/?:@&=+$,'; # char class
$uric = '(?:['.$_unreserved.$_reserved.']|'.$escaped.')';
$uric_no_slash = '(?:['.$_unreserved.';?:@&=+$,]|'.$escaped.')';
$fragment = $uric.'*';
$query = $uric.'*';
$pchar = '(?:['.$_unreserved.':@&=+$,]|'.$escaped.')';
$param = $pchar.'+';
$segment = '(?:'.$pchar.'+(?:;'.$param.')*|(?:;'.$param.')+)';
$path_segments = $segment.'(?:/(?:'.$segment.')?)*';
$rel_segment = '(?:['.$_unreserved.';@&=+$,]|'.$escaped.')+';
$IPv4part = '(?:[01]?\d\d?|2[0-4]\d|25[0-5])';
$IPv4address = $IPv4part.'\.'.$IPv4part.'\.'.$IPv4part.'\.'.$IPv4part;
$toplabel = $alpha.'(?:['.$_alphanum.'\-]*'.$alphanum.')?';
$domainlabel = $alphanum.'(?:['.$_alphanum.'\-]*'.$alphanum.')?';
$hostname = '(?:'.$domainlabel.'\.)*(?:'.$toplabel.')\.?';
$host = $hostname.'|'.$IPv4address;
$port = $digit.'*';
$hostport = '(?:'.$host.')(?::'.$port.')?';
$reg_name = '(?:['.$_unreserved.';:@&=+$,]|'.$escaped.')+';
$userinfo = '(?:['.$_unreserved.';:&=+$,]|'.$escaped.')+';
$server = '(?:'.$userinfo.'\@)?'.$hostport;
$authority = '(?:'.$server.'|'.$reg_name.')';
$scheme = $alpha.'['.$_alphanum.'+\-.]*';
$abs_path = '/(?:'.$path_segments.')?';
$net_path = '//(?:'.$authority.'(?:'.$abs_path.')?|'.$abs_path.')';
$rel_path = $rel_segment.'(?:'.$abs_path.')?';
$hier_part = '(?:'.$net_path.'|'.$abs_path.')(?:\?'.$query.')?';
$opaque_part = $uric_no_slash.$uric.'*';
$absoluteURI = '(?:'.$scheme.':(?:'.$hier_part.'|'.$opaque_part.'))';
$relativeURI = '(?:'.$net_path.'|'.$abs_path.'|'.$rel_path.')(?:\?'.$query.')?';
$URI_reference = '(?:(?:'.$absoluteURI.'|'.$relativeURI.')(?:\#'.$fragment.')?|'.
'\#'.$fragment.')';
sub URI_reference { $_[0] =~ /^$URI_reference$/o; }
# RFC2396 Appendix B
$URI_parsing = '(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?';
##############
# FTP (see also RFC959)
$ftptype = '[AIDaid]';
$fsegment = '(?:['.$_unreserved.'?:@&=+$,]|'.$escaped.')*';
$fpath = $fsegment.'(?:/'.$fsegment.')*';
$password = $userinfo;
$login = '(?:'.$userinfo.'(?::'.$password.')?\@)?'.$hostport;
$URL_ftp = 'ftp://'.$login.'(?:/'.$fpath.'(?:;type='.$ftptype.')?)?';
sub URL_ftp { $_[0] =~ /^$URL_ftp$/o; }
# FILE (see also RFC1738)
$URL_file = 'file://(?:'. $host.'|localhost)?/'.$fpath;
sub URL_file { $_[0] =~ /^$URL_file$/o; }
# HTTP (see also RFC1738)
$hsegment = '(?:['.$_unreserved.';:@&=+$,]|'.$escaped.')*';
$hpath = $hsegment.'(?:/'.$hsegment.')*';
$URL_http = 'https?://'.$hostport.'(?:/'.$hpath.'(?:\?'.$query.')?)?';
sub URL_http { $_[0] =~ /^$URL_http$/o; }
# GOPHER (see also RFC1436)
$gopher_string = $uric.'*';
$selector = $uric.'*';
$gtype = $uric;
$URL_gopher = 'gopher://'.$hostport.'(?:/(?:'.$gtype.'(?:'.$selector.
'(?:\t'.$query.'(?:\t'.$gopher_string.')?)?)?)?)?';
sub URL_gopher { $_[0] =~ /^$URL_gopher$/o; }
# MAILTO (see also RFC822/RFC2368)
$_urlc = $_unreserved.';/:@+$,'; # char class
$urlc = '(?:['.$_urlc.']|'.$escaped.')';
$hname = $urlc.'*';
$hvalue = $urlc.'*';
$header = $hname.'='.$hvalue;
$headers = '(\?'.$header.'(?:&'.$header.')*)?';
$to = '('.$urlc.'*)';
$URL_mailto = 'mailto:'.$to.$headers;
# Regular expression built using Jeffrey Friedl's example in
# _Mastering Regular Expressions_ (http://www.ora.com/catalog/regexp/).
$RFC822PAT = <<'EOF';
[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\
xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xf
f\n\015()]*)*\)[\040\t]*)*(?:(?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\x
ff]+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff])|"[^\\\x80-\xff\n\015
"]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015"]*)*")[\040\t]*(?:\([^\\\x80-\
xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80
-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*
)*(?:\.[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\
\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\
x80-\xff\n\015()]*)*\)[\040\t]*)*(?:[^(\040)<>@,;:".\\\[\]\000-\037\x8
0-\xff]+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff])|"[^\\\x80-\xff\n
\015"]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015"]*)*")[\040\t]*(?:\([^\\\x
80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^
\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040
\t]*)*)*@[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([
^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\
\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:[^(\040)<>@,;:".\\\[\]\000-\037\
x80-\xff]+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff])|\[(?:[^\\\x80-
\xff\n\015\[\]]|\\[^\x80-\xff])*\])[\040\t]*(?:\([^\\\x80-\xff\n\015()
]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\
x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:\.[\04
0\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\
n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\
015()]*)*\)[\040\t]*)*(?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?!
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff])|\[(?:[^\\\x80-\xff\n\015\[\
]]|\\[^\x80-\xff])*\])[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\
x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\01
5()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*)*|(?:[^(\040)<>@,;:".
\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]
)|"[^\\\x80-\xff\n\015"]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015"]*)*")[^
()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037]*(?:(?:\([^\\\x80-\xff\n\0
15()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][
^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)|"[^\\\x80-\xff\
n\015"]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015"]*)*")[^()<>@,;:".\\\[\]\
x80-\xff\000-\010\012-\037]*)*<[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?
:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-
\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:@[\040\t]*
(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015
()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()
]*)*\)[\040\t]*)*(?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\0
40)<>@,;:".\\\[\]\000-\037\x80-\xff])|\[(?:[^\\\x80-\xff\n\015\[\]]|\\
[^\x80-\xff])*\])[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\
xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*
)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:\.[\040\t]*(?:\([^\\\x80
-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x
80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t
]*)*(?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\
\[\]\000-\037\x80-\xff])|\[(?:[^\\\x80-\xff\n\015\[\]]|\\[^\x80-\xff])
*\])[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x
80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80
-\xff\n\015()]*)*\)[\040\t]*)*)*(?:,[\040\t]*(?:\([^\\\x80-\xff\n\015(
)]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\
\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*@[\040\t
]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\0
15()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015
()]*)*\)[\040\t]*)*(?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(
\040)<>@,;:".\\\[\]\000-\037\x80-\xff])|\[(?:[^\\\x80-\xff\n\015\[\]]|
\\[^\x80-\xff])*\])[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80
-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()
]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:\.[\040\t]*(?:\([^\\\x
80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^
\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040
\t]*)*(?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".
\\\[\]\000-\037\x80-\xff])|\[(?:[^\\\x80-\xff\n\015\[\]]|\\[^\x80-\xff
])*\])[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\
\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x
80-\xff\n\015()]*)*\)[\040\t]*)*)*)*:[\040\t]*(?:\([^\\\x80-\xff\n\015
()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\
\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*)?(?:[^
(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\\[\]\000-
\037\x80-\xff])|"[^\\\x80-\xff\n\015"]*(?:\\[^\x80-\xff][^\\\x80-\xff\
n\015"]*)*")[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|
\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))
[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:\.[\040\t]*(?:\([^\\\x80-\xff
\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\x
ff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(
?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\\[\]\
000-\037\x80-\xff])|"[^\\\x80-\xff\n\015"]*(?:\\[^\x80-\xff][^\\\x80-\
xff\n\015"]*)*")[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\x
ff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)
*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*)*@[\040\t]*(?:\([^\\\x80-\x
ff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-
\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)
*(?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\\[\
]\000-\037\x80-\xff])|\[(?:[^\\\x80-\xff\n\015\[\]]|\\[^\x80-\xff])*\]
)[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-
\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\x
ff\n\015()]*)*\)[\040\t]*)*(?:\.[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(
?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80
-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:[^(\040)<
>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\\[\]\000-\037\x8
0-\xff])|\[(?:[^\\\x80-\xff\n\015\[\]]|\\[^\x80-\xff])*\])[\040\t]*(?:
\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]
*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)
*\)[\040\t]*)*)*>)
EOF
$RFC822PAT =~ s/\n//g;
sub URL_mailto {
unless ($_[0] =~ /^$URL_mailto$/o) { return 0; }
my ($to, $headers) = ($1, $2);
if ($to eq '') { return $headers ne ''; }
$to =~ s/\%($hex2)/chr(hex($1))/oge;
$to =~ /^$RFC822PAT$/o;
}
# NEWS (see also RFC1036)
$article = '(?:['.$_unreserved.';/?:&=+$,]|'.$escaped.')+\@'.$domain;
$newsgroup = '(?:'.$alpha.'['.$_alphanum.'\-.+_]*)';
$grouppart = '(?:\*|'.$newsgroup.'|'.$article.')';
$URL_news = 'news:'.$grouppart;
sub URL_news { $_[0] =~ /^$URL_news$/o; }
# NNTP (see also RFC977)
$URL_nntp = 'nntp://'.$hostport.'/'.$newsgroup.'(?:/'.$digit.'+)?';
sub URL_nntp { $_[0] =~ /^$URL_nntp$/o; }
# TELNET (see also RFC1738)
$URL_telnet = 'telnet://'.$login.'/?';
sub URL_telnet { $_[0] =~ /^$URL_telnet$/o; }
# WAIS (see also RFC1625)
$wpath = '(?:['.$_unreserved.'+$,]|'.$escaped.')*';
$wtype = '(?:['.$_unreserved.'+$,]|'.$escaped.')*';
$database = '(?:['.$_unreserved.'+$,]|'.$escaped.')*';
$waisdoc = $database.'/'.$wtype.'/'.$wpath;
$waisindex = $database.'\?'.$query;
$waisdatabase = $database;
$URL_wais = 'wais://'.$hostport.'/(?:'.$waisdatabase.'|'.$waisindex.'|'.$waisdoc.')';
sub URL_wais { $_[0] =~ /^$URL_wais$/o; }
# PROSPERO (see also RFC1738)
$fieldvalue = '(?:['.$_unreserved.'?:@&+$,]|'.$escaped.')*';
$fieldname = '(?:['.$_unreserved.'?:@&+$,]|'.$escaped.')*';
$fieldspec = ';'.$fieldname.'='.$fieldvalue;
$psegment = '(?:['.$_unreserved.'?:@&=+$,]|'.$escaped.')*';
$ppath = $psegment.'(?:/'.$psegment.')*';
$URL_prospero = 'prospero://'.$hostport.'/'.$ppath.'(?:'.$fieldspec.')*';
sub URL_prospero { $_[0] =~ /^$URL_prospero$/o; }
1;