home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Geek Gadgets 1
/
ADE-1.bin
/
ade-dist
/
gnat-2.06-src.tgz
/
tar.out
/
fsf
/
gnat
/
ada
/
scn.adb
< prev
next >
Wrap
Text File
|
1996-09-28
|
48KB
|
1,329 lines
------------------------------------------------------------------------------
-- --
-- GNAT COMPILER COMPONENTS --
-- --
-- S C N --
-- --
-- B o d y --
-- --
-- $Revision: 1.78 $ --
-- --
-- Copyright (c) 1992,1993,1994,1995 NYU, All Rights Reserved --
-- --
-- GNAT is free software; you can redistribute it and/or modify it under --
-- terms of the GNU General Public License as published by the Free Soft- --
-- ware Foundation; either version 2, or (at your option) any later ver- --
-- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
-- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
-- for more details. You should have received a copy of the GNU General --
-- Public License distributed with GNAT; see file COPYING. If not, write --
-- to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. --
-- --
------------------------------------------------------------------------------
with Atree; use Atree;
with Csets; use Csets;
with Debug; use Debug;
with Errout; use Errout;
with Features; use Features;
with Lib; use Lib;
with Namet; use Namet;
with Opt; use Opt;
with Output; use Output;
with Scans; use Scans;
with Sinput; use Sinput;
with Sinfo; use Sinfo;
with Snames; use Snames;
with Style;
with Widechar; use Widechar;
with System.Parameters;
with System.WCh_Con; use System.WCh_Con;
package body Scn is
use Ascii;
-- Make control characters visible
Used_As_Identifier : array (Token_Type) of Boolean;
-- Flags set True if a given keyword is used as an identifier (used to
-- make sure that we only post an error message for incorrect use of a
-- keyword as an identifier once for a given keyword).
Max_Allowed_Line_Length : Nat := System.Parameters.Max_Line_Length;
-- Maximum allowed line length (maybe reset by Style.Set_Max_Line_Length)
-----------------------
-- Local Subprograms --
-----------------------
function Double_Char_Token (C : Character) return Boolean;
-- This function is used for double character tokens like := or <>. It
-- checks if the character following Source (Scan_Ptr) is C, and if so
-- bumps Scan_Ptr past the pair of characters and returns True. A space
-- between the two characters is also recognized with an appropriate
-- error message being issued. If C is not present, False is returned.
-- Note that Double_Char_Token can only be used for tokens defined in
-- the Ada syntax.
procedure Error_Illegal_Character;
-- Give illegal character error, Scan_Ptr points to character. On return,
-- Scan_Ptr is bumped past the illegal character.
procedure Error_Illegal_Wide_Character;
-- Give illegal character in wide character escape sequence message. On
-- return, Scan_Ptr is bumped past the illegal character, which may still
-- leave us pointing to junk, not much we can do if the escape sequence
-- is messed up!
procedure Error_Long_Line;
-- Signal error of excessively long line
procedure Error_No_Double_Underline;
-- Signal error of double underline character
procedure Nlit;
-- This is the procedure for scanning out numeric literals
function Set_Start_Column return Column_Number;
-- This routine is called with Scan_Ptr pointing to the first character
-- of a line. On exit, Scan_Ptr is advanced to the first non-blank
-- character of this line (or to the terminating format effector if the
-- line contains no non-blank characters), and the returned result is the
-- column number of this non-blank character (zero origin), which is the
-- value to be stored in the Start_Column scan variable.
procedure Slit;
-- This is the procedure for scanning out string literals
--------------
-- Subunits --
--------------
-- For some reason, these must come early in the file or we run into an
-- infinite loop in GNAT, to be looked at some time ???
procedure Nlit is separate;
procedure Slit is separate;
----------------------------
-- Determine_Token_Casing --
----------------------------
function Determine_Token_Casing return Casing_Type is
All_Lower : Boolean := True;
-- Set False if upper case letter found
All_Upper : Boolean := True;
-- Set False if lower case letter found
Mixed : Boolean := True;
-- Set False if exception to mixed case rule found (lower case letter
-- at start or after underline, or upper case letter elsewhere).
Decisive : Boolean := False;
-- Set True if at least one instance of letter not after underline
After_Und : Boolean := True;
-- True at start of string, and after an underline character
begin
for S in Token_Ptr .. Scan_Ptr - 1 loop
if Source (S) = '_' or else Source (S) = '.' then
After_Und := True;
elsif Is_Lower_Case_Letter (Source (S)) then
All_Upper := False;
if not After_Und then
Decisive := True;
else
After_Und := False;
Mixed := False;
end if;
elsif Is_Upper_Case_Letter (Source (S)) then
All_Lower := False;
if not After_Und then
Decisive := True;
Mixed := False;
else
After_Und := False;
end if;
end if;
end loop;
-- Now we can figure out the result from the flags we set in that loop
if All_Lower then
return All_Lower_Case;
elsif not Decisive then
return Unknown;
elsif All_Upper then
return All_Upper_Case;
elsif Mixed then
return Mixed_Case;
else
return Unknown;
end if;
end Determine_Token_Casing;
-----------------------
-- Double_Char_Token --
-----------------------
function Double_Char_Token (C : Character) return Boolean is
begin
if Source (Scan_Ptr + 1) = C then
Scan_Ptr := Scan_Ptr + 2;
return True;
elsif Source (Scan_Ptr + 1) = ' '
and then Source (Scan_Ptr + 2) = C
then
Scan_Ptr := Scan_Ptr + 1;
Error_Msg_S ("no space allowed here");
Scan_Ptr := Scan_Ptr + 2;
return True;
else
return False;
end if;
end Double_Char_Token;
-----------------------------
-- Error_Illegal_Character --
-----------------------------
procedure Error_Illegal_Character is
begin
Error_Msg_S ("illegal character");
Scan_Ptr := Scan_Ptr + 1;
end Error_Illegal_Character;
----------------------------------
-- Error_Illegal_Wide_Character --
----------------------------------
procedure Error_Illegal_Wide_Character is
begin
Error_Msg_S ("illegal character in wide character escape sequence");
Scan_Ptr := Scan_Ptr + 1;
end Error_Illegal_Wide_Character;
---------------------
-- Error_Long_Line --
---------------------
procedure Error_Long_Line is
begin
Error_Msg ("this line is too long", Current_Line_Start);
end Error_Long_Line;
-------------------------------
-- Error_No_Double_Underline --
-------------------------------
procedure Error_No_Double_Underline is
begin
Error_Msg_S ("two consecutive underlines not permitted");
end Error_No_Double_Underline;
------------------------
-- Initialize_Scanner --
------------------------
procedure Initialize_Scanner (Unit : Unit_Number_Type) is
begin
-- Set up Token_Type values in Names Table entries for reserved keywords
-- We use the Pos value of the Token_Type value. Note we are relying on
-- the fact that Token_Type'Val (0) is not a reserved word!
Set_Name_Table_Byte (Name_Abort, Token_Type'Pos (Tok_Abort));
Set_Name_Table_Byte (Name_Abs, Token_Type'Pos (Tok_Abs));
Set_Name_Table_Byte (Name_Abstract, Token_Type'Pos (Tok_Abstract));
Set_Name_Table_Byte (Name_Accept, Token_Type'Pos (Tok_Accept));
Set_Name_Table_Byte (Name_Access, Token_Type'Pos (Tok_Access));
Set_Name_Table_Byte (Name_And, Token_Type'Pos (Tok_And));
Set_Name_Table_Byte (Name_Aliased, Token_Type'Pos (Tok_Aliased));
Set_Name_Table_Byte (Name_All, Token_Type'Pos (Tok_All));
Set_Name_Table_Byte (Name_Array, Token_Type'Pos (Tok_Array));
Set_Name_Table_Byte (Name_At, Token_Type'Pos (Tok_At));
Set_Name_Table_Byte (Name_Begin, Token_Type'Pos (Tok_Begin));
Set_Name_Table_Byte (Name_Body, Token_Type'Pos (Tok_Body));
Set_Name_Table_Byte (Name_Case, Token_Type'Pos (Tok_Case));
Set_Name_Table_Byte (Name_Constant, Token_Type'Pos (Tok_Constant));
Set_Name_Table_Byte (Name_Declare, Token_Type'Pos (Tok_Declare));
Set_Name_Table_Byte (Name_Delay, Token_Type'Pos (Tok_Delay));
Set_Name_Table_Byte (Name_Delta, Token_Type'Pos (Tok_Delta));
Set_Name_Table_Byte (Name_Digits, Token_Type'Pos (Tok_Digits));
Set_Name_Table_Byte (Name_Do, Token_Type'Pos (Tok_Do));
Set_Name_Table_Byte (Name_Else, Token_Type'Pos (Tok_Else));
Set_Name_Table_Byte (Name_Elsif, Token_Type'Pos (Tok_Elsif));
Set_Name_Table_Byte (Name_End, Token_Type'Pos (Tok_End));
Set_Name_Table_Byte (Name_Entry, Token_Type'Pos (Tok_Entry));
Set_Name_Table_Byte (Name_Exception, Token_Type'Pos (Tok_Exception));
Set_Name_Table_Byte (Name_Exit, Token_Type'Pos (Tok_Exit));
Set_Name_Table_Byte (Name_For, Token_Type'Pos (Tok_For));
Set_Name_Table_Byte (Name_Function, Token_Type'Pos (Tok_Function));
Set_Name_Table_Byte (Name_Generic, Token_Type'Pos (Tok_Generic));
Set_Name_Table_Byte (Name_Goto, Token_Type'Pos (Tok_Goto));
Set_Name_Table_Byte (Name_If, Token_Type'Pos (Tok_If));
Set_Name_Table_Byte (Name_In, Token_Type'Pos (Tok_In));
Set_Name_Table_Byte (Name_Is, Token_Type'Pos (Tok_Is));
Set_Name_Table_Byte (Name_Limited, Token_Type'Pos (Tok_Limited));
Set_Name_Table_Byte (Name_Loop, Token_Type'Pos (Tok_Loop));
Set_Name_Table_Byte (Name_Mod, Token_Type'Pos (Tok_Mod));
Set_Name_Table_Byte (Name_New, Token_Type'Pos (Tok_New));
Set_Name_Table_Byte (Name_Not, Token_Type'Pos (Tok_Not));
Set_Name_Table_Byte (Name_Null, Token_Type'Pos (Tok_Null));
Set_Name_Table_Byte (Name_Of, Token_Type'Pos (Tok_Of));
Set_Name_Table_Byte (Name_Or, Token_Type'Pos (Tok_Or));
Set_Name_Table_Byte (Name_Others, Token_Type'Pos (Tok_Others));
Set_Name_Table_Byte (Name_Out, Token_Type'Pos (Tok_Out));
Set_Name_Table_Byte (Name_Package, Token_Type'Pos (Tok_Package));
Set_Name_Table_Byte (Name_Pragma, Token_Type'Pos (Tok_Pragma));
Set_Name_Table_Byte (Name_Private, Token_Type'Pos (Tok_Private));
Set_Name_Table_Byte (Name_Procedure, Token_Type'Pos (Tok_Procedure));
Set_Name_Table_Byte (Name_Protected, Token_Type'Pos (Tok_Protected));
Set_Name_Table_Byte (Name_Raise, Token_Type'Pos (Tok_Raise));
Set_Name_Table_Byte (Name_Range, Token_Type'Pos (Tok_Range));
Set_Name_Table_Byte (Name_Record, Token_Type'Pos (Tok_Record));
Set_Name_Table_Byte (Name_Rem, Token_Type'Pos (Tok_Rem));
Set_Name_Table_Byte (Name_Renames, Token_Type'Pos (Tok_Renames));
Set_Name_Table_Byte (Name_Requeue, Token_Type'Pos (Tok_Requeue));
Set_Name_Table_Byte (Name_Return, Token_Type'Pos (Tok_Return));
Set_Name_Table_Byte (Name_Reverse, Token_Type'Pos (Tok_Reverse));
Set_Name_Table_Byte (Name_Select, Token_Type'Pos (Tok_Select));
Set_Name_Table_Byte (Name_Separate, Token_Type'Pos (Tok_Separate));
Set_Name_Table_Byte (Name_Subtype, Token_Type'Pos (Tok_Subtype));
Set_Name_Table_Byte (Name_Tagged, Token_Type'Pos (Tok_Tagged));
Set_Name_Table_Byte (Name_Task, Token_Type'Pos (Tok_Task));
Set_Name_Table_Byte (Name_Terminate, Token_Type'Pos (Tok_Terminate));
Set_Name_Table_Byte (Name_Then, Token_Type'Pos (Tok_Then));
Set_Name_Table_Byte (Name_Type, Token_Type'Pos (Tok_Type));
Set_Name_Table_Byte (Name_Until, Token_Type'Pos (Tok_Until));
Set_Name_Table_Byte (Name_Use, Token_Type'Pos (Tok_Use));
Set_Name_Table_Byte (Name_When, Token_Type'Pos (Tok_When));
Set_Name_Table_Byte (Name_While, Token_Type'Pos (Tok_While));
Set_Name_Table_Byte (Name_With, Token_Type'Pos (Tok_With));
Set_Name_Table_Byte (Name_Xor, Token_Type'Pos (Tok_Xor));
-- Initialize scan control variables
Current_Source_File := Source_Index (Unit);
Source := Source_Text (Current_Source_File);
Current_Source_Unit := Unit;
Scan_Ptr := Source_First (Current_Source_File);
Token := No_Token;
Token_Ptr := Scan_Ptr;
Current_Line_Start := Scan_Ptr;
Token_Node := Empty;
Token_Name := No_Name;
Start_Column := Set_Start_Column;
First_Non_Blank_Location := Scan_Ptr;
if Style_Check then
Style.Set_Max_Line_Length (Max_Allowed_Line_Length);
end if;
-- Set default for Comes_From_Source. All nodes built now until we
-- reenter the analyzer will have Comes_From_Source set to True
Set_Comes_From_Source_Default (True);
-- Scan initial token (note this initializes Prev_Token, Prev_Token_Ptr)
Scan;
-- Clear flags for reserved words used as indentifiers
for I in Token_Type loop
Used_As_Identifier (I) := False;
end loop;
end Initialize_Scanner;
----------
-- Scan --
----------
procedure Scan is
begin
Prev_Token := Token;
Prev_Token_Ptr := Token_Ptr;
Token_Name := Error_Name;
-- The following loop runs more than once only if a format effector
-- (tab, vertical tab, form feed, line feed, carriage return) is
-- encountered and skipped, or some error situation, such as an
-- illegal character, is encountered.
loop
-- Skip past blanks, loop is opened up for speed
while Source (Scan_Ptr) = ' ' loop
if Source (Scan_Ptr + 1) /= ' ' then
Scan_Ptr := Scan_Ptr + 1;
exit;
end if;
if Source (Scan_Ptr + 2) /= ' ' then
Scan_Ptr := Scan_Ptr + 2;
exit;
end if;
if Source (Scan_Ptr + 3) /= ' ' then
Scan_Ptr := Scan_Ptr + 3;
exit;
end if;
if Source (Scan_Ptr + 4) /= ' ' then
Scan_Ptr := Scan_Ptr + 4;
exit;
end if;
if Source (Scan_Ptr + 5) /= ' ' then
Scan_Ptr := Scan_Ptr + 5;
exit;
end if;
if Source (Scan_Ptr + 6) /= ' ' then
Scan_Ptr := Scan_Ptr + 6;
exit;
end if;
if Source (Scan_Ptr + 7) /= ' ' then
Scan_Ptr := Scan_Ptr + 7;
exit;
end if;
Scan_Ptr := Scan_Ptr + 8;
end loop;
-- We are now at a non-blank character, which is the first character
-- of the token we will scan, and hence the value of Token_Ptr.
Token_Ptr := Scan_Ptr;
-- Here begins the main case statement which transfers control on
-- the basis of the non-blank character we have encountered.
case Source (Scan_Ptr) is
-- Line terminator characters
when CR | LF | FF | VT => Line_Terminator_Case : begin
if Int (Scan_Ptr) - Int (Current_Line_Start)
> Max_Allowed_Line_Length
then
Error_Long_Line;
end if;
if Style_Check then Style.Check_Line_Terminator; end if;
declare
Physical : Boolean;
begin
Skip_Line_Terminators (Scan_Ptr, Physical);
-- If we are at start of physical line, update scan pointers
-- to reflect the start of the new line.
if Physical then
Current_Line_Start := Scan_Ptr;
Start_Column := Set_Start_Column;
First_Non_Blank_Location := Scan_Ptr;
end if;
end;
end Line_Terminator_Case;
-- Horizontal tab, just skip past it
when HT =>
if Style_Check then Style.Check_HT; end if;
Scan_Ptr := Scan_Ptr + 1;
-- End of file character
when EOF =>
Token := Tok_EOF;
return;
-- Ampersand
when '&' =>
if Source (Scan_Ptr + 1) = '&' then
Error_Msg_S ("'&'& should be `AND THEN`");
Scan_Ptr := Scan_Ptr + 2;
Token := Tok_And;
return;
else
Scan_Ptr := Scan_Ptr + 1;
Token := Tok_Ampersand;
return;
end if;
-- Asterisk (can be multiplication operator or double asterisk
-- which is the exponentiation compound delimtier).
when '*' =>
if Source (Scan_Ptr + 1) = '*' then
Scan_Ptr := Scan_Ptr + 2;
Token := Tok_Double_Asterisk;
return;
else
Scan_Ptr := Scan_Ptr + 1;
Token := Tok_Asterisk;
return;
end if;
-- Colon, which can either be an isolated colon, or part of an
-- assignment compound delimiter.
when ':' =>
if Double_Char_Token ('=') then
Token := Tok_Colon_Equal;
if Style_Check then Style.Check_Colon_Equal; end if;
return;
elsif Source (Scan_Ptr + 1) = '-'
and then Source (Scan_Ptr + 2) /= '-'
then
Token := Tok_Colon_Equal;
Error_Msg (":- should be :=", Scan_Ptr);
Scan_Ptr := Scan_Ptr + 2;
return;
else
Scan_Ptr := Scan_Ptr + 1;
Token := Tok_Colon;
if Style_Check then Style.Check_Colon; end if;
return;
end if;
-- Left parenthesis
when '(' =>
Scan_Ptr := Scan_Ptr + 1;
Token := Tok_Left_Paren;
if Style_Check then Style.Check_Left_Paren; end if;
return;
-- Left bracket or left brace, treated as left paren
when '[' | '{' =>
Error_Msg_S ("illegal character, replaced by ""(""");
Scan_Ptr := Scan_Ptr + 1;
Token := Tok_Left_Paren;
return;
-- Comma
when ',' =>
Scan_Ptr := Scan_Ptr + 1;
Token := Tok_Comma;
if Style_Check then Style.Check_Comma; end if;
return;
-- Dot, which is either an isolated period, or part of a double
-- dot compound delimiter sequence. We also check for the case of
-- a digit following the period, to give a better error message.
when '.' =>
if Source (Scan_Ptr + 1) = '.' then
Scan_Ptr := Scan_Ptr + 2;
Token := Tok_Dot_Dot;
if Style_Check then Style.Check_Dot_Dot; end if;
return;
elsif Source (Scan_Ptr + 1) in '0' .. '9' then
Error_Msg_S ("numeric literal cannot start with point");
Scan_Ptr := Scan_Ptr + 1;
else
Scan_Ptr := Scan_Ptr + 1;
Token := Tok_Dot;
return;
end if;
-- Equal, which can either be an equality operator, or part of the
-- arrow (=>) compound delimiter.
when '=' =>
if Double_Char_Token ('>') then
Token := Tok_Arrow;
if Style_Check then Style.Check_Arrow; end if;
return;
elsif Source (Scan_Ptr + 1) = '=' then
Error_Msg_S ("== should be =");
Scan_Ptr := Scan_Ptr + 1;
end if;
Scan_Ptr := Scan_Ptr + 1;
Token := Tok_Equal;
return;
-- Greater than, which can be a greater than operator, greater than
-- or equal operator, or first character of a right label bracket.
when '>' =>
if Double_Char_Token ('=') then
Token := Tok_Greater_Equal;
return;
elsif Double_Char_Token ('>') then
Token := Tok_Greater_Greater;
return;
else
Scan_Ptr := Scan_Ptr + 1;
Token := Tok_Greater;
return;
end if;
-- Less than, which can be a less than operator, less than or equal
-- operator, or the first character of a left label bracket, or the
-- first character of a box (<>) compound delimiter.
when '<' =>
if Double_Char_Token ('=') then
Token := Tok_Less_Equal;
return;
elsif Double_Char_Token ('>') then
Token := Tok_Box;
if Style_Check then Style.Check_Box; end if;
return;
elsif Double_Char_Token ('<') then
Token := Tok_Less_Less;
return;
else
Scan_Ptr := Scan_Ptr + 1;
Token := Tok_Less;
return;
end if;
-- Minus, which is either a subtraction operator, or the first
-- character of double minus starting a comment
when '-' => Minus_Case : begin
if Source (Scan_Ptr + 1) = '>' then
Error_Msg_S ("-> should be =>");
Scan_Ptr := Scan_Ptr + 2;
Token := Tok_Arrow;
return;
elsif Source (Scan_Ptr + 1) /= '-' then
Scan_Ptr := Scan_Ptr + 1;
Token := Tok_Minus;
return;
-- Comment
else -- Source (Scan_Ptr + 1) = '-' then
if Style_Check then Style.Check_Comment; end if;
Scan_Ptr := Scan_Ptr + 2;
-- Loop to scan comment (this loop runs more than once only if
-- a horizontal tab or other non-graphic character is scanned)
loop
-- Scan to non graphic character (opened up for speed)
loop
exit when Source (Scan_Ptr) not in Graphic_Character;
Scan_Ptr := Scan_Ptr + 1;
exit when Source (Scan_Ptr) not in Graphic_Character;
Scan_Ptr := Scan_Ptr + 1;
exit when Source (Scan_Ptr) not in Graphic_Character;
Scan_Ptr := Scan_Ptr + 1;
exit when Source (Scan_Ptr) not in Graphic_Character;
Scan_Ptr := Scan_Ptr + 1;
exit when Source (Scan_Ptr) not in Graphic_Character;
Scan_Ptr := Scan_Ptr + 1;
end loop;
-- Keep going if horizontal tab
if Source (Scan_Ptr) = HT then
if Style_Check then Style.Check_HT; end if;
Scan_Ptr := Scan_Ptr + 1;
-- Terminate scan of comment if line terminator or EOF
elsif Source (Scan_Ptr) in Line_Terminator
or else Source (Scan_Ptr) = EOF
then
if Int (Scan_Ptr) - Int (Current_Line_Start) >
Max_Allowed_Line_Length
then
Error_Long_Line;
end if;
exit;
-- Terminate scan of comment if end of file encountered
-- (embedded EOF character or real last character in file)
elsif Source (Scan_Ptr) = EOF then
exit;
-- Keep going if character in 80-FF range. These characters
-- are allowed in comments according to the approved AI.
-- Also allow ESC, which just got added to the AI (June 93)
elsif Source (Scan_Ptr) in Upper_Half_Character
or else Source (Scan_Ptr) = ESC
then
Scan_Ptr := Scan_Ptr + 1;
-- Otherwise we have an illegal comment character
else
Error_Illegal_Character;
end if;
end loop;
-- Note that we do NOT execute a return here, instead we fall
-- through to reexecute the scan loop to look for a token.
end if;
end Minus_Case;
-- Double quote or percent starting a string constant
when '"' | '%' =>
Slit;
return;
-- Apostrophe. This can either be the start of a character literal,
-- or an isolated apostrophe used in a qualified expression or an
-- attribute. We treat it as a character literal if it does not
-- follow a right parenthesis, identifier or literal. This means
-- that we correctly treat constructs like:
-- A := CHARACTER'('A');
-- which appears to be illegal according to 2.2(2) (since the rule
-- there would seem to require separators to avoid the confusion
-- with the character literal), but all compilers accept the above
-- statement, and there are at least six ACVC tests that use this
-- type of lexical sequence, expecting it to be legal, so in fact
-- all compilers must accept this and we must too!
when ''' => Char_Literal_Case : declare
Code : Char_Code;
Err : Boolean;
begin
Scan_Ptr := Scan_Ptr + 1;
-- Here is where we make the test to distinguish the cases. Treat
-- as apostrophe if previous token is an identifier, right paren
-- or the reserved word "all" (latter case as in A.all'Address)
-- Also treat it as apostrophe after a literal (wrong anyway, but
-- that's probably the better choice).
if Prev_Token = Tok_Identifier
or else Prev_Token = Tok_Right_Paren
or else Prev_Token = Tok_All
or else Prev_Token in Token_Class_Literal
then
Token := Tok_Apostrophe;
return;
-- Otherwise the apostrophe starts a character literal
else
-- Case of wide character literal
if (Source (Scan_Ptr) = ESC
and then Wide_Character_Encoding_Method /= WCEM_None)
or else (Upper_Half_Encoding
and then Source (Scan_Ptr) in Upper_Half_Character)
then
Note_Feature (Wide_Characters_And_Strings, Scan_Ptr);
Scan_Wide (Source, Scan_Ptr, Code, Err);
if Err then
Error_Illegal_Wide_Character;
end if;
if Source (Scan_Ptr) /= ''' then
Error_Msg_S ("missing apostrophe");
else
Scan_Ptr := Scan_Ptr + 1;
end if;
-- If we do not find a closing quote in the expected place then
-- assume that we have a misguided attempt at a string literal.
elsif Source (Scan_Ptr + 1) /= ''' then
Scan_Ptr := Scan_Ptr - 1;
Error_Msg_S
("strings are delimited by double quote character");
Scn.Slit;
return;
-- Otherwise we have a (non-wide) character literal
else
if Source (Scan_Ptr) not in Graphic_Character then
if Source (Scan_Ptr) in Upper_Half_Character then
Note_Feature (Latin_1, Scan_Ptr);
if Ada_83 then
Error_Illegal_Character;
end if;
else
Error_Illegal_Character;
end if;
end if;
Code := Get_Char_Code (Source (Scan_Ptr));
Scan_Ptr := Scan_Ptr + 2;
end if;
-- Fall through here with Scan_Ptr updated past the closing
-- quote, and Code set to the Char_Code value for the literal
Token := Tok_Char_Literal;
Token_Node := New_Node (N_Character_Literal, Token_Ptr);
Set_Char_Literal_Value (Token_Node, Code);
Set_Character_Literal_Name (Code);
Token_Name := Name_Find;
Set_Chars (Token_Node, Token_Name);
return;
end if;
end Char_Literal_Case;
-- Right parenthesis
when ')' =>
Scan_Ptr := Scan_Ptr + 1;
Token := Tok_Right_Paren;
if Style_Check then Style.Check_Right_Paren; end if;
return;
-- Right bracket or right brace, treated as right paren
when ']' | '}' =>
Error_Msg_S ("illegal character, replaced by "")""");
Scan_Ptr := Scan_Ptr + 1;
Token := Tok_Right_Paren;
return;
-- Slash (can be division operator or first character of not equal)
when '/' =>
if Double_Char_Token ('=') then
Token := Tok_Not_Equal;
return;
else
Scan_Ptr := Scan_Ptr + 1;
Token := Tok_Slash;
return;
end if;
-- Semicolon
when ';' =>
Scan_Ptr := Scan_Ptr + 1;
Token := Tok_Semicolon;
if Style_Check then Style.Check_Semicolon; end if;
return;
-- Vertical bar
when '|' => Vertical_Bar_Case : begin
-- Special check for || to give nice message
if Source (Scan_Ptr + 1) = '|' then
Error_Msg_S ("""||"" should be `OR ELSE`");
Scan_Ptr := Scan_Ptr + 2;
Token := Tok_Or;
return;
else
Scan_Ptr := Scan_Ptr + 1;
Token := Tok_Vertical_Bar;
if Style_Check then Style.Check_Vertical_Bar; end if;
return;
end if;
end Vertical_Bar_Case;
-- Exclamation, replacement character for vertical bar
when '!' => Exclamation_Case : begin
if Source (Scan_Ptr + 1) = '=' then
Error_Msg_S ("'!= should be /=");
Scan_Ptr := Scan_Ptr + 2;
Token := Tok_Not_Equal;
return;
else
Scan_Ptr := Scan_Ptr + 1;
Token := Tok_Vertical_Bar;
return;
end if;
end Exclamation_Case;
-- Plus
when '+' => Plus_Case : begin
Scan_Ptr := Scan_Ptr + 1;
Token := Tok_Plus;
return;
end Plus_Case;
-- Digits starting a numeric constant
when '0' .. '9' =>
Nlit;
if Identifier_Char (Source (Scan_Ptr)) then
Error_Msg_S
("delimiter required between literal and identifier");
end if;
return;
-- Lower case letters
when 'a' .. 'z' =>
Name_Len := 1;
Name_Buffer (1) := Source (Scan_Ptr);
Scan_Ptr := Scan_Ptr + 1;
goto Scan_Identifier;
-- Upper case letters
when 'A' .. 'Z' =>
Name_Len := 1;
Name_Buffer (1) :=
Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
Scan_Ptr := Scan_Ptr + 1;
goto Scan_Identifier;
-- Underline character
when '_' =>
Error_Msg_S ("identifier cannot start with underline");
Name_Len := 1;
Name_Buffer (1) := '_';
Scan_Ptr := Scan_Ptr + 1;
goto Scan_Identifier;
-- Space (not possible, because we scanned past blanks)
when ' ' =>
pragma Assert (False); null;
-- Characters in top half of ASCII 8-bit chart
when Upper_Half_Character =>
-- If wide character, then illegal, not allowed outside literal
if Upper_Half_Encoding then
Error_Illegal_Character;
-- Otherwise we have OK Latin-1 character
else
Note_Feature (Latin_1, Scan_Ptr);
-- Upper half characters may possibly be identifier letters
-- but can never be digits, so Identifier_Character can be
-- used to test for a valid start of identifier character.
if Identifier_Char (Source (Scan_Ptr)) then
Name_Len := 0;
goto Scan_Identifier;
else
Error_Illegal_Character;
end if;
end if;
when ESC =>
-- ESC character, possible start of identifier if wide characters
-- are allowed in identifiers, which we can tell by looking at
-- the Identifier_Char flag for ESC.
if Identifier_Char (ESC) then
Name_Len := 0;
goto Scan_Identifier;
else
Error_Illegal_Wide_Character;
end if;
-- Invalid control characters
when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS | SO |
SI | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN |
EM | FS | GS | RS | US | DEL
=> Error_Illegal_Character;
-- Invalid graphic characters
when '#' | '$' | '?' | '@' | '`' | '\' | '^' | '~' =>
Error_Illegal_Character;
-- End switch on non-blank character
end case;
-- End loop past format effectors. The exit from this loop is by
-- executing a return statement following completion of token scan
-- (control never falls out of this loop to the code which follows)
end loop;
-- Identifier scanning routine. On entry, some initial characters
-- of the identifier may have already been stored in Name_Buffer.
-- If so, Name_Len has the number of characters stored. otherwise
-- Name_Len is set to zero on entry.
<<Scan_Identifier>>
-- This loop scans as fast as possible past lower half letters
-- and digits, which we expect to be the most common characters.
loop
if Source (Scan_Ptr) in 'a' .. 'z'
or else Source (Scan_Ptr) in '0' .. '9'
then
Name_Buffer (Name_Len + 1) := Source (Scan_Ptr);
elsif Source (Scan_Ptr) in 'A' .. 'Z' then
Name_Buffer (Name_Len + 1) :=
Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
else
exit;
end if;
-- Open out the loop a couple of times for speed
if Source (Scan_Ptr + 1) in 'a' .. 'z'
or else Source (Scan_Ptr + 1) in '0' .. '9'
then
Name_Buffer (Name_Len + 2) := Source (Scan_Ptr + 1);
elsif Source (Scan_Ptr + 1) in 'A' .. 'Z' then
Name_Buffer (Name_Len + 2) :=
Character'Val (Character'Pos (Source (Scan_Ptr + 1)) + 32);
else
Scan_Ptr := Scan_Ptr + 1;
Name_Len := Name_Len + 1;
exit;
end if;
if Source (Scan_Ptr + 2) in 'a' .. 'z'
or else Source (Scan_Ptr + 2) in '0' .. '9'
then
Name_Buffer (Name_Len + 3) := Source (Scan_Ptr + 2);
elsif Source (Scan_Ptr + 2) in 'A' .. 'Z' then
Name_Buffer (Name_Len + 3) :=
Character'Val (Character'Pos (Source (Scan_Ptr + 2)) + 32);
else
Scan_Ptr := Scan_Ptr + 2;
Name_Len := Name_Len + 2;
exit;
end if;
if Source (Scan_Ptr + 3) in 'a' .. 'z'
or else Source (Scan_Ptr + 3) in '0' .. '9'
then
Name_Buffer (Name_Len + 4) := Source (Scan_Ptr + 3);
elsif Source (Scan_Ptr + 3) in 'A' .. 'Z' then
Name_Buffer (Name_Len + 4) :=
Character'Val (Character'Pos (Source (Scan_Ptr + 3)) + 32);
else
Scan_Ptr := Scan_Ptr + 3;
Name_Len := Name_Len + 3;
exit;
end if;
Scan_Ptr := Scan_Ptr + 4;
Name_Len := Name_Len + 4;
end loop;
-- If we fall through, then we have encountered either an underline
-- character, or an extended identifier character (i.e. one from the
-- upper half), or a wide character, or an identifier terminator.
-- The initial test speeds us up in the most common case where we
-- have an identifier terminator. Note that ESC is an identifier
-- character only if a wide character encoding method is active.
if Identifier_Char (Source (Scan_Ptr)) then
-- Case of underline, check for error cases of double underline,
-- and for a trailing underline character
if Source (Scan_Ptr) = '_' then
Name_Len := Name_Len + 1;
Name_Buffer (Name_Len) := '_';
if Identifier_Char (Source (Scan_Ptr + 1)) then
Scan_Ptr := Scan_Ptr + 1;
if Source (Scan_Ptr) = '_' then
Error_No_Double_Underline;
end if;
else
Error_Msg_S ("identifier cannot end with underline");
Scan_Ptr := Scan_Ptr + 1;
end if;
-- We know we have either an ESC or an upper half character.
-- First test for wide character case.
elsif (Source (Scan_Ptr) = ESC
and then Wide_Character_Encoding_Method /= WCEM_None)
or else Upper_Half_Encoding
then
if Identifier_Character_Set /= 'w' then
Error_Msg_S ("wide character not allowed in identifier");
end if;
-- Scan out the wide character and insert the appropriate
-- encoding into the name table entry for the identifier.
declare
Code : Char_Code;
Err : Boolean;
begin
Note_Feature (Wide_Characters_And_Strings, Scan_Ptr);
Scan_Wide (Source, Scan_Ptr, Code, Err);
if Err then
Error_Illegal_Wide_Character;
else
Store_Encoded_Character (Code);
end if;
end;
-- Case of an extended character from the upper half. We insert
-- the appropriate encoding of the character, folded to lower
-- case (see Namet for details of the encoding).
else
Note_Feature (Latin_1, Scan_Ptr);
Store_Encoded_Character
(Get_Char_Code (Fold_Lower (Source (Scan_Ptr))));
Scan_Ptr := Scan_Ptr + 1;
end if;
-- In all above cases, keep scanning identifier characters
goto Scan_Identifier;
end if;
-- Scan of identifier is complete. The identifier is stored in
-- Name_Buffer, and Scan_Ptr points past the last character.
Token_Name := Name_Find;
-- Here is where we check if it was a keyword
if Get_Name_Table_Byte (Token_Name) /= 0
and then (Ada_95 or else Token_Name not in Ada_95_Reserved_Words)
then
Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name));
-- Deal with possible style check for non-lower case keyword,
-- but we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords
-- for this purpose if they appear as attribute designators.
-- Actually we only check the first character for speed.
if Style_Check
and then Source (Token_Ptr) <= 'Z'
and then (Prev_Token /= Tok_Apostrophe
or else
(Token /= Tok_Access
and then Token /= Tok_Delta
and then Token /= Tok_Digits
and then Token /= Tok_Range))
then
Style.Non_Lower_Case_Keyword;
end if;
-- We must reset Token_Name since this is not an identifier
-- and if we leave Token_Name set, the parser gets confused
-- because it thinks it is dealing with an identifier instead
-- of the corresponding keyword.
Token_Name := No_Name;
return;
-- It is an identifier after all
else
Token_Node := New_Node (N_Identifier, Token_Ptr);
Set_Chars (Token_Node, Token_Name);
Token := Tok_Identifier;
return;
end if;
end Scan;
---------------------
-- Scan_First_Char --
---------------------
function Scan_First_Char return Source_Ptr is
Ptr : Source_Ptr := Current_Line_Start;
begin
loop
if Source (Ptr) = ' ' then
Ptr := Ptr + 1;
elsif Source (Ptr) = HT then
if Style_Check then Style.Check_HT; end if;
Ptr := Ptr + 1;
else
return Ptr;
end if;
end loop;
end Scan_First_Char;
------------------------------
-- Scan_Reserved_Identifier --
------------------------------
procedure Scan_Reserved_Identifier (Force_Msg : Boolean) is
Token_Chars : constant String := Token_Type'Image (Token);
begin
-- We have in Token_Chars the image of the Token name, i.e. Tok_xxx.
-- This code extracts the xxx and makes an identifier out of it.
Name_Len := 0;
for J in 5 .. Token_Chars'Length loop
Name_Len := Name_Len + 1;
Name_Buffer (Name_Len) := Fold_Lower (Token_Chars (J));
end loop;
Token_Name := Name_Find;
if not Used_As_Identifier (Token) or else Force_Msg then
Error_Msg_Name_1 := Token_Name;
Error_Msg_SC ("reserved word* cannot be used as identifier!");
Used_As_Identifier (Token) := True;
end if;
Token := Tok_Identifier;
Token_Node := New_Node (N_Identifier, Token_Ptr);
Set_Chars (Token_Node, Token_Name);
end Scan_Reserved_Identifier;
----------------------
-- Set_Start_Column --
----------------------
-- Note: it seems at first glance a little expensive to compute this value
-- for every source line (since it is certainly not used for all source
-- lines). On the other hand, it doesn't take much more work to skip past
-- the initial white space on the line counting the columns than it would
-- to scan past the white space using the standard scanning circuits.
function Set_Start_Column return Column_Number is
Start_Column : Column_Number := 0;
begin
-- Outer loop scans past horizontal tab characters
Tabs_Loop : loop
-- Inner loop scans past blanks as fast as possible, bumping Scan_Ptr
-- past the blanks and adjusting Start_Column to account for them.
Blanks_Loop : loop
if Source (Scan_Ptr) = ' ' then
if Source (Scan_Ptr + 1) = ' ' then
if Source (Scan_Ptr + 2) = ' ' then
if Source (Scan_Ptr + 3) = ' ' then
if Source (Scan_Ptr + 4) = ' ' then
if Source (Scan_Ptr + 5) = ' ' then
if Source (Scan_Ptr + 6) = ' ' then
Scan_Ptr := Scan_Ptr + 7;
Start_Column := Start_Column + 7;
else
Scan_Ptr := Scan_Ptr + 6;
Start_Column := Start_Column + 6;
exit Blanks_Loop;
end if;
else
Scan_Ptr := Scan_Ptr + 5;
Start_Column := Start_Column + 5;
exit Blanks_Loop;
end if;
else
Scan_Ptr := Scan_Ptr + 4;
Start_Column := Start_Column + 4;
exit Blanks_Loop;
end if;
else
Scan_Ptr := Scan_Ptr + 3;
Start_Column := Start_Column + 3;
exit Blanks_Loop;
end if;
else
Scan_Ptr := Scan_Ptr + 2;
Start_Column := Start_Column + 2;
exit Blanks_Loop;
end if;
else
Scan_Ptr := Scan_Ptr + 1;
Start_Column := Start_Column + 1;
exit Blanks_Loop;
end if;
else
exit Blanks_Loop;
end if;
end loop Blanks_Loop;
-- Outer loop keeps going only if a horizontal tab follows
if Source (Scan_Ptr) = HT then
if Style_Check then Style.Check_HT; end if;
Scan_Ptr := Scan_Ptr + 1;
Start_Column := (Start_Column / 8) * 8 + 8;
else
exit Tabs_Loop;
end if;
end loop Tabs_Loop;
return Start_Column;
end Set_Start_Column;
end Scn;