Geek Gadgets 1

home *** CD-ROM | disk | FTP | other *** search

/ Geek Gadgets 1 / ADE-1.bin / ade-dist / gnat-2.06-src.tgz / tar.out / fsf / gnat / ada / scn.adb < prev next >

Wrap

Text File | 1996-09-28 | 48KB | 1,329 lines

------------------------------------------------------------------------------ -- -- -- GNAT COMPILER COMPONENTS -- -- -- -- S C N -- -- -- -- B o d y -- -- -- -- $Revision: 1.78 $ -- -- -- -- Copyright (c) 1992,1993,1994,1995 NYU, All Rights Reserved -- -- -- -- GNAT is free software; you can redistribute it and/or modify it under -- -- terms of the GNU General Public License as published by the Free Soft- -- -- ware Foundation; either version 2, or (at your option) any later ver- -- -- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- -- for more details. You should have received a copy of the GNU General -- -- Public License distributed with GNAT; see file COPYING. If not, write -- -- to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. -- -- -- ------------------------------------------------------------------------------ with Atree; use Atree; with Csets; use Csets; with Debug; use Debug; with Errout; use Errout; with Features; use Features; with Lib; use Lib; with Namet; use Namet; with Opt; use Opt; with Output; use Output; with Scans; use Scans; with Sinput; use Sinput; with Sinfo; use Sinfo; with Snames; use Snames; with Style; with Widechar; use Widechar; with System.Parameters; with System.WCh_Con; use System.WCh_Con; package body Scn is use Ascii; -- Make control characters visible Used_As_Identifier : array (Token_Type) of Boolean; -- Flags set True if a given keyword is used as an identifier (used to -- make sure that we only post an error message for incorrect use of a -- keyword as an identifier once for a given keyword). Max_Allowed_Line_Length : Nat := System.Parameters.Max_Line_Length; -- Maximum allowed line length (maybe reset by Style.Set_Max_Line_Length) ----------------------- -- Local Subprograms -- ----------------------- function Double_Char_Token (C : Character) return Boolean; -- This function is used for double character tokens like := or <>. It -- checks if the character following Source (Scan_Ptr) is C, and if so -- bumps Scan_Ptr past the pair of characters and returns True. A space -- between the two characters is also recognized with an appropriate -- error message being issued. If C is not present, False is returned. -- Note that Double_Char_Token can only be used for tokens defined in -- the Ada syntax. procedure Error_Illegal_Character; -- Give illegal character error, Scan_Ptr points to character. On return, -- Scan_Ptr is bumped past the illegal character. procedure Error_Illegal_Wide_Character; -- Give illegal character in wide character escape sequence message. On -- return, Scan_Ptr is bumped past the illegal character, which may still -- leave us pointing to junk, not much we can do if the escape sequence -- is messed up! procedure Error_Long_Line; -- Signal error of excessively long line procedure Error_No_Double_Underline; -- Signal error of double underline character procedure Nlit; -- This is the procedure for scanning out numeric literals function Set_Start_Column return Column_Number; -- This routine is called with Scan_Ptr pointing to the first character -- of a line. On exit, Scan_Ptr is advanced to the first non-blank -- character of this line (or to the terminating format effector if the -- line contains no non-blank characters), and the returned result is the -- column number of this non-blank character (zero origin), which is the -- value to be stored in the Start_Column scan variable. procedure Slit; -- This is the procedure for scanning out string literals -------------- -- Subunits -- -------------- -- For some reason, these must come early in the file or we run into an -- infinite loop in GNAT, to be looked at some time ??? procedure Nlit is separate; procedure Slit is separate; ---------------------------- -- Determine_Token_Casing -- ---------------------------- function Determine_Token_Casing return Casing_Type is All_Lower : Boolean := True; -- Set False if upper case letter found All_Upper : Boolean := True; -- Set False if lower case letter found Mixed : Boolean := True; -- Set False if exception to mixed case rule found (lower case letter -- at start or after underline, or upper case letter elsewhere). Decisive : Boolean := False; -- Set True if at least one instance of letter not after underline After_Und : Boolean := True; -- True at start of string, and after an underline character begin for S in Token_Ptr .. Scan_Ptr - 1 loop if Source (S) = '_' or else Source (S) = '.' then After_Und := True; elsif Is_Lower_Case_Letter (Source (S)) then All_Upper := False; if not After_Und then Decisive := True; else After_Und := False; Mixed := False; end if; elsif Is_Upper_Case_Letter (Source (S)) then All_Lower := False; if not After_Und then Decisive := True; Mixed := False; else After_Und := False; end if; end if; end loop; -- Now we can figure out the result from the flags we set in that loop if All_Lower then return All_Lower_Case; elsif not Decisive then return Unknown; elsif All_Upper then return All_Upper_Case; elsif Mixed then return Mixed_Case; else return Unknown; end if; end Determine_Token_Casing; ----------------------- -- Double_Char_Token -- ----------------------- function Double_Char_Token (C : Character) return Boolean is begin if Source (Scan_Ptr + 1) = C then Scan_Ptr := Scan_Ptr + 2; return True; elsif Source (Scan_Ptr + 1) = ' ' and then Source (Scan_Ptr + 2) = C then Scan_Ptr := Scan_Ptr + 1; Error_Msg_S ("no space allowed here"); Scan_Ptr := Scan_Ptr + 2; return True; else return False; end if; end Double_Char_Token; ----------------------------- -- Error_Illegal_Character -- ----------------------------- procedure Error_Illegal_Character is begin Error_Msg_S ("illegal character"); Scan_Ptr := Scan_Ptr + 1; end Error_Illegal_Character; ---------------------------------- -- Error_Illegal_Wide_Character -- ---------------------------------- procedure Error_Illegal_Wide_Character is begin Error_Msg_S ("illegal character in wide character escape sequence"); Scan_Ptr := Scan_Ptr + 1; end Error_Illegal_Wide_Character; --------------------- -- Error_Long_Line -- --------------------- procedure Error_Long_Line is begin Error_Msg ("this line is too long", Current_Line_Start); end Error_Long_Line; ------------------------------- -- Error_No_Double_Underline -- ------------------------------- procedure Error_No_Double_Underline is begin Error_Msg_S ("two consecutive underlines not permitted"); end Error_No_Double_Underline; ------------------------ -- Initialize_Scanner -- ------------------------ procedure Initialize_Scanner (Unit : Unit_Number_Type) is begin -- Set up Token_Type values in Names Table entries for reserved keywords -- We use the Pos value of the Token_Type value. Note we are relying on -- the fact that Token_Type'Val (0) is not a reserved word! Set_Name_Table_Byte (Name_Abort, Token_Type'Pos (Tok_Abort)); Set_Name_Table_Byte (Name_Abs, Token_Type'Pos (Tok_Abs)); Set_Name_Table_Byte (Name_Abstract, Token_Type'Pos (Tok_Abstract)); Set_Name_Table_Byte (Name_Accept, Token_Type'Pos (Tok_Accept)); Set_Name_Table_Byte (Name_Access, Token_Type'Pos (Tok_Access)); Set_Name_Table_Byte (Name_And, Token_Type'Pos (Tok_And)); Set_Name_Table_Byte (Name_Aliased, Token_Type'Pos (Tok_Aliased)); Set_Name_Table_Byte (Name_All, Token_Type'Pos (Tok_All)); Set_Name_Table_Byte (Name_Array, Token_Type'Pos (Tok_Array)); Set_Name_Table_Byte (Name_At, Token_Type'Pos (Tok_At)); Set_Name_Table_Byte (Name_Begin, Token_Type'Pos (Tok_Begin)); Set_Name_Table_Byte (Name_Body, Token_Type'Pos (Tok_Body)); Set_Name_Table_Byte (Name_Case, Token_Type'Pos (Tok_Case)); Set_Name_Table_Byte (Name_Constant, Token_Type'Pos (Tok_Constant)); Set_Name_Table_Byte (Name_Declare, Token_Type'Pos (Tok_Declare)); Set_Name_Table_Byte (Name_Delay, Token_Type'Pos (Tok_Delay)); Set_Name_Table_Byte (Name_Delta, Token_Type'Pos (Tok_Delta)); Set_Name_Table_Byte (Name_Digits, Token_Type'Pos (Tok_Digits)); Set_Name_Table_Byte (Name_Do, Token_Type'Pos (Tok_Do)); Set_Name_Table_Byte (Name_Else, Token_Type'Pos (Tok_Else)); Set_Name_Table_Byte (Name_Elsif, Token_Type'Pos (Tok_Elsif)); Set_Name_Table_Byte (Name_End, Token_Type'Pos (Tok_End)); Set_Name_Table_Byte (Name_Entry, Token_Type'Pos (Tok_Entry)); Set_Name_Table_Byte (Name_Exception, Token_Type'Pos (Tok_Exception)); Set_Name_Table_Byte (Name_Exit, Token_Type'Pos (Tok_Exit)); Set_Name_Table_Byte (Name_For, Token_Type'Pos (Tok_For)); Set_Name_Table_Byte (Name_Function, Token_Type'Pos (Tok_Function)); Set_Name_Table_Byte (Name_Generic, Token_Type'Pos (Tok_Generic)); Set_Name_Table_Byte (Name_Goto, Token_Type'Pos (Tok_Goto)); Set_Name_Table_Byte (Name_If, Token_Type'Pos (Tok_If)); Set_Name_Table_Byte (Name_In, Token_Type'Pos (Tok_In)); Set_Name_Table_Byte (Name_Is, Token_Type'Pos (Tok_Is)); Set_Name_Table_Byte (Name_Limited, Token_Type'Pos (Tok_Limited)); Set_Name_Table_Byte (Name_Loop, Token_Type'Pos (Tok_Loop)); Set_Name_Table_Byte (Name_Mod, Token_Type'Pos (Tok_Mod)); Set_Name_Table_Byte (Name_New, Token_Type'Pos (Tok_New)); Set_Name_Table_Byte (Name_Not, Token_Type'Pos (Tok_Not)); Set_Name_Table_Byte (Name_Null, Token_Type'Pos (Tok_Null)); Set_Name_Table_Byte (Name_Of, Token_Type'Pos (Tok_Of)); Set_Name_Table_Byte (Name_Or, Token_Type'Pos (Tok_Or)); Set_Name_Table_Byte (Name_Others, Token_Type'Pos (Tok_Others)); Set_Name_Table_Byte (Name_Out, Token_Type'Pos (Tok_Out)); Set_Name_Table_Byte (Name_Package, Token_Type'Pos (Tok_Package)); Set_Name_Table_Byte (Name_Pragma, Token_Type'Pos (Tok_Pragma)); Set_Name_Table_Byte (Name_Private, Token_Type'Pos (Tok_Private)); Set_Name_Table_Byte (Name_Procedure, Token_Type'Pos (Tok_Procedure)); Set_Name_Table_Byte (Name_Protected, Token_Type'Pos (Tok_Protected)); Set_Name_Table_Byte (Name_Raise, Token_Type'Pos (Tok_Raise)); Set_Name_Table_Byte (Name_Range, Token_Type'Pos (Tok_Range)); Set_Name_Table_Byte (Name_Record, Token_Type'Pos (Tok_Record)); Set_Name_Table_Byte (Name_Rem, Token_Type'Pos (Tok_Rem)); Set_Name_Table_Byte (Name_Renames, Token_Type'Pos (Tok_Renames)); Set_Name_Table_Byte (Name_Requeue, Token_Type'Pos (Tok_Requeue)); Set_Name_Table_Byte (Name_Return, Token_Type'Pos (Tok_Return)); Set_Name_Table_Byte (Name_Reverse, Token_Type'Pos (Tok_Reverse)); Set_Name_Table_Byte (Name_Select, Token_Type'Pos (Tok_Select)); Set_Name_Table_Byte (Name_Separate, Token_Type'Pos (Tok_Separate)); Set_Name_Table_Byte (Name_Subtype, Token_Type'Pos (Tok_Subtype)); Set_Name_Table_Byte (Name_Tagged, Token_Type'Pos (Tok_Tagged)); Set_Name_Table_Byte (Name_Task, Token_Type'Pos (Tok_Task)); Set_Name_Table_Byte (Name_Terminate, Token_Type'Pos (Tok_Terminate)); Set_Name_Table_Byte (Name_Then, Token_Type'Pos (Tok_Then)); Set_Name_Table_Byte (Name_Type, Token_Type'Pos (Tok_Type)); Set_Name_Table_Byte (Name_Until, Token_Type'Pos (Tok_Until)); Set_Name_Table_Byte (Name_Use, Token_Type'Pos (Tok_Use)); Set_Name_Table_Byte (Name_When, Token_Type'Pos (Tok_When)); Set_Name_Table_Byte (Name_While, Token_Type'Pos (Tok_While)); Set_Name_Table_Byte (Name_With, Token_Type'Pos (Tok_With)); Set_Name_Table_Byte (Name_Xor, Token_Type'Pos (Tok_Xor)); -- Initialize scan control variables Current_Source_File := Source_Index (Unit); Source := Source_Text (Current_Source_File); Current_Source_Unit := Unit; Scan_Ptr := Source_First (Current_Source_File); Token := No_Token; Token_Ptr := Scan_Ptr; Current_Line_Start := Scan_Ptr; Token_Node := Empty; Token_Name := No_Name; Start_Column := Set_Start_Column; First_Non_Blank_Location := Scan_Ptr; if Style_Check then Style.Set_Max_Line_Length (Max_Allowed_Line_Length); end if; -- Set default for Comes_From_Source. All nodes built now until we -- reenter the analyzer will have Comes_From_Source set to True Set_Comes_From_Source_Default (True); -- Scan initial token (note this initializes Prev_Token, Prev_Token_Ptr) Scan; -- Clear flags for reserved words used as indentifiers for I in Token_Type loop Used_As_Identifier (I) := False; end loop; end Initialize_Scanner; ---------- -- Scan -- ---------- procedure Scan is begin Prev_Token := Token; Prev_Token_Ptr := Token_Ptr; Token_Name := Error_Name; -- The following loop runs more than once only if a format effector -- (tab, vertical tab, form feed, line feed, carriage return) is -- encountered and skipped, or some error situation, such as an -- illegal character, is encountered. loop -- Skip past blanks, loop is opened up for speed while Source (Scan_Ptr) = ' ' loop if Source (Scan_Ptr + 1) /= ' ' then Scan_Ptr := Scan_Ptr + 1; exit; end if; if Source (Scan_Ptr + 2) /= ' ' then Scan_Ptr := Scan_Ptr + 2; exit; end if; if Source (Scan_Ptr + 3) /= ' ' then Scan_Ptr := Scan_Ptr + 3; exit; end if; if Source (Scan_Ptr + 4) /= ' ' then Scan_Ptr := Scan_Ptr + 4; exit; end if; if Source (Scan_Ptr + 5) /= ' ' then Scan_Ptr := Scan_Ptr + 5; exit; end if; if Source (Scan_Ptr + 6) /= ' ' then Scan_Ptr := Scan_Ptr + 6; exit; end if; if Source (Scan_Ptr + 7) /= ' ' then Scan_Ptr := Scan_Ptr + 7; exit; end if; Scan_Ptr := Scan_Ptr + 8; end loop; -- We are now at a non-blank character, which is the first character -- of the token we will scan, and hence the value of Token_Ptr. Token_Ptr := Scan_Ptr; -- Here begins the main case statement which transfers control on -- the basis of the non-blank character we have encountered. case Source (Scan_Ptr) is -- Line terminator characters when CR | LF | FF | VT => Line_Terminator_Case : begin if Int (Scan_Ptr) - Int (Current_Line_Start) > Max_Allowed_Line_Length then Error_Long_Line; end if; if Style_Check then Style.Check_Line_Terminator; end if; declare Physical : Boolean; begin Skip_Line_Terminators (Scan_Ptr, Physical); -- If we are at start of physical line, update scan pointers -- to reflect the start of the new line. if Physical then Current_Line_Start := Scan_Ptr; Start_Column := Set_Start_Column; First_Non_Blank_Location := Scan_Ptr; end if; end; end Line_Terminator_Case; -- Horizontal tab, just skip past it when HT => if Style_Check then Style.Check_HT; end if; Scan_Ptr := Scan_Ptr + 1; -- End of file character when EOF => Token := Tok_EOF; return; -- Ampersand when '&' => if Source (Scan_Ptr + 1) = '&' then Error_Msg_S ("'&'& should be `AND THEN`"); Scan_Ptr := Scan_Ptr + 2; Token := Tok_And; return; else Scan_Ptr := Scan_Ptr + 1; Token := Tok_Ampersand; return; end if; -- Asterisk (can be multiplication operator or double asterisk -- which is the exponentiation compound delimtier). when '*' => if Source (Scan_Ptr + 1) = '*' then Scan_Ptr := Scan_Ptr + 2; Token := Tok_Double_Asterisk; return; else Scan_Ptr := Scan_Ptr + 1; Token := Tok_Asterisk; return; end if; -- Colon, which can either be an isolated colon, or part of an -- assignment compound delimiter. when ':' => if Double_Char_Token ('=') then Token := Tok_Colon_Equal; if Style_Check then Style.Check_Colon_Equal; end if; return; elsif Source (Scan_Ptr + 1) = '-' and then Source (Scan_Ptr + 2) /= '-' then Token := Tok_Colon_Equal; Error_Msg (":- should be :=", Scan_Ptr); Scan_Ptr := Scan_Ptr + 2; return; else Scan_Ptr := Scan_Ptr + 1; Token := Tok_Colon; if Style_Check then Style.Check_Colon; end if; return; end if; -- Left parenthesis when '(' => Scan_Ptr := Scan_Ptr + 1; Token := Tok_Left_Paren; if Style_Check then Style.Check_Left_Paren; end if; return; -- Left bracket or left brace, treated as left paren when '[' | '{' => Error_Msg_S ("illegal character, replaced by ""("""); Scan_Ptr := Scan_Ptr + 1; Token := Tok_Left_Paren; return; -- Comma when ',' => Scan_Ptr := Scan_Ptr + 1; Token := Tok_Comma; if Style_Check then Style.Check_Comma; end if; return; -- Dot, which is either an isolated period, or part of a double -- dot compound delimiter sequence. We also check for the case of -- a digit following the period, to give a better error message. when '.' => if Source (Scan_Ptr + 1) = '.' then Scan_Ptr := Scan_Ptr + 2; Token := Tok_Dot_Dot; if Style_Check then Style.Check_Dot_Dot; end if; return; elsif Source (Scan_Ptr + 1) in '0' .. '9' then Error_Msg_S ("numeric literal cannot start with point"); Scan_Ptr := Scan_Ptr + 1; else Scan_Ptr := Scan_Ptr + 1; Token := Tok_Dot; return; end if; -- Equal, which can either be an equality operator, or part of the -- arrow (=>) compound delimiter. when '=' => if Double_Char_Token ('>') then Token := Tok_Arrow; if Style_Check then Style.Check_Arrow; end if; return; elsif Source (Scan_Ptr + 1) = '=' then Error_Msg_S ("== should be ="); Scan_Ptr := Scan_Ptr + 1; end if; Scan_Ptr := Scan_Ptr + 1; Token := Tok_Equal; return; -- Greater than, which can be a greater than operator, greater than -- or equal operator, or first character of a right label bracket. when '>' => if Double_Char_Token ('=') then Token := Tok_Greater_Equal; return; elsif Double_Char_Token ('>') then Token := Tok_Greater_Greater; return; else Scan_Ptr := Scan_Ptr + 1; Token := Tok_Greater; return; end if; -- Less than, which can be a less than operator, less than or equal -- operator, or the first character of a left label bracket, or the -- first character of a box (<>) compound delimiter. when '<' => if Double_Char_Token ('=') then Token := Tok_Less_Equal; return; elsif Double_Char_Token ('>') then Token := Tok_Box; if Style_Check then Style.Check_Box; end if; return; elsif Double_Char_Token ('<') then Token := Tok_Less_Less; return; else Scan_Ptr := Scan_Ptr + 1; Token := Tok_Less; return; end if; -- Minus, which is either a subtraction operator, or the first -- character of double minus starting a comment when '-' => Minus_Case : begin if Source (Scan_Ptr + 1) = '>' then Error_Msg_S ("-> should be =>"); Scan_Ptr := Scan_Ptr + 2; Token := Tok_Arrow; return; elsif Source (Scan_Ptr + 1) /= '-' then Scan_Ptr := Scan_Ptr + 1; Token := Tok_Minus; return; -- Comment else -- Source (Scan_Ptr + 1) = '-' then if Style_Check then Style.Check_Comment; end if; Scan_Ptr := Scan_Ptr + 2; -- Loop to scan comment (this loop runs more than once only if -- a horizontal tab or other non-graphic character is scanned) loop -- Scan to non graphic character (opened up for speed) loop exit when Source (Scan_Ptr) not in Graphic_Character; Scan_Ptr := Scan_Ptr + 1; exit when Source (Scan_Ptr) not in Graphic_Character; Scan_Ptr := Scan_Ptr + 1; exit when Source (Scan_Ptr) not in Graphic_Character; Scan_Ptr := Scan_Ptr + 1; exit when Source (Scan_Ptr) not in Graphic_Character; Scan_Ptr := Scan_Ptr + 1; exit when Source (Scan_Ptr) not in Graphic_Character; Scan_Ptr := Scan_Ptr + 1; end loop; -- Keep going if horizontal tab if Source (Scan_Ptr) = HT then if Style_Check then Style.Check_HT; end if; Scan_Ptr := Scan_Ptr + 1; -- Terminate scan of comment if line terminator or EOF elsif Source (Scan_Ptr) in Line_Terminator or else Source (Scan_Ptr) = EOF then if Int (Scan_Ptr) - Int (Current_Line_Start) > Max_Allowed_Line_Length then Error_Long_Line; end if; exit; -- Terminate scan of comment if end of file encountered -- (embedded EOF character or real last character in file) elsif Source (Scan_Ptr) = EOF then exit; -- Keep going if character in 80-FF range. These characters -- are allowed in comments according to the approved AI. -- Also allow ESC, which just got added to the AI (June 93) elsif Source (Scan_Ptr) in Upper_Half_Character or else Source (Scan_Ptr) = ESC then Scan_Ptr := Scan_Ptr + 1; -- Otherwise we have an illegal comment character else Error_Illegal_Character; end if; end loop; -- Note that we do NOT execute a return here, instead we fall -- through to reexecute the scan loop to look for a token. end if; end Minus_Case; -- Double quote or percent starting a string constant when '"' | '%' => Slit; return; -- Apostrophe. This can either be the start of a character literal, -- or an isolated apostrophe used in a qualified expression or an -- attribute. We treat it as a character literal if it does not -- follow a right parenthesis, identifier or literal. This means -- that we correctly treat constructs like: -- A := CHARACTER'('A'); -- which appears to be illegal according to 2.2(2) (since the rule -- there would seem to require separators to avoid the confusion -- with the character literal), but all compilers accept the above -- statement, and there are at least six ACVC tests that use this -- type of lexical sequence, expecting it to be legal, so in fact -- all compilers must accept this and we must too! when ''' => Char_Literal_Case : declare Code : Char_Code; Err : Boolean; begin Scan_Ptr := Scan_Ptr + 1; -- Here is where we make the test to distinguish the cases. Treat -- as apostrophe if previous token is an identifier, right paren -- or the reserved word "all" (latter case as in A.all'Address) -- Also treat it as apostrophe after a literal (wrong anyway, but -- that's probably the better choice). if Prev_Token = Tok_Identifier or else Prev_Token = Tok_Right_Paren or else Prev_Token = Tok_All or else Prev_Token in Token_Class_Literal then Token := Tok_Apostrophe; return; -- Otherwise the apostrophe starts a character literal else -- Case of wide character literal if (Source (Scan_Ptr) = ESC and then Wide_Character_Encoding_Method /= WCEM_None) or else (Upper_Half_Encoding and then Source (Scan_Ptr) in Upper_Half_Character) then Note_Feature (Wide_Characters_And_Strings, Scan_Ptr); Scan_Wide (Source, Scan_Ptr, Code, Err); if Err then Error_Illegal_Wide_Character; end if; if Source (Scan_Ptr) /= ''' then Error_Msg_S ("missing apostrophe"); else Scan_Ptr := Scan_Ptr + 1; end if; -- If we do not find a closing quote in the expected place then -- assume that we have a misguided attempt at a string literal. elsif Source (Scan_Ptr + 1) /= ''' then Scan_Ptr := Scan_Ptr - 1; Error_Msg_S ("strings are delimited by double quote character"); Scn.Slit; return; -- Otherwise we have a (non-wide) character literal else if Source (Scan_Ptr) not in Graphic_Character then if Source (Scan_Ptr) in Upper_Half_Character then Note_Feature (Latin_1, Scan_Ptr); if Ada_83 then Error_Illegal_Character; end if; else Error_Illegal_Character; end if; end if; Code := Get_Char_Code (Source (Scan_Ptr)); Scan_Ptr := Scan_Ptr + 2; end if; -- Fall through here with Scan_Ptr updated past the closing -- quote, and Code set to the Char_Code value for the literal Token := Tok_Char_Literal; Token_Node := New_Node (N_Character_Literal, Token_Ptr); Set_Char_Literal_Value (Token_Node, Code); Set_Character_Literal_Name (Code); Token_Name := Name_Find; Set_Chars (Token_Node, Token_Name); return; end if; end Char_Literal_Case; -- Right parenthesis when ')' => Scan_Ptr := Scan_Ptr + 1; Token := Tok_Right_Paren; if Style_Check then Style.Check_Right_Paren; end if; return; -- Right bracket or right brace, treated as right paren when ']' | '}' => Error_Msg_S ("illegal character, replaced by "")"""); Scan_Ptr := Scan_Ptr + 1; Token := Tok_Right_Paren; return; -- Slash (can be division operator or first character of not equal) when '/' => if Double_Char_Token ('=') then Token := Tok_Not_Equal; return; else Scan_Ptr := Scan_Ptr + 1; Token := Tok_Slash; return; end if; -- Semicolon when ';' => Scan_Ptr := Scan_Ptr + 1; Token := Tok_Semicolon; if Style_Check then Style.Check_Semicolon; end if; return; -- Vertical bar when '|' => Vertical_Bar_Case : begin -- Special check for || to give nice message if Source (Scan_Ptr + 1) = '|' then Error_Msg_S ("""||"" should be `OR ELSE`"); Scan_Ptr := Scan_Ptr + 2; Token := Tok_Or; return; else Scan_Ptr := Scan_Ptr + 1; Token := Tok_Vertical_Bar; if Style_Check then Style.Check_Vertical_Bar; end if; return; end if; end Vertical_Bar_Case; -- Exclamation, replacement character for vertical bar when '!' => Exclamation_Case : begin if Source (Scan_Ptr + 1) = '=' then Error_Msg_S ("'!= should be /="); Scan_Ptr := Scan_Ptr + 2; Token := Tok_Not_Equal; return; else Scan_Ptr := Scan_Ptr + 1; Token := Tok_Vertical_Bar; return; end if; end Exclamation_Case; -- Plus when '+' => Plus_Case : begin Scan_Ptr := Scan_Ptr + 1; Token := Tok_Plus; return; end Plus_Case; -- Digits starting a numeric constant when '0' .. '9' => Nlit; if Identifier_Char (Source (Scan_Ptr)) then Error_Msg_S ("delimiter required between literal and identifier"); end if; return; -- Lower case letters when 'a' .. 'z' => Name_Len := 1; Name_Buffer (1) := Source (Scan_Ptr); Scan_Ptr := Scan_Ptr + 1; goto Scan_Identifier; -- Upper case letters when 'A' .. 'Z' => Name_Len := 1; Name_Buffer (1) := Character'Val (Character'Pos (Source (Scan_Ptr)) + 32); Scan_Ptr := Scan_Ptr + 1; goto Scan_Identifier; -- Underline character when '_' => Error_Msg_S ("identifier cannot start with underline"); Name_Len := 1; Name_Buffer (1) := '_'; Scan_Ptr := Scan_Ptr + 1; goto Scan_Identifier; -- Space (not possible, because we scanned past blanks) when ' ' => pragma Assert (False); null; -- Characters in top half of ASCII 8-bit chart when Upper_Half_Character => -- If wide character, then illegal, not allowed outside literal if Upper_Half_Encoding then Error_Illegal_Character; -- Otherwise we have OK Latin-1 character else Note_Feature (Latin_1, Scan_Ptr); -- Upper half characters may possibly be identifier letters -- but can never be digits, so Identifier_Character can be -- used to test for a valid start of identifier character. if Identifier_Char (Source (Scan_Ptr)) then Name_Len := 0; goto Scan_Identifier; else Error_Illegal_Character; end if; end if; when ESC => -- ESC character, possible start of identifier if wide characters -- are allowed in identifiers, which we can tell by looking at -- the Identifier_Char flag for ESC. if Identifier_Char (ESC) then Name_Len := 0; goto Scan_Identifier; else Error_Illegal_Wide_Character; end if; -- Invalid control characters when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS | SO | SI | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN | EM | FS | GS | RS | US | DEL => Error_Illegal_Character; -- Invalid graphic characters when '#' | '$' | '?' | '@' | '`' | '\' | '^' | '~' => Error_Illegal_Character; -- End switch on non-blank character end case; -- End loop past format effectors. The exit from this loop is by -- executing a return statement following completion of token scan -- (control never falls out of this loop to the code which follows) end loop; -- Identifier scanning routine. On entry, some initial characters -- of the identifier may have already been stored in Name_Buffer. -- If so, Name_Len has the number of characters stored. otherwise -- Name_Len is set to zero on entry. <<Scan_Identifier>> -- This loop scans as fast as possible past lower half letters -- and digits, which we expect to be the most common characters. loop if Source (Scan_Ptr) in 'a' .. 'z' or else Source (Scan_Ptr) in '0' .. '9' then Name_Buffer (Name_Len + 1) := Source (Scan_Ptr); elsif Source (Scan_Ptr) in 'A' .. 'Z' then Name_Buffer (Name_Len + 1) := Character'Val (Character'Pos (Source (Scan_Ptr)) + 32); else exit; end if; -- Open out the loop a couple of times for speed if Source (Scan_Ptr + 1) in 'a' .. 'z' or else Source (Scan_Ptr + 1) in '0' .. '9' then Name_Buffer (Name_Len + 2) := Source (Scan_Ptr + 1); elsif Source (Scan_Ptr + 1) in 'A' .. 'Z' then Name_Buffer (Name_Len + 2) := Character'Val (Character'Pos (Source (Scan_Ptr + 1)) + 32); else Scan_Ptr := Scan_Ptr + 1; Name_Len := Name_Len + 1; exit; end if; if Source (Scan_Ptr + 2) in 'a' .. 'z' or else Source (Scan_Ptr + 2) in '0' .. '9' then Name_Buffer (Name_Len + 3) := Source (Scan_Ptr + 2); elsif Source (Scan_Ptr + 2) in 'A' .. 'Z' then Name_Buffer (Name_Len + 3) := Character'Val (Character'Pos (Source (Scan_Ptr + 2)) + 32); else Scan_Ptr := Scan_Ptr + 2; Name_Len := Name_Len + 2; exit; end if; if Source (Scan_Ptr + 3) in 'a' .. 'z' or else Source (Scan_Ptr + 3) in '0' .. '9' then Name_Buffer (Name_Len + 4) := Source (Scan_Ptr + 3); elsif Source (Scan_Ptr + 3) in 'A' .. 'Z' then Name_Buffer (Name_Len + 4) := Character'Val (Character'Pos (Source (Scan_Ptr + 3)) + 32); else Scan_Ptr := Scan_Ptr + 3; Name_Len := Name_Len + 3; exit; end if; Scan_Ptr := Scan_Ptr + 4; Name_Len := Name_Len + 4; end loop; -- If we fall through, then we have encountered either an underline -- character, or an extended identifier character (i.e. one from the -- upper half), or a wide character, or an identifier terminator. -- The initial test speeds us up in the most common case where we -- have an identifier terminator. Note that ESC is an identifier -- character only if a wide character encoding method is active. if Identifier_Char (Source (Scan_Ptr)) then -- Case of underline, check for error cases of double underline, -- and for a trailing underline character if Source (Scan_Ptr) = '_' then Name_Len := Name_Len + 1; Name_Buffer (Name_Len) := '_'; if Identifier_Char (Source (Scan_Ptr + 1)) then Scan_Ptr := Scan_Ptr + 1; if Source (Scan_Ptr) = '_' then Error_No_Double_Underline; end if; else Error_Msg_S ("identifier cannot end with underline"); Scan_Ptr := Scan_Ptr + 1; end if; -- We know we have either an ESC or an upper half character. -- First test for wide character case. elsif (Source (Scan_Ptr) = ESC and then Wide_Character_Encoding_Method /= WCEM_None) or else Upper_Half_Encoding then if Identifier_Character_Set /= 'w' then Error_Msg_S ("wide character not allowed in identifier"); end if; -- Scan out the wide character and insert the appropriate -- encoding into the name table entry for the identifier. declare Code : Char_Code; Err : Boolean; begin Note_Feature (Wide_Characters_And_Strings, Scan_Ptr); Scan_Wide (Source, Scan_Ptr, Code, Err); if Err then Error_Illegal_Wide_Character; else Store_Encoded_Character (Code); end if; end; -- Case of an extended character from the upper half. We insert -- the appropriate encoding of the character, folded to lower -- case (see Namet for details of the encoding). else Note_Feature (Latin_1, Scan_Ptr); Store_Encoded_Character (Get_Char_Code (Fold_Lower (Source (Scan_Ptr)))); Scan_Ptr := Scan_Ptr + 1; end if; -- In all above cases, keep scanning identifier characters goto Scan_Identifier; end if; -- Scan of identifier is complete. The identifier is stored in -- Name_Buffer, and Scan_Ptr points past the last character. Token_Name := Name_Find; -- Here is where we check if it was a keyword if Get_Name_Table_Byte (Token_Name) /= 0 and then (Ada_95 or else Token_Name not in Ada_95_Reserved_Words) then Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name)); -- Deal with possible style check for non-lower case keyword, -- but we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords -- for this purpose if they appear as attribute designators. -- Actually we only check the first character for speed. if Style_Check and then Source (Token_Ptr) <= 'Z' and then (Prev_Token /= Tok_Apostrophe or else (Token /= Tok_Access and then Token /= Tok_Delta and then Token /= Tok_Digits and then Token /= Tok_Range)) then Style.Non_Lower_Case_Keyword; end if; -- We must reset Token_Name since this is not an identifier -- and if we leave Token_Name set, the parser gets confused -- because it thinks it is dealing with an identifier instead -- of the corresponding keyword. Token_Name := No_Name; return; -- It is an identifier after all else Token_Node := New_Node (N_Identifier, Token_Ptr); Set_Chars (Token_Node, Token_Name); Token := Tok_Identifier; return; end if; end Scan; --------------------- -- Scan_First_Char -- --------------------- function Scan_First_Char return Source_Ptr is Ptr : Source_Ptr := Current_Line_Start; begin loop if Source (Ptr) = ' ' then Ptr := Ptr + 1; elsif Source (Ptr) = HT then if Style_Check then Style.Check_HT; end if; Ptr := Ptr + 1; else return Ptr; end if; end loop; end Scan_First_Char; ------------------------------ -- Scan_Reserved_Identifier -- ------------------------------ procedure Scan_Reserved_Identifier (Force_Msg : Boolean) is Token_Chars : constant String := Token_Type'Image (Token); begin -- We have in Token_Chars the image of the Token name, i.e. Tok_xxx. -- This code extracts the xxx and makes an identifier out of it. Name_Len := 0; for J in 5 .. Token_Chars'Length loop Name_Len := Name_Len + 1; Name_Buffer (Name_Len) := Fold_Lower (Token_Chars (J)); end loop; Token_Name := Name_Find; if not Used_As_Identifier (Token) or else Force_Msg then Error_Msg_Name_1 := Token_Name; Error_Msg_SC ("reserved word* cannot be used as identifier!"); Used_As_Identifier (Token) := True; end if; Token := Tok_Identifier; Token_Node := New_Node (N_Identifier, Token_Ptr); Set_Chars (Token_Node, Token_Name); end Scan_Reserved_Identifier; ---------------------- -- Set_Start_Column -- ---------------------- -- Note: it seems at first glance a little expensive to compute this value -- for every source line (since it is certainly not used for all source -- lines). On the other hand, it doesn't take much more work to skip past -- the initial white space on the line counting the columns than it would -- to scan past the white space using the standard scanning circuits. function Set_Start_Column return Column_Number is Start_Column : Column_Number := 0; begin -- Outer loop scans past horizontal tab characters Tabs_Loop : loop -- Inner loop scans past blanks as fast as possible, bumping Scan_Ptr -- past the blanks and adjusting Start_Column to account for them. Blanks_Loop : loop if Source (Scan_Ptr) = ' ' then if Source (Scan_Ptr + 1) = ' ' then if Source (Scan_Ptr + 2) = ' ' then if Source (Scan_Ptr + 3) = ' ' then if Source (Scan_Ptr + 4) = ' ' then if Source (Scan_Ptr + 5) = ' ' then if Source (Scan_Ptr + 6) = ' ' then Scan_Ptr := Scan_Ptr + 7; Start_Column := Start_Column + 7; else Scan_Ptr := Scan_Ptr + 6; Start_Column := Start_Column + 6; exit Blanks_Loop; end if; else Scan_Ptr := Scan_Ptr + 5; Start_Column := Start_Column + 5; exit Blanks_Loop; end if; else Scan_Ptr := Scan_Ptr + 4; Start_Column := Start_Column + 4; exit Blanks_Loop; end if; else Scan_Ptr := Scan_Ptr + 3; Start_Column := Start_Column + 3; exit Blanks_Loop; end if; else Scan_Ptr := Scan_Ptr + 2; Start_Column := Start_Column + 2; exit Blanks_Loop; end if; else Scan_Ptr := Scan_Ptr + 1; Start_Column := Start_Column + 1; exit Blanks_Loop; end if; else exit Blanks_Loop; end if; end loop Blanks_Loop; -- Outer loop keeps going only if a horizontal tab follows if Source (Scan_Ptr) = HT then if Style_Check then Style.Check_HT; end if; Scan_Ptr := Scan_Ptr + 1; Start_Column := (Start_Column / 8) * 8 + 8; else exit Tabs_Loop; end if; end loop Tabs_Loop; return Start_Column; end Set_Start_Column; end Scn;