/*************************************************************
file:
    isql.l
purpose:
    the lexical scaner prepared for flex generator;
new features:(compared to DM3's lexical scaner)
    1. -- style comments
    2. c-style comments (can be nested!)
    3. double quoted identifier
    4. string can be merged by "\"
    5. escape char supported in string literial
special notes:
    to generate Yylex.java, just type: jflex isql.lex;
history:
    Date        Who         RefDoc      Memo
    2002-03-24  Joe Han     N/A         Created
*************************************************************/
package dm.jdbc.lex;
import java.util.ArrayList;
import java.util.Vector;

import dm.jdbc.dbaccess.Const;
import dm.jdbc.innerData.DmdbLexWord;

%%

/* definitions for flex/lex comes here */

%byaccj
%line
%column
%unicode
%public
//%option noyywrap
//%option yylineno


%{

private int commentStartLine = 0;
private int commentStartColumn = 0;
private String text = "";
private boolean keyWordFlag = false;
private Vector startAndEndVec = new Vector();
private ArrayList m_wordList = null;
private boolean m_escapePro	= false;

public Yylex(java.io.Reader r, String s, ArrayList wordList, boolean escapePro) {
  this(r);
  this.text = s;
  this.m_wordList = wordList;
  this.m_escapePro = escapePro;
}

private void sqlWordList_add(String word, int type)
{
	if (type == Const.LEX_TYPE_WHITESPACE && null != m_wordList && m_wordList.size() > 0)
    {
            DmdbLexWord temp = (DmdbLexWord)m_wordList.get(m_wordList.size() - 1);
            if (temp.getM_type() == Const.LEX_TYPE_WHITESPACE)
            {
                temp.setM_value(temp.getM_value() + word);
                temp.setM_valLen(temp.getM_valLen() + word.length());
                return;
            }
            
    }
    
	DmdbLexWord temp = new DmdbLexWord(word, type, word.length());
	
	if (null != m_wordList)
		m_wordList.add(temp);
}

public ArrayList getWordList()
{
	return m_wordList;
} 

//#define MAX_INCLUDE_DEPTH 100
public final int MAX_INCLUDE_DEPTH = 100;

int include_stack_ptr = 0;

byte[] fn_stack = new byte[MAX_INCLUDE_DEPTH];
int[] ln_stack = new int[MAX_INCLUDE_DEPTH];
byte[] file_name = null;

static LexError lex_error = new LexError();           /* lexical error message */


static int xcdepth = 0;          /* for nested c-style comments */

String inputbuf = "";
int inputbuf_len = 0;

boolean	backslash_need_escape = true;

/*********************************************************************
 name:
     lex_scan_str
 purpose:
     if the string passed in has escaped codes, map the escape codes
     to actual chars.
 return:
     the return string should be freed by mem_free
 ********************************************************************/
static
String
lex_scanstr(String str)  /* in: the string should be processed */
{
	if (str == null || str.charAt(0) == '\0')
          return "";

        return str;
}


public int LEXERR(int errcode, String errmsg)
{
	lex_error.errcode = errcode;
	lex_error.errmsg = errmsg;
	return errcode;
}

public int getErrCode()
{
  return lex_error.errcode;
}

public String getErrMsg()
{
  return lex_error.errmsg;
}

static String lex_litbuf = null;   /* for literial buffer */
static int  lex_litleng = 0;      /* literial length */
static int  lex_litbufsize = 0;   /* the literial buffer size */

boolean lex_litbuf_flag = false;
String  ret_lex_litbuf = null;


public int YY_USER_ACTION()
{
	return strcat_input();
}

int
strcat_input()
{
	inputbuf += yytext();
	
	return 1;
}

void my_yyless(int n)
{
	int size, n_cut;
	n_cut = yytext().length();
	n_cut -= n;

	size = inputbuf.length();
	inputbuf.substring(0, size - n_cut);
}



/* when start to scan a literial, prepare the literal buffer */
static void startlit()
{
   lex_litbufsize = 0;
   lex_litleng    = 0;
   lex_litbuf     = null;
}

/* append string to the literial */
void
addlit(String ytext, int yleng)
{
	/* enlarge buffer if needed */
	
	String buf = ytext.substring(0, yleng);
	if (lex_litbuf == null)
		lex_litbuf = buf;
	else
		lex_litbuf += buf;
		
	lex_litleng += yleng;
}

int LEX_MAX_N_BUFFER = 8192;


int
lex_ret_integer()
{
	try{
		Long.parseLong(yytext(), 10);
		sqlWordList_add(yytext(), Const.LEX_TYPE_INT);
		
		return Const.LEX_TYPE_INT;
	}
	catch (NumberFormatException ex) {		
		sqlWordList_add(yytext(), Const.LEX_TYPE_NORMAL);
				
		return Const.LEX_TYPE_NORMAL;
	}
}

  public void setBackSlash(boolean backslash_esc){
  	backslash_need_escape = backslash_esc;
  }  
  
  public final String yytext2() {
    String ret;
    if (lex_litbuf_flag == true)
    {
      ret = ret_lex_litbuf;
      lex_litbuf = "";
      ret_lex_litbuf = "";
      lex_litbuf_flag = false;
      return ret;
    }
    else
      return yytext();
  }

%}

/*
 * We use exclusive states for quoted strings, extended comments,
 * and to eliminate parsing troubles for numeric strings.
 * Exclusive states:
 *  <xbit> bit string literal
 *  <xc> extended C-style comments
 *  <xd> delimited identifiers (double-quoted identifiers)
 *  <xh> hexadecimal numeric string
 *  <xq> quoted strings
 */
/* Bit string
 */
xbitstart	= 	[bB]{quote}
xbitstop	=	{quote}
xbitinside	=	[^']*
xbitcat		=	{quote}{whitespace_with_newline}{quote}

/* Hexadecimal number
 */
xhstart		=	[xX]{quote}
xhstop		=	{quote}
xhinside	=	[^']+
xhcat		=	{quote}{whitespace_with_newline}{quote}

bslash		=	\\

/* Extended quote
 * xqdouble implements SQL92 embedded quote
 * xqcat allows strings to cross input lines
 * Note: reduction of '' and \ sequences to output text is done in scanstr(),
 * not by rules here.  But we do get rid of xqcat sequences here.
 */
quote		=	'
xqstart		=	{quote}
xqstop		=	{quote}
xqdouble	=	{quote}{quote}
xqinside	=	[^']+
xqbsinside	=	[^']
xqcat		=	{quote}{whitespace_with_newline}{quote}

/* Delimited quote
 * Allows embedded spaces and other special characters into identifiers.
 */
dquote		=	\"
xdstart		=	{dquote}
xdstop		=	{dquote}
xddouble	=	{dquote}{dquote}
xdinside	=	[^\\\"]+
xdbsinside	=	[\\\"]

/* C-style comments */
xcstart		=	\/\*{op_chars}*
xcstop		=	\*+\/
xcinside	=	[^*/]+

digit		=	[0-9]

/* chinese character is supported */
letter		=	[_A-Za-z$#] | [^\000-\177] //[\200-\377_A-Za-z$#]
letter_or_digit	= 	[_A-Za-z0-9$#] | [^\000-\177] //[\200-\377_A-Za-z0-9$#]

identifier	=	{letter}{letter_or_digit}*

//typecast	=	"::"
assign        =  ":="
boundary      =  ".."

/*********************
*	include
*********************/
include		=	[\`]
xincinside	=	[^\t\n\;\015]+
xincstop	=	[\t\n\015\;]


/* modified by gy 2002-05-23
 * '?' should not realized as operator
 */
self		=	[\,()\.;\:\~\{\}\[\]]
op_chars	=	[\!\<\>\=\+\-\!\&\|\*\%\/\^\?\@]
operator	=	{op_chars}{1,3}
/* end modify */

integer			={digit}+
binteger		=0[xX]({digit}|[a-fA-F])+
decimal			=(({digit}*\.{digit}+)|({digit}+\.{digit}*))
real			=((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+))

integer_with_boundary	=({digit}+\.\.)

/*param			\${integer}*/

space			=[ \t\n\r\f]
horiz_space		=[ \t\f]
newline			=[\n\r]
non_newline		=[^\n\r]

comment			=("--"{non_newline}*)

whitespace		=({space}|{comment})

horiz_whitespace	=({horiz_space}|{comment})
whitespace_with_newline	=({horiz_whitespace}*{newline}{whitespace}*)

selstar         = \.\*
//constr_op       = \|\|
other		= .

pt_kw_toomanyrows 	=[%]{whitespace}*[Tt][Oo][Oo][Mm][Aa][Nn][Yy][Rr][Oo][Ww][Ss]
pt_kw_rowcount  	=[%]{whitespace}*[Rr][Oo][Ww][Cc][Oo][Uu][Nn][Tt]
pt_kw_isopen    	=[%]{whitespace}*[Ii][Ss][Oo][Pp][Ee][Nn]
pt_kw_found     	=[%]{whitespace}*[Ff][Oo][Uu][Nn][Dd]
pt_kw_notfound  	=[%]{whitespace}*[Nn][Oo][Tt][Ff][Oo][Uu][Nn][Dd]
pt_kw_type		=[%]{whitespace}*[Tt][Yy][Pp][Ee]
pt_kw_rowtype		=[%]{whitespace}*[Rr][Oo][Ww][Tt][Yy][Pp][Ee]

startwith  	=	[Ss][Tt][Aa][Rr][Tt]{whitespace}+[Ww][Ii][Tt][Hh]
packagebody 	=	[Pp][Aa][Cc][Kk][Aa][Gg][Ee]{whitespace}+[Bb][Oo][Dd][Yy]

global_var	=	@@{identifier}

/*unionall	=	[Uu][Nn][Ii][Oo][Nn]{whitespace}[Aa][Ll][Ll]*/

%xstate xbit
%xstate xc
%xstate xd
%xstate xh
%xstate xq
%xstate xinc
%xstate xsb
%xstate xqbs
%xstate xdbs


/* the rules starts here */
%%

/*{unionall}		{
				YY_USER_ACTION();
			}*/

{whitespace}		{ 
				YY_USER_ACTION();		
				sqlWordList_add(yytext(), Const.LEX_TYPE_WHITESPACE);
				return Const.LEX_TYPE_WHITESPACE;
				/* ignore */ 
			}

{include}		{
				YY_USER_ACTION();
			}

<xinc><<EOF>>		{
				YY_USER_ACTION();
				
				yybegin(YYINITIAL);   
				return LEXERR(-8, "Unterminated command"); 
			}

<xinc>{xincinside}	{
				YY_USER_ACTION();
			}
					
<xinc>{xincstop}	{
				YY_USER_ACTION();				
        		}
		
<YYINITIAL><<EOF>>	{
				YY_USER_ACTION();
				
			        if ( --include_stack_ptr < 0 )
                    {
						yybegin(YYINITIAL);					
						return LEXERR(Const.LEX_END, "unterminated command");
			      	}                		
                }
				
						
{xcstart}		{
				YY_USER_ACTION();
				
				xcdepth = 0;
				yybegin(xc);
				startlit();
				
				/* Put back any characters past slash-star; see above */
				my_yyless(2);
								
				yypushback(yytext().length() - 2);				
			}

<xc>{xcstart}		{
				YY_USER_ACTION();
				
				xcdepth++;
				/* Put back any characters past slash-star; see above */
				my_yyless(2);				
				yypushback(yytext().length() - 2);				
				
				commentStartLine = yyline;
				commentStartColumn = yycolumn - 1;
			}

<xc>{xcstop}		{
				YY_USER_ACTION();
				
				if (xcdepth <= 0)
				{
					yybegin(YYINITIAL);
					
					lex_litbuf_flag = true;
					ret_lex_litbuf = "/*" + lex_litbuf + "*/";
				
					sqlWordList_add(ret_lex_litbuf, Const.LEX_TYPE_NORMAL);
			    }
				else
					xcdepth--;		
					
			    return Const.LEX_TYPE_NORMAL;			
			}

<xc>{xcinside}		{
				YY_USER_ACTION();
				
				/* in the comments, record as normal */
				addlit(yytext(), yylength());
				 
			}

<xc>[\/]		|
<xc>[\*]		{  
				YY_USER_ACTION();
				/* in the comments, record as normal */
				addlit(yytext(), yylength()); 
			}

<xc><<EOF>>		{
				YY_USER_ACTION();
				
				yybegin(YYINITIAL);   
				return LEXERR(-1, "Unterminated /* comment"); 
			}
			
{xbitstart}		{
				YY_USER_ACTION();
				
				yybegin(xbit);
				startlit();
				addlit("b", 1);
			}
			
<xbit>{xbitstop}	{
				YY_USER_ACTION();
				
				yybegin(YYINITIAL);                    		
                if (lex_litbuf.charAt(lex_litbuf.indexOf("01", 1) + 1) !=  '\0')
				{
					yybegin(YYINITIAL);   
					return LEXERR(-1, "invalid bit string input:");
				}									
				
				lex_litbuf_flag = true;
				ret_lex_litbuf = lex_litbuf;	
				
				sqlWordList_add(ret_lex_litbuf, Const.LEX_TYPE_NORMAL);			

                return Const.LEX_TYPE_NORMAL;
			}
<xh>{xhinside}	|
<xbit>{xbitinside}	{
				YY_USER_ACTION();
				
				addlit(yytext(), yylength());
			}
			
<xh>{xhcat}		|
<xbit>{xbitcat}		{
				YY_USER_ACTION();
				
				/* ignore */
			}
			
<xbit><<EOF>>		{ 
				YY_USER_ACTION();
				
				yybegin(YYINITIAL);
				return LEXERR(-2, "unterminated bit string literal"); 
			}

{xhstart}		{
				YY_USER_ACTION();
				
				yybegin(xh);
				startlit();
			}
			
<xh>{xhstop}		{
				YY_USER_ACTION();							

				yybegin(YYINITIAL);
				
				try {
					Long.parseLong(lex_litbuf, 16);
				}
				catch (NumberFormatException ex) {
				    	yybegin(YYINITIAL);
                        return LEXERR(-3, "Ivalided hexadecimal integer");
				}
				
				sqlWordList_add(lex_litbuf, Const.LEX_TYPE_NORMAL);
				
				return Const.LEX_TYPE_NORMAL;
			}
			
<xh><<EOF>>		{     
				YY_USER_ACTION();				
				yybegin(YYINITIAL);
                return LEXERR(-3, "Unterminated hexadecimal integer"); 
                }

{xqstart}		{
				YY_USER_ACTION();
				
                yybegin(xq);
				startlit();
			    }

<xqbs>{quote}		|			
<xq>{xqstop}		{
				YY_USER_ACTION();
				
				yybegin(YYINITIAL);
				
				sqlWordList_add("'", Const.LEX_TYPE_CHAR);
				if (lex_scanstr(lex_litbuf).length() > 0)
					sqlWordList_add(lex_litbuf, Const.LEX_TYPE_CHAR);
				sqlWordList_add("'", Const.LEX_TYPE_CHAR);
				
				lex_litbuf_flag = true;
				ret_lex_litbuf = "'" + lex_scanstr(lex_litbuf) + "'";
								
				return Const.LEX_TYPE_CHAR;
			}
<xq>{xqdouble}	|
<xq>{xqinside}		
			{
				YY_USER_ACTION();
				
				addlit(yytext(), yylength());
			}
<xq>{bslash}		
	{
       		YY_USER_ACTION();
            
            addlit(yytext(), yylength());    		       		
            if (backslash_need_escape == true)		
				yybegin(xqbs);		
	}
			
<xq>{xqcat}		{
				YY_USER_ACTION();
				
				/* ignore */
			}
			
<xq><<EOF>>		{
				YY_USER_ACTION();
				
				yybegin(YYINITIAL);
                      		return LEXERR(-4, "Unterminated quoted string"); 
                      	}
                      	
<xqbs>{xqbsinside}	{
                YY_USER_ACTION();
                		
				addlit(yytext(), yylength());

				yybegin(xq);
			}
<xqbs><<EOF>>		{ 	
				yybegin(YYINITIAL);
				return LEXERR(-4, "Unterminated quoted string"); 
			}

{xdstart}		{
				YY_USER_ACTION();
				
                    		yybegin(xd);
				startlit();
			}
<xd>{xdstop}		{
				YY_USER_ACTION();
				
				yybegin(YYINITIAL);
				if (lex_litbuf == null || lex_litbuf.length() == 0)
				{
				    	yybegin(YYINITIAL);
        				return LEXERR(-5, "zero-length delimited identifier");
				}								
				
				lex_litbuf_flag = true;
				ret_lex_litbuf = "\"" + lex_litbuf + "\"";
				
				sqlWordList_add(ret_lex_litbuf, Const.LEX_TYPE_NORMAL);                    		                                        	
					
				return Const.LEX_TYPE_NORMAL;
			}
			
<xd>{xddouble} 		{
				YY_USER_ACTION();
				
				addlit(yytext(), yylength());
			}
			
<xd>{xdinside}		{
				YY_USER_ACTION();
				
                    		addlit(yytext(), yylength());
			}
<xd>{bslash}		{
                    		YY_USER_ACTION();
                    		
                    		addlit(yytext(), yylength());
            			if (backslash_need_escape == true)
				{
					yybegin(xdbs);
				}
			}
			
<xd><<EOF>>		{    
				YY_USER_ACTION();
				
				yybegin(YYINITIAL);
                     		return LEXERR(-5, "Unterminated quoted identifier"); 
                     	}

<xdbs>{xdbsinside}	|
<xdbs>{xdinside}	{
                    		YY_USER_ACTION();
				addlit(yytext(), yylength());

				yybegin(xd);
			}
<xdbs><<EOF>>		{ 
				yybegin(YYINITIAL);
				return LEXERR(-5, "Unterminated quoted identifier"); 
			}
			
<xsb><<EOF>>		{
				YY_USER_ACTION();
				
				yybegin(YYINITIAL);
                     		return LEXERR(-5, "Unterminated delimited identifier"); 
                     	}

{self}			{ 
				YY_USER_ACTION();				
				
				sqlWordList_add(yytext(), Const.LEX_TYPE_NORMAL);			
				  
				return Const.LEX_TYPE_NORMAL; 		
			}

{operator}		{
					YY_USER_ACTION();				

					sqlWordList_add(yytext(), Const.LEX_TYPE_NORMAL);				
					
                	return Const.LEX_TYPE_NORMAL;
                }

{binteger}		{     
				YY_USER_ACTION();
				
				sqlWordList_add(yytext(), Const.LEX_TYPE_BINT);
				
				return Const.LEX_TYPE_BINT;
			}

{integer}		{
				YY_USER_ACTION();
				
				return lex_ret_integer();
			}

{integer_with_boundary}		{
					YY_USER_ACTION();
					
					my_yyless(yylength() - 2);
					yypushback(yytext().length() - 2);
					
					return lex_ret_integer();
				}
			
{decimal}		{
				YY_USER_ACTION();
				
				if (yytext().length() <= Const.DEC_MAX_LEN)
				{
					sqlWordList_add(yytext(), Const.LEX_TYPE_DECIMAL);
						
					return Const.LEX_TYPE_DECIMAL;
				}
				else
				{
					sqlWordList_add(yytext(), Const.LEX_TYPE_NORMAL);
										
					return Const.LEX_TYPE_NORMAL;
				}
			}
			
{real}			{
				YY_USER_ACTION();				
				sqlWordList_add(yytext(), Const.LEX_TYPE_REAL);			
				return Const.LEX_TYPE_REAL;
			}

{pt_kw_toomanyrows}     {
			 	YY_USER_ACTION();				 	
			 	sqlWordList_add(yytext(), Const.LEX_TYPE_NORMAL);

	            return Const.LEX_TYPE_NORMAL;
	                }

{pt_kw_rowcount}  	{
				YY_USER_ACTION();				
				sqlWordList_add(yytext(), Const.LEX_TYPE_NORMAL);				
                return Const.LEX_TYPE_NORMAL;
                	}
                	
{pt_kw_isopen}  	{
				YY_USER_ACTION();				
				sqlWordList_add(yytext(), Const.LEX_TYPE_NORMAL);
                return Const.LEX_TYPE_NORMAL;
                	}
                	
{pt_kw_found}   	{
				YY_USER_ACTION();				
				sqlWordList_add(yytext(), Const.LEX_TYPE_NORMAL);
                return Const.LEX_TYPE_NORMAL;
                	}

{pt_kw_notfound} 	{
				YY_USER_ACTION();				
				sqlWordList_add(yytext(), Const.LEX_TYPE_NORMAL);
                return Const.LEX_TYPE_NORMAL;
                	}
                	
{pt_kw_type}		{
				YY_USER_ACTION();
				sqlWordList_add(yytext(), Const.LEX_TYPE_NORMAL);
                return Const.LEX_TYPE_NORMAL;
                	}

{pt_kw_rowtype}		{
				YY_USER_ACTION();
				sqlWordList_add(yytext(), Const.LEX_TYPE_NORMAL);        
                return Const.LEX_TYPE_NORMAL;
                	}

{identifier}		{
				YY_USER_ACTION();
				sqlWordList_add(yytext(), Const.LEX_TYPE_NORMAL);
				
				return Const.LEX_TYPE_NORMAL;
                	}

{global_var}		{
				YY_USER_ACTION();
				sqlWordList_add(yytext(), Const.LEX_TYPE_NORMAL);
				return Const.LEX_TYPE_NORMAL;				
                 	}
                                  	
{assign}		{
				YY_USER_ACTION();				
				
				if (m_escapePro)
				{
					lex_litbuf_flag = true;
			    	ret_lex_litbuf = " ASSIGN ";
								
					sqlWordList_add(" ASSIGN ", Const.LEX_TYPE_NORMAL);				
				}	
				else
					sqlWordList_add(" := ", Const.LEX_TYPE_NORMAL);
												
				return Const.LEX_TYPE_NORMAL;
			}

{boundary}		{
				YY_USER_ACTION();
				sqlWordList_add("..", Const.LEX_TYPE_NORMAL);
				
				lex_litbuf_flag = true;
			    ret_lex_litbuf = "..";
				
				return Const.LEX_TYPE_NORMAL;
			}

{selstar}		{
				YY_USER_ACTION();
				sqlWordList_add(yytext(), Const.LEX_TYPE_NORMAL);
							
				return Const.LEX_TYPE_NORMAL;
			}
                	
{other}			{ 
				YY_USER_ACTION();
				sqlWordList_add(yytext(), Const.LEX_TYPE_NORMAL);
				
				return Const.LEX_TYPE_NORMAL; 		
			}

{startwith}		{
				YY_USER_ACTION();
				sqlWordList_add(yytext(), Const.LEX_TYPE_NORMAL);							
                   		  
				return Const.LEX_TYPE_NORMAL;
			}

{packagebody}		{
				YY_USER_ACTION();
				sqlWordList_add(yytext(), Const.LEX_TYPE_NORMAL);				
				
				return Const.LEX_TYPE_NORMAL;
			}