2020-12-28 13:51:59 +00:00
# include "lexer.h"
# include "types.h"
# include "args.h"
# include <mem/heap.h>
static inline int isValidWord ( char c ) {
char r = c | 0x20 ;
return ( ( r > = ' a ' & & r < = ' z ' ) | | c = = ' _ ' ) ;
}
static inline int isValidNum ( char c ) {
return ( c > = ' 0 ' & & c < = ' 9 ' ) ;
}
static inline int isValidVar ( char c ) {
return ( isValidWord ( c ) | | isValidNum ( c ) ) ;
}
static inline int isValidHexNum ( char c ) {
char r = c | 0x20 ;
return ( isValidNum ( r ) | | ( r > = ' a ' & & r < = ' f ' ) ) ;
}
# define makeLexarToken(token, var) ((lexarToken_t) {token, var})
typedef struct {
u8 tokenC ;
u8 tokenN ;
} lexarTranslation_t ;
lexarTranslation_t lexarTranslations [ ] = {
{ ' } ' , RCBracket } ,
{ ' , ' , Seperator } ,
{ ' + ' , Plus } ,
{ ' - ' , Minus } ,
{ ' * ' , Multiply } ,
{ ' / ' , Division } ,
{ ' % ' , Mod } ,
{ ' ! ' , Not } ,
2020-12-31 20:20:48 +00:00
{ ' : ' , Selector } ,
2020-12-28 13:51:59 +00:00
{ ' ) ' , RBracket } ,
{ ' ] ' , RSBracket } ,
{ ' ( ' , LBracket } ,
{ ' { ' , LCBracket } ,
{ ' = ' , Equal } ,
{ ' [ ' , LSBracket } ,
2021-01-02 10:39:14 +00:00
{ ' < ' , Smaller } ,
{ ' > ' , Bigger } ,
2020-12-28 13:51:59 +00:00
{ ' \0 ' , 0 } ,
} ;
/*
Should we make vars with next char being ' ( ' a function and vars with an equals ( or [ x ] wait how are we gonna spot that ) after it to be an assignmentVar
*/
char lexarDebugGetTokenC ( u8 tokenN ) {
for ( int i = 0 ; lexarTranslations [ i ] . tokenC ; i + + ) {
if ( lexarTranslations [ i ] . tokenN = = tokenN ) {
return lexarTranslations [ i ] . tokenC ;
}
}
if ( tokenN = = EquationSeperator )
return ' ; ' ;
return ' ? ' ;
}
/*
* ! ! we need to remake this
void lexarVectorClear ( lexarVector_t * vec ) {
for ( int i = 0 ; i < vec - > stored ; i + + ) {
if ( vec - > tokens [ i ] . token = = Variable | | vec - > tokens [ i ] . token = = StrLit )
if ( vec - > tokens [ i ] . text ! = NULL )
free ( vec - > tokens [ i ] . text ) ;
}
free ( vec - > tokens ) ;
}
*/
void lexarVectorClear ( Vector_t * v ) {
vecPDefArray ( lexarToken_t * , entries , v ) ;
for ( int i = 0 ; i < v - > count ; i + + ) {
if ( entries [ i ] . token ! = IntLit & & entries [ i ] . text ! = NULL ) {
free ( entries [ i ] . text ) ;
}
}
vecFreePtr ( v ) ;
}
# define ELIFC(c) else if (*in == c)
2021-01-09 23:10:28 +00:00
Vector_t runLexer ( const char * in , u32 len ) {
2020-12-28 13:51:59 +00:00
const char * start = in ;
Vector_t vec = newVec ( sizeof ( lexarToken_t ) , 16 ) ;
// store last var for re-assignment
// var -> func if next obj is '('
// var -> assignment if next obj is '='
// var -> arrassignment if next obj is '[' and before '=' is ']'
// maybe measure len between ( ) and [ ], so this doesn't have to be done during runtime?
// We also have to support (()). maybe if '(' set indent level, then if ')' minus indent level, set len. indent level contains {u8 level, u16 token, u16 startoffset}
2021-01-01 17:30:51 +00:00
u32 lastAssignment = 0 ;
2020-12-28 13:51:59 +00:00
while ( ( in - start ) < len ) {
lexarToken_t * lx = vecGetArray ( lexarToken_t * , vec ) ;
if ( ( lx [ vec . count - 2 ] . token = = StrLit | | lx [ vec . count - 2 ] . token = = IntLit | | lx [ vec . count - 2 ] . token = = Variable | | lx [ vec . count - 2 ] . token = = RSBracket | | lx [ vec . count - 2 ] . token = = RBracket )
& & ( lx [ vec . count - 1 ] . token = = Variable | | lx [ vec . count - 1 ] . token = = LCBracket | | lx [ vec . count - 1 ] . token = = RCBracket ) ) {
2021-01-01 17:30:51 +00:00
if ( ! ( lx [ lastAssignment ] . token = = ArrayVariableAssignment & & lx [ vec . count - 1 ] . token = = Variable & & lx [ vec . count - 2 ] . token = = RSBracket ) ) {
lexarToken_t holder = lx [ vec . count - 1 ] ;
lx [ vec . count - 1 ] = makeLexarToken ( EquationSeperator , 0 ) ;
vecAddElement ( & vec , holder ) ;
lx = vecGetArray ( lexarToken_t * , vec ) ;
}
2020-12-28 13:51:59 +00:00
}
if ( isValidWord ( * in ) ) {
char * startWord = in ;
in + + ;
while ( isValidVar ( * in ) )
in + + ;
vecAddElement ( & vec , ( makeLexarToken ( Variable , utils_copyStringSize ( startWord , in - startWord ) ) ) ) ;
continue ;
}
else if ( isValidNum ( * in ) | | ( * in = = ' - ' & & isValidNum ( in [ 1 ] ) ) ) {
int parse = 0 ;
u8 negative = ( * in = = ' - ' ) ;
if ( negative )
in + + ;
if ( * in = = ' 0 ' & & ( in [ 1 ] | 0x20 ) = = ' x ' ) {
in + = 2 ;
while ( isValidHexNum ( * in ) ) {
parse = parse * 16 + ( * in & 0x0F ) + ( * in > = ' A ' ? 9 : 0 ) ;
in + + ;
}
}
else while ( isValidNum ( * in ) ) {
parse = parse * 10 + * in + + - ' 0 ' ;
}
if ( negative )
parse * = - 1 ;
vecAddElement ( & vec , makeLexarToken ( IntLit , parse ) ) ;
continue ;
}
ELIFC ( ' ( ' ) {
if ( lx [ vec . count - 1 ] . token = = Variable )
lx [ vec . count - 1 ] . token = Function ;
vecAddElement ( & vec , makeLexarToken ( LBracket , 0 ) ) ;
}
ELIFC ( ' [ ' ) {
if ( lx [ vec . count - 1 ] . token = = Variable )
lx [ vec . count - 1 ] . token = ArrayVariable ;
vecAddElement ( & vec , makeLexarToken ( LSBracket , 0 ) ) ;
}
ELIFC ( ' = ' ) { // Do we need to keep = if the vars are assignments anyway?
2021-01-04 17:27:17 +00:00
if ( in [ 1 ] = = ' = ' ) {
vecAddElement ( & vec , makeLexarToken ( EqualEqual , 0 ) ) ;
in + + ;
continue ;
}
2020-12-28 13:51:59 +00:00
if ( lx [ vec . count - 1 ] . token = = Variable )
lx [ vec . count - 1 ] . token = VariableAssignment ;
else if ( lx [ vec . count - 1 ] . token = = RSBracket ) {
int back = 1 ;
while ( lx [ vec . count - back ] . token ! = ArrayVariable ) {
back + + ;
if ( vec . count - back < 0 )
break ; // major error
}
if ( lx [ vec . count - back ] . token = = ArrayVariable ) {
lx [ vec . count - back ] . token = ArrayVariableAssignment ;
2021-01-01 17:30:51 +00:00
lastAssignment = vec . count - back ;
in + + ;
continue ;
2020-12-28 13:51:59 +00:00
}
}
2021-01-01 17:30:51 +00:00
lastAssignment = 0 ;
2020-12-28 13:51:59 +00:00
}
ELIFC ( ' { ' ) {
if ( lx [ vec . count - 1 ] . token = = VariableAssignment ) {
lx [ vec . count - 1 ] . token = FunctionAssignment ;
}
vecAddElement ( & vec , makeLexarToken ( LCBracket , 0 ) ) ;
}
ELIFC ( ' " ' ) {
char * startStr = + + in ;
int len = 0 ;
while ( * in ! = ' " ' ) {
in + + ;
}
len = in - startStr ;
char * storage = malloc ( len + 1 ) ;
int pos = 0 ;
for ( int i = 0 ; i < len ; i + + ) {
if ( startStr [ i ] = = ' \\ ' ) {
if ( startStr [ i + 1 ] = = ' n ' ) {
storage [ pos + + ] = ' \n ' ;
i + + ;
continue ;
}
if ( startStr [ i + 1 ] = = ' r ' ) {
storage [ pos + + ] = ' \r ' ;
i + + ;
continue ;
}
}
storage [ pos + + ] = startStr [ i ] ;
}
storage [ pos ] = ' \0 ' ;
vecAddElement ( & vec , makeLexarToken ( StrLit , storage ) ) ;
}
ELIFC ( ' # ' ) {
while ( * in ! = ' \n ' )
in + + ;
}
ELIFC ( ' & ' ) {
if ( in [ 1 ] = = ' & ' ) {
vecAddElement ( & vec , makeLexarToken ( LogicAND , 0 ) ) ;
in + + ;
}
else {
vecAddElement ( & vec , makeLexarToken ( AND , 0 ) ) ;
}
}
ELIFC ( ' | ' ) {
if ( in [ 1 ] = = ' | ' ) {
vecAddElement ( & vec , makeLexarToken ( LogicOR , 0 ) ) ;
in + + ;
}
else {
vecAddElement ( & vec , makeLexarToken ( OR , 0 ) ) ;
}
}
2021-01-02 10:39:14 +00:00
ELIFC ( ' > ' ) {
if ( in [ 1 ] = = ' > ' ) {
vecAddElement ( & vec , makeLexarToken ( BitShiftRight , 0 ) ) ;
in + + ;
}
2021-01-10 22:19:51 +00:00
else {
int a = ( in [ 1 ] = = ' = ' ) ? 1 : 0 ;
2021-01-02 10:39:14 +00:00
vecAddElement ( & vec , makeLexarToken ( Bigger , 0 ) ) ;
2021-01-10 22:19:51 +00:00
in + = a ;
}
2021-01-02 10:39:14 +00:00
}
ELIFC ( ' < ' ) {
if ( in [ 1 ] = = ' < ' ) {
vecAddElement ( & vec , makeLexarToken ( BitShiftLeft , 0 ) ) ;
in + + ;
}
2021-01-10 22:19:51 +00:00
else {
int a = ( in [ 1 ] = = ' = ' ) ? 1 : 0 ;
vecAddElement ( & vec , makeLexarToken ( Smaller + a , 0 ) ) ;
in + = a ;
}
2021-01-02 10:39:14 +00:00
}
2020-12-28 13:51:59 +00:00
else {
int val = 0 ;
for ( int i = 0 ; lexarTranslations [ i ] . tokenC ; i + + ) {
if ( lexarTranslations [ i ] . tokenC = = * in ) {
val = lexarTranslations [ i ] . tokenN ;
break ;
}
}
in + + ;
if ( * in = = ' = ' & & val > = Smaller & & val < = Not ) {
val + + ;
in + + ;
}
if ( val ! = Invalid )
vecAddElement ( & vec , makeLexarToken ( val , 0 ) ) ;
continue ;
}
in + + ;
}
lexarToken_t * lx = vecGetArray ( lexarToken_t * , vec ) ;
if ( ( lx [ vec . count - 2 ] . token = = StrLit | | lx [ vec . count - 2 ] . token = = IntLit | | lx [ vec . count - 2 ] . token = = Variable | | lx [ vec . count - 2 ] . token = = RSBracket | | lx [ vec . count - 2 ] . token = = RBracket )
& & ( lx [ vec . count - 1 ] . token = = Variable | | lx [ vec . count - 1 ] . token = = LCBracket | | lx [ vec . count - 1 ] . token = = RCBracket ) ) {
lexarToken_t holder = lx [ vec . count - 1 ] ;
lx [ vec . count - 1 ] = makeLexarToken ( EquationSeperator , 0 ) ;
vecAddElement ( & vec , holder ) ;
}
vecAddElement ( & vec , makeLexarToken ( EquationSeperator , 0 ) ) ;
return vec ;
}