TegraExplorer/source/script/lexer.c

#include "lexer.h"
#include "types.h"
#include "args.h"
#include <mem/heap.h>

static inline int isValidWord(char c) {
	char r = c | 0x20;
	return ((r >= 'a' && r <= 'z') || c == '_');
}

static inline int isValidNum(char c) {
	return (c >= '0' && c <= '9');
}

static inline int isValidVar(char c) {
	return (isValidWord(c) || isValidNum(c));
}

static inline int isValidHexNum(char c) {
	char r = c | 0x20;
	return (isValidNum(r) || (r >= 'a' && r <= 'f'));
}

#define makeLexarToken(token, var) ((lexarToken_t) {token, var})

typedef struct {
	u8 tokenC;
	u8 tokenN;
} lexarTranslation_t;

lexarTranslation_t lexarTranslations[] = {
	{'}', RCBracket},
	{',', Seperator},
	{'+', Plus},
	{'-', Minus},
	{'*', Multiply},
	{'/', Division},
	{'%', Mod},
	{'!', Not},
	{':', Selector},
	{')', RBracket},
	{']', RSBracket},
	{'(', LBracket},
	{'{', LCBracket},
	{'=', Equal},
	{'[', LSBracket},
	{'<', Smaller},
	{'>', Bigger},
	{'\0', 0},
};

/*
	Should we make vars with next char being '(' a function and vars with an equals (or [x] wait how are we gonna spot that) after it to be an assignmentVar
*/

char lexarDebugGetTokenC(u8 tokenN) {
	for (int i = 0; lexarTranslations[i].tokenC; i++) {
		if (lexarTranslations[i].tokenN == tokenN) {
			return lexarTranslations[i].tokenC;
		}
	}

	if (tokenN == EquationSeperator)
		return ';';

	return '?';
}

/*
* !! we need to remake this
void lexarVectorClear(lexarVector_t* vec) {
	for (int i = 0; i < vec->stored; i++) {
		if (vec->tokens[i].token == Variable || vec->tokens[i].token == StrLit)
			if (vec->tokens[i].text != NULL)
				free(vec->tokens[i].text);
	}
	free(vec->tokens);
}
*/

void lexarVectorClear(Vector_t *v){
	vecPDefArray(lexarToken_t*, entries, v);

	for (int i = 0; i < v->count; i++){
		if (entries[i].token != IntLit && entries[i].text != NULL){
			free(entries[i].text);
		}
	}

	vecFreePtr(v);
}

#define ELIFC(c) else if (*in == c)

Vector_t runLexer(const char* in, u32 len) {
	const char *start = in;
	Vector_t vec = newVec(sizeof(lexarToken_t), 16);
	// store last var for re-assignment
	// var -> func if next obj is '('
	// var -> assignment if next obj is '='
	// var -> arrassignment if next obj is '[' and before '=' is ']'
	// maybe measure len between ( ) and [ ], so this doesn't have to be done during runtime?
		// We also have to support (()). maybe if '(' set indent level, then if ')' minus indent level, set len. indent level contains {u8 level, u16 token, u16 startoffset}

	u32 lastAssignment = 0;

	while ((in - start) < len) {
		lexarToken_t* lx = vecGetArray(lexarToken_t*, vec);

		if ((lx[vec.count - 2].token == StrLit || lx[vec.count - 2].token == IntLit || lx[vec.count - 2].token == Variable || lx[vec.count - 2].token == RSBracket || lx[vec.count - 2].token == RBracket)
			&& (lx[vec.count - 1].token == Variable || lx[vec.count - 1].token == LCBracket || lx[vec.count - 1].token == RCBracket)) {
			if (!(lx[lastAssignment].token == ArrayVariableAssignment && lx[vec.count - 1].token == Variable && lx[vec.count - 2].token == RSBracket)) {
				lexarToken_t holder = lx[vec.count - 1];
				lx[vec.count - 1] = makeLexarToken(EquationSeperator, 0);
				vecAddElement(&vec, holder);
				lx = vecGetArray(lexarToken_t*, vec);
			}
		}

		if (isValidWord(*in)) {
			char* startWord = in;
			in++;
			while (isValidVar(*in))
				in++;

			vecAddElement(&vec, (makeLexarToken(Variable, utils_copyStringSize(startWord, in - startWord))));
			continue;
		}
		else if (isValidNum(*in) || (*in == '-' && isValidNum(in[1]))) {
			int parse = 0;
			u8 negative = (*in == '-');
			if (negative)
				in++;

			if (*in == '0' && (in[1] | 0x20) == 'x') {
				in += 2;
				while (isValidHexNum(*in)) {
					parse = parse * 16 + (*in & 0x0F) + (*in >= 'A' ? 9 : 0);
					in++;
				}
			}
			else while (isValidNum(*in)) {
				parse = parse * 10 + *in++ - '0';
			}

			if (negative)
				parse *= -1;

			vecAddElement(&vec, makeLexarToken(IntLit, parse));
			continue;
		}
		ELIFC('(') {
			if (lx[vec.count - 1].token == Variable)
				lx[vec.count - 1].token = Function;

			vecAddElement(&vec, makeLexarToken(LBracket, 0));
		}
		ELIFC('[') {
			if (lx[vec.count - 1].token == Variable)
				lx[vec.count - 1].token = ArrayVariable;

			vecAddElement(&vec, makeLexarToken(LSBracket, 0));
		}
		ELIFC('=') { // Do we need to keep = if the vars are assignments anyway?
			if (in[1] == '='){
				vecAddElement(&vec, makeLexarToken(EqualEqual, 0));
				in++;
				continue;
			}

			if (lx[vec.count - 1].token == Variable)
				lx[vec.count - 1].token = VariableAssignment;

			else if (lx[vec.count - 1].token == RSBracket) {
				int back = 1;
				while (lx[vec.count - back].token != ArrayVariable) {
					back++;
					if (vec.count - back < 0)
						break; // major error
				}
				if (lx[vec.count - back].token == ArrayVariable) {
					lx[vec.count - back].token = ArrayVariableAssignment;
					lastAssignment = vec.count - back;
					in++;
					continue;
				}
			}
			lastAssignment = 0;
		}
		ELIFC('{') {
			if (lx[vec.count - 1].token == VariableAssignment) {
				lx[vec.count - 1].token = FunctionAssignment;
			}
			vecAddElement(&vec, makeLexarToken(LCBracket, 0));
		}
		ELIFC('"') {
			char* startStr = ++in;
			int len = 0;
			while (*in != '"') {
				in++;
			}
			len = in - startStr;

			char* storage = malloc(len + 1);

			int pos = 0;
			for (int i = 0; i < len; i++) {
				if (startStr[i] == '\\') {
					if (startStr[i + 1] == 'n') {
						storage[pos++] = '\n';
						i++;
						continue;
					}

					if (startStr[i + 1] == 'r') {
						storage[pos++] = '\r';
						i++;
						continue;
					}
				}
				storage[pos++] = startStr[i];
			}
			storage[pos] = '\0';

			vecAddElement(&vec, makeLexarToken(StrLit, storage));
		}
		ELIFC('#') {
			while (*in != '\n')
				in++;
		}
		ELIFC('&') {
			if (in[1] == '&') {
				vecAddElement(&vec, makeLexarToken(LogicAND, 0));
				in++;
			}
			else {
				vecAddElement(&vec, makeLexarToken(AND, 0));
			}
		}
		ELIFC('|') {
			if (in[1] == '|') {
				vecAddElement(&vec, makeLexarToken(LogicOR, 0));
				in++;
			}
			else {
				vecAddElement(&vec, makeLexarToken(OR, 0));
			}
		}
		ELIFC('>'){
			if (in[1] == '>'){
				vecAddElement(&vec, makeLexarToken(BitShiftRight, 0));
				in++;
			}
			else {
				int a = (in[1] == '=') ? 1 : 0;
				vecAddElement(&vec, makeLexarToken(Bigger, 0));
				in += a;
			}

		}
		ELIFC('<'){
			if (in[1] == '<'){
				vecAddElement(&vec, makeLexarToken(BitShiftLeft, 0));
				in++;
			}
			else {
				int a = (in[1] == '=') ? 1 : 0;
				vecAddElement(&vec, makeLexarToken(Smaller + a, 0));
				in += a;
			}

		}
		else {
			int val = 0;

			for (int i = 0; lexarTranslations[i].tokenC; i++) {
				if (lexarTranslations[i].tokenC == *in) {
					val = lexarTranslations[i].tokenN;
					break;
				}
			}

			in++;

			if (*in == '=' && val >= Smaller && val <= Not) {
				val++;
				in++;
			}

			if (val != Invalid)
				vecAddElement(&vec, makeLexarToken(val, 0));

			continue;
		}
		in++;
	}

	lexarToken_t* lx = vecGetArray(lexarToken_t*, vec);
	if ((lx[vec.count - 2].token == StrLit || lx[vec.count - 2].token == IntLit || lx[vec.count - 2].token == Variable || lx[vec.count - 2].token == RSBracket || lx[vec.count - 2].token == RBracket)
		&& (lx[vec.count - 1].token == Variable || lx[vec.count - 1].token == LCBracket || lx[vec.count - 1].token == RCBracket)) {
		lexarToken_t holder = lx[vec.count - 1];
		lx[vec.count - 1] = makeLexarToken(EquationSeperator, 0);
		vecAddElement(&vec, holder);
	}

	vecAddElement(&vec, makeLexarToken(EquationSeperator, 0));
	return vec;
}