mirror of
https://github.com/suchmememanyskill/TegraExplorer.git
synced 2024-11-25 21:32:08 +00:00
update ts-minifier
This commit is contained in:
parent
9eef3d7b47
commit
6ad5c7b5f6
1 changed files with 296 additions and 97 deletions
385
ts-minifier.py
385
ts-minifier.py
|
@ -1,29 +1,66 @@
|
|||
# Copyright (c) 2021 bleck9999
|
||||
# https://github.com/bleck9999/ts-minifier
|
||||
# Version: 91345ce1
|
||||
# Version: 700ce8d
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import itertools
|
||||
from string import ascii_letters
|
||||
|
||||
# if is not included because it's already 2 characters
|
||||
sub_funcs = {'while': "_h", 'print': "_p", 'println': "_l", 'mountsys': "_s", 'mountemu': "_e", 'readsave': "_r",
|
||||
'exit': "_q", 'break': "_b", 'dict': "_d", 'setpixel': "_y", 'readdir': "_i", 'copyfile': "_c",
|
||||
'mkdir': "_k", 'ncatype': "_n", 'pause': "_w", 'color': "_a", 'menu': "__", 'emu': "_u",
|
||||
'clear': "_x", 'timer': "_t", 'deldir': "_g", 'fsexists': "_f", 'delfile': "_z", "copydir": "c_",
|
||||
"movefile": "_v", "payload": "_j", "readfile": "_o", "writefile": "_W", "setpixels": "_Y", "printpos": "_P",
|
||||
"emmcread": "_E", "emmcwrite": "_F", "emummcread": "_R", "emummcwrite": "_S", "escapepath": "_X",
|
||||
"combinepath": "_A", "cwd": "_D", "power": "_O", "fuse_patched": "_M", "fuse_hwtype": "_N"}
|
||||
replace_functions = False
|
||||
auto_replace = False
|
||||
stdlib = ['if', 'while', 'print', 'println', 'mountsys', 'mountemu', 'readsave', 'exit', 'break', 'dict', 'setpixel',
|
||||
'readdir', 'copyfile', 'mkdir', 'ncatype', 'pause', 'color', 'menu', 'emu', 'clear', 'timer', 'deldir',
|
||||
'fsexists', 'delfile', 'copydir', 'movefile', 'payload', 'readfile', 'writefile', 'setpixels', 'printpos',
|
||||
'emmcread', 'emmcwrite', 'emummcread', 'emummcwrite', 'escapepath', 'combinepath', 'cwd', 'power',
|
||||
'fuse_patched', 'fuse_hwtype']
|
||||
|
||||
|
||||
def wantsumspace(s: str):
|
||||
for c in s.lower():
|
||||
if (ord(c) < 97 or ord(c) > 122) and (ord(c) != 95) and not (c.isnumeric()):
|
||||
class Code:
|
||||
def __init__(self, strings, comments, script):
|
||||
counter = 0
|
||||
strings_comments = sorted(strings + comments)
|
||||
bounds = [0] if strings_comments[0][0] != 0 else []
|
||||
for val in strings_comments:
|
||||
if counter and (bounds[counter - 1] == val[0]):
|
||||
bounds[counter - 1] = val[1]
|
||||
else:
|
||||
bounds += [val[0], val[1]]
|
||||
counter += 2
|
||||
bounds.append(len(script))
|
||||
code = []
|
||||
i = 2 if len(bounds) % 2 else 1
|
||||
while i < len(bounds):
|
||||
code.append((bounds[i - 1], bounds[i], script[bounds[i - 1]:bounds[i]]))
|
||||
i += 2
|
||||
self.sections = sorted(strings_comments + code)
|
||||
self.strings = strings
|
||||
self.comments = comments
|
||||
self.code = code
|
||||
# self.string_comments = strings_comments
|
||||
self.rawcode = "".join([x[2] for x in sorted(self.code+self.strings)])
|
||||
|
||||
def getafter(self, ch: int):
|
||||
ch += self.comments[-1][1] if self.comments else 0
|
||||
for strcom in self.strings:
|
||||
if strcom[0] >= ch:
|
||||
return strcom
|
||||
return None
|
||||
|
||||
def nextch(self, ch: int, reverse: bool):
|
||||
rawcontent = self.rawcode
|
||||
if ((ch+1 >= len(rawcontent)) and not reverse) or \
|
||||
((ch-1 < 0) and reverse):
|
||||
return ''
|
||||
return rawcontent[ch-1] if reverse else rawcontent[ch+1]
|
||||
|
||||
|
||||
def isidentifier(s: str):
|
||||
for c in s:
|
||||
if c not in (ascii_letters + '_'):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def commentstartswhere(s: str):
|
||||
def hascomment(s: str):
|
||||
quoted = False
|
||||
for c in range(len(s)):
|
||||
if s[c] == '"':
|
||||
|
@ -33,29 +70,233 @@ def commentstartswhere(s: str):
|
|||
return None
|
||||
|
||||
|
||||
def minify(script: str):
|
||||
# currently ts does not seem to allow 's to mark a quote
|
||||
# (https://github.com/suchmememanyskill/TegraExplorer/blob/tsv3/source/script/parser.c#L173)
|
||||
# im fine with that, it makes doing this a lot easier
|
||||
# strings = script.split(sep='"')
|
||||
def parser(script: str):
|
||||
comments = [] # [(start, end, content)]
|
||||
strings = []
|
||||
commented = False
|
||||
quoted = False
|
||||
strstart = -1
|
||||
commentstart = -1
|
||||
for c in range(len(script)):
|
||||
if script[c] == '#' and not quoted:
|
||||
commented = True
|
||||
commentstart = c
|
||||
elif (script[c] == '\n' and not quoted) and commented:
|
||||
comments.append((commentstart, c + 1, script[commentstart:c + 1]))
|
||||
commented = False
|
||||
elif script[c] == '"' and not commented:
|
||||
if not quoted:
|
||||
strstart = c
|
||||
quoted = True
|
||||
else:
|
||||
strings.append((strstart, c + 1, script[strstart:c + 1]))
|
||||
quoted = False
|
||||
|
||||
script = Code(strings, comments, script)
|
||||
|
||||
# guess i should do a breakdown of step 3
|
||||
# we need to be able to read:
|
||||
# variable creation | a = 15, array.foreach("a")
|
||||
# defining a function | funcname = {function body}
|
||||
# calling a function | funcname(arguments) for stdlib functions, funcname(<optional> any valid ts) for user defined
|
||||
# member calling | object.member(possible args)
|
||||
# we don't need to check if it's valid syntax or not so we dont need to know the type of object that's nice
|
||||
# this can actually be chained which is pretty annoying
|
||||
# operators? i dont think it actually matters to us
|
||||
# *statements* | a
|
||||
# these can be delimited by anything that isn't a valid identifier.
|
||||
# fuck me clockwise
|
||||
#
|
||||
# other notes:
|
||||
# we minify the script a little before parsing, so there is no unnecessary whitespace or comments
|
||||
# we are assuming the input script is valid syntax
|
||||
|
||||
userobjects = {}
|
||||
usages = {}
|
||||
hexxed = False
|
||||
ismember = False
|
||||
quoted = False
|
||||
strscript = script.rawcode
|
||||
start = len(strscript) + 1
|
||||
for ch in range(len(strscript)):
|
||||
if (strscript[ch-1] == '0' and strscript[ch] == 'x') and not quoted:
|
||||
hexxed = True
|
||||
elif isidentifier(strscript[ch]) and not (hexxed or quoted):
|
||||
if start > ch:
|
||||
start = ch
|
||||
else:
|
||||
pass
|
||||
elif hexxed and strscript[ch].upper() not in "0123456789ABCDEF":
|
||||
hexxed = False
|
||||
elif strscript[ch] == '"':
|
||||
quoted = not quoted
|
||||
elif not quoted:
|
||||
if start != len(strscript)+1: # if we actually had an identifier before this char
|
||||
identifier = strscript[start:ch]
|
||||
if identifier in usages:
|
||||
usages[identifier].append(start)
|
||||
elif strscript[ch] == '=' and strscript[ch+1] != '=':
|
||||
isfunc = script.nextch(ch, False) == '{'
|
||||
userobjects[identifier] = "func" if isfunc else "var"
|
||||
usages[identifier] = [start] # declaration is a usage because i cant be arsed
|
||||
elif not ismember: # not an assignment (or member) but also haven't seen this name before
|
||||
usages[identifier] = [start]
|
||||
# fuck it we are using a fucking list of fucking stdlib functions i just fucking cant im adding tsv3
|
||||
# to the fucking esolangs wiki have a good day
|
||||
if identifier not in stdlib:
|
||||
userobjects[identifier] = "var"
|
||||
if strscript[ch] == '.':
|
||||
if ismember: # we check if there's a . after a ), if there is we know that there's nothing to do here
|
||||
continue
|
||||
ismember = True
|
||||
# we don't really care about anything else
|
||||
elif strscript[ch] == '(':
|
||||
if ismember:
|
||||
if "foreach" == strscript[start:ch]: # array.foreach takes a variable name as an arg (blame meme)
|
||||
name = script.getafter(ch)[2].replace('"', '')
|
||||
if name in userobjects:
|
||||
usages[name].append(start)
|
||||
else:
|
||||
usages[name] = []
|
||||
userobjects[name] = "var"
|
||||
else:
|
||||
pass
|
||||
elif strscript[ch] == ')':
|
||||
ismember = script.nextch(ch, False) == '.'
|
||||
start = len(strscript) + 1
|
||||
|
||||
return minify(script, userobjects, usages)
|
||||
|
||||
|
||||
def minify(script: Code, userobjects, usages):
|
||||
# the space saved by an alias is the amount of characters currently used by calling the function (uses*len(func))
|
||||
# minus the amount of characters it would take to define an alias (len(alias)+len(func)+2), with the 2 being the
|
||||
# equals and the whitespace needed for a definition
|
||||
# obviously for a rename you're already defining it so it's just the difference between lengths multiplied by uses
|
||||
short_idents = [x for x in (ascii_letters+'_')] + [x[0]+x[1] for x in itertools.permutations(ascii_letters+'_', 2)]
|
||||
short_idents.pop(short_idents.index("if"))
|
||||
mcode = script.rawcode
|
||||
aliases = []
|
||||
for uo in [x for x in userobjects]:
|
||||
if userobjects[uo] not in ["var", "func"]:
|
||||
continue
|
||||
tmpcode = ""
|
||||
otype = userobjects[uo]
|
||||
uses = len(usages[uo])
|
||||
uolen = len(uo)
|
||||
if uolen > 1:
|
||||
candidates = short_idents
|
||||
minName = ''
|
||||
if uolen == 2:
|
||||
candidates = short_idents[:53]
|
||||
for i in candidates:
|
||||
if i not in userobjects:
|
||||
minName = i
|
||||
userobjects[minName] = "TRN"
|
||||
break
|
||||
if not minName:
|
||||
print(f"{'Function' if otype == 'func' else 'Variable'} name {uo} could be shortened but no available "
|
||||
f"names found (would save {uses} bytes)")
|
||||
continue
|
||||
# we assume that nobody is insane enough to exhaust all *2,756* 2 character names,
|
||||
# instead that uo is len 2 and all the 1 character names are in use (because of that we dont multiply
|
||||
# uses by anything
|
||||
if not auto_replace:
|
||||
print(f"{'Function' if otype == 'func' else 'Variable'} name {uo} could be shortened ({uo}->{minName}, "
|
||||
f"would save {uses*(uolen - len(minName))} bytes")
|
||||
continue
|
||||
else:
|
||||
print(f"Renaming {'Function' if otype == 'func' else 'Variable'} {uo} to {minName} "
|
||||
f"(saving {uses*(uolen - len(minName))} bytes)")
|
||||
# rather than just blindly str.replace()ing we're going to actually use the character indices that we stored
|
||||
diff = uolen - len(minName)
|
||||
prev = 0
|
||||
for bound in usages[uo]:
|
||||
tmpcode += mcode[prev:bound] + minName + ' '*diff
|
||||
prev = bound + diff + len(minName)
|
||||
mcode = tmpcode + mcode[bound+diff+len(minName):] # it actually cant be referenced before assignment but ok
|
||||
for func in usages:
|
||||
tmpcode = ""
|
||||
candidates = short_idents
|
||||
minName = ''
|
||||
savings = 0
|
||||
uses = len(usages[func])
|
||||
if func in userobjects or uses < 2: # we only want stdlib functions used more than once
|
||||
continue
|
||||
elif func == "if":
|
||||
candidates = short_idents[:53]
|
||||
savings = uses * 2 - 5 # the 5 is how many characters an alias declaration would use (a=if<space>)
|
||||
for i in candidates:
|
||||
if i not in userobjects:
|
||||
minName = i
|
||||
userobjects[minName] = "TRP"
|
||||
break
|
||||
# once again we assume it's only `if` that could trigger this message
|
||||
if not minName and (uses - 4) > 0:
|
||||
print(f"Standard library function {func} could be aliased but no available names found "
|
||||
f"(would save {uses-4} bytes)")
|
||||
else:
|
||||
if not savings:
|
||||
savings = uses*len(func) - (len(func)+len(minName)+2)
|
||||
if savings <= 0 or not auto_replace:
|
||||
print(f"Not aliasing standard library function {func} (would save {savings} bytes)")
|
||||
else:
|
||||
print(f"Aliasing standard library function {func} to {minName} (saving {savings} bytes)")
|
||||
diff = len(func) - len(minName)
|
||||
prev = 0
|
||||
for bound in usages[func]:
|
||||
tmpcode += mcode[prev:bound] + minName + ' ' * diff
|
||||
prev = bound + diff + len(minName)
|
||||
mcode = tmpcode + mcode[bound + diff + len(minName):]
|
||||
aliases.append(f"{minName}={func} ")
|
||||
|
||||
str_reuse = {}
|
||||
for string in script.strings:
|
||||
if string[2] in str_reuse:
|
||||
str_reuse[string[2]].append(string[0])
|
||||
else:
|
||||
str_reuse[string[2]] = [string[0]]
|
||||
for string in str_reuse:
|
||||
tmpcode = ""
|
||||
candidates = short_idents
|
||||
uses = len(str_reuse[string])
|
||||
minName = ""
|
||||
if uses > 1 and len(string) > 1:
|
||||
if len(string) == 2:
|
||||
candidates = short_idents[:53]
|
||||
for i in candidates:
|
||||
if i not in userobjects:
|
||||
minName = i
|
||||
userobjects[minName] = "TIV"
|
||||
break
|
||||
# the quotation marks are included in string
|
||||
savings = uses * len(string) - (len(string) + len(minName) + 2)
|
||||
if savings <= 0 or not auto_replace:
|
||||
print(f"Not introducing variable for string {string} reused {uses} times (would save {savings} bytes)")
|
||||
else:
|
||||
# "duplicated code fragment" do i look like i give a shit
|
||||
print(f"Introducing variable {minName} with value {string} (saving {savings} bytes)")
|
||||
diff = len(string) - len(minName)
|
||||
prev = 0
|
||||
for bound in str_reuse[string]:
|
||||
bound -= script.comments[-1][1] if script.comments else 0
|
||||
tmpcode += mcode[prev:bound] + minName + ' ' * diff
|
||||
prev = bound + diff + len(minName)
|
||||
mcode = tmpcode + mcode[bound + diff + len(minName):]
|
||||
aliases.append(f"{minName}={string} ")
|
||||
|
||||
print("Reintroducing REQUIREs")
|
||||
mcode = "".join([x[2] for x in script.comments]) + "".join(aliases) + mcode
|
||||
print("Stripping whitespace")
|
||||
return whitespacent(mcode)
|
||||
|
||||
|
||||
def whitespacent(script: str):
|
||||
# also happens to remove unneeded comments and push REQUIREs to the top of the file
|
||||
requires = ""
|
||||
mcode = ""
|
||||
stl_counts = {}.fromkeys(sub_funcs, 0)
|
||||
# while part < len(strings):
|
||||
for line in script.split(sep='\n'):
|
||||
# maybe in future it'll shrink user defined names
|
||||
# dont hold out hope for that because `a.files.foreach("b") {println(b)}` is valid syntax
|
||||
# and i dont have the skill or patience to deal with that
|
||||
|
||||
# # in theory all the even numbered indexes should be outside quotes, so we ignore any parts with an odd index
|
||||
# if part % 2 == 1:
|
||||
# if strings[part] not in str_reuse:
|
||||
# str_reuse[strings[part]] = 0
|
||||
# else:
|
||||
# str_reuse[strings[part]] += 1
|
||||
# mcode += f'"{strings[part]}"'
|
||||
start = commentstartswhere(line)
|
||||
start = hascomment(line)
|
||||
if start is None:
|
||||
start = -1
|
||||
|
||||
|
@ -74,27 +315,20 @@ def minify(script: str):
|
|||
line = line.split(sep='"')
|
||||
|
||||
if len(line) % 2 == 0:
|
||||
print("You appear to have string literals spanning multiple lines. Please seek professional help")
|
||||
raise Exception("Too much hatred")
|
||||
raise Exception("Unmatched quote or hard newline in string")
|
||||
part = 0
|
||||
while part < len(line):
|
||||
# all the odd numbered indexes should be inside quotes
|
||||
if part % 2 == 0:
|
||||
if not line[part]:
|
||||
break
|
||||
for s in sub_funcs:
|
||||
stl_counts[s] += len(re.findall("(?<!\\.)%s\\(" % s, line[part]))
|
||||
mcode += line[part].replace('\t', '') + ' '
|
||||
else:
|
||||
if line[part] not in str_reuse:
|
||||
str_reuse[line[part]] = 0
|
||||
else:
|
||||
str_reuse[line[part]] += 1
|
||||
mcode += f'"{line[part]}"'
|
||||
|
||||
part += 1
|
||||
|
||||
|
||||
mcode = " ".join(mcode.split()) # turn lots of whitespace into one whitespace with one easy trick!
|
||||
# tsv3 is still an absolute nightmare
|
||||
# so spaces have a couple edge cases
|
||||
# 1. the - operator which requires space between the right operand
|
||||
|
@ -107,12 +341,10 @@ def minify(script: str):
|
|||
newline = list(mcode)
|
||||
while index < (len(mcode) - 3):
|
||||
sec = mcode[index:index + 3]
|
||||
if not inquote and sec[1] == '"':
|
||||
inquote = True
|
||||
elif inquote and sec[1] == '"':
|
||||
inquote = False
|
||||
if sec[1] == '"':
|
||||
inquote = not inquote
|
||||
if (sec[1] == ' ') and not inquote:
|
||||
if wantsumspace(sec[0]) and wantsumspace(sec[2]):
|
||||
if (isidentifier(sec[0]) or sec[0].isnumeric()) and (isidentifier(sec[2]) or sec[2].isnumeric()):
|
||||
pass
|
||||
elif sec[0] == '-' and sec[2].isnumeric():
|
||||
pass
|
||||
|
@ -121,62 +353,29 @@ def minify(script: str):
|
|||
index += 1
|
||||
mmcode += ''.join(newline).strip()
|
||||
|
||||
for func in sub_funcs:
|
||||
# space saved here is given by n * (len(func) - len(min_func)) - (len(min_func)+1 + len(func))
|
||||
# as such with one usage space is always lost (len(func)-2 is never > len(func)+3) so dont even try
|
||||
if stl_counts[func] >= 2:
|
||||
savings = stl_counts[func] * (len(func) - 2) - (len(func) + 3)
|
||||
print(f"Replacing all {stl_counts[func]} usages of {func} would save {savings}byte{'s' if savings != 1 else ''}")
|
||||
if (savings < 0) or not replace_functions:
|
||||
print("Savings negative or automatic replacement disabled, continuing")
|
||||
continue
|
||||
func_min = sub_funcs[func] # now here we have to assume nobody is using any of our substitute vars
|
||||
# should be a pretty safe assumption but knowing for sure would require about the same amount of effort
|
||||
# as it would to replace all user defined variables
|
||||
ucode = "" # this is rather hacky
|
||||
sections = [0]
|
||||
for m in re.finditer(r"(?<!\.)%s\(" % func, mmcode):
|
||||
sections.append(m.span()[0])
|
||||
sections.append(m.span()[1])
|
||||
sections.append(len(mmcode))
|
||||
i = 2 # change rather to very
|
||||
while i < len(sections):
|
||||
ucode += mmcode[sections[i-2]:sections[i-1]] + func_min + '('
|
||||
i += 2
|
||||
ucode += mmcode[sections[i-2]:]
|
||||
|
||||
ucode = f"{func_min}={func}\n" + ucode
|
||||
mmcode = ucode
|
||||
# a space isn't any shorter than \n so why not use \n
|
||||
|
||||
for string, count in str_reuse.items():
|
||||
if count >= 2:
|
||||
# we can't auto replace strings without a full parser
|
||||
# unlike with the stdlib functions we cant make a lookup table ahead of time
|
||||
# and generating shorter names on the fly sounds like an absolute nightmare no thanks
|
||||
print(f'Warning: string "{string}" of len {len(string)} reused {count} times')
|
||||
|
||||
return requires + mmcode.strip()
|
||||
return requires + mmcode.strip().replace('\n', ' ')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description="Minify tsv3 scripts, useful for embedding",
|
||||
argparser = argparse.ArgumentParser(description="Minify tsv3 scripts, useful for embedding",
|
||||
formatter_class=argparse.RawTextHelpFormatter)
|
||||
parser.add_argument("source", type=str, nargs='+', help="source files to minify")
|
||||
parser.add_argument("-d", type=str, nargs='?', help="destination folder for minified scripts"
|
||||
argparser.add_argument("source", type=str, nargs='+', help="source files to minify")
|
||||
argparser.add_argument("-d", type=str, nargs='?', help="destination folder for minified scripts"
|
||||
"\ndefault: ./", default='./')
|
||||
parser.add_argument("--replace-functions", action="store_true", default=False,
|
||||
help="automatically replace reused functions instead of just warning\ndefault: false")
|
||||
argparser.add_argument("--auto-replace", action="store_true", default=False,
|
||||
help="automatically replace reused functions and variables instead of just warning\n"
|
||||
"and attempt to generate shorter names for reused variables \ndefault: false")
|
||||
|
||||
args = parser.parse_args()
|
||||
args = argparser.parse_args()
|
||||
files = args.source
|
||||
dest = args.d[:-1] if args.d[-1] == '/' else args.d
|
||||
replace_functions = args.replace_functions if args.replace_functions is not None else False
|
||||
auto_replace = args.auto_replace if args.auto_replace is not None else False
|
||||
|
||||
for file in files:
|
||||
print(f"Minifying {file}")
|
||||
print(f"\nMinifying {file}")
|
||||
with open(file, 'r') as f:
|
||||
r = minify(f.read())
|
||||
print("Stripping comments")
|
||||
r = parser(whitespacent(f.read()))
|
||||
file = file.split(sep='.')[0].split(sep='/')[-1]
|
||||
if dest != '.':
|
||||
f = open(f"{dest}/{file}.te", 'w')
|
||||
|
|
Loading…
Reference in a new issue