ts-minifier gets some spring cleaning

i dont care if its not spring just try and stop me
2024-11-22 11:56:42 +00:00 · 2021-09-12 15:52:06 +01:00 · 2021-09-12 15:52:06 +01:00 · 13d320a355
commit 13d320a355
parent b9b6e07eb0
1 changed files with 115 additions and 107 deletions
--- a/ts-minifier.py
+++ b/ts-minifier.py
@ -1,10 +1,11 @@
 # Copyright (c) 2021 bleck9999
 # https://github.com/bleck9999/ts-minifier
-# Version: d7106796
+# Version: 9d56e91a
 import argparse
 import itertools
-import string
+import logging
 from os import path
 from string import ascii_letters, digits, hexdigits
 auto_replace = False
@ -32,21 +33,7 @@ class Code:
        self.comments = comments
        self.code = code
        self.varstrs = []
-        self.rawcode = "".join([x[2] for x in sorted(self.code+self.strings)])
+        self.rawcode = "".join([x[2] for x in sorted(self.code + self.strings)])
    def getafter(self, ch: int):
        ch += self.comments[-1][1] if self.comments else 0
        for strcom in self.strings:
            if strcom[0] >= ch:
                return strcom
        return None
    def nextch(self, ch: int, reverse: bool):
        rawcontent = self.rawcode
        if ((ch+1 >= len(rawcontent)) and not reverse) or \
                ((ch-1 < 0) and reverse):
            return ''
        return rawcontent[ch-1] if reverse else rawcontent[ch+1]
 def isidentifier(s: str):
@ -110,7 +97,7 @@ def parser(script: str):
    # last identifier in a script (eg if script.rawcode was "a=12" the 12 wouldn't be detected without the trailing ' ')
    start = len(strscript) + 1
    for ch in range(len(strscript)):
-        if (strscript[ch-1] == '0' and strscript[ch] == 'x') and not quoted:
+        if (strscript[ch - 1] == '0' and strscript[ch] == 'x') and not quoted:
            hexxed = True
        elif isidentifier(strscript[ch]) and not (hexxed or quoted):
            if start > ch:
@ -122,8 +109,8 @@ def parser(script: str):
        elif strscript[ch] == '"':
            quoted = not quoted
        elif not quoted:
-            if start != len(strscript)+1 and not ismember:  # if we actually had an identifier before this char
+            if start != len(strscript) + 1 and not ismember:  # if we actually had an identifier before this char
-                identifier = strscript[start:ch]            # and this isnt a member of anything
+                identifier = strscript[start:ch]  # and this isnt a member of anything
                if identifier in usages:
                    usages[identifier].append(start)
                elif identifier.isnumeric():  # numbers are legally valid identifiers because fuckyou
@ -131,8 +118,8 @@ def parser(script: str):
                    userobjects[identifier] = "INT"
                elif identifier == "0x":
                    pass
-                elif strscript[ch] == '=' and strscript[ch+1] != '=':
+                elif strscript[ch] == '=' and strscript[ch + 1] != '=':
-                    isfunc = script.nextch(ch, False) == '{'
+                    isfunc = strscript[ch + 1] == '{'
                    userobjects[identifier] = "func" if isfunc else "var"
                    usages[identifier] = [start]  # declaration is a usage because i cant be arsed
                else:  # not an assignment (or member) but also haven't seen this name before
@ -155,7 +142,7 @@ def parser(script: str):
                        ismember = False
                        pass
            elif strscript[ch] in ')}]':
-                ismember = script.nextch(ch, False) == '.'
+                ismember = strscript[ch + 1] == '.'
            start = len(strscript) + 1
    return script, userobjects, usages
@ -170,10 +157,13 @@ def minify(script: Code, userobjects, usages):
    #                                                                          ^ 2 for = and whitespace, 2 for ""
    #
    # obviously for a rename you're already defining it so it's just the difference between lengths multiplied by uses
-    short_idents = [x for x in (ascii_letters+'_')] + [x[0]+x[1] for x in itertools.product(ascii_letters+'_', repeat=2)]
+    short_idents = [x for x in (ascii_letters + '_')] + [x[0] + x[1] for x in
                                                         itertools.product(ascii_letters + '_', repeat=2)]
    short_idents.pop(short_idents.index("if"))
    mcode = script.rawcode
    aliases = []
    logging.info("Renaming user functions and variables" if auto_replace else
                 "Checking user function and variable names")
    for uo in [x for x in userobjects]:
        if userobjects[uo] not in ["var", "func"]:
            continue
@ -189,23 +179,23 @@ def minify(script: Code, userobjects, usages):
            for i in candidates:
                if i not in userobjects:
                    minName = i
                    userobjects[minName] = "TRN"
                    break
-            if verbose and not minName:
+            if not minName:
-                print(f"{'Function' if otype == 'func' else 'Variable'} name {uo} could be shortened but "
+                logging.info(f"{'Function' if otype == 'func' else 'Variable'} name {uo} could be shortened but "
-                      f"no available names found (would save {uses} bytes)")
+                             f"no available names found (would save {uses} bytes)")
                continue
                # we assume that nobody is insane enough to exhaust all *2,808* 2 character names,
                # instead that uo is len 2 and all the 1 character names are in use (because of that we dont multiply
                # uses by anything as multiplying by a difference of 1 would be redundant)
            if not auto_replace:
-                print(f"{'Function' if otype == 'func' else 'Variable'} name {uo} could be shortened ({uo}->{minName}, "
+                logging.warning(
-                      f"would save {uses*(uolen - len(minName))} bytes)")
+                    f"{'Function' if otype == 'func' else 'Variable'} name {uo} could be shortened ({uo}->{minName}, "
                    f"would save {uses * (uolen - len(minName))} bytes)")
                continue
            else:
-                userobjects[minName] = "TRN"
+                logging.info(f"Renaming {'Function' if otype == 'func' else 'Variable'} {uo} to {minName} "
-                if verbose:
+                             f"(saving {uses * (uolen - len(minName))} bytes)")
                    print(f"Renaming {'Function' if otype == 'func' else 'Variable'} {uo} to {minName} "
                          f"(saving {uses*(uolen - len(minName))} bytes)")
                diff = uolen - len(minName)
                # the foreach syntax is literally the worst part of ts
@ -213,8 +203,7 @@ def minify(script: Code, userobjects, usages):
                    struo = f'"{uo}"'
                    for varstr in script.varstrs:
                        if varstr[2] == struo:
-                            if verbose:
+                            logging.info(f"Replacing declaration of {varstr[2]} at {varstr[0]}-{varstr[1]}")
                                print(f"Replacing declaration of {varstr[2]} at {varstr[0]}-{varstr[1]}")
                            start = varstr[0] - (script.comments[-1][1] if script.comments else 0)
                            end = varstr[1] - (script.comments[-1][1] if script.comments else 0)
                            newend = start + len(minName)
@ -223,11 +212,13 @@ def minify(script: Code, userobjects, usages):
                # rather than just blindly str.replace()ing we're going to actually use the character indices that we stored
                prev = 0
                for bound in usages[uo]:
-                    tmpcode += mcode[prev:bound] + minName + ' '*diff
+                    tmpcode += mcode[prev:bound] + minName + ' ' * diff
                    prev = bound + diff + len(minName)
                # actually shut up about "bound might be referenced before assignment" or show me what possible
                # execution path that could lead to usages[uo] being an empty list
-                mcode = tmpcode + mcode[bound+diff+len(minName):]
+                mcode = tmpcode + mcode[bound + diff + len(minName):]
    logging.info("Aliasing standard library functions" if auto_replace else
                 "Checking for standard library aliases")
    for func in usages:
        tmpcode = ""
        candidates = short_idents
@ -244,27 +235,28 @@ def minify(script: Code, userobjects, usages):
                minName = i
                break
        # once again we assume it's only `if` that could trigger this message
-        # uses - 4 is the minimum amount of uses needed to save space, 1*(uses - 4) is the space it would save
+        # 4 is the minimum amount of uses needed to save space, 1*(uses - 4) is the space it would save
-        if not minName and (uses - 4) > 0:
+        if not minName and uses > 4:
-            if verbose:
+            logging.info(f"Standard library function {func} could be aliased but no available names found "
-                print(f"Standard library function {func} could be aliased but no available names found "
+                         f"(would save {uses - 4} bytes)")
                      f"(would save {uses-4} bytes)")
        else:
            if not savings:
-                savings = uses*len(func) - (len(func)+len(minName)+2)
+                savings = uses * len(func) - (len(func) + len(minName) + 2)
-            if (verbose and savings <= 0) or (not auto_replace and savings > 0):
+            if savings > 0:
                print(f"Not aliasing standard library function {func} (would save {savings} bytes)")
            elif auto_replace and savings > 0:
                userobjects[minName] = "TRP"
-                if verbose:
+                if auto_replace:
-                    print(f"Aliasing standard library function {func} to {minName} (saving {savings} bytes)")
+                    logging.info(f"Aliasing standard library function {func} to {minName} (saving {savings} bytes)")
-                diff = len(func) - len(minName)
+                    diff = len(func) - len(minName)
-                prev = 0
+                    prev = 0
-                for bound in usages[func]:
+                    for bound in usages[func]:
-                    tmpcode += mcode[prev:bound] + minName + ' ' * diff
+                        tmpcode += mcode[prev:bound] + minName + ' ' * diff
-                    prev = bound + diff + len(minName)
+                        prev = bound + diff + len(minName)
-                mcode = tmpcode + mcode[bound + diff + len(minName):]
+                    mcode = tmpcode + mcode[bound + diff + len(minName):]
-                aliases.append(f"{minName}={func} ")
+                    aliases.append(f"{minName}={func} ")
                else:
                    logging.warning(f"Not aliasing standard library function {func} (would save {savings} bytes)")
            else:
                logging.info(f"Not aliasing standard library function {func} (would save {savings} bytes)")
    str_reuse = {}
    for string in script.strings:
@ -272,6 +264,8 @@ def minify(script: Code, userobjects, usages):
            str_reuse[string[2]].append(string[0])
        else:
            str_reuse[string[2]] = [string[0]]
    logging.info("Introducing variables for reused literals" if auto_replace else
                 "Checking for reused literals")
    for string in str_reuse:
        tmpcode = ""
        candidates = short_idents
@ -286,29 +280,32 @@ def minify(script: Code, userobjects, usages):
                    break
            if not minName:
                savings = len(string) * uses - (len(string) + 5)  # 5 comes from id="{string}"
-                if verbose:
+                logging.info(f"Could introduce variable for reused string {string} but no available names found "
-                    print(f"Could introduce variable for reused string {string} but no available names found "
+                             f"(would save {savings} bytes)")
                          f"(would save {savings} bytes)")
                continue
            # the quotation marks are included in string
            savings = uses * len(string) - (len(string) + len(minName) + 2)
-            if (verbose and savings <= 0) or (not auto_replace and savings > 0):
+            if savings > 0:
                print(f"Not introducing variable for string {string} reused {uses} times (would save {savings} bytes)")
            elif auto_replace and savings > 0:
                # "duplicated code fragment" do i look like i give a shit
                userobjects[minName] = "TIV"
-                if verbose:
+                if auto_replace:
-                    print(f"Introducing variable {minName} with value {string} (saving {savings} bytes)")
+                    # "duplicated code fragment" do i look like i give a shit
-                diff = len(string) - len(minName)
+                    logging.info(f"Introducing variable {minName} with value {string} (saving {savings} bytes)")
-                prev = 0
+                    diff = len(string) - len(minName)
-                for bound in str_reuse[string]:
+                    prev = 0
-                    bound -= script.comments[-1][1] if script.comments else 0
+                    for bound in str_reuse[string]:
-                    tmpcode += mcode[prev:bound] + minName + ' ' * diff
+                        bound -= script.comments[-1][1] if script.comments else 0
-                    prev = bound + diff + len(minName)
+                        tmpcode += mcode[prev:bound] + minName + ' ' * diff
-                mcode = tmpcode + mcode[bound + diff + len(minName):]
+                        prev = bound + diff + len(minName)
-                aliases.append(f"{minName}={string}")
+                    mcode = tmpcode + mcode[bound + diff + len(minName):]
-        elif verbose:
+                    aliases.append(f"{minName}={string}")
-            print(f"Not introducing variable for string {string} (only used once)")
+                else:
                    logging.warning(f"Not introducing variable for string {string} reused {uses} times "
                                    f"(would save {savings} bytes)")
            else:
                logging.info(f"Not introducing variable for string {string} reused {uses} times "
                             f"(would save {savings} bytes)")
        else:
            logging.info(f"Not introducing variable for string {string} (only used once)")
    for uint in [x for x in userobjects]:
        if userobjects[uint] != "INT" or len(uint) < 2:
@ -328,35 +325,37 @@ def minify(script: Code, userobjects, usages):
            if not minName:
                # yet another case of "nobody could possibly use up all the 2 char names we hope"
                savings = uilen * uses - (uilen + 4)  # 4 comes from id={uint}<whitespace>
-                if verbose:
+                logging.info(f"Could introduce variable for reused integer {uint} but no available names found "
-                    print(f"Could introduce variable for reused integer {uint} but no available names found "
+                             f"(would save {savings} bytes)")
                          f"(would save {savings} bytes)")
                continue
            savings = uilen * uses - (uilen + len(minName) + 2)
-            if (verbose and savings <= 0) or (not auto_replace and savings > 0):
+            if savings > 0:
                print(f"Not introducing variable for string {uint} reused {uses} times (would save {savings} bytes)")
            elif auto_replace and savings > 0:
                userobjects[minName] = "TIV"
-                if verbose:
+                if auto_replace:
-                    print(f"Introducing variable {minName} with value {uint} (saving {savings} bytes)")
+                    logging.info(f"Introducing variable {minName} with value {uint} (saving {savings} bytes)")
-                diff = len(uint) - len(minName)
+                    diff = len(uint) - len(minName)
-                prev = 0
+                    prev = 0
-                for bound in usages[uint]:
+                    for bound in usages[uint]:
-                    tmpcode += mcode[prev:bound] + minName + ' ' * diff
+                        tmpcode += mcode[prev:bound] + minName + ' ' * diff
-                    prev = bound + diff + len(minName)
+                        prev = bound + diff + len(minName)
-                mcode = tmpcode + mcode[bound + diff + len(minName):]
+                    mcode = tmpcode + mcode[bound + diff + len(minName):]
-                aliases.append(f"{minName}={uint} ")
+                    aliases.append(f"{minName}={uint} ")
-        elif verbose:
+                else:
-            print(f"Not introducing variable for int {uint} (only used once)")
+                    logging.warning(f"Not introducing variable for string {uint} reused {uses} times "
                                    f"(would save {savings} bytes)")
            else:
                logging.info(f"Not introducing variable for string {uint} reused {uses} times "
                             f"(would save {savings} bytes)")
        else:
            logging.info(f"Not introducing variable for int {uint} (only used once)")
-    print("Reintroducing REQUIREs")
+    logging.info("Reintroducing REQUIREs")
    mcode = "".join([x[2] for x in script.comments]) + "".join(aliases) + mcode
-    print("Stripping whitespace")
+    return mcode
    return whitespacent(mcode)
 def whitespacent(script: str):
-    # also removes unneeded comments and push REQUIREs to the top of the file
+    # also removes unneeded comments and pushes REQUIREs to the top of the file
    requires = ""
    mcode = ""
    for line in script.split(sep='\n'):
@ -393,9 +392,9 @@ def whitespacent(script: str):
            part += 1
    # tsv3 is still an absolute nightmare
-    # so spaces are required under two situations
+    # so spaces should be preserved under two situations
-    # 1. the minus operator which requires space between the right operand but only if the right operand is a literal
+    # 1. the subtraction operator which requires space between the right operand but only if the right operand is a literal
-    # 2. between 2 characters that are either valid identifiers (aA-zZ or _) or integers
+    # 2. between 2 characters that are valid identifiers (aA-zZ, _ or integers)
    inquote = False
    mmcode = ""
    index = 0
@ -425,25 +424,34 @@ if __name__ == '__main__':
                                                           "\ndefault: ./", default='./')
    argparser.add_argument("--auto-replace", action="store_true", default=False,
                           help="automatically replace reused functions, variables and strings instead of just warning\n"
-                           "and attempt to generate shorter names for reused variables \ndefault: false")
+                                "and attempt to generate shorter names for reused variables \ndefault: false")
    argparser.add_argument("-v", action="store_true", default=False,
                           help="prints even more information to the console than usual")
    args = argparser.parse_args()
    files = args.source
-    dest = args.d[:-1] if args.d[-1] == '/' else args.d
+    dest = args.d[:-1] if args.d[-1] in '/\\' else args.d
    auto_replace = args.auto_replace
-    verbose = args.v
+    verbose = "INFO" if args.v else "WARNING"
    logging.basicConfig(level=verbose, format="{message}", style='{')
    print(f"Automatic replacement: {'ENABLED' if auto_replace else 'DISABLED'}")
    for file in files:
        print(f"\nMinifying {file}")
        with open(file, 'r') as f:
-            print("Stripping comments")
+            logging.info("Stripping comments and whitespace (pass 1)")
-            res = parser(whitespacent(f.read()))
+            r = whitespacent(f.read())
-            r = minify(res[0], res[1], res[2])
+            logging.info("Parsing file")
            r = parser(r)
            logging.info("Searching for optimisations")
            r = minify(r[0], r[1], r[2])
            logging.info("Stripping whitespace (pass 2)")
            r = whitespacent(r)
        file = file.split(sep='.')[0].split(sep='/')[-1]
-        if dest != '.':
+        if path.exists(f"{dest}/{file}.te"):
            f = open(f"{dest}/{file}.te", 'w')
        else:
            f = open(f"{dest}/{file}_min.te", 'w')
        else:
            f = open(f"{dest}/{file}.te", 'w')
        logging.info(f"Writing to {f.name}")
        f.write(r)
        print("Done!")