diff --git a/source/elf_symbol.h b/source/elf_symbol.h
new file mode 100644
index 0000000..b3e0c06
--- /dev/null
+++ b/source/elf_symbol.h
@@ -0,0 +1,91 @@
+/*
+ * elf_symbol.h
+ *
+ * Copyright (c) 1991-2020, Linus Torvalds and Linux Kernel project contributors.
+ * Copyright (c) 2020, DarkMatterCore <pabloacurielz@gmail.com>.
+ *
+ * This file is part of nxdumptool (https://github.com/DarkMatterCore/nxdumptool).
+ *
+ * nxdumptool is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * nxdumptool is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#ifndef __ELF_SYMBOL_H__
+#define __ELF_SYMBOL_H__
+
+/* Symbol types. */
+#define STT_NOTYPE              0           /* The symbol type not defined. */
+#define STT_OBJECT              1           /* The symbol is associated with a data object. */
+#define STT_FUNC                2           /* The symbol is associated with a function or other executable code. */
+#define STT_SECTION             3           /* The symbol is associated with a section. */
+#define STT_FILE                4           /* The symbol's name gives the name of the source file associated with the object file. */
+#define STT_COMMON              5           /* The symbol labels an uninitialized common block. */
+#define STT_TLS                 6           /* The symbol specifies a Thread-Local Storage entity. */
+#define STT_LOOS                10          /* Reserved for operating system-specific semantics. */
+#define STT_HIOS                12          /* Reserved for operating system-specific semantics. */
+#define STT_LOPROC              13          /* Reserved for processor-specific semantics. */
+#define STT_HIPROC              15          /* Reserved for processor-specific semantics. */
+
+/* Symbol binding attributes. */
+#define STB_LOCAL               0           /* Local symbols are not visible outside the object file containing their definition. */
+#define STB_GLOBAL              1           /* Global symbols are visible to all object files being combined. */
+#define STB_WEAK                2           /* Weak symbols resemble global symbols, but their definitions have lower precedence. */
+#define STB_LOOS                10          /* Reserved for operating system-specific semantics. */
+#define STB_HIOS                12          /* Reserved for operating system-specific semantics. */
+#define STB_LOPROC              13          /* Reserved for processor-specific semantics. */
+#define STB_HIPROC              15          /* Reserved for processor-specific semantics. */
+
+/* Symbol visibility. */
+#define STV_DEFAULT             0           /* The symbol visibility is specified by the its binding attribute. */
+#define STV_INTERNAL            1           /* The meaning of this visibility attribute may be defined by processor supplements to further constrain hidden symbols. */
+#define STV_HIDDEN              2           /* The symbol name is not visible to other components. */
+#define STV_PROTECTED           3           /* The symbol is visible in other components but not preemptable. */
+
+/* Section header index. */
+#define SHN_UNDEF               0           /* The symbol is undefined. */
+#define SHN_LORESERVE           0xFF00
+#define SHN_LOPROC              0xFF00
+#define SHN_HIPROC              0xFF1F
+#define SHN_LOOS                0xFF20
+#define SHN_LIVEPATCH           0xFF20
+#define SHN_HIOS                0xFF3F
+#define SHN_ABS                 0xFFF1      /* The symbol has an absolute value that will not change because of relocation. */
+#define SHN_COMMON              0xFFF2      /* Labels a common block that has not yet been allocated. */
+#define SHN_XINDEX              0xFFFF      /* The symbol refers to a specific location within a section, but the section header index is too large to be represented directly. */
+#define SHN_HIRESERVE           0xFFFF      
+
+/* Macros. */
+#define ELF_ST_BIND(x)          ((x) >> 4)  /* Extracts binding value from a st_info field. */
+#define ELF_ST_TYPE(x)          ((x) & 0xF) /* Extracts type value from a st_info field. */
+#define ELF_ST_VISIBILITY(x)    ((x) & 0x3) /* Extracts visibility value from a st_other field. */
+
+typedef struct {
+    u32 st_name;    ///< Symbol name offset within dynamic string table.
+    u32 st_value;   ///< Symbol value.
+    u32 st_size;    ///< Symbol size.
+    u8 st_info;     ///< Symbol type (lower nibble) and binding attributes (upper nibble).
+    u8 st_other;    ///< Currently specifies a symbol's visibility  (lower 3 bits).
+    u16 st_shndx;   ///< Holds the relevant section header table index.
+} Elf32Symbol;
+
+typedef struct {
+    u32 st_name;    ///< Symbol name offset within dynamic string table.
+    u8 st_info;     ///< Symbol type (lower nibble) and binding attributes (upper nibble).
+    u8 st_other;    ///< Currently specifies a symbol's visibility (lower 3 bits).
+    u16 st_shndx;   ///< Holds the relevant section header table index.
+    u64 st_value;   ///< Symbol value.
+    u64 st_size;    ///< Symbol size.
+} Elf64Symbol;
+
+#endif /* __ELF_SYMBOL_H__ */
diff --git a/source/lz4.c b/source/lz4.c
index e614c45..9808d70 100644
--- a/source/lz4.c
+++ b/source/lz4.c
@@ -106,6 +106,7 @@
 #define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */
 #endif
 
+#define LZ4_STATIC_LINKING_ONLY  /* LZ4_DISTANCE_MAX */
 #include "lz4.h"
 /* see also "memory routines" below */
 
@@ -182,6 +183,60 @@
 #define MEM_INIT(p,v,s)   memset((p),(v),(s))
 
 
+/*-************************************
+*  Common Constants
+**************************************/
+#define MINMATCH 4
+
+#define WILDCOPYLENGTH 8
+#define LASTLITERALS   5   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
+#define MFLIMIT       12   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
+#define MATCH_SAFEGUARD_DISTANCE  ((2*WILDCOPYLENGTH) - MINMATCH)   /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
+#define FASTLOOP_SAFE_DISTANCE 64
+static const int LZ4_minLength = (MFLIMIT+1);
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define LZ4_DISTANCE_ABSOLUTE_MAX 65535
+#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX)   /* max supported by LZ4 format */
+#  error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
+#endif
+
+#define ML_BITS  4
+#define ML_MASK  ((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
+
+
+/*-************************************
+*  Error detection
+**************************************/
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
+#  include <assert.h>
+#else
+#  ifndef assert
+#    define assert(condition) ((void)0)
+#  endif
+#endif
+
+#define LZ4_STATIC_ASSERT(c)   { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use after variable declarations */
+
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
+#  include <stdio.h>
+static int g_debuglog_enable = 1;
+#  define DEBUGLOG(l, ...) {                                  \
+                if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) {  \
+                    fprintf(stderr, __FILE__ ": ");           \
+                    fprintf(stderr, __VA_ARGS__);             \
+                    fprintf(stderr, " \n");                   \
+            }   }
+#else
+#  define DEBUGLOG(l, ...)      {}    /* disabled */
+#endif
+
+
 /*-************************************
 *  Types
 **************************************/
@@ -317,6 +372,11 @@ static const int      dec64table[8] = {0, 0, 0, -1, -4,  1, 2, 3};
 #ifndef LZ4_FAST_DEC_LOOP
 #  if defined(__i386__) || defined(__x86_64__)
 #    define LZ4_FAST_DEC_LOOP 1
+#  elif defined(__aarch64__) && !defined(__clang__)
+     /* On aarch64, we disable this optimization for clang because on certain
+      * mobile chipsets and clang, it reduces performance. For more information
+      * refer to https://github.com/lz4/lz4/pull/707. */
+#    define LZ4_FAST_DEC_LOOP 1
 #  else
 #    define LZ4_FAST_DEC_LOOP 0
 #  endif
@@ -358,29 +418,35 @@ LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
     do { memcpy(d,s,16); memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
 }
 
+/* LZ4_memcpy_using_offset()  presumes :
+ * - dstEnd >= dstPtr + MINMATCH
+ * - there is at least 8 bytes available to write after dstEnd */
 LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
 LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
 {
     BYTE v[8];
+
+    assert(dstEnd >= dstPtr + MINMATCH);
+    LZ4_write32(dstPtr, 0);   /* silence an msan warning when offset==0 */
+
     switch(offset) {
     case 1:
         memset(v, *srcPtr, 8);
-        goto copy_loop;
+        break;
     case 2:
         memcpy(v, srcPtr, 2);
         memcpy(&v[2], srcPtr, 2);
         memcpy(&v[4], &v[0], 4);
-        goto copy_loop;
+        break;
     case 4:
         memcpy(v, srcPtr, 4);
         memcpy(&v[4], srcPtr, 4);
-        goto copy_loop;
+        break;
     default:
         LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
         return;
     }
 
- copy_loop:
     memcpy(dstPtr, v, 8);
     dstPtr += 8;
     while (dstPtr < dstEnd) {
@@ -391,63 +457,6 @@ LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const si
 #endif
 
 
-/*-************************************
-*  Common Constants
-**************************************/
-#define MINMATCH 4
-
-#define WILDCOPYLENGTH 8
-#define LASTLITERALS   5   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
-#define MFLIMIT       12   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
-#define MATCH_SAFEGUARD_DISTANCE  ((2*WILDCOPYLENGTH) - MINMATCH)   /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
-#define FASTLOOP_SAFE_DISTANCE 64
-static const int LZ4_minLength = (MFLIMIT+1);
-
-#define KB *(1 <<10)
-#define MB *(1 <<20)
-#define GB *(1U<<30)
-
-#ifndef LZ4_DISTANCE_MAX   /* can be user - defined at compile time */
-#  define LZ4_DISTANCE_MAX 65535
-#endif
-
-#if (LZ4_DISTANCE_MAX > 65535)   /* max supported by LZ4 format */
-#  error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
-#endif
-
-#define ML_BITS  4
-#define ML_MASK  ((1U<<ML_BITS)-1)
-#define RUN_BITS (8-ML_BITS)
-#define RUN_MASK ((1U<<RUN_BITS)-1)
-
-
-/*-************************************
-*  Error detection
-**************************************/
-#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
-#  include <assert.h>
-#else
-#  ifndef assert
-#    define assert(condition) ((void)0)
-#  endif
-#endif
-
-#define LZ4_STATIC_ASSERT(c)   { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use after variable declarations */
-
-#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
-#  include <stdio.h>
-static int g_debuglog_enable = 1;
-#  define DEBUGLOG(l, ...) {                                  \
-                if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) {  \
-                    fprintf(stderr, __FILE__ ": ");           \
-                    fprintf(stderr, __VA_ARGS__);             \
-                    fprintf(stderr, " \n");                   \
-            }   }
-#else
-#  define DEBUGLOG(l, ...)      {}    /* disabled */
-#endif
-
-
 /*-************************************
 *  Common functions
 **************************************/
@@ -460,7 +469,7 @@ static unsigned LZ4_NbCommonBytes (reg_t val)
             _BitScanForward64( &r, (U64)val );
             return (int)(r>>3);
 #       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            return (__builtin_ctzll((U64)val) >> 3);
+            return (unsigned)__builtin_ctzll((U64)val) >> 3;
 #       else
             static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
                                                      0, 3, 1, 3, 1, 4, 2, 7,
@@ -478,7 +487,7 @@ static unsigned LZ4_NbCommonBytes (reg_t val)
             _BitScanForward( &r, (U32)val );
             return (int)(r>>3);
 #       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            return (__builtin_ctz((U32)val) >> 3);
+            return (unsigned)__builtin_ctz((U32)val) >> 3;
 #       else
             static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
                                                      3, 2, 2, 1, 3, 2, 0, 1,
@@ -494,7 +503,7 @@ static unsigned LZ4_NbCommonBytes (reg_t val)
             _BitScanReverse64( &r, val );
             return (unsigned)(r>>3);
 #       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            return (__builtin_clzll((U64)val) >> 3);
+            return (unsigned)__builtin_clzll((U64)val) >> 3;
 #       else
             static const U32 by32 = sizeof(val)*4;  /* 32 on 64 bits (goal), 16 on 32 bits.
                 Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
@@ -511,7 +520,7 @@ static unsigned LZ4_NbCommonBytes (reg_t val)
             _BitScanReverse( &r, (unsigned long)val );
             return (unsigned)(r>>3);
 #       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            return (__builtin_clz((U32)val) >> 3);
+            return (unsigned)__builtin_clz((U32)val) >> 3;
 #       else
             unsigned r;
             if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
@@ -606,9 +615,11 @@ int LZ4_sizeofState() { return LZ4_STREAMSIZE; }
 extern "C" {
 #endif
 
-int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize);
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize);
 
-int LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize, const void* dict, size_t dictSize);
+int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
+                                     int compressedSize, int maxOutputSize,
+                                     const void* dictStart, size_t dictSize);
 
 #if defined (__cplusplus)
 }
@@ -643,6 +654,18 @@ LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tab
     return LZ4_hash4(LZ4_read32(p), tableType);
 }
 
+static void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
+{
+    switch (tableType)
+    {
+    default: /* fallthrough */
+    case clearedTable: { /* illegal! */ assert(0); return; }
+    case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; }
+    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; }
+    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; }
+    }
+}
+
 static void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
 {
     switch (tableType)
@@ -703,18 +726,19 @@ static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType
     { const U16* const hashTable = (const U16*) tableBase; return hashTable[h] + srcBase; }   /* default, to ensure a return */
 }
 
-LZ4_FORCE_INLINE const BYTE* LZ4_getPosition(const BYTE* p,
-                                             const void* tableBase, tableType_t tableType,
-                                             const BYTE* srcBase)
+LZ4_FORCE_INLINE const BYTE*
+LZ4_getPosition(const BYTE* p,
+                const void* tableBase, tableType_t tableType,
+                const BYTE* srcBase)
 {
     U32 const h = LZ4_hashPosition(p, tableType);
     return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
 }
 
-LZ4_FORCE_INLINE void LZ4_prepareTable(
-        LZ4_stream_t_internal* const cctx,
-        const int inputSize,
-        const tableType_t tableType) {
+LZ4_FORCE_INLINE void
+LZ4_prepareTable(LZ4_stream_t_internal* const cctx,
+           const int inputSize,
+           const tableType_t tableType) {
     /* If compression failed during the previous step, then the context
      * is marked as dirty, therefore, it has to be fully reset.
      */
@@ -729,9 +753,10 @@ LZ4_FORCE_INLINE void LZ4_prepareTable(
      * out if it's safe to leave as is or whether it needs to be reset.
      */
     if (cctx->tableType != clearedTable) {
+        assert(inputSize >= 0);
         if (cctx->tableType != tableType
-          || (tableType == byU16 && cctx->currentOffset + inputSize >= 0xFFFFU)
-          || (tableType == byU32 && cctx->currentOffset > 1 GB)
+          || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU)
+          || ((tableType == byU32) && cctx->currentOffset > 1 GB)
           || tableType == byPtr
           || inputSize >= 4 KB)
         {
@@ -811,9 +836,9 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
     DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, tableType=%u", inputSize, tableType);
     /* If init conditions are not met, we don't have to mark stream
      * as having dirty context, since no action was taken yet */
-    if (outputDirective == fillOutput && maxOutputSize < 1) return 0;   /* Impossible to store anything */
-    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0;           /* Unsupported inputSize, too large (or negative) */
-    if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) return 0;  /* Size too large (not within 64K limit) */
+    if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */
+    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; }           /* Unsupported inputSize, too large (or negative) */
+    if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) { return 0; }  /* Size too large (not within 64K limit) */
     if (tableType==byPtr) assert(dictDirective==noDict);      /* only supported use case with byPtr */
     assert(acceleration >= 1);
 
@@ -841,6 +866,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
     for ( ; ; ) {
         const BYTE* match;
         BYTE* token;
+        const BYTE* filledIp;
 
         /* Find a match */
         if (tableType == byPtr) {
@@ -909,10 +935,14 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
                 forwardH = LZ4_hashPosition(forwardIp, tableType);
                 LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
 
-                if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) continue;    /* match outside of valid area */
+                DEBUGLOG(7, "candidate at pos=%u  (offset=%u \n", matchIndex, current - matchIndex);
+                if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; }    /* match outside of valid area */
                 assert(matchIndex < current);
-                if ((tableType != byU16) && (matchIndex+LZ4_DISTANCE_MAX < current)) continue;  /* too far */
-                if (tableType == byU16) assert((current - matchIndex) <= LZ4_DISTANCE_MAX);     /* too_far presumed impossible with byU16 */
+                if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX))
+                  && (matchIndex+LZ4_DISTANCE_MAX < current)) {
+                    continue;
+                } /* too far */
+                assert((current - matchIndex) <= LZ4_DISTANCE_MAX);  /* match now expected within distance */
 
                 if (LZ4_read32(match) == LZ4_read32(ip)) {
                     if (maybe_extMem) offset = current - matchIndex;
@@ -923,15 +953,16 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
         }
 
         /* Catch up */
+        filledIp = ip;
         while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
 
         /* Encode Literals */
         {   unsigned const litLength = (unsigned)(ip - anchor);
             token = op++;
             if ((outputDirective == limitedOutput) &&  /* Check output buffer overflow */
-                (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) )
+                (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) {
                 return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
-
+            }
             if ((outputDirective == fillOutput) &&
                 (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) {
                 op--;
@@ -1002,12 +1033,26 @@ _next_match:
             }
 
             if ((outputDirective) &&    /* Check output buffer overflow */
-                (unlikely(op + (1 + LASTLITERALS) + (matchCode>>8) > olimit)) ) {
+                (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) {
                 if (outputDirective == fillOutput) {
                     /* Match description too long : reduce it */
-                    U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 2 - 1 - LASTLITERALS) * 255;
+                    U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255;
                     ip -= matchCode - newMatchCode;
+                    assert(newMatchCode < matchCode);
                     matchCode = newMatchCode;
+                    if (unlikely(ip <= filledIp)) {
+                        /* We have already filled up to filledIp so if ip ends up less than filledIp
+                         * we have positions in the hash table beyond the current position. This is
+                         * a problem if we reuse the hash table. So we have to remove these positions
+                         * from the hash table.
+                         */
+                        const BYTE* ptr;
+                        DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip));
+                        for (ptr = ip; ptr <= filledIp; ++ptr) {
+                            U32 const h = LZ4_hashPosition(ptr, tableType);
+                            LZ4_clearHash(h, cctx->hashTable, tableType);
+                        }
+                    }
                 } else {
                     assert(outputDirective == limitedOutput);
                     return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
@@ -1027,6 +1072,8 @@ _next_match:
             } else
                 *token += (BYTE)(matchCode);
         }
+        /* Ensure we have enough space for the last literals. */
+        assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit));
 
         anchor = ip;
 
@@ -1076,7 +1123,7 @@ _next_match:
             LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
             assert(matchIndex < current);
             if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1)
-              && ((tableType==byU16) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current))
+              && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current))
               && (LZ4_read32(match) == LZ4_read32(ip)) ) {
                 token=op++;
                 *token=0;
@@ -1143,7 +1190,7 @@ int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int
             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
         }
     } else {
-        if (inputSize < LZ4_64Klimit) {;
+        if (inputSize < LZ4_64Klimit) {
             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration);
         } else {
             const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
@@ -1306,12 +1353,12 @@ static size_t LZ4_stream_t_alignment(void)
 LZ4_stream_t* LZ4_initStream (void* buffer, size_t size)
 {
     DEBUGLOG(5, "LZ4_initStream");
-    if (buffer == NULL) return NULL;
-    if (size < sizeof(LZ4_stream_t)) return NULL;
+    if (buffer == NULL) { return NULL; }
+    if (size < sizeof(LZ4_stream_t)) { return NULL; }
 #ifndef _MSC_VER  /* for some reason, Visual fails the aligment test on 32-bit x86 :
                      it reports an aligment of 8-bytes,
                      while actually aligning LZ4_stream_t on 4 bytes. */
-    if (((size_t)buffer) & (LZ4_stream_t_alignment() - 1)) return NULL;  /* alignment check */
+    if (((size_t)buffer) & (LZ4_stream_t_alignment() - 1)) { return NULL; } /* alignment check */
 #endif
     MEM_INIT(buffer, 0, sizeof(LZ4_stream_t));
     return (LZ4_stream_t*)buffer;
@@ -1361,18 +1408,18 @@ int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
      * there are only valid offsets in the window, which allows an optimization
      * in LZ4_compress_fast_continue() where it uses noDictIssue even when the
      * dictionary isn't a full 64k. */
-
-    if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
-    base = dictEnd - 64 KB - dict->currentOffset;
-    dict->dictionary = p;
-    dict->dictSize = (U32)(dictEnd - p);
     dict->currentOffset += 64 KB;
-    dict->tableType = tableType;
 
     if (dictSize < (int)HASH_UNIT) {
         return 0;
     }
 
+    if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
+    base = dictEnd - dict->currentOffset;
+    dict->dictionary = p;
+    dict->dictSize = (U32)(dictEnd - p);
+    dict->tableType = tableType;
+
     while (p <= dictEnd-HASH_UNIT) {
         LZ4_putPosition(p, dict->hashTable, tableType, base);
         p+=3;
@@ -1381,26 +1428,37 @@ int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
     return (int)dict->dictSize;
 }
 
-void LZ4_attach_dictionary(LZ4_stream_t *working_stream, const LZ4_stream_t *dictionary_stream) {
+void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream) {
+    const LZ4_stream_t_internal* dictCtx = dictionaryStream == NULL ? NULL :
+        &(dictionaryStream->internal_donotuse);
+
+    DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)",
+             workingStream, dictionaryStream,
+             dictCtx != NULL ? dictCtx->dictSize : 0);
+
     /* Calling LZ4_resetStream_fast() here makes sure that changes will not be
      * erased by subsequent calls to LZ4_resetStream_fast() in case stream was
      * marked as having dirty context, e.g. requiring full reset.
      */
-    LZ4_resetStream_fast(working_stream);
+    LZ4_resetStream_fast(workingStream);
 
-    if (dictionary_stream != NULL) {
+    if (dictCtx != NULL) {
         /* If the current offset is zero, we will never look in the
          * external dictionary context, since there is no value a table
          * entry can take that indicate a miss. In that case, we need
          * to bump the offset to something non-zero.
          */
-        if (working_stream->internal_donotuse.currentOffset == 0) {
-            working_stream->internal_donotuse.currentOffset = 64 KB;
+        if (workingStream->internal_donotuse.currentOffset == 0) {
+            workingStream->internal_donotuse.currentOffset = 64 KB;
+        }
+
+        /* Don't actually attach an empty dictionary.
+         */
+        if (dictCtx->dictSize == 0) {
+            dictCtx = NULL;
         }
-        working_stream->internal_donotuse.dictCtx = &(dictionary_stream->internal_donotuse);
-    } else {
-        working_stream->internal_donotuse.dictCtx = NULL;
     }
+    workingStream->internal_donotuse.dictCtx = dictCtx;
 }
 
 
@@ -1435,7 +1493,7 @@ int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream,
 
     DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i)", inputSize);
 
-    if (streamPtr->dirty) return 0;   /* Uninitialized structure detected */
+    if (streamPtr->dirty) { return 0; } /* Uninitialized structure detected */
     LZ4_renormDictT(streamPtr, inputSize);   /* avoid index overflow */
     if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
 
@@ -1532,8 +1590,8 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
     LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;
     const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize;
 
-    if ((U32)dictSize > 64 KB) dictSize = 64 KB;   /* useless to define a dictionary > 64 KB */
-    if ((U32)dictSize > dict->dictSize) dictSize = (int)dict->dictSize;
+    if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */
+    if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; }
 
     memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
 
@@ -1607,7 +1665,7 @@ LZ4_decompress_generic(
                  const size_t dictSize         /* note : = 0 if noDict */
                  )
 {
-    if (src == NULL) return -1;
+    if (src == NULL) { return -1; }
 
     {   const BYTE* ip = (const BYTE*) src;
         const BYTE* const iend = ip + srcSize;
@@ -1636,9 +1694,13 @@ LZ4_decompress_generic(
 
         /* Special cases */
         assert(lowPrefix <= op);
-        if ((endOnInput) && (unlikely(outputSize==0))) return ((srcSize==1) && (*ip==0)) ? 0 : -1;  /* Empty output buffer */
-        if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0 ? 1 : -1);
-        if ((endOnInput) && unlikely(srcSize==0)) return -1;
+        if ((endOnInput) && (unlikely(outputSize==0))) {
+            /* Empty output buffer */
+            if (partialDecoding) return 0;
+            return ((srcSize==1) && (*ip==0)) ? 0 : -1;
+        }
+        if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); }
+        if ((endOnInput) && unlikely(srcSize==0)) { return -1; }
 
 	/* Currently the fast loop shows a regression on qualcomm arm chips. */
 #if LZ4_FAST_DEC_LOOP
@@ -1651,7 +1713,7 @@ LZ4_decompress_generic(
         while (1) {
             /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
             assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
-            if (endOnInput) assert(ip < iend);
+            if (endOnInput) { assert(ip < iend); }
             token = *ip++;
             length = token >> ML_BITS;  /* literal length */
 
@@ -1661,18 +1723,18 @@ LZ4_decompress_generic(
             if (length == RUN_MASK) {
                 variable_length_error error = ok;
                 length += read_variable_length(&ip, iend-RUN_MASK, endOnInput, endOnInput, &error);
-                if (error == initial_error) goto _output_error;
-                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) goto _output_error;   /* overflow detection */
-                if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) goto _output_error;   /* overflow detection */
+                if (error == initial_error) { goto _output_error; }
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
+                if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
 
                 /* copy literals */
                 cpy = op+length;
                 LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
                 if (endOnInput) {  /* LZ4_decompress_safe() */
-                    if ((cpy>oend-32) || (ip+length>iend-32)) goto safe_literal_copy;
+                    if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; }
                     LZ4_wildCopy32(op, ip, cpy);
                 } else {   /* LZ4_decompress_fast() */
-                    if (cpy>oend-8) goto safe_literal_copy;
+                    if (cpy>oend-8) { goto safe_literal_copy; }
                     LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
                                                  * it doesn't know input length, and only relies on end-of-block properties */
                 }
@@ -1682,14 +1744,14 @@ LZ4_decompress_generic(
                 if (endOnInput) {  /* LZ4_decompress_safe() */
                     DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
                     /* We don't need to check oend, since we check it once for each loop below */
-                    if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) goto safe_literal_copy;
+                    if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; }
                     /* Literals can only be 14, but hope compilers optimize if we copy by a register size */
                     memcpy(op, ip, 16);
                 } else {  /* LZ4_decompress_fast() */
                     /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
                      * it doesn't know input length, and relies on end-of-block properties */
                     memcpy(op, ip, 8);
-                    if (length > 8) memcpy(op+8, ip+8, 8);
+                    if (length > 8) { memcpy(op+8, ip+8, 8); }
                 }
                 ip += length; op = cpy;
             }
@@ -1697,17 +1759,17 @@ LZ4_decompress_generic(
             /* get offset */
             offset = LZ4_readLE16(ip); ip+=2;
             match = op - offset;
+            assert(match <= op);
 
             /* get matchlength */
             length = token & ML_MASK;
 
-            if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
-
             if (length == ML_MASK) {
               variable_length_error error = ok;
+              if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
               length += read_variable_length(&ip, iend - LASTLITERALS + 1, endOnInput, 0, &error);
-              if (error != ok) goto _output_error;
-                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
+              if (error != ok) { goto _output_error; }
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
                 length += MINMATCH;
                 if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
                     goto safe_match_copy;
@@ -1719,8 +1781,12 @@ LZ4_decompress_generic(
                 }
 
                 /* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */
-                if (!(dict == usingExtDict) || (match >= lowPrefix)) {
+                if ((dict == withPrefix64k) || (match >= lowPrefix)) {
                     if (offset >= 8) {
+                        assert(match >= lowPrefix);
+                        assert(match <= op);
+                        assert(op + 18 <= oend);
+
                         memcpy(op, match, 8);
                         memcpy(op+8, match+8, 8);
                         memcpy(op+16, match+16, 2);
@@ -1728,12 +1794,15 @@ LZ4_decompress_generic(
                         continue;
             }   }   }
 
+            if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
             /* match starting within external dictionary */
             if ((dict==usingExtDict) && (match < lowPrefix)) {
                 if (unlikely(op+length > oend-LASTLITERALS)) {
-                    if (partialDecoding) length = MIN(length, (size_t)(oend-op));
-                    else goto _output_error;   /* doesn't respect parsing restriction */
-                }
+                    if (partialDecoding) {
+                        length = MIN(length, (size_t)(oend-op));  /* reach end of buffer */
+                    } else {
+                        goto _output_error;  /* end-of-block condition violated */
+                }   }
 
                 if (length <= (size_t)(lowPrefix-match)) {
                     /* match fits entirely within external dictionary : just copy */
@@ -1748,7 +1817,7 @@ LZ4_decompress_generic(
                     if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
                         BYTE* const endOfMatch = op + restSize;
                         const BYTE* copyFrom = lowPrefix;
-                        while (op < endOfMatch) *op++ = *copyFrom++;
+                        while (op < endOfMatch) { *op++ = *copyFrom++; }
                     } else {
                         memcpy(op, lowPrefix, restSize);
                         op += restSize;
@@ -1821,11 +1890,11 @@ LZ4_decompress_generic(
 
             /* decode literal length */
             if (length == RUN_MASK) {
-              variable_length_error error = ok;
-              length += read_variable_length(&ip, iend-RUN_MASK, endOnInput, endOnInput, &error);
-              if (error == initial_error) goto _output_error;
-                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) goto _output_error;   /* overflow detection */
-                if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) goto _output_error;   /* overflow detection */
+                variable_length_error error = ok;
+                length += read_variable_length(&ip, iend-RUN_MASK, endOnInput, endOnInput, &error);
+                if (error == initial_error) { goto _output_error; }
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
+                if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
             }
 
             /* copy literals */
@@ -1837,21 +1906,50 @@ LZ4_decompress_generic(
             if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) )
               || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
             {
+                /* We've either hit the input parsing restriction or the output parsing restriction.
+                 * If we've hit the input parsing condition then this must be the last sequence.
+                 * If we've hit the output parsing condition then we are either using partialDecoding
+                 * or we've hit the output parsing condition.
+                 */
                 if (partialDecoding) {
-                    if (cpy > oend) { cpy = oend; assert(op<=oend); length = (size_t)(oend-op); }  /* Partial decoding : stop in the middle of literal segment */
-                    if ((endOnInput) && (ip+length > iend)) goto _output_error;   /* Error : read attempt beyond end of input buffer */
+                    /* Since we are partial decoding we may be in this block because of the output parsing
+                     * restriction, which is not valid since the output buffer is allowed to be undersized.
+                     */
+                    assert(endOnInput);
+                    /* If we're in this block because of the input parsing condition, then we must be on the
+                     * last sequence (or invalid), so we must check that we exactly consume the input.
+                     */
+                    if ((ip+length>iend-(2+1+LASTLITERALS)) && (ip+length != iend)) { goto _output_error; }
+                    assert(ip+length <= iend);
+                    /* We are finishing in the middle of a literals segment.
+                     * Break after the copy.
+                     */
+                    if (cpy > oend) {
+                        cpy = oend;
+                        assert(op<=oend);
+                        length = (size_t)(oend-op);
+                    }
+                    assert(ip+length <= iend);
                 } else {
-                    if ((!endOnInput) && (cpy != oend)) goto _output_error;       /* Error : block decoding must stop exactly there */
-                    if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error;   /* Error : input must be consumed */
+                    /* We must be on the last sequence because of the parsing limitations so check
+                     * that we exactly regenerate the original size (must be exact when !endOnInput).
+                     */
+                    if ((!endOnInput) && (cpy != oend)) { goto _output_error; }
+                     /* We must be on the last sequence (or invalid) because of the parsing limitations
+                      * so check that we exactly consume the input and don't overrun the output buffer.
+                      */
+                    if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) { goto _output_error; }
                 }
-                memcpy(op, ip, length);
+                memmove(op, ip, length);  /* supports overlapping memory regions, which only matters for in-place decompression scenarios */
                 ip += length;
                 op += length;
-                if (!partialDecoding || (cpy == oend)) {
-                    /* Necessarily EOF, due to parsing restrictions */
+                /* Necessarily EOF when !partialDecoding. When partialDecoding
+                 * it is EOF if we've either filled the output buffer or hit
+                 * the input parsing restriction.
+                 */
+                if (!partialDecoding || (cpy == oend) || (ip == iend)) {
                     break;
                 }
-
             } else {
                 LZ4_wildCopy8(op, ip, cpy);   /* may overwrite up to WILDCOPYLENGTH beyond cpy */
                 ip += length; op = cpy;
@@ -1865,13 +1963,6 @@ LZ4_decompress_generic(
             length = token & ML_MASK;
 
     _copy_match:
-            if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
-            if (!partialDecoding) {
-                assert(oend > op);
-                assert(oend - op >= 4);
-                LZ4_write32(op, 0);   /* silence an msan warning when offset==0; costs <1%; */
-            }   /* note : when partialDecoding, there is no guarantee that at least 4 bytes remain available in output buffer */
-
             if (length == ML_MASK) {
               variable_length_error error = ok;
               length += read_variable_length(&ip, iend - LASTLITERALS + 1, endOnInput, 0, &error);
@@ -1883,6 +1974,7 @@ LZ4_decompress_generic(
 #if LZ4_FAST_DEC_LOOP
         safe_match_copy:
 #endif
+            if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
             /* match starting within external dictionary */
             if ((dict==usingExtDict) && (match < lowPrefix)) {
                 if (unlikely(op+length > oend-LASTLITERALS)) {
@@ -1910,6 +2002,7 @@ LZ4_decompress_generic(
                 }   }
                 continue;
             }
+            assert(match >= lowPrefix);
 
             /* copy match within block */
             cpy = op + length;
@@ -1921,16 +2014,17 @@ LZ4_decompress_generic(
                 const BYTE* const matchEnd = match + mlen;
                 BYTE* const copyEnd = op + mlen;
                 if (matchEnd > op) {   /* overlap copy */
-                    while (op < copyEnd) *op++ = *match++;
+                    while (op < copyEnd) { *op++ = *match++; }
                 } else {
                     memcpy(op, match, mlen);
                 }
                 op = copyEnd;
-                if (op==oend) break;
+                if (op == oend) { break; }
                 continue;
             }
 
             if (unlikely(offset<8)) {
+                LZ4_write32(op, 0);   /* silence msan warning when offset==0 */
                 op[0] = match[0];
                 op[1] = match[1];
                 op[2] = match[2];
@@ -1946,25 +2040,26 @@ LZ4_decompress_generic(
 
             if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
                 BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
-                if (cpy > oend-LASTLITERALS) goto _output_error;    /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
+                if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
                 if (op < oCopyLimit) {
                     LZ4_wildCopy8(op, match, oCopyLimit);
                     match += oCopyLimit - op;
                     op = oCopyLimit;
                 }
-                while (op < cpy) *op++ = *match++;
+                while (op < cpy) { *op++ = *match++; }
             } else {
                 memcpy(op, match, 8);
-                if (length > 16) LZ4_wildCopy8(op+8, match+8, cpy);
+                if (length > 16)  { LZ4_wildCopy8(op+8, match+8, cpy); }
             }
             op = cpy;   /* wildcopy correction */
         }
 
         /* end of decoding */
-        if (endOnInput)
+        if (endOnInput) {
            return (int) (((char*)op)-dst);     /* Nb of output bytes decoded */
-        else
+       } else {
            return (int) (((const char*)ip)-src);   /* Nb of input bytes read */
+       }
 
         /* Overflow error detected */
     _output_error:
@@ -2079,7 +2174,7 @@ LZ4_streamDecode_t* LZ4_createStreamDecode(void)
 
 int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
 {
-    if (LZ4_stream == NULL) return 0;   /* support free on NULL */
+    if (LZ4_stream == NULL) { return 0; }  /* support free on NULL */
     FREEMEM(LZ4_stream);
     return 0;
 }
@@ -2214,18 +2309,22 @@ int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressed
     if (dictSize==0)
         return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
     if (dictStart+dictSize == dest) {
-        if (dictSize >= 64 KB - 1)
+        if (dictSize >= 64 KB - 1) {
             return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
-        return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, dictSize);
+        }
+        assert(dictSize >= 0);
+        return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize);
     }
-    return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, dictSize);
+    assert(dictSize >= 0);
+    return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize);
 }
 
 int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
 {
     if (dictSize==0 || dictStart+dictSize == dest)
         return LZ4_decompress_fast(source, dest, originalSize);
-    return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, dictSize);
+    assert(dictSize >= 0);
+    return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize);
 }
 
 
@@ -2237,9 +2336,9 @@ int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, in
 {
     return LZ4_compress_default(source, dest, inputSize, maxOutputSize);
 }
-int LZ4_compress(const char* source, char* dest, int inputSize)
+int LZ4_compress(const char* src, char* dest, int srcSize)
 {
-    return LZ4_compress_default(source, dest, inputSize, LZ4_compressBound(inputSize));
+    return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize));
 }
 int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize)
 {
diff --git a/source/lz4.h b/source/lz4.h
index a9c932c..32108e2 100644
--- a/source/lz4.h
+++ b/source/lz4.h
@@ -46,7 +46,7 @@ extern "C" {
 /**
   Introduction
 
-  LZ4 is lossless compression algorithm, providing compression speed at 500 MB/s per core,
+  LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core,
   scalable with multi-cores CPU. It features an extremely fast decoder, with speed in
   multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.
 
@@ -58,16 +58,19 @@ extern "C" {
     - unbounded multiple steps (described as Streaming compression)
 
   lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md).
-  Decompressing a block requires additional metadata, such as its compressed size.
+  Decompressing such a compressed block requires additional metadata.
+  Exact metadata depends on exact decompression function.
+  For the typical case of LZ4_decompress_safe(),
+  metadata includes block's compressed size, and maximum bound of decompressed size.
   Each application is free to encode and pass such metadata in whichever way it wants.
 
   lz4.h only handle blocks, it can not generate Frames.
 
   Blocks are different from Frames (doc/lz4_Frame_format.md).
   Frames bundle both blocks and metadata in a specified manner.
-  This are required for compressed data to be self-contained and portable.
+  Embedding metadata is required for compressed data to be self-contained and portable.
   Frame format is delivered through a companion API, declared in lz4frame.h.
-  Note that the `lz4` CLI can only manage frames.
+  The `lz4` CLI can only manage frames.
 */
 
 /*^***************************************************************
@@ -97,7 +100,7 @@ extern "C" {
 /*------   Version   ------*/
 #define LZ4_VERSION_MAJOR    1    /* for breaking interface changes  */
 #define LZ4_VERSION_MINOR    9    /* for new (non-breaking) interface capabilities */
-#define LZ4_VERSION_RELEASE  1    /* for tweaks, bug-fixes, or development */
+#define LZ4_VERSION_RELEASE  2    /* for tweaks, bug-fixes, or development */
 
 #define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
 
@@ -129,29 +132,35 @@ LZ4LIB_API const char* LZ4_versionString (void);   /**< library version string;
 *  Simple Functions
 **************************************/
 /*! LZ4_compress_default() :
-    Compresses 'srcSize' bytes from buffer 'src'
-    into already allocated 'dst' buffer of size 'dstCapacity'.
-    Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
-    It also runs faster, so it's a recommended setting.
-    If the function cannot compress 'src' into a more limited 'dst' budget,
-    compression stops *immediately*, and the function result is zero.
-    In which case, 'dst' content is undefined (invalid).
-        srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
-        dstCapacity : size of buffer 'dst' (which must be already allocated)
-       @return  : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
-                  or 0 if compression fails
-    Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
-*/
+ *  Compresses 'srcSize' bytes from buffer 'src'
+ *  into already allocated 'dst' buffer of size 'dstCapacity'.
+ *  Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
+ *  It also runs faster, so it's a recommended setting.
+ *  If the function cannot compress 'src' into a more limited 'dst' budget,
+ *  compression stops *immediately*, and the function result is zero.
+ *  In which case, 'dst' content is undefined (invalid).
+ *      srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
+ *      dstCapacity : size of buffer 'dst' (which must be already allocated)
+ *     @return  : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
+ *                or 0 if compression fails
+ * Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
+ */
 LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity);
 
 /*! LZ4_decompress_safe() :
-    compressedSize : is the exact complete size of the compressed block.
-    dstCapacity : is the size of destination buffer, which must be already allocated.
-   @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
-             If destination buffer is not large enough, decoding will stop and output an error code (negative value).
-             If the source stream is detected malformed, the function will stop decoding and return a negative result.
-    Note : This function is protected against malicious data packets (never writes outside 'dst' buffer, nor read outside 'source' buffer).
-*/
+ *  compressedSize : is the exact complete size of the compressed block.
+ *  dstCapacity : is the size of destination buffer (which must be already allocated), presumed an upper bound of decompressed size.
+ * @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
+ *           If destination buffer is not large enough, decoding will stop and output an error code (negative value).
+ *           If the source stream is detected malformed, the function will stop decoding and return a negative result.
+ * Note 1 : This function is protected against malicious data packets :
+ *          it will never writes outside 'dst' buffer, nor read outside 'source' buffer,
+ *          even if the compressed block is maliciously modified to order the decoder to do these actions.
+ *          In such case, the decoder stops immediately, and considers the compressed block malformed.
+ * Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them.
+ *          The implementation is free to send / store / derive this information in whichever way is most beneficial.
+ *          If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead.
+ */
 LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity);
 
 
@@ -388,6 +397,8 @@ LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecod
  */
 LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize);
 
+#endif /* LZ4_H_2983827168210 */
+
 
 /*^*************************************
  * !!!!!!   STATIC LINKING ONLY   !!!!!!
@@ -413,14 +424,17 @@ LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int sr
  * define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library.
  ******************************************************************************/
 
+#ifdef LZ4_STATIC_LINKING_ONLY
+
+#ifndef LZ4_STATIC_3504398509
+#define LZ4_STATIC_3504398509
+
 #ifdef LZ4_PUBLISH_STATIC_FUNCTIONS
 #define LZ4LIB_STATIC_API LZ4LIB_API
 #else
 #define LZ4LIB_STATIC_API
 #endif
 
-#ifdef LZ4_STATIC_LINKING_ONLY
-
 
 /*! LZ4_compress_fast_extState_fastReset() :
  *  A variant of LZ4_compress_fast_extState().
@@ -462,8 +476,75 @@ LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const c
  */
 LZ4LIB_STATIC_API void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream);
 
+
+/*! In-place compression and decompression
+ *
+ * It's possible to have input and output sharing the same buffer,
+ * for highly contrained memory environments.
+ * In both cases, it requires input to lay at the end of the buffer,
+ * and decompression to start at beginning of the buffer.
+ * Buffer size must feature some margin, hence be larger than final size.
+ *
+ * |<------------------------buffer--------------------------------->|
+ *                             |<-----------compressed data--------->|
+ * |<-----------decompressed size------------------>|
+ *                                                  |<----margin---->|
+ *
+ * This technique is more useful for decompression,
+ * since decompressed size is typically larger,
+ * and margin is short.
+ *
+ * In-place decompression will work inside any buffer
+ * which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize).
+ * This presumes that decompressedSize > compressedSize.
+ * Otherwise, it means compression actually expanded data,
+ * and it would be more efficient to store such data with a flag indicating it's not compressed.
+ * This can happen when data is not compressible (already compressed, or encrypted).
+ *
+ * For in-place compression, margin is larger, as it must be able to cope with both
+ * history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX,
+ * and data expansion, which can happen when input is not compressible.
+ * As a consequence, buffer size requirements are much higher,
+ * and memory savings offered by in-place compression are more limited.
+ *
+ * There are ways to limit this cost for compression :
+ * - Reduce history size, by modifying LZ4_DISTANCE_MAX.
+ *   Note that it is a compile-time constant, so all compressions will apply this limit.
+ *   Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX,
+ *   so it's a reasonable trick when inputs are known to be small.
+ * - Require the compressor to deliver a "maximum compressed size".
+ *   This is the `dstCapacity` parameter in `LZ4_compress*()`.
+ *   When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail,
+ *   in which case, the return code will be 0 (zero).
+ *   The caller must be ready for these cases to happen,
+ *   and typically design a backup scheme to send data uncompressed.
+ * The combination of both techniques can significantly reduce
+ * the amount of margin required for in-place compression.
+ *
+ * In-place compression can work in any buffer
+ * which size is >= (maxCompressedSize)
+ * with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success.
+ * LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX,
+ * so it's possible to reduce memory requirements by playing with them.
+ */
+
+#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize)          (((compressedSize) >> 8) + 32)
+#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize)   ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize))  /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */
+
+#ifndef LZ4_DISTANCE_MAX   /* history window size; can be user-defined at compile time */
+#  define LZ4_DISTANCE_MAX 65535   /* set to maximum value by default */
 #endif
 
+#define LZ4_COMPRESS_INPLACE_MARGIN                           (LZ4_DISTANCE_MAX + 32)   /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */
+#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize)   ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN)  /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */
+
+#endif   /* LZ4_STATIC_3504398509 */
+#endif   /* LZ4_STATIC_LINKING_ONLY */
+
+
+
+#ifndef LZ4_H_98237428734687
+#define LZ4_H_98237428734687
 
 /*-************************************************************
  *  PRIVATE DEFINITIONS
@@ -567,6 +648,7 @@ union LZ4_streamDecode_u {
 } ;   /* previously typedef'd to LZ4_streamDecode_t */
 
 
+
 /*-************************************
 *  Obsolete Functions
 **************************************/
@@ -601,8 +683,8 @@ union LZ4_streamDecode_u {
 #endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
 
 /* Obsolete compression functions */
-LZ4_DEPRECATED("use LZ4_compress_default() instead")       LZ4LIB_API int LZ4_compress               (const char* source, char* dest, int sourceSize);
-LZ4_DEPRECATED("use LZ4_compress_default() instead")       LZ4LIB_API int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_default() instead")       LZ4LIB_API int LZ4_compress               (const char* src, char* dest, int srcSize);
+LZ4_DEPRECATED("use LZ4_compress_default() instead")       LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize);
 LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
 LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
 LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
@@ -674,7 +756,7 @@ LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int or
 LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr);
 
 
-#endif /* LZ4_H_2983827168210 */
+#endif /* LZ4_H_98237428734687 */
 
 
 #if defined (__cplusplus)
diff --git a/source/nso.c b/source/nso.c
index fd8a1ac..0b002f8 100644
--- a/source/nso.c
+++ b/source/nso.c
@@ -18,6 +18,263 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#define LZ4_STATIC_LINKING_ONLY /* Required by LZ4 to enable in-place decompression. */
+
 #include "utils.h"
 #include "nso.h"
+#include "lz4.h"
 
+/* Function prototypes. */
+
+static bool nsoGetModuleName(NsoContext *nso_ctx);
+static u8 *nsoGetRodataSegment(NsoContext *nso_ctx);
+static bool nsoGetModuleInfoName(NsoContext *nso_ctx, u8 *rodata_buf);
+static bool nsoGetSectionFromRodataSegment(NsoContext *nso_ctx, u8 *rodata_buf, u8 **section_ptr, u64 section_offset, u64 section_size);
+
+bool nsoInitializeContext(NsoContext *out, PartitionFileSystemContext *pfs_ctx, PartitionFileSystemEntry *pfs_entry)
+{
+    NcaContext *nca_ctx = NULL;
+    u8 *rodata_buf = NULL;
+    bool success = false;
+    
+    if (!out || !pfs_ctx || !pfs_ctx->nca_fs_ctx || !(nca_ctx = (NcaContext*)pfs_ctx->nca_fs_ctx->nca_ctx) || nca_ctx->content_type != NcmContentType_Program || !pfs_ctx->offset || !pfs_ctx->size || \
+        !pfs_ctx->is_exefs || pfs_ctx->header_size <= sizeof(PartitionFileSystemHeader) || !pfs_ctx->header || !pfs_entry)
+    {
+        LOGFILE("Invalid parameters!");
+        return false;
+    }
+    
+    /* Free output context beforehand. */
+    nsoFreeContext(out);
+    
+    /* Update output context. */
+    out->pfs_ctx = pfs_ctx;
+    out->pfs_entry = pfs_entry;
+    
+    /* Get entry filename. */
+    if (!(out->nso_filename = pfsGetEntryName(pfs_ctx, pfs_entry)) || !strlen(out->nso_filename))
+    {
+        LOGFILE("Invalid Partition FS entry filename!");
+        goto end;
+    }
+    
+    /* Read NSO header. */
+    if (!pfsReadEntryData(pfs_ctx, pfs_entry, &(out->nso_header), sizeof(NsoHeader), 0))
+    {
+        LOGFILE("Failed to read NSO \"%s\" header!", out->nso_filename);;
+        goto end;
+    }
+    
+    /* Verify NSO header. */
+    if (__builtin_bswap32(out->nso_header.magic) != NSO_HEADER_MAGIC)
+    {
+        LOGFILE("Invalid NSO \"%s\" header magic word! (0x%08X != 0x%08X).", out->nso_filename, __builtin_bswap32(out->nso_header.magic), __builtin_bswap32(NSO_HEADER_MAGIC));
+        goto end;
+    }
+    
+    if (out->nso_header.text_segment_header.file_offset < sizeof(NsoHeader) || !out->nso_header.text_segment_header.size || \
+        ((out->nso_header.flags & NsoFlags_TextCompress) && (!out->nso_header.text_file_size || out->nso_header.text_file_size > out->nso_header.text_segment_header.size)) || \
+        (!(out->nso_header.flags & NsoFlags_TextCompress) && out->nso_header.text_file_size != out->nso_header.text_segment_header.size) || \
+        (out->nso_header.text_segment_header.file_offset + out->nso_header.text_file_size) > pfs_entry->size)
+    {
+        LOGFILE("Invalid .text segment offset/size for NSO \"%s\"! (0x%08X, 0x%08X, 0x%08X).", out->nso_filename, out->nso_header.text_segment_header.file_offset, out->nso_header.text_file_size, \
+                out->nso_header.text_segment_header.size);
+        goto end;
+    }
+    
+    if (out->nso_header.rodata_segment_header.file_offset < sizeof(NsoHeader) || !out->nso_header.rodata_segment_header.size || \
+        ((out->nso_header.flags & NsoFlags_RoCompress) && (!out->nso_header.rodata_file_size || out->nso_header.rodata_file_size > out->nso_header.rodata_segment_header.size)) || \
+        (!(out->nso_header.flags & NsoFlags_RoCompress) && out->nso_header.rodata_file_size != out->nso_header.rodata_segment_header.size) || \
+        (out->nso_header.rodata_segment_header.file_offset + out->nso_header.rodata_file_size) > pfs_entry->size)
+    {
+        LOGFILE("Invalid .rodata segment offset/size for NSO \"%s\"! (0x%08X, 0x%08X, 0x%08X).", out->nso_filename, out->nso_header.rodata_segment_header.file_offset, out->nso_header.rodata_file_size, \
+                out->nso_header.rodata_segment_header.size);
+        goto end;
+    }
+    
+    if (out->nso_header.data_segment_header.file_offset < sizeof(NsoHeader) || !out->nso_header.data_segment_header.size || \
+        ((out->nso_header.flags & NsoFlags_DataCompress) && (!out->nso_header.data_file_size || out->nso_header.data_file_size > out->nso_header.data_segment_header.size)) || \
+        (!(out->nso_header.flags & NsoFlags_DataCompress) && out->nso_header.data_file_size != out->nso_header.data_segment_header.size) || \
+        (out->nso_header.data_segment_header.file_offset + out->nso_header.data_file_size) > pfs_entry->size)
+    {
+        LOGFILE("Invalid .data segment offset/size for NSO \"%s\"! (0x%08X, 0x%08X, 0x%08X).", out->nso_filename, out->nso_header.data_segment_header.file_offset, out->nso_header.data_file_size, \
+                out->nso_header.data_segment_header.size);
+        goto end;
+    }
+    
+    if (out->nso_header.module_name_offset < sizeof(NsoHeader) || !out->nso_header.module_name_size || (out->nso_header.module_name_offset + out->nso_header.module_name_size) > pfs_entry->size)
+    {
+        LOGFILE("Invalid module name offset/size for NSO \"%s\"! (0x%08X, 0x%08X).", out->nso_filename, out->nso_header.module_name_offset, out->nso_header.module_name_size);
+        goto end;
+    }
+    
+    if (!out->nso_header.api_info_section_header.size || (out->nso_header.api_info_section_header.offset + out->nso_header.api_info_section_header.size) > out->nso_header.rodata_segment_header.size)
+    {
+        LOGFILE("Invalid .api_info section offset/size for NSO \"%s\"! (0x%08X, 0x%08X).", out->nso_filename, out->nso_header.api_info_section_header.offset, out->nso_header.api_info_section_header.size);
+        goto end;
+    }
+    
+    if (!out->nso_header.dynstr_section_header.size || (out->nso_header.dynstr_section_header.offset + out->nso_header.dynstr_section_header.size) > out->nso_header.rodata_segment_header.size)
+    {
+        LOGFILE("Invalid .dynstr section offset/size for NSO \"%s\"! (0x%08X, 0x%08X).", out->nso_filename, out->nso_header.dynstr_section_header.offset, out->nso_header.dynstr_section_header.size);
+        goto end;
+    }
+    
+    if (!out->nso_header.dynsym_section_header.size || (out->nso_header.dynsym_section_header.offset + out->nso_header.dynsym_section_header.size) > out->nso_header.rodata_segment_header.size)
+    {
+        LOGFILE("Invalid .dynsym section offset/size for NSO \"%s\"! (0x%08X, 0x%08X).", out->nso_filename, out->nso_header.dynsym_section_header.offset, out->nso_header.dynsym_section_header.size);
+        goto end;
+    }
+    
+    /* Get module name. */
+    if (!nsoGetModuleName(out)) goto end;
+    
+    /* Get .rodata segment. */
+    if (!(rodata_buf = nsoGetRodataSegment(out))) goto end;
+    
+    /* Get module info name. */
+    if (!nsoGetModuleInfoName(out, rodata_buf)) goto end;
+    
+    /* Get .api_info section data. */
+    if (!nsoGetSectionFromRodataSegment(out, rodata_buf, (u8**)&(out->rodata_api_info_section), out->nso_header.api_info_section_header.offset, out->nso_header.api_info_section_header.size)) goto end;
+    out->rodata_api_info_section_size = out->nso_header.api_info_section_header.size;
+    
+    /* Get .dynstr section data. */
+    if (!nsoGetSectionFromRodataSegment(out, rodata_buf, (u8**)&(out->rodata_dynstr_section), out->nso_header.dynstr_section_header.offset, out->nso_header.dynstr_section_header.size)) goto end;
+    out->rodata_dynstr_section_size = out->nso_header.dynstr_section_header.size;
+    
+    /* Get .dynsym section data. */
+    if (!nsoGetSectionFromRodataSegment(out, rodata_buf, &(out->rodata_dynsym_section), out->nso_header.dynsym_section_header.offset, out->nso_header.dynsym_section_header.size)) goto end;
+    out->rodata_dynsym_section_size = out->nso_header.dynsym_section_header.size;
+    
+    success = true;
+    
+end:
+    if (rodata_buf) free(rodata_buf);
+    
+    if (!success) nsoFreeContext(out);
+    
+    return success;
+}
+
+static bool nsoGetModuleName(NsoContext *nso_ctx)
+{
+    if (nso_ctx->nso_header.module_name_size <= 1) return true;
+    
+    NsoModuleName module_name = {0};
+    
+    /* Get module name. */
+    if (!pfsReadEntryData(nso_ctx->pfs_ctx, nso_ctx->pfs_entry, &module_name, sizeof(NsoModuleName), nso_ctx->nso_header.module_name_offset))
+    {
+        LOGFILE("Failed to read NSO \"%s\" module name length!", nso_ctx->nso_filename);
+        return false;
+    }
+    
+    /* Verify module name length. */
+    if (module_name.name_length != ((u8)nso_ctx->nso_header.module_name_size - 1))
+    {
+        LOGFILE("NSO \"%s\" module name length mismatch! (0x%02X != 0x%02X).", nso_ctx->nso_filename, module_name.name_length, (u8)nso_ctx->nso_header.module_name_size - 1);
+        return false;
+    }
+    
+    /* Allocate memory for the module name. */
+    nso_ctx->module_name = calloc(nso_ctx->nso_header.module_name_size, sizeof(char));
+    if (!nso_ctx->module_name)
+    {
+        LOGFILE("Failed to allocate memory for NSO \"%s\" module name!", nso_ctx->nso_filename);
+        return false;
+    }
+    
+    /* Read module name string. */
+    if (!pfsReadEntryData(nso_ctx->pfs_ctx, nso_ctx->pfs_entry, nso_ctx->module_name, module_name.name_length, nso_ctx->nso_header.module_name_offset + 1))
+    {
+        LOGFILE("Failed to read NSO \"%s\" module name string!", nso_ctx->nso_filename);
+        return false;
+    }
+    
+    return true;
+}
+
+static u8 *nsoGetRodataSegment(NsoContext *nso_ctx)
+{
+    bool compressed = (nso_ctx->nso_header.flags & NsoFlags_RoCompress);
+    
+    u8 *rodata_buf = NULL;
+    u64 rodata_buf_size = (compressed ? LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(nso_ctx->nso_header.rodata_segment_header.size) : nso_ctx->nso_header.rodata_segment_header.size);
+    
+    u8 *rodata_read_ptr = (compressed ? (rodata_buf + (rodata_buf_size - nso_ctx->nso_header.rodata_file_size)) : rodata_buf);
+    u64 rodata_read_size = (compressed ? nso_ctx->nso_header.rodata_file_size : nso_ctx->nso_header.rodata_segment_header.size);
+    
+    int lz4_res = 0;
+    bool success = false;
+    
+    /* Allocate memory for the .rodata buffer. */
+    if (!(rodata_buf = calloc(rodata_buf_size, sizeof(u8))))
+    {
+        LOGFILE("Failed to allocate 0x%lX bytes for the .rodata segment in NSO \"%s\"!", rodata_buf_size, nso_ctx->nso_filename);
+        return NULL;
+    }
+    
+    /* Read .rodata segment data. */
+    if (!pfsReadEntryData(nso_ctx->pfs_ctx, nso_ctx->pfs_entry, rodata_read_ptr, rodata_read_size, nso_ctx->nso_header.rodata_segment_header.file_offset))
+    {
+        LOGFILE("Failed to read %s .rodata segment in NRO \"%s\"!", nso_ctx->nso_filename);
+        goto end;
+    }
+    
+    if (compressed)
+    {
+        /* Decompress .rodata segment in-place. */
+        if ((lz4_res = LZ4_decompress_safe((char*)rodata_read_ptr, (char*)rodata_buf, (int)nso_ctx->nso_header.rodata_file_size, (int)rodata_buf_size)) != \
+            (int)nso_ctx->nso_header.rodata_segment_header.size)
+        {
+            LOGFILE("LZ4 decompression failed for NRO \"%s\"! (0x%08X).", nso_ctx->nso_filename, (u32)lz4_res);
+            goto end;
+        }
+    }
+    
+    success = true;
+    
+end:
+    if (!success && rodata_buf)
+    {
+        free(rodata_buf);
+        rodata_buf = NULL;
+    }
+    
+    return rodata_buf;
+}
+
+static bool nsoGetModuleInfoName(NsoContext *nso_ctx, u8 *rodata_buf)
+{
+    NsoModuleInfo *module_info = (NsoModuleInfo*)(rodata_buf + 0x4);
+    if (!module_info->name_length) return true;
+    
+    /* Allocate memory for the module info name. */
+    nso_ctx->module_info_name = calloc(module_info->name_length + 1, sizeof(char));
+    if (!nso_ctx->module_info_name)
+    {
+        LOGFILE("Failed to allocate memory for NSO \"%s\" module info name!", nso_ctx->nso_filename);
+        return false;
+    }
+    
+    /* Copy module info name. */
+    sprintf(nso_ctx->module_info_name, "%.*s", (int)module_info->name_length, module_info->name);
+    
+    return true;
+}
+
+static bool nsoGetSectionFromRodataSegment(NsoContext *nso_ctx, u8 *rodata_buf, u8 **section_ptr, u64 section_offset, u64 section_size)
+{
+    /* Allocate memory for the desired .rodata section. */
+    if (!(*section_ptr = malloc(section_size)))
+    {
+        LOGFILE("Failed to allocate 0x%lX bytes for section at .rodata offset 0x%lX in NSO \"%s\"!", section_size, section_offset, nso_ctx->nso_filename);
+        return false;
+    }
+    
+    /* Copy .rodata section data. */
+    memcpy(*section_ptr, rodata_buf + section_offset, section_size);
+    
+    return true;
+}
diff --git a/source/nso.h b/source/nso.h
index f7c9fc3..ec986a6 100644
--- a/source/nso.h
+++ b/source/nso.h
@@ -23,6 +23,8 @@
 #ifndef __NSO_H__
 #define __NSO_H__
 
+#include "pfs.h"
+
 #define NSO_HEADER_MAGIC    0x4E534F30  /* "NSO0". */
 #define NSO_MOD_MAGIC       0x4D4F4430  /* "MOD0". */
 
@@ -88,7 +90,7 @@ typedef struct {
 /// This is essentially a replacement for the PT_DYNAMIC program header available in ELF binaries.
 /// All offsets are signed 32-bit values relative to the start of this header.
 /// This is usually placed at the start of the decompressed .text segment, right after a NsoModStart block.
-/// However, in some NSOs, it can instead be placed at the start of the uncompressed .rodata segment, right after its NsoModuleInfo block.
+/// However, in some NSOs, it can instead be placed at the start of the decompressed .rodata segment, right after its NsoModuleInfo block.
 /// In these cases, the 'mod_offset' value from the NsoModStart block will point to an offset within the .rodata segment.
 typedef struct  {
     u32 magic;                      ///< "MOD0".
@@ -97,7 +99,7 @@ typedef struct  {
     s32 bss_end_offset;
     s32 eh_frame_hdr_start_offset;
     s32 eh_frame_hdr_end_offset;
-    s32 module_object_offset;       ///< Typically equal to .bss base.
+    s32 module_object_offset;       ///< Typically equal to bss_start_offset.
 } NsoModHeader;
 
 /// Placed at the start of the decompressed .rodata segment + 0x4.
@@ -107,36 +109,35 @@ typedef struct {
     char name[];
 } NsoModuleInfo;
 
-
-
-
-/*
-
-
 typedef struct {
-    PartitionFileSystemContext *pfs_ctx;                        ///< PartitionFileSystemContext for the Program NCA FS section #0, which is where this NSO is stored.
-    PartitionFileSystemEntry *pfs_entry;                        ///< PartitionFileSystemEntry for this NSO in the Program NCA FS section #0. Used to read NSO data.
-    NsoHeader nso_header;                                       ///< Copy of the NSO header.
-    
-    
-    
-    
-    
-    
+    PartitionFileSystemContext *pfs_ctx;    ///< PartitionFileSystemContext for the Program NCA FS section #0, which is where this NSO is stored.
+    PartitionFileSystemEntry *pfs_entry;    ///< PartitionFileSystemEntry for this NSO in the Program NCA FS section #0. Used to read NSO data.
+    char *nso_filename;                     ///< Pointer to the NSO filename in the Program NCA FS section #0.
+    NsoHeader nso_header;                   ///< NSO header.
+    char *module_name;                      ///< Pointer to a dynamically allocated buffer that holds the NSO module name, if available. Otherwise, this is set to NULL.
+    char *module_info_name;                 ///< Pointer to a dynamically allocated buffer that holds the .rodata module info module name, if available. Otherwise, this is set to NULL.
+    char *rodata_api_info_section;          ///< Pointer to a dynamically allocated buffer that holds the .rodata API info section data. Middleware/GuidelineApi data is retrieved from this section.
+    u64 rodata_api_info_section_size;       ///< .rodata API info section size. Kept here for convenience - this is part of 'nso_header'.
+    char *rodata_dynstr_section;            ///< Pointer to a dynamically allocated buffer that holds the .rodata dynamic string section data. UnresolvedApi data is retrieved from this section.
+    u64 rodata_dynstr_section_size;         ///< .rodata dynamic string section size. Kept here for convenience - this is part of 'nso_header'.
+    u8 *rodata_dynsym_section;              ///< Pointer to a dynamically allocated buffer that holds the .rodata dynamic symbol section data. Used to retrieve pointers to symbol strings within dynstr.
+    u64 rodata_dynsym_section_size;         ///< .rodata dynamic symbol section size. Kept here for convenience - this is part of 'nso_header'.
 } NsoContext;
 
+/// Initializes a NsoContext using a previously initialized PartitionFileSystemContext (which must belong to the ExeFS from a Program NCA) and a PartitionFileSystemEntry belonging to an underlying NSO.
+bool nsoInitializeContext(NsoContext *out, PartitionFileSystemContext *pfs_ctx, PartitionFileSystemEntry *pfs_entry);
 
+/// Helper inline functions.
 
-
-
-*/
-
-
-
-
-
-
-
-
+NX_INLINE void nsoFreeContext(NsoContext *nso_ctx)
+{
+    if (!nso_ctx) return;
+    if (nso_ctx->module_name) free(nso_ctx->module_name);
+    if (nso_ctx->module_info_name) free(nso_ctx->module_info_name);
+    if (nso_ctx->rodata_api_info_section) free(nso_ctx->rodata_api_info_section);
+    if (nso_ctx->rodata_dynstr_section) free(nso_ctx->rodata_dynstr_section);
+    if (nso_ctx->rodata_dynsym_section) free(nso_ctx->rodata_dynsym_section);
+    memset(nso_ctx, 0, sizeof(NsoContext));
+}
 
 #endif /* __NSO_H__ */
diff --git a/source/pfs.h b/source/pfs.h
index 8525a7d..d07fc67 100644
--- a/source/pfs.h
+++ b/source/pfs.h
@@ -101,6 +101,13 @@ NX_INLINE char *pfsGetNameTable(PartitionFileSystemContext *ctx)
     return (char*)(ctx->header + sizeof(PartitionFileSystemHeader) + (entry_count * sizeof(PartitionFileSystemEntry)));
 }
 
+NX_INLINE char *pfsGetEntryName(PartitionFileSystemContext *ctx, PartitionFileSystemEntry *fs_entry)
+{
+    char *name_table = pfsGetNameTable(ctx);
+    if (!name_table || !fs_entry || fs_entry->name_offset >= ((PartitionFileSystemHeader*)ctx->header)->name_table_size || !name_table[fs_entry->name_offset]) return NULL;
+    return (name_table + fs_entry->name_offset);
+}
+
 NX_INLINE char *pfsGetEntryNameByIndex(PartitionFileSystemContext *ctx, u32 idx)
 {
     PartitionFileSystemEntry *fs_entry = pfsGetEntryByIndex(ctx, idx);