Working Gzip and tar.gz file in Kotlin

kotlin tar.gz file

This article shows how we can compress, update, and decompress multiple files and folders into a tar.gz file using Kotlin.

Table of contents:

Gzip is widely used for file compression to save disk space and speed up data transfer.

1. Gzip vs. Java Zip

Java commonly uses Zip format, handling multiple files directly. However, gzip typically compresses a single file or data stream. For multiple files or folders, gzip is combined with tar, creating .tar.gz archives.

2. Add Apache Commons Compress Dependency

We need Apache Commons Compress to handle tar.gz file in Kotlin.

pom.xml

<dependency>
    <groupId>org.apache.commons</groupId>
    <artifactId>commons-compress</artifactId>
    <version>1.27.0</version>
</dependency>

3. Compressing Files and Folders into a tar.gz

Here’s how we compress files and folders:

CompressFiles.kt

package com.mkyong.zip

import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream
import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream
import java.io.*

fun compressToTarGz(source: File, tarGzFile: File) {
    if (!source.exists()) throw FileNotFoundException("File or directory does not found: ${source.path}")
    GzipCompressorOutputStream(BufferedOutputStream(FileOutputStream(tarGzFile))).use { gzOut ->
        TarArchiveOutputStream(gzOut).apply {
            // Allow file names longer than 100 characters
            setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX)
        }.use { tarOut ->
            addToTar(source, "", tarOut)
        }
    }
}

// Recursively adds files and directories to the tar archive
fun addToTar(file: File, parentPath: String, tarOut: TarArchiveOutputStream) {
    val entryName = "$parentPath${file.name}"
    val entry = tarOut.createArchiveEntry(file, entryName)
    tarOut.putArchiveEntry(entry)

    if (file.isFile) {
        // Write file contents to the tar entry
        file.inputStream().use { it.copyTo(tarOut) }
        tarOut.closeArchiveEntry()
    } else {
        tarOut.closeArchiveEntry()
        // Recursively process child files/directories
        file.listFiles()?.forEach { child ->
            addToTar(child, "$entryName/", tarOut)
        }
    }
}

fun main() {
    compressToTarGz(File("project"), File("project.tar.gz"))
}

3.1 file name is too long (>100 bytes)

The default TAR format supports filenames of up to 100 characters only. If we add a file inside a deep folder structure, we may hits file name is too long (>100 bytes) error.

How to fix this issue:
Always explicitly set setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX) when compressing complex projects or deep folder structures to avoid errors due to file name length limitations.


fun compressToTarGz(source: File, tarGzFile: File) {
    GzipCompressorOutputStream(BufferedOutputStream(FileOutputStream(tarGzFile))).use { gzOut ->
        TarArchiveOutputStream(gzOut).apply {
            // supports file names longer than 100 characters.
            setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX)
        }.use { tarOut ->
            addToTar(source, "", tarOut)
        }
    }
}

4. Decompressing a tar.gz File

Here’s how to decompress a .tar.gz archive while preventing the Zip Slip Vulnerability.

Decompress.kt

package com.mkyong.zip

import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream
import java.io.BufferedInputStream
import java.io.File
import java.io.FileInputStream
import java.io.FileNotFoundException

fun decompressTarGz(tarGzFile: File, outputDir: File) {

    // Ensure canonical path for security
    val canonicalOutputDir = outputDir.canonicalFile

    if (!tarGzFile.exists()) throw FileNotFoundException("File not found: ${tarGzFile.path}")
    GzipCompressorInputStream(BufferedInputStream(FileInputStream(tarGzFile))).use { gzIn ->
        TarArchiveInputStream(gzIn).use { tarIn ->
            generateSequence { tarIn.nextEntry }.forEach { entry ->

                val outputFile = File(outputDir, entry.name).canonicalFile

                // Check if the extracted file stays inside outputDir
                // Prevent Zip Slip Vulnerability
                if (!outputFile.toPath().startsWith(canonicalOutputDir.toPath())) {
                    throw SecurityException("Zip Slip vulnerability detected! Malicious entry: ${entry.name}")
                }

                if (entry.isDirectory) outputFile.mkdirs()
                else {
                    outputFile.parentFile.mkdirs()
                    outputFile.outputStream().use { outStream ->
                        tarIn.copyTo(outStream)
                    }
                }
            }
        }
    }
}

fun main() {
    decompressTarGz(File("project.tar.gz"), File("extracted_project"))
}

The decompressTarGz function ensures both outputDir and extracted files are resolved to their canonical paths, preventing Zip Slip vulnerabilities from symbolic links and malicious relative paths.

5. Adding Files and Folders to an Existing tar.gz

We can update an existing tar.gz file by decompressing, adding files, and recompressing:

UpdateExisting.kt

import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream
import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream
import java.io.*

// Adds additional files to an existing tar.gz archive
fun addFilesToExistingTarGz(existingTarGz: File, filesToAdd: List<File>, tempDir: File) {

    if (tempDir.exists()) tempDir.deleteRecursively()
    tempDir.mkdirs()

    // Decompress current archive
    decompressTarGz(existingTarGz, tempDir)

    // Copy new files to temporary directory
    filesToAdd.forEach { file ->
        file.copyRecursively(File(tempDir, file.name), overwrite = true)
    }

    // Recompress updated directory structure
    // compressToTarGz(tempDir, existingTarGz)
    compressToTarGzContents(tempDir, existingTarGz);

    // Clean up temporary directory
    tempDir.deleteRecursively()
}

// Helper to avoid including tempDir itself in archive
fun compressToTarGzContents(sourceDir: File, tarGzFile: File) {
    GzipCompressorOutputStream(BufferedOutputStream(FileOutputStream(tarGzFile))).use { gzOut ->
        TarArchiveOutputStream(gzOut).apply {
            setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX)
        }.use { tarOut ->
            sourceDir.listFiles()?.forEach { file ->
                addToTar(file, "", tarOut)
            }
        }
    }
}

fun compressToTarGz(source: File, tarGzFile: File) {
    GzipCompressorOutputStream(BufferedOutputStream(FileOutputStream(tarGzFile))).use { gzOut ->
        TarArchiveOutputStream(gzOut).apply {
            // Allow file names longer than 100 characters
            setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX)
        }.use { tarOut ->
            addToTar(source, "", tarOut)
        }
    }
}

// Recursively adds files and directories to the tar archive
fun addToTar(file: File, parentPath: String, tarOut: TarArchiveOutputStream) {
    val entryName = "$parentPath${file.name}"
    val entry = tarOut.createArchiveEntry(file, entryName)
    tarOut.putArchiveEntry(entry)

    if (file.isFile) {
        // Write file contents to the tar entry
        file.inputStream().use { it.copyTo(tarOut) }
        tarOut.closeArchiveEntry()
    } else {
        tarOut.closeArchiveEntry()
        // Recursively process child files/directories
        file.listFiles()?.forEach { child ->
            addToTar(child, "$entryName/", tarOut)
        }
    }
}

// Decompresses tar.gz archive
fun decompressTarGz(tarGzFile: File, outputDir: File) {

    // Ensure canonical path for security
    val canonicalOutputDir = outputDir.canonicalFile

    if (!tarGzFile.exists()) throw FileNotFoundException("File not found: ${tarGzFile.path}")
    GzipCompressorInputStream(BufferedInputStream(FileInputStream(tarGzFile))).use { gzIn ->
        TarArchiveInputStream(gzIn).use { tarIn ->
            generateSequence { tarIn.nextEntry }.forEach { entry ->

                val outputFile = File(outputDir, entry.name).canonicalFile

                // Check if the extracted file stays inside outputDir
                // Prevent Zip Slip Vulnerability
                if (!outputFile.toPath().startsWith(canonicalOutputDir.toPath())) {
                    throw SecurityException("Zip Slip vulnerability detected! Malicious entry: ${entry.name}")
                }

                if (entry.isDirectory) outputFile.mkdirs()
                else {
                    outputFile.parentFile.mkdirs()
                    outputFile.outputStream().use { outStream ->
                        tarIn.copyTo(outStream)
                    }
                }
            }
        }
    }
}

fun main() {
    compressToTarGz(File("folder1"), File("project.tar.gz"))
    addFilesToExistingTarGz(
        File("project.tar.gz"),
        listOf(File("pom.xml"), File("target")),
        File("temp_extracted")
    )
}

6. References:

mkyong

Founder of Mkyong.com, passionate Java and open-source technologies. If you enjoy my tutorials, consider making a donation to these charities.

0 Comments
Most Voted
Newest Oldest
Inline Feedbacks
View all comments