This article shows how we can compress, update, and decompress multiple files and folders into a tar.gz file using Kotlin.
Table of contents:
- 1. Gzip vs. Java Zip
- 2. Add Apache Commons Compress Dependency
- 3. Compressing Files and Folders into a tar.gz
- 4. Decompressing a tar.gz File
- 5. Adding Files and Folders to an Existing tar.gz
- 6. References:
Gzip is widely used for file compression to save disk space and speed up data transfer.
1. Gzip vs. Java Zip
Java commonly uses Zip format, handling multiple files directly. However, gzip typically compresses a single file or data stream. For multiple files or folders, gzip is combined with tar, creating .tar.gz archives.
2. Add Apache Commons Compress Dependency
We need Apache Commons Compress to handle tar.gz file in Kotlin.
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.27.0</version>
</dependency>
3. Compressing Files and Folders into a tar.gz
Here’s how we compress files and folders:
package com.mkyong.zip
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream
import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream
import java.io.*
fun compressToTarGz(source: File, tarGzFile: File) {
if (!source.exists()) throw FileNotFoundException("File or directory does not found: ${source.path}")
GzipCompressorOutputStream(BufferedOutputStream(FileOutputStream(tarGzFile))).use { gzOut ->
TarArchiveOutputStream(gzOut).apply {
// Allow file names longer than 100 characters
setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX)
}.use { tarOut ->
addToTar(source, "", tarOut)
}
}
}
// Recursively adds files and directories to the tar archive
fun addToTar(file: File, parentPath: String, tarOut: TarArchiveOutputStream) {
val entryName = "$parentPath${file.name}"
val entry = tarOut.createArchiveEntry(file, entryName)
tarOut.putArchiveEntry(entry)
if (file.isFile) {
// Write file contents to the tar entry
file.inputStream().use { it.copyTo(tarOut) }
tarOut.closeArchiveEntry()
} else {
tarOut.closeArchiveEntry()
// Recursively process child files/directories
file.listFiles()?.forEach { child ->
addToTar(child, "$entryName/", tarOut)
}
}
}
fun main() {
compressToTarGz(File("project"), File("project.tar.gz"))
}
3.1 file name is too long (>100 bytes)
The default TAR format supports filenames of up to 100 characters only. If we add a file inside a deep folder structure, we may hits file name is too long (>100 bytes) error.
How to fix this issue:
Always explicitly set setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX) when compressing complex projects or deep folder structures to avoid errors due to file name length limitations.
fun compressToTarGz(source: File, tarGzFile: File) {
GzipCompressorOutputStream(BufferedOutputStream(FileOutputStream(tarGzFile))).use { gzOut ->
TarArchiveOutputStream(gzOut).apply {
// supports file names longer than 100 characters.
setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX)
}.use { tarOut ->
addToTar(source, "", tarOut)
}
}
}
4. Decompressing a tar.gz File
Here’s how to decompress a .tar.gz archive while preventing the Zip Slip Vulnerability.
package com.mkyong.zip
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream
import java.io.BufferedInputStream
import java.io.File
import java.io.FileInputStream
import java.io.FileNotFoundException
fun decompressTarGz(tarGzFile: File, outputDir: File) {
// Ensure canonical path for security
val canonicalOutputDir = outputDir.canonicalFile
if (!tarGzFile.exists()) throw FileNotFoundException("File not found: ${tarGzFile.path}")
GzipCompressorInputStream(BufferedInputStream(FileInputStream(tarGzFile))).use { gzIn ->
TarArchiveInputStream(gzIn).use { tarIn ->
generateSequence { tarIn.nextEntry }.forEach { entry ->
val outputFile = File(outputDir, entry.name).canonicalFile
// Check if the extracted file stays inside outputDir
// Prevent Zip Slip Vulnerability
if (!outputFile.toPath().startsWith(canonicalOutputDir.toPath())) {
throw SecurityException("Zip Slip vulnerability detected! Malicious entry: ${entry.name}")
}
if (entry.isDirectory) outputFile.mkdirs()
else {
outputFile.parentFile.mkdirs()
outputFile.outputStream().use { outStream ->
tarIn.copyTo(outStream)
}
}
}
}
}
}
fun main() {
decompressTarGz(File("project.tar.gz"), File("extracted_project"))
}
The decompressTarGz function ensures both outputDir and extracted files are resolved to their canonical paths, preventing Zip Slip vulnerabilities from symbolic links and malicious relative paths.
5. Adding Files and Folders to an Existing tar.gz
We can update an existing tar.gz file by decompressing, adding files, and recompressing:
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream
import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream
import java.io.*
// Adds additional files to an existing tar.gz archive
fun addFilesToExistingTarGz(existingTarGz: File, filesToAdd: List<File>, tempDir: File) {
if (tempDir.exists()) tempDir.deleteRecursively()
tempDir.mkdirs()
// Decompress current archive
decompressTarGz(existingTarGz, tempDir)
// Copy new files to temporary directory
filesToAdd.forEach { file ->
file.copyRecursively(File(tempDir, file.name), overwrite = true)
}
// Recompress updated directory structure
// compressToTarGz(tempDir, existingTarGz)
compressToTarGzContents(tempDir, existingTarGz);
// Clean up temporary directory
tempDir.deleteRecursively()
}
// Helper to avoid including tempDir itself in archive
fun compressToTarGzContents(sourceDir: File, tarGzFile: File) {
GzipCompressorOutputStream(BufferedOutputStream(FileOutputStream(tarGzFile))).use { gzOut ->
TarArchiveOutputStream(gzOut).apply {
setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX)
}.use { tarOut ->
sourceDir.listFiles()?.forEach { file ->
addToTar(file, "", tarOut)
}
}
}
}
fun compressToTarGz(source: File, tarGzFile: File) {
GzipCompressorOutputStream(BufferedOutputStream(FileOutputStream(tarGzFile))).use { gzOut ->
TarArchiveOutputStream(gzOut).apply {
// Allow file names longer than 100 characters
setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX)
}.use { tarOut ->
addToTar(source, "", tarOut)
}
}
}
// Recursively adds files and directories to the tar archive
fun addToTar(file: File, parentPath: String, tarOut: TarArchiveOutputStream) {
val entryName = "$parentPath${file.name}"
val entry = tarOut.createArchiveEntry(file, entryName)
tarOut.putArchiveEntry(entry)
if (file.isFile) {
// Write file contents to the tar entry
file.inputStream().use { it.copyTo(tarOut) }
tarOut.closeArchiveEntry()
} else {
tarOut.closeArchiveEntry()
// Recursively process child files/directories
file.listFiles()?.forEach { child ->
addToTar(child, "$entryName/", tarOut)
}
}
}
// Decompresses tar.gz archive
fun decompressTarGz(tarGzFile: File, outputDir: File) {
// Ensure canonical path for security
val canonicalOutputDir = outputDir.canonicalFile
if (!tarGzFile.exists()) throw FileNotFoundException("File not found: ${tarGzFile.path}")
GzipCompressorInputStream(BufferedInputStream(FileInputStream(tarGzFile))).use { gzIn ->
TarArchiveInputStream(gzIn).use { tarIn ->
generateSequence { tarIn.nextEntry }.forEach { entry ->
val outputFile = File(outputDir, entry.name).canonicalFile
// Check if the extracted file stays inside outputDir
// Prevent Zip Slip Vulnerability
if (!outputFile.toPath().startsWith(canonicalOutputDir.toPath())) {
throw SecurityException("Zip Slip vulnerability detected! Malicious entry: ${entry.name}")
}
if (entry.isDirectory) outputFile.mkdirs()
else {
outputFile.parentFile.mkdirs()
outputFile.outputStream().use { outStream ->
tarIn.copyTo(outStream)
}
}
}
}
}
}
fun main() {
compressToTarGz(File("folder1"), File("project.tar.gz"))
addFilesToExistingTarGz(
File("project.tar.gz"),
listOf(File("pom.xml"), File("target")),
File("temp_extracted")
)
}
6. References:
- Gzip – GNU
- Zip Slip Vulnerability
- Apache Commons Compress
- Apache Commons Compress – Archivers and Compressors
- Kotlin Official Docs
- How to compress and decompress zip file in Kotlin – Mkyong.com
- How to create tar.gz in Java – Mkyong.com
- Java – Compress a file in Gzip format – Mkyong.com
- Java – Decompress a Gzip file – Mkyong.com