Extract tar.gz-archives with tar-cs library
2013-10-06 16:51:32 Moscow time
Let's implement the code to demonstrate how to extract files and directories from tar.gz-archives.
- The tar-cs library is a C# library to work with tar-archives. It is under New BSD License.
- To work with gzip-archives, it is enough to use GZipStream Class from .NET BCL.
The code:
using System;
using System.IO;
using System.IO.Compression;
using tar_cs;
/// <summary>
/// Example of tar-cs library usage to extract tar.gz-archives.
/// Please use the latest version (from trunk) of the library.
/// </summary>
public static class TarGZip
{
public static void Extract(string inputFile, string outputDirectory)
{
using (FileStream inputStream = File.OpenRead(inputFile))
using (Stream tarStream = UnGZipSteam(inputStream))
{
var tarReader = new TarReader(tarStream);
while (tarReader.MoveNext(false)) // Moves pointer to the next file in the tar archive.
{
ExtractTarEntry(tarReader, outputDirectory);
}
}
}
/// <summary>
/// Since GZipStream.Position Property is not implemented,
/// it is necessary to use MemoryStream as intermediate storage.
/// </summary>
/// <param name="inputStream">The input stream.</param>
/// <returns>Un-gzipped stream.</returns>
private static Stream UnGZipSteam(Stream inputStream)
{
using (GZipStream gZipStream = new GZipStream(inputStream, CompressionMode.Decompress))
{
MemoryStream memoryStream = new MemoryStream();
gZipStream.CopyTo(memoryStream);
memoryStream.Position = 0;
return memoryStream;
}
}
private static void ExtractTarEntry(TarReader tarReader, string outputDirectory)
{
string relativePath = tarReader.FileInfo.FileName;
// Relative path can contain slash, not backslash.
// Use Path.GetFullPath() method to convert path.
string fullPath = Path.GetFullPath(Path.Combine(outputDirectory, relativePath));
switch (tarReader.FileInfo.EntryType)
{
case EntryType.File:
case EntryType.FileObsolete:
using (FileStream outputStream = File.Create(fullPath))
{
// Read data from a current file to a Stream.
tarReader.Read(outputStream);
}
break;
case EntryType.Directory:
Directory.CreateDirectory(fullPath);
break;
default:
throw new NotSupportedException("Not supported entry type: " + tarReader.FileInfo.EntryType);
}
}
}
Please note that since GZipStream.Position Property is not implemented, it is necessary to use MemoryStream as intermediate storage or implement GZipStream
wrapper with Position
property support.