Just looking for a simple solution to downloading and unzipping .zip
or .tar.gz
files in Node.js on any operating system.
Not sure if this is built in or I have to use a separate library. Any ideas? Looking for just a couple lines of code so when the next zip file comes that I want to download in node, it's a no brainer. Feel like this should be easy and/or built in, but I can't find anything. Thanks!
I tried a few of the nodejs unzip libraries including adm-zip and unzip, then settled on extract-zip which is a wrapper around yauzl. Seemed the simplest to implement.
https://www.npmjs.com/package/extract-zip
var extract = require('extract-zip')
extract(zipfile, { dir: outputPath }, function (err) {
// handle err
})
You can simply extract the existing zip files also by using "unzip". It will work for any size files and you need to add it as a dependency from npm.
fs.createReadStream(filePath).pipe(unzip.Extract({path:moveIntoFolder})).on('close', function(){_x000D_
//To do after unzip_x000D_
callback();_x000D_
});
_x000D_
yauzl is a robust library for unzipping. Design principles:
Currently has 97% test coverage.
Download and extract for .tar.gz
:
const https = require("https");
const tar = require("tar");
https.get("https://url.to/your.tar.gz", function(response) {
response.pipe(
tar.x({
strip: 1,
C: "some-dir"
})
);
});
Node has builtin support for gzip and deflate via the zlib module:
var zlib = require('zlib');
zlib.gunzip(gzipBuffer, function(err, result) {
if(err) return console.error(err);
console.log(result);
});
Edit: You can even pipe
the data directly through e.g. Gunzip
(using request):
var request = require('request'),
zlib = require('zlib'),
fs = require('fs'),
out = fs.createWriteStream('out');
// Fetch http://example.com/foo.gz, gunzip it and store the results in 'out'
request('http://example.com/foo.gz').pipe(zlib.createGunzip()).pipe(out);
For tar archives, there is Isaacs' tar module, which is used by npm.
Edit 2: Updated answer as zlib
doesn't support the zip
format. This will only work for gzip
.
I found success with the following, works with .zip
(Simplified here for posting: no error checking & just unzips all files to current folder)
function DownloadAndUnzip(URL){
var unzip = require('unzip');
var http = require('http');
var request = http.get(URL, function(response) {
response.pipe(unzip.Extract({path:'./'}))
});
}
Another working example:
var zlib = require('zlib');
var tar = require('tar');
var ftp = require('ftp');
var files = [];
var conn = new ftp();
conn.on('connect', function(e)
{
conn.auth(function(e)
{
if (e)
{
throw e;
}
conn.get('/tz/tzdata-latest.tar.gz', function(e, stream)
{
stream.on('success', function()
{
conn.end();
console.log("Processing files ...");
for (var name in files)
{
var file = files[name];
console.log("filename: " + name);
console.log(file);
}
console.log("OK")
});
stream.on('error', function(e)
{
console.log('ERROR during get(): ' + e);
conn.end();
});
console.log("Reading ...");
stream
.pipe(zlib.createGunzip())
.pipe(tar.Parse())
.on("entry", function (e)
{
var filename = e.props["path"];
console.log("filename:" + filename);
if( files[filename] == null )
{
files[filename] = "";
}
e.on("data", function (c)
{
files[filename] += c.toString();
})
});
});
});
})
.connect(21, "ftp.iana.org");
I was looking forward this for a long time, and found no simple working example, but based on these answers I created the downloadAndUnzip()
function.
The usage is quite simple:
downloadAndUnzip('http://your-domain.com/archive.zip', 'yourfile.xml')
.then(function (data) {
console.log(data); // unzipped content of yourfile.xml in root of archive.zip
})
.catch(function (err) {
console.error(err);
});
And here is the declaration:
var AdmZip = require('adm-zip');
var request = require('request');
var downloadAndUnzip = function (url, fileName) {
/**
* Download a file
*
* @param url
*/
var download = function (url) {
return new Promise(function (resolve, reject) {
request({
url: url,
method: 'GET',
encoding: null
}, function (err, response, body) {
if (err) {
return reject(err);
}
resolve(body);
});
});
};
/**
* Unzip a Buffer
*
* @param buffer
* @returns {Promise}
*/
var unzip = function (buffer) {
return new Promise(function (resolve, reject) {
var resolved = false;
var zip = new AdmZip(buffer);
var zipEntries = zip.getEntries(); // an array of ZipEntry records
zipEntries.forEach(function (zipEntry) {
if (zipEntry.entryName == fileName) {
resolved = true;
resolve(zipEntry.getData().toString('utf8'));
}
});
if (!resolved) {
reject(new Error('No file found in archive: ' + fileName));
}
});
};
return download(url)
.then(unzip);
};
Checkout gunzip-file
import gunzip from 'gunzip-file';
const unzipAll = async () => {
try {
const compFiles = fs.readdirSync('tmp')
await Promise.all(compFiles.map( async file => {
if(file.endsWith(".gz")){
gunzip(`tmp/${file}`, `tmp/${file.slice(0, -3)}`)
}
}));
}
catch(err) {
console.log(err)
}
}
It's 2017 (October 26th, to be exact).
For an ancient and pervasive technology such as unzip I would expect there to exist a fairly popular, mature node.js unzip library that is "stagnant" and "unmaintained" because it is "complete".
However, most libraries appear either to be completely terrible or to have commits recently as just a few months ago. This is quite concerning... so I've gone through several unzip libraries, read their docs, and tried their examples to try to figure out WTF. For example, I've tried these:
yauzl
node-stream-zip
node-unzipper
node-unzip
jszip
zip
Update 2020: Haven't tried it yet, but there's also archiver
yauzl
Works great for completely downloaded file. Not as great for streaming.
Well documented. Works well. Makes sense.
node-stream-zip
antelle's node-stream-zip
seems to be the best
Install:
npm install --save node-stream-zip
Usage:
'use strict';
var fs = require('fs');
var StreamZip = require('node-stream-zip');
var zip = new StreamZip({
file: './example.zip'
, storeEntries: true
});
zip.on('error', function (err) { console.error('[ERROR]', err); });
zip.on('ready', function () {
console.log('All entries read: ' + zip.entriesCount);
//console.log(zip.entries());
});
zip.on('entry', function (entry) {
var pathname = path.resolve('./temp', entry.name);
if (/\.\./.test(path.relative('./temp', pathname))) {
console.warn("[zip warn]: ignoring maliciously crafted paths in zip file:", entry.name);
return;
}
if ('/' === entry.name[entry.name.length - 1]) {
console.log('[DIR]', entry.name);
return;
}
console.log('[FILE]', entry.name);
zip.stream(entry.name, function (err, stream) {
if (err) { console.error('Error:', err.toString()); return; }
stream.on('error', function (err) { console.log('[ERROR]', err); return; });
// example: print contents to screen
//stream.pipe(process.stdout);
// example: save contents to file
fs.mkdir(
path.dirname(pathname),
{ recursive: true },
function (err) {
stream.pipe(fs.createWriteStream(pathname));
}
);
});
});
Security Warning:
Not sure if this checks entry.name
for maliciously crafted paths that would resolve incorrectly (such as ../../../foo
or /etc/passwd
).
You can easily check this yourself by comparing /\.\./.test(path.relative('./to/dir', path.resolve('./to/dir', entry.name)))
.
Pros: (Why do I think it's the best?)
Cons:
zip.extract()
doesn't seem to work (hence I used zip.stream()
in my example)Install:
npm install --save unzipper
Usage:
'use strict';
var fs = require('fs');
var unzipper = require('unzipper');
fs.createReadStream('./example.zip')
.pipe(unzipper.Parse())
.on('entry', function (entry) {
var fileName = entry.path;
var type = entry.type; // 'Directory' or 'File'
console.log();
if (/\/$/.test(fileName)) {
console.log('[DIR]', fileName, type);
return;
}
console.log('[FILE]', fileName, type);
// TODO: probably also needs the security check
entry.pipe(process.stdout/*fs.createWriteStream('output/path')*/);
// NOTE: To ignore use entry.autodrain() instead of entry.pipe()
});
Pros:
node-stream-zip
, but less controlunzip
Cons:
Source: Stackoverflow.com