I'd like to find all *.html files in src folder and all its sub folders using nodejs. What is the best way to do it?
var folder = '/project1/src';
var extension = 'html';
var cb = function(err, results) {
// results is an array of the files with path relative to the folder
console.log(results);
}
// This function is what I am looking for. It has to recursively traverse all sub folders.
findFiles(folder, extension, cb);
I think a lot developers should have great and tested solution and it is better to use it than writing one myself.
This question is related to
node.js
find
file-extension
you can install this package walk-sync by
yarn add walk-sync
const walkSync = require("walk-sync");
const paths = walkSync("./project1/src", {globs: ["**/*.html"]});
console.log(paths); //all html file path array
The following code does a recursive search inside ./ (change it appropriately) and returns an array of absolute file names ending with .html
var fs = require('fs');
var path = require('path');
var searchRecursive = function(dir, pattern) {
// This is where we store pattern matches of all files inside the directory
var results = [];
// Read contents of directory
fs.readdirSync(dir).forEach(function (dirInner) {
// Obtain absolute path
dirInner = path.resolve(dir, dirInner);
// Get stats to determine if path is a directory or a file
var stat = fs.statSync(dirInner);
// If path is a directory, scan it and combine results
if (stat.isDirectory()) {
results = results.concat(searchRecursive(dirInner, pattern));
}
// If path is a file and ends with pattern then push it onto results
if (stat.isFile() && dirInner.endsWith(pattern)) {
results.push(dirInner);
}
});
return results;
};
var files = searchRecursive('./', '.html'); // replace dir and pattern
// as you seem fit
console.log(files);
Take a look into file-regex
let findFiles = require('file-regex')
let pattern = '\.js'
findFiles(__dirname, pattern, (err, files) => {
console.log(files);
})
This above snippet would print all the js
files in the current directory.
I just noticed, you are using sync fs methods, that might block you application, here is a promise-based async way using async and q, you can execute it with START=/myfolder FILTER=".jpg" node myfile.js, assuming you put the following code in a file called myfile.js:
Q = require("q")
async = require("async")
path = require("path")
fs = require("fs")
function findFiles(startPath, filter, files){
var deferred;
deferred = Q.defer(); //main deferred
//read directory
Q.nfcall(fs.readdir, startPath).then(function(list) {
var ideferred = Q.defer(); //inner deferred for resolve of async each
//async crawling through dir
async.each(list, function(item, done) {
//stat current item in dirlist
return Q.nfcall(fs.stat, path.join(startPath, item))
.then(function(stat) {
//check if item is a directory
if (stat.isDirectory()) {
//recursive!! find files in subdirectory
return findFiles(path.join(startPath, item), filter, files)
.catch(function(error){
console.log("could not read path: " + error.toString());
})
.finally(function() {
//resolve async job after promise of subprocess of finding files has been resolved
return done();
});
//check if item is a file, that matches the filter and add it to files array
} else if (item.indexOf(filter) >= 0) {
files.push(path.join(startPath, item));
return done();
//file is no directory and does not match the filefilter -> don't do anything
} else {
return done();
}
})
.catch(function(error){
ideferred.reject("Could not stat: " + error.toString());
});
}, function() {
return ideferred.resolve(); //async each has finished, so resolve inner deferred
});
return ideferred.promise;
}).then(function() {
//here you could do anything with the files of this recursion step (otherwise you would only need ONE deferred)
return deferred.resolve(files); //resolve main deferred
}).catch(function(error) {
deferred.reject("Could not read dir: " + error.toString());
return
});
return deferred.promise;
}
findFiles(process.env.START, process.env.FILTER, [])
.then(function(files){
console.log(files);
})
.catch(function(error){
console.log("Problem finding files: " + error);
})
I have looked at the above answers and have mixed together this version which works for me:
function getFilesFromPath(path, extension) {
let files = fs.readdirSync( path );
return files.filter( file => file.match(new RegExp(`.*\.(${extension})`, 'ig')));
}
console.log(getFilesFromPath("./testdata", ".txt"));
This test will return an array of filenames from the files found in the folder at the path ./testdata
. Working on node version 8.11.3.
Can't add a comment because of reputation, but notice the following:
Using fs.readdir or node-glob to find a wildcard set of files in a folder of 500,000 files took ~2s. Using exec with DIR took ~0.05s (non recursive) or ~0.45s (recursive). (I was looking for ~14 files matching my pattern in a single directory).
So far, I have failed to find any nodejs implementation which uses low level OS wildcard searching for efficiency. But the above DIR/ls based code works wonderfully in windows in terms of efficiency. linux find, however, will likely be very slow for large directories.
What, hang on?! ... Okay ya, maybe this makes more sense to someones else too.
[nodejs 7 mind you]
fs = import('fs');
let dirCont = fs.readdirSync( dir );
let files = dirCont.filter( function( elm ) {return elm.match(/.*\.(htm?html)/ig);});
Do whatever with regex make it an argument you set in the function with a default etc.
i like using the glob package:
const glob = require('glob');
glob(__dirname + '/**/*.html', {}, (err, files)=>{
console.log(files)
})
You can use OS help for this. Here is a cross-platform solution:
1. The bellow function uses ls
and dir
and does not search recursively but it has relative paths
var exec = require('child_process').exec;
function findFiles(folder,extension,cb){
var command = "";
if(/^win/.test(process.platform)){
command = "dir /B "+folder+"\\*."+extension;
}else{
command = "ls -1 "+folder+"/*."+extension;
}
exec(command,function(err,stdout,stderr){
if(err)
return cb(err,null);
//get rid of \r from windows
stdout = stdout.replace(/\r/g,"");
var files = stdout.split("\n");
//remove last entry because it is empty
files.splice(-1,1);
cb(err,files);
});
}
findFiles("folderName","html",function(err,files){
console.log("files:",files);
})
2. The bellow function uses find
and dir
, searches recursively but on windows it has absolute paths
var exec = require('child_process').exec;
function findFiles(folder,extension,cb){
var command = "";
if(/^win/.test(process.platform)){
command = "dir /B /s "+folder+"\\*."+extension;
}else{
command = 'find '+folder+' -name "*.'+extension+'"'
}
exec(command,function(err,stdout,stderr){
if(err)
return cb(err,null);
//get rid of \r from windows
stdout = stdout.replace(/\r/g,"");
var files = stdout.split("\n");
//remove last entry because it is empty
files.splice(-1,1);
cb(err,files);
});
}
findFiles("folder","html",function(err,files){
console.log("files:",files);
})
Old post but ES6 now handles this out of the box with the includes
method.
let files = ['file.json', 'other.js'];
let jsonFiles = files.filter(file => file.includes('.json'));
console.log("Files: ", jsonFiles) ==> //file.json
Based on Lucio's code, I made a module. It will return an away with all the files with specific extensions under the one. Just post it here in case anybody needs it.
var path = require('path'),
fs = require('fs');
/**
* Find all files recursively in specific folder with specific extension, e.g:
* findFilesInDir('./project/src', '.html') ==> ['./project/src/a.html','./project/src/build/index.html']
* @param {String} startPath Path relative to this file or other file which requires this files
* @param {String} filter Extension name, e.g: '.html'
* @return {Array} Result files with path string in an array
*/
function findFilesInDir(startPath,filter){
var results = [];
if (!fs.existsSync(startPath)){
console.log("no dir ",startPath);
return;
}
var files=fs.readdirSync(startPath);
for(var i=0;i<files.length;i++){
var filename=path.join(startPath,files[i]);
var stat = fs.lstatSync(filename);
if (stat.isDirectory()){
results = results.concat(findFilesInDir(filename,filter)); //recurse
}
else if (filename.indexOf(filter)>=0) {
console.log('-- found: ',filename);
results.push(filename);
}
}
return results;
}
module.exports = findFilesInDir;
my two pence, using map in place of for-loop
var path = require('path'), fs = require('fs');
var findFiles = function(folder, pattern = /.*/, callback) {
var flist = [];
fs.readdirSync(folder).map(function(e){
var fname = path.join(folder, e);
var fstat = fs.lstatSync(fname);
if (fstat.isDirectory()) {
// don't want to produce a new array with concat
Array.prototype.push.apply(flist, findFiles(fname, pattern, callback));
} else {
if (pattern.test(fname)) {
flist.push(fname);
if (callback) {
callback(fname);
}
}
}
});
return flist;
};
// HTML files
var html_files = findFiles(myPath, /\.html$/, function(o) { console.log('look what we have found : ' + o} );
// All files
var all_files = findFiles(myPath);
You can use Filehound to do this.
For example: find all .html files in /tmp:
const Filehound = require('filehound');
Filehound.create()
.ext('html')
.paths("/tmp")
.find((err, htmlFiles) => {
if (err) return console.error("handle err", err);
console.log(htmlFiles);
});
For further information (and examples), check out the docs: https://github.com/nspragg/filehound
Disclaimer: I'm the author.
Source: Stackoverflow.com