[node.js] find files by extension, *.html under a folder in nodejs

I'd like to find all *.html files in src folder and all its sub folders using nodejs. What is the best way to do it?

var folder = '/project1/src';
var extension = 'html';
var cb = function(err, results) {
   // results is an array of the files with path relative to the folder
   console.log(results);

}
// This function is what I am looking for. It has to recursively traverse all sub folders. 
findFiles(folder, extension, cb);

I think a lot developers should have great and tested solution and it is better to use it than writing one myself.

This question is related to node.js find file-extension

The answer is


Install

you can install this package walk-sync by

yarn add walk-sync

Usage

const walkSync = require("walk-sync");
const paths = walkSync("./project1/src", {globs: ["**/*.html"]});
console.log(paths);   //all html file path array

The following code does a recursive search inside ./ (change it appropriately) and returns an array of absolute file names ending with .html

var fs = require('fs');
var path = require('path');

var searchRecursive = function(dir, pattern) {
  // This is where we store pattern matches of all files inside the directory
  var results = [];

  // Read contents of directory
  fs.readdirSync(dir).forEach(function (dirInner) {
    // Obtain absolute path
    dirInner = path.resolve(dir, dirInner);

    // Get stats to determine if path is a directory or a file
    var stat = fs.statSync(dirInner);

    // If path is a directory, scan it and combine results
    if (stat.isDirectory()) {
      results = results.concat(searchRecursive(dirInner, pattern));
    }

    // If path is a file and ends with pattern then push it onto results
    if (stat.isFile() && dirInner.endsWith(pattern)) {
      results.push(dirInner);
    }
  });

  return results;
};

var files = searchRecursive('./', '.html'); // replace dir and pattern
                                                // as you seem fit

console.log(files);

Take a look into file-regex

let findFiles = require('file-regex')
let pattern = '\.js'

findFiles(__dirname, pattern, (err, files) => {  
   console.log(files);
})

This above snippet would print all the js files in the current directory.


I just noticed, you are using sync fs methods, that might block you application, here is a promise-based async way using async and q, you can execute it with START=/myfolder FILTER=".jpg" node myfile.js, assuming you put the following code in a file called myfile.js:

Q = require("q")
async = require("async")
path = require("path")
fs = require("fs")

function findFiles(startPath, filter, files){
    var deferred;
    deferred = Q.defer(); //main deferred

    //read directory
    Q.nfcall(fs.readdir, startPath).then(function(list) {
        var ideferred = Q.defer(); //inner deferred for resolve of async each
        //async crawling through dir
        async.each(list, function(item, done) {

            //stat current item in dirlist
            return Q.nfcall(fs.stat, path.join(startPath, item))
                .then(function(stat) {
                    //check if item is a directory
                    if (stat.isDirectory()) {
                        //recursive!! find files in subdirectory
                        return findFiles(path.join(startPath, item), filter, files)
                            .catch(function(error){
                                console.log("could not read path: " + error.toString());
                            })
                            .finally(function() {
                                //resolve async job after promise of subprocess of finding files has been resolved
                                return done();
                             });
                    //check if item is a file, that matches the filter and add it to files array
                    } else if (item.indexOf(filter) >= 0) {
                        files.push(path.join(startPath, item));
                        return done();
                    //file is no directory and does not match the filefilter -> don't do anything
                    } else {
                        return done();
                    }
                })
                .catch(function(error){
                    ideferred.reject("Could not stat: " + error.toString());
                });
        }, function() {
            return ideferred.resolve(); //async each has finished, so resolve inner deferred
        });
        return ideferred.promise;
    }).then(function() {
        //here you could do anything with the files of this recursion step (otherwise you would only need ONE deferred)
        return deferred.resolve(files); //resolve main deferred
    }).catch(function(error) {
        deferred.reject("Could not read dir: " + error.toString());
        return
    });
    return deferred.promise;
}


findFiles(process.env.START, process.env.FILTER, [])
    .then(function(files){
        console.log(files);
    })
    .catch(function(error){
        console.log("Problem finding files: " + error);
})

I have looked at the above answers and have mixed together this version which works for me:

function getFilesFromPath(path, extension) {
    let files = fs.readdirSync( path );
    return files.filter( file => file.match(new RegExp(`.*\.(${extension})`, 'ig')));
}

console.log(getFilesFromPath("./testdata", ".txt"));

This test will return an array of filenames from the files found in the folder at the path ./testdata. Working on node version 8.11.3.


Can't add a comment because of reputation, but notice the following:

Using fs.readdir or node-glob to find a wildcard set of files in a folder of 500,000 files took ~2s. Using exec with DIR took ~0.05s (non recursive) or ~0.45s (recursive). (I was looking for ~14 files matching my pattern in a single directory).

So far, I have failed to find any nodejs implementation which uses low level OS wildcard searching for efficiency. But the above DIR/ls based code works wonderfully in windows in terms of efficiency. linux find, however, will likely be very slow for large directories.


What, hang on?! ... Okay ya, maybe this makes more sense to someones else too.

[nodejs 7 mind you]

fs = import('fs');
let dirCont = fs.readdirSync( dir );
let files = dirCont.filter( function( elm ) {return elm.match(/.*\.(htm?html)/ig);});

Do whatever with regex make it an argument you set in the function with a default etc.


i like using the glob package:

const glob = require('glob');

glob(__dirname + '/**/*.html', {}, (err, files)=>{
  console.log(files)
})

You can use OS help for this. Here is a cross-platform solution:

1. The bellow function uses ls and dir and does not search recursively but it has relative paths

var exec = require('child_process').exec;
function findFiles(folder,extension,cb){
    var command = "";
    if(/^win/.test(process.platform)){
        command = "dir /B "+folder+"\\*."+extension;
    }else{
        command = "ls -1 "+folder+"/*."+extension;
    }
    exec(command,function(err,stdout,stderr){
        if(err)
            return cb(err,null);
        //get rid of \r from windows
        stdout = stdout.replace(/\r/g,"");
        var files = stdout.split("\n");
        //remove last entry because it is empty
        files.splice(-1,1);
        cb(err,files);
    });
}

findFiles("folderName","html",function(err,files){
    console.log("files:",files);
})

2. The bellow function uses find and dir, searches recursively but on windows it has absolute paths

var exec = require('child_process').exec;
function findFiles(folder,extension,cb){
    var command = "";
    if(/^win/.test(process.platform)){
        command = "dir /B /s "+folder+"\\*."+extension;
    }else{
        command = 'find '+folder+' -name "*.'+extension+'"'
    }
    exec(command,function(err,stdout,stderr){
        if(err)
            return cb(err,null);
        //get rid of \r from windows
        stdout = stdout.replace(/\r/g,"");
        var files = stdout.split("\n");
        //remove last entry because it is empty
        files.splice(-1,1);
        cb(err,files);
    });
}

findFiles("folder","html",function(err,files){
    console.log("files:",files);
})

Old post but ES6 now handles this out of the box with the includes method.

let files = ['file.json', 'other.js'];

let jsonFiles = files.filter(file => file.includes('.json'));

console.log("Files: ", jsonFiles) ==> //file.json

Based on Lucio's code, I made a module. It will return an away with all the files with specific extensions under the one. Just post it here in case anybody needs it.

var path = require('path'), 
    fs   = require('fs');


/**
 * Find all files recursively in specific folder with specific extension, e.g:
 * findFilesInDir('./project/src', '.html') ==> ['./project/src/a.html','./project/src/build/index.html']
 * @param  {String} startPath    Path relative to this file or other file which requires this files
 * @param  {String} filter       Extension name, e.g: '.html'
 * @return {Array}               Result files with path string in an array
 */
function findFilesInDir(startPath,filter){

    var results = [];

    if (!fs.existsSync(startPath)){
        console.log("no dir ",startPath);
        return;
    }

    var files=fs.readdirSync(startPath);
    for(var i=0;i<files.length;i++){
        var filename=path.join(startPath,files[i]);
        var stat = fs.lstatSync(filename);
        if (stat.isDirectory()){
            results = results.concat(findFilesInDir(filename,filter)); //recurse
        }
        else if (filename.indexOf(filter)>=0) {
            console.log('-- found: ',filename);
            results.push(filename);
        }
    }
    return results;
}

module.exports = findFilesInDir;

my two pence, using map in place of for-loop

var path = require('path'), fs = require('fs');

var findFiles = function(folder, pattern = /.*/, callback) {
  var flist = [];

  fs.readdirSync(folder).map(function(e){ 
    var fname = path.join(folder, e);
    var fstat = fs.lstatSync(fname);
    if (fstat.isDirectory()) {
      // don't want to produce a new array with concat
      Array.prototype.push.apply(flist, findFiles(fname, pattern, callback)); 
    } else {
      if (pattern.test(fname)) {
        flist.push(fname);
        if (callback) {
          callback(fname);
        }
      }
    }
  });
  return flist;
};

// HTML files   
var html_files = findFiles(myPath, /\.html$/, function(o) { console.log('look what we have found : ' + o} );

// All files
var all_files = findFiles(myPath);

You can use Filehound to do this.

For example: find all .html files in /tmp:

const Filehound = require('filehound');

Filehound.create()
  .ext('html')
  .paths("/tmp")
  .find((err, htmlFiles) => {
    if (err) return console.error("handle err", err);

    console.log(htmlFiles);
});

For further information (and examples), check out the docs: https://github.com/nspragg/filehound

Disclaimer: I'm the author.


Examples related to node.js

Hide Signs that Meteor.js was Used Querying date field in MongoDB with Mongoose SyntaxError: Cannot use import statement outside a module Server Discovery And Monitoring engine is deprecated How to fix ReferenceError: primordials is not defined in node UnhandledPromiseRejectionWarning: This error originated either by throwing inside of an async function without a catch block dyld: Library not loaded: /usr/local/opt/icu4c/lib/libicui18n.62.dylib error running php after installing node with brew on Mac internal/modules/cjs/loader.js:582 throw err DeprecationWarning: Buffer() is deprecated due to security and usability issues when I move my script to another server Please run `npm cache clean`

Examples related to find

Find a file by name in Visual Studio Code Explaining the 'find -mtime' command find files by extension, *.html under a folder in nodejs MongoDB Show all contents from all collections How can I find a file/directory that could be anywhere on linux command line? Get all files modified in last 30 days in a directory FileNotFoundError: [Errno 2] No such file or directory Linux find and grep command together find . -type f -exec chmod 644 {} ; Find all stored procedures that reference a specific column in some table

Examples related to file-extension

find files by extension, *.html under a folder in nodejs JPG vs. JPEG image formats What is phtml, and when should I use a .phtml extension rather than .php? How to get the file extension in PHP? Given a filesystem path, is there a shorter way to extract the filename without its extension? What file uses .md extension and how should I edit them? How can I check the extension of a file? Changing file extension in Python How can I find all of the distinct file extensions in a folder hierarchy? Should you use .htm or .html file extension? What is the difference, and which file is correct?