How do I limit os.walk
to only return files in the directory I provide it?
def _dir_list(self, dir_name, whitelist):
outputList = []
for root, dirs, files in os.walk(dir_name):
for f in files:
if os.path.splitext(f)[1] in whitelist:
outputList.append(os.path.join(root, f))
else:
self._email_to_("ignore")
return outputList
Use the walklevel
function.
import os
def walklevel(some_dir, level=1):
some_dir = some_dir.rstrip(os.path.sep)
assert os.path.isdir(some_dir)
num_sep = some_dir.count(os.path.sep)
for root, dirs, files in os.walk(some_dir):
yield root, dirs, files
num_sep_this = root.count(os.path.sep)
if num_sep + level <= num_sep_this:
del dirs[:]
It works just like os.walk
, but you can pass it a level
parameter that indicates how deep the recursion will go.
If you have more complex requirements than just the top directory (eg ignore VCS dirs etc), you can also modify the list of directories to prevent os.walk recursing through them.
ie:
def _dir_list(self, dir_name, whitelist):
outputList = []
for root, dirs, files in os.walk(dir_name):
dirs[:] = [d for d in dirs if is_good(d)]
for f in files:
do_stuff()
Note - be careful to mutate the list, rather than just rebind it. Obviously os.walk doesn't know about the external rebinding.
root folder changes for every directory os.walk finds. I solver that checking if root == directory
def _dir_list(self, dir_name, whitelist):
outputList = []
for root, dirs, files in os.walk(dir_name):
if root == dir_name: #This only meet parent folder
for f in files:
if os.path.splitext(f)[1] in whitelist:
outputList.append(os.path.join(root, f))
else:
self._email_to_("ignore")
return outputList
You could also do the following:
for path, subdirs, files in os.walk(dir_name):
for name in files:
if path == ".": #this will filter the files in the current directory
#code here
You could use os.listdir()
which returns a list of names (for both files and directories) in a given directory. If you need to distinguish between files and directories, call os.stat()
on each name.
root folder changes for every directory os.walk finds. I solver that checking if root == directory
def _dir_list(self, dir_name, whitelist):
outputList = []
for root, dirs, files in os.walk(dir_name):
if root == dir_name: #This only meet parent folder
for f in files:
if os.path.splitext(f)[1] in whitelist:
outputList.append(os.path.join(root, f))
else:
self._email_to_("ignore")
return outputList
A slight change to Alex's answer, but using __next__()
:
print(next(os.walk('d:/'))[2])
or
print(os.walk('d:/').__next__()[2])
with the [2]
being the file
in root, dirs, file
mentioned in other answers
I think the solution is actually very simple.
use
break
to only do first iteration of the for loop, there must be a more elegant way.
for root, dirs, files in os.walk(dir_name):
for f in files:
...
...
break
...
The first time you call os.walk, it returns tulips for the current directory, then on next loop the contents of the next directory.
Take original script and just add a break.
def _dir_list(self, dir_name, whitelist):
outputList = []
for root, dirs, files in os.walk(dir_name):
for f in files:
if os.path.splitext(f)[1] in whitelist:
outputList.append(os.path.join(root, f))
else:
self._email_to_("ignore")
break
return outputList
You could use os.listdir()
which returns a list of names (for both files and directories) in a given directory. If you need to distinguish between files and directories, call os.stat()
on each name.
If you have more complex requirements than just the top directory (eg ignore VCS dirs etc), you can also modify the list of directories to prevent os.walk recursing through them.
ie:
def _dir_list(self, dir_name, whitelist):
outputList = []
for root, dirs, files in os.walk(dir_name):
dirs[:] = [d for d in dirs if is_good(d)]
for f in files:
do_stuff()
Note - be careful to mutate the list, rather than just rebind it. Obviously os.walk doesn't know about the external rebinding.
Why not simply use a range
and os.walk
combined with the zip
? Is not the best solution, but would work too.
For example like this:
# your part before
for count, (root, dirs, files) in zip(range(0, 1), os.walk(dir_name)):
# logic stuff
# your later part
Works for me on python 3.
Also: A break
is simpler too btw. (Look at the answer from @Pieter)
You can use this snippet
for root, dirs, files in os.walk(directory):
if level > 0:
# do some stuff
else:
break
level-=1
A slight change to Alex's answer, but using __next__()
:
print(next(os.walk('d:/'))[2])
or
print(os.walk('d:/').__next__()[2])
with the [2]
being the file
in root, dirs, file
mentioned in other answers
In Python 3, I was able to do this:
import os
dir = "/path/to/files/"
#List all files immediately under this folder:
print ( next( os.walk(dir) )[2] )
#List all folders immediately under this folder:
print ( next( os.walk(dir) )[1] )
There is a catch when using listdir. The os.path.isdir(identifier) must be an absolute path. To pick subdirectories you do:
for dirname in os.listdir(rootdir):
if os.path.isdir(os.path.join(rootdir, dirname)):
print("I got a subdirectory: %s" % dirname)
The alternative is to change to the directory to do the testing without the os.path.join().
I think the solution is actually very simple.
use
break
to only do first iteration of the for loop, there must be a more elegant way.
for root, dirs, files in os.walk(dir_name):
for f in files:
...
...
break
...
The first time you call os.walk, it returns tulips for the current directory, then on next loop the contents of the next directory.
Take original script and just add a break.
def _dir_list(self, dir_name, whitelist):
outputList = []
for root, dirs, files in os.walk(dir_name):
for f in files:
if os.path.splitext(f)[1] in whitelist:
outputList.append(os.path.join(root, f))
else:
self._email_to_("ignore")
break
return outputList
Felt like throwing my 2 pence in.
baselevel = len(rootdir.split("\\"))
for subdirs, dirs, files in os.walk(rootdir):
curlevel = len(subdirs.split("\\"))
if curlevel <= baselevel + 1:
[do stuff]
Don't use os.walk.
Example:
import os
root = "C:\\"
for item in os.listdir(root):
if os.path.isfile(os.path.join(root, item)):
print item
Since Python 3.5 you can use os.scandir
instead of os.listdir
. Instead of strings you get an iterator of DirEntry
objects in return. From the docs:
Using
scandir()
instead oflistdir()
can significantly increase the performance of code that also needs file type or file attribute information, becauseDirEntry
objects expose this information if the operating system provides it when scanning a directory. AllDirEntry
methods may perform a system call, butis_dir()
andis_file()
usually only require a system call for symbolic links;DirEntry.stat()
always requires a system call on Unix but only requires one for symbolic links on Windows.
You can access the name of the object via DirEntry.name
which is then equivalent to the output of os.listdir
Since Python 3.5 you can use os.scandir
instead of os.listdir
. Instead of strings you get an iterator of DirEntry
objects in return. From the docs:
Using
scandir()
instead oflistdir()
can significantly increase the performance of code that also needs file type or file attribute information, becauseDirEntry
objects expose this information if the operating system provides it when scanning a directory. AllDirEntry
methods may perform a system call, butis_dir()
andis_file()
usually only require a system call for symbolic links;DirEntry.stat()
always requires a system call on Unix but only requires one for symbolic links on Windows.
You can access the name of the object via DirEntry.name
which is then equivalent to the output of os.listdir
for path, dirs, files in os.walk('.'):
print path, dirs, files
del dirs[:] # go only one level deep
The suggestion to use listdir
is a good one. The direct answer to your question in Python 2 is root, dirs, files = os.walk(dir_name).next()
.
The equivalent Python 3 syntax is root, dirs, files = next(os.walk(dir_name))
The same idea with listdir
, but shorter:
[f for f in os.listdir(root_dir) if os.path.isfile(os.path.join(root_dir, f))]
Why not simply use a range
and os.walk
combined with the zip
? Is not the best solution, but would work too.
For example like this:
# your part before
for count, (root, dirs, files) in zip(range(0, 1), os.walk(dir_name)):
# logic stuff
# your later part
Works for me on python 3.
Also: A break
is simpler too btw. (Look at the answer from @Pieter)
This is how I solved it
if recursive:
items = os.walk(target_directory)
else:
items = [next(os.walk(target_directory))]
...
import os
def listFiles(self, dir_name):
names = []
for root, directory, files in os.walk(dir_name):
if root == dir_name:
for name in files:
names.append(name)
return names
If you have more complex requirements than just the top directory (eg ignore VCS dirs etc), you can also modify the list of directories to prevent os.walk recursing through them.
ie:
def _dir_list(self, dir_name, whitelist):
outputList = []
for root, dirs, files in os.walk(dir_name):
dirs[:] = [d for d in dirs if is_good(d)]
for f in files:
do_stuff()
Note - be careful to mutate the list, rather than just rebind it. Obviously os.walk doesn't know about the external rebinding.
create a list of excludes, use fnmatch to skip the directory structure and do the process
excludes= ['a\*\b', 'c\d\e']
for root, directories, files in os.walk('Start_Folder'):
if not any(fnmatch.fnmatch(nf_root, pattern) for pattern in excludes):
for root, directories, files in os.walk(nf_root):
....
do the process
....
same as for 'includes':
if **any**(fnmatch.fnmatch(nf_root, pattern) for pattern in **includes**):
import os
def listFiles(self, dir_name):
names = []
for root, directory, files in os.walk(dir_name):
if root == dir_name:
for name in files:
names.append(name)
return names
Don't use os.walk.
Example:
import os
root = "C:\\"
for item in os.listdir(root):
if os.path.isfile(os.path.join(root, item)):
print item
If you have more complex requirements than just the top directory (eg ignore VCS dirs etc), you can also modify the list of directories to prevent os.walk recursing through them.
ie:
def _dir_list(self, dir_name, whitelist):
outputList = []
for root, dirs, files in os.walk(dir_name):
dirs[:] = [d for d in dirs if is_good(d)]
for f in files:
do_stuff()
Note - be careful to mutate the list, rather than just rebind it. Obviously os.walk doesn't know about the external rebinding.
The same idea with listdir
, but shorter:
[f for f in os.listdir(root_dir) if os.path.isfile(os.path.join(root_dir, f))]
In Python 3, I was able to do this:
import os
dir = "/path/to/files/"
#List all files immediately under this folder:
print ( next( os.walk(dir) )[2] )
#List all folders immediately under this folder:
print ( next( os.walk(dir) )[1] )
This is a nice python example
def walk_with_depth(root_path, depth):
if depth < 0:
for root, dirs, files in os.walk(root_path):
yield [root, dirs[:], files]
return
elif depth == 0:
return
base_depth = root_path.rstrip(os.path.sep).count(os.path.sep)
for root, dirs, files in os.walk(root_path):
yield [root, dirs[:], files]
cur_depth = root.count(os.path.sep)
if base_depth + depth <= cur_depth:
del dirs[:]
create a list of excludes, use fnmatch to skip the directory structure and do the process
excludes= ['a\*\b', 'c\d\e']
for root, directories, files in os.walk('Start_Folder'):
if not any(fnmatch.fnmatch(nf_root, pattern) for pattern in excludes):
for root, directories, files in os.walk(nf_root):
....
do the process
....
same as for 'includes':
if **any**(fnmatch.fnmatch(nf_root, pattern) for pattern in **includes**):
The suggestion to use listdir
is a good one. The direct answer to your question in Python 2 is root, dirs, files = os.walk(dir_name).next()
.
The equivalent Python 3 syntax is root, dirs, files = next(os.walk(dir_name))
You could also do the following:
for path, subdirs, files in os.walk(dir_name):
for name in files:
if path == ".": #this will filter the files in the current directory
#code here
The suggestion to use listdir
is a good one. The direct answer to your question in Python 2 is root, dirs, files = os.walk(dir_name).next()
.
The equivalent Python 3 syntax is root, dirs, files = next(os.walk(dir_name))
for path, dirs, files in os.walk('.'):
print path, dirs, files
del dirs[:] # go only one level deep
There is a catch when using listdir. The os.path.isdir(identifier) must be an absolute path. To pick subdirectories you do:
for dirname in os.listdir(rootdir):
if os.path.isdir(os.path.join(rootdir, dirname)):
print("I got a subdirectory: %s" % dirname)
The alternative is to change to the directory to do the testing without the os.path.join().
You can use this snippet
for root, dirs, files in os.walk(directory):
if level > 0:
# do some stuff
else:
break
level-=1
This is how I solved it
if recursive:
items = os.walk(target_directory)
else:
items = [next(os.walk(target_directory))]
...
You could use os.listdir()
which returns a list of names (for both files and directories) in a given directory. If you need to distinguish between files and directories, call os.stat()
on each name.
Don't use os.walk.
Example:
import os
root = "C:\\"
for item in os.listdir(root):
if os.path.isfile(os.path.join(root, item)):
print item
This is a nice python example
def walk_with_depth(root_path, depth):
if depth < 0:
for root, dirs, files in os.walk(root_path):
yield [root, dirs[:], files]
return
elif depth == 0:
return
base_depth = root_path.rstrip(os.path.sep).count(os.path.sep)
for root, dirs, files in os.walk(root_path):
yield [root, dirs[:], files]
cur_depth = root.count(os.path.sep)
if base_depth + depth <= cur_depth:
del dirs[:]
You could use os.listdir()
which returns a list of names (for both files and directories) in a given directory. If you need to distinguish between files and directories, call os.stat()
on each name.
Felt like throwing my 2 pence in.
baselevel = len(rootdir.split("\\"))
for subdirs, dirs, files in os.walk(rootdir):
curlevel = len(subdirs.split("\\"))
if curlevel <= baselevel + 1:
[do stuff]
Don't use os.walk.
Example:
import os
root = "C:\\"
for item in os.listdir(root):
if os.path.isfile(os.path.join(root, item)):
print item
The suggestion to use listdir
is a good one. The direct answer to your question in Python 2 is root, dirs, files = os.walk(dir_name).next()
.
The equivalent Python 3 syntax is root, dirs, files = next(os.walk(dir_name))
Source: Stackoverflow.com