在python中递归扫描文件并删除空目录

gcxthw6b  于 2023-02-28  发布在  Python
关注(0)|答案(7)|浏览(163)

我有以下结构:

Dir 1
|___Dir 2
   |___file 1
   |___file 2...
Dir 3
|___Dir 4
   |___file 3...

我希望能够递归地找到每个文件,用我自己的方式处理文件,一旦完成,删除文件,移动到下一个。然后,如果目录是空的,删除,以及,工作的方式,直到什么都没有留下。
只是不知道该怎么做。
这是我的

for root, dirs, files in os.walk(dir):
    path = root.split('/')
    for file in files:
        file = os.path.join(root, file)
        process_file(file)
        os.remove(file)

这很好,但我希望删除子目录,如果且只有他们是空的。

bwitn5fc

bwitn5fc1#

嗯,我想这就行了,得跑了。走了......

def get_files(src_dir):
# traverse root directory, and list directories as dirs and files as files
    for root, dirs, files in os.walk(src_dir):
        path = root.split('/')
        for file in files:
            process(os.path.join(root, file))
                    os.remove(os.path.join(root, file))

def del_dirs(src_dir):
    for dirpath, _, _ in os.walk(src_dir, topdown=False):  # Listing the files
        if dirpath == src_dir:
            break
        try:
            os.rmdir(dirpath)
        except OSError as ex:
            print(ex)

def main():
    get_files(src_dir)
    del_dirs(src_dir)

if __name__ == "__main__":
    main()
svgewumm

svgewumm2#

我意识到这篇文章比较老,可能没有必要再添加一个例子,但是乍一看,我认为初学者应该比这里的其他一些文章更容易理解,因为没有连接,它只导入一个模块,并给出了如何使用一些内置函数的好例子[open()& len()]和新的Python3字符串格式str.format。它还展示了在print()函数中使用file = filename将内容填充到文件是多么简单。
此脚本将使用os.walk扫描根目录(),检查目录和文件的长度,并根据它找到的内容执行条件。它还增加一个计数器来确定使用的目录的数量& empty,并将信息输出到一个文件中。我用Python 3.4编写了这个例子,这对我的目的很有效。如果有人有改进逻辑的想法,请在评论中发表,这样我们都可以从一个新的Angular 来解决问题。

import os
#declare the root directory
root_dir = 'C:\\tempdir\\directory\\directory\\'
#initialize the counters
empty_count = 0
used_count = 0
#Set the file to write to. 'x' will indicate to create a new file and open it for writing
outfile = open('C:\\tempdir\\directories.txt', 'x')
for curdir, subdirs, files in os.walk(root_dir):
    if len(subdirs) == 0 and len(files) == 0: #check for empty directories. len(files) == 0 may be overkill
        empty_count += 1 #increment empty_count
        print('Empty directory: {}'.format(curdir), file = outfile) #add empty results to file
        os.rmdir(curdir) #delete the directory
    elif len(subdirs) > 0 and len(files) > 0: #check for used directories
        used_count += 1 #increment used_count
        print('Used directory: {}'.format(curdir), file = outfile) #add used results to file

#add the counters to the file
print('empty_count: {}\nused_count: {}'.format(empty_count, used_count), file = outfile) 
outfile.close() #close the file
vof42yt1

vof42yt13#

这里是另一个我认为很高效的解决方案。当然,使用os.scandir可以提高效率。
首先,我定义了一个通用的rec_rmdir函数(reccursive rmdir),它递归地浏览目录树。

  • 该函数首先处理每个文件和每个子目录。
  • 然后它尝试删除当前目录。
  • preserve 标志用于保留根目录。

该算法是一种经典的Depth-first search算法。

import os
import stat

def rec_rmdir(root, callback, preserve=True):
    for path in (os.path.join(root, p) for p in os.listdir(root)):
        st = os.stat(path)
        if stat.S_ISREG(st.st_mode):
            callback(path)
        elif stat.S_ISDIR(st.st_mode):
            rec_rmdir(path, callback, preserve=False)
    if not preserve:
        try:
            os.rmdir(root)
        except IOError:
            pass

然后,很容易定义一个函数来处理文件并删除它。

def process_file_and_remove(path):
    # process the file
    # ...
    os.remove(path)

经典用法:

rec_rmdir("/path/to/root", process_file_and_remove)
ltqd579y

ltqd579y4#

这只是为了删除空目录,也拉出目录的单个文件。它似乎只回答了问题的一部分,对不起。
我在最后添加了一个循环,不断尝试直到找不到为止。我让函数返回已删除目录的计数。
我的访问被拒绝错误已由以下人员修复:shutil.rmtree fails on Windows with 'Access is denied'

import os
import shutil

def onerror(func, path, exc_info):
    """
    Error handler for ``shutil.rmtree``.

    If the error is due to an access error (read only file)
    it attempts to add write permission and then retries.

    If the error is for another reason it re-raises the error.

    Usage : ``shutil.rmtree(path, ignore_errors=False, onerror=onerror)``
    """
    import stat

    if not os.access(path, os.W_OK):
        # Is the error an access error ?
        os.chmod(path, stat.S_IWUSR)
        func(path)
    else:
        raise

def get_empty_dirs(path):
    # count of removed directories
    count = 0
    # traverse root directory, and list directories as dirs and files as files
    for root, dirs, files in os.walk(path):
        try:
            # if a directory is empty there will be no sub-directories or files
            if len(dirs) is 0 and len(files) is 0:
                print u"deleting " + root
                # os.rmdir(root)
                shutil.rmtree(root, ignore_errors=False, onerror=onerror)
                count += 1
            # if a directory has one file lets pull it out.
            elif len(dirs) is 0 and len(files) is 1:
                print u"moving " + os.path.join(root, files[0]) + u" to " + os.path.dirname(root)
                shutil.move(os.path.join(root, files[0]), os.path.dirname(root))
                print u"deleting " + root
                # os.rmdir(root)
                shutil.rmtree(root, ignore_errors=False, onerror=onerror)
                count += 1
        except WindowsError, e:
            # I'm getting access denied errors when removing directory.
            print e
        except shutil.Error, e:
            # Path your moving to already exists
            print e
    return count

def get_all_empty_dirs(path):
    # loop till break
    total_count = 0
    while True:
        # count of removed directories
        count = get_empty_dirs(path)
        total_count += count
        # if no removed directories you are done.
        if count >= 1:
            print u"retrying till count is 0, currently count is: %d" % count
        else:
            break

    print u"Total directories removed: %d" % total_count
    return total_count

count = get_all_empty_dirs(os.getcwdu())  # current directory
count += get_all_empty_dirs(u"o:\\downloads\\")  # other directory
print u"Total of all directories removed: %d" % count
w6lpcovy

w6lpcovy5#

看来我迟到了.不过,这里有另一个解决方案,可以帮助初学者.
进口

import os

from contextlib import suppress

包括在适当的功能中

# Loop for processing files
for root, _, files in os.walk(dir):
    path = root.split('/')
    for file in files:
        file = os.path.join(root, file)

        # Assuming process_file() returns True on success
        if process_file(file):
            os.remove(file)

包括在适当的功能中

# Loop for deleting empty directories
for root, _, _ in os.walk(dir):
        # Ignore directory not empty errors; nothing can be done about it if we want
        # to retain files that failed to be processsed. The entire deletion would
        # hence be silent.
        with suppress(OSError):
            os.removedirs(root)
csbfibhn

csbfibhn6#

import os

#Top level of tree you wish to delete empty directories from.
currentDir = r'K:\AutoCAD Drafting Projects\USA\TX\Image Archive'

index = 0

for root, dirs, files in os.walk(currentDir):
    for dir in dirs:
        newDir = os.path.join(root, dir)
        index += 1
        print str(index) + " ---> " + newDir

        try:
            os.removedirs(newDir)
            print "Directory empty! Deleting..."
            print " "
        except:
            print "Directory not empty and will not be removed"
            print " "

很好很简单。关键是在try语句下使用os.removedirs。它已经是递归的了。

s8vozzvw

s8vozzvw7#

你可以通过递归很容易地做到

import os

def rm_empty_dirs(path):
    try:
        if os.path.exists(path):
            if os.path.isdir(path):
                if os.listdir(path):
                    for sub_path in os.listdir(path):
                        sub_path = os.path.join(path, sub_path)
                        rm_empty_dirs(sub_path)
                    rm_empty_dirs(path)
                else:
                    os.rmdir(path)
            else:
                do_something(path)
                os.remove(path)
    except:
        pass

相关问题