使用Pathlib python扫描递归和非递归文件

lf5gs5x2  于 2023-06-25  发布在  Python
关注(0)|答案(1)|浏览(106)

我需要扫描文件在给定的文件夹/目录,如果递归是真的递归扫描目录,否则只有文件可以产生。我在下面代码实现,我得到None

import re
import uuid
from pathlib import Path
from typing import Generator, Tuple, Union, Optional
import time

def scan_files(dir_path: Union[str, Path], filter_regex: Optional[str] = None, recursive: bool = True) -> Generator[Tuple[str, Path], None, None]:
    """
    Get list of files in the specified directory.
    """
    path = Path(dir_path)
    for item in path.iterdir():
        if not item.is_symlink():
            if recursive and item.is_dir():
                yield from scan_files(item, filter_regex=filter_regex)
            elif filter_regex is None or re.match(filter_regex, item.name, re.IGNORECASE):
                yield str(uuid.uuid5(uuid.NAMESPACE_URL, str(item))), item

while True:
    print("=" * 100)
    for x, y in scan_files(dir_path="/tmp/in/106/", recursive=True):
        print(x, y)
    print("=" * 100)
    time.sleep(4)

实际产量:scan_files不产生任何文件
测试输入文件
创建目录

/tmp/in/106/nested

 add below files 
 /tmp/in/106/1.mov
 /tmp/in/106/2.mov
 /tmp/in/106/nested/1.mov
 /tmp/in/106/nested/2.mov

预期产出:

if recursive:  all files with full path 

   if non recursive: /tmp/in/106/1.mov and /tmp/in/106/2.mov
gj3fmq9x

gj3fmq9x1#

使用Path.glob和递归或非递归模式:

import re

def get_files(path, recursive=False):
    pattern = '*.mov' if not recursive else '**/*.mov'

    for file in Path(path).glob(pattern):
        if item.is_symlink():
            continue
        
        yield file

# Then filter using a regex
def scan_files(directory, recursive=False, regex=None):
    paths = get_files(directory, recursive=recursive)

    if regex is None:
        it = paths
    else:
        r = re.compile(regex, re.IGNORECASE)
        it = (p for p in paths if r.search(p.name))
    
    for p in it:
        item = p.absolute()
        yield str(uuid.uuid5(uuid.NAMESPACE_URL, str(item))), item

编辑

使用一个catch-all正则表达式来更简洁地在一个函数中完成这一点:

def scan_files(directory, recursive=False, regex=None):
    pattern = '**/*.mov' if recursive else '*.mov'
    regex = regex if regex is not None else '.'

    r = re.compile(regex, re.IGNORECASE)
    
    for p in Path(directory).glob(pattern):
        if not r.search(p.name):
            continue
        elif p.is_symlink():
            continue

        item = p.absolute()
        yield str(uuid.uuid5(uuid.NAMESPACE_URL, str(item))), item

相关问题