前言
最近整理nas群晖的文件,发现影音库里面很多重复的资源,自己就写了一个python脚本自动的处理。
import os import hashlib # 只删除以下列表中的重复文件类型.如果想删除其他类型的文件,自己添加一下就行了 file_type = ['.jpg', '.jpeg', '.png', '.gif', '.psd', '.bmp', '.webp', '.mp4', '.mkv', '.avi', '.mov', 'mpeg', 'mpg', '.rar', '.zip','img'] check_files = [] def remove_reapt_files(): for root, dirs, files in os.walk(r'/Users/yuqianjun/Downloads/'): #这个路径/Users/yuqianjun/Downloads/就是你需要 查重的路径;自己定义就好了 for name in files: print(name) p_type = os.path.splitext(os.path.join(root, name))[1] if p_type in file_type: check_files.append(os.path.join(root, name)) for name in dirs: p_type = os.path.splitext(os.path.join(root, name))[1] if p_type in file_type: check_files.append(os.path.join(root, name)) files_dict = {} r_index = 0 print('Fiels Num:%s' % len(check_files)) for value in check_files: md5_hash = hashlib.md5() try: with open(value, "rb+") as f: for byte_block in iter(lambda: f.read(4096), b""): md5_hash.update(byte_block) file_md5 = md5_hash.hexdigest() print('Check file MD5:%s' % value) if files_dict.get(file_md5) is None: files_dict[file_md5] = value else: d_path = files_dict[file_md5] d_path_stats = os.stat(d_path) file_stats = os.stat(value) d_time = d_path_stats.st_ctime f_time = file_stats.st_ctime if d_time > f_time: os.remove(d_path) files_dict[file_md5] = value print('Delete File:', d_path) r_index += 1 else: os.remove(value) print('Delete File:', value) r_index += 1 except Exception as e: pass print('File does not exist or has been deleted') print('File Count:%s, Repeat Files Num:%s. All deleted!' %( len(check_files),str(r_index))) if __name__ == '__main__': remove_reapt_files()
原文链接:https://blog.csdn.net/2301_81547508/article/details/135359578?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522171836987416800197013756%2522%252C%2522scm%2522%253A%252220140713.130102334.pc%255Fblog.%2522%257D&request_id=171836987416800197013756&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~blog~first_rank_ecpm_v1~times_rank-16-135359578-null-null.nonecase&utm_term=%E7%BB%BF%E8%81%94NAS
© 版权声明
声明📢本站内容均来自互联网,归原创作者所有,如有侵权必删除。
本站文章皆由CC-4.0协议发布,如无来源则为原创,转载请注明出处。
THE END