identify duplicate .mp3 files by size, then confirm is these are duoplicates by hash content' in python
main.py
import os
import hashlib
# Function to find duplicate files by sizedeffind_duplicate_by_size(directory): files_by_size = {}
duplicates = []
for dirpath, _, filenames in os.walk(directory):
for filename in filenames:
file_path = os.path.join(dirpath, filename)
file_size = os.path.getsize(file_path)
if file_size in files_by_size:
duplicates.append((file_path, files_by_size[file_size]))
else:
files_by_size[file_size] = file_path
return duplicates
# Function to check duplicate files by content using hashingdeffind_duplicate_by_content(duplicates): duplicate_files = []
for file1, file2 in duplicates:
withopen(file1, 'rb') as f1, open(file2, 'rb') as f2:
hash1 = hashlib.sha256(f1.read()).hexdigest()
hash2 = hashlib.sha256(f2.read()).hexdigest()
if hash1 == hash2:
duplicate_files.append((file1, file2))
return duplicate_files
# Steps to find duplicatesdirectory = "path_to_directory"duplicates_by_size = find_duplicate_by_size(directory)
duplicate_files = find_duplicate_by_content(duplicates_by_size)
print(duplicate_files)
1243 chars
38 lines
This code first finds duplicate .mp3 files by comparing their sizes and then confirms if they are duplicates by comparing their content using hash values.