
For Reine, this code is the fix, I have tested and verified training:
path = "/kaggle/input/datasets/vesuvius13/formula-one-cars/Formula One Cars"
# Get every image under all team folders
all_files = get_image_files(path)
print("Total files before cleaning:", len(all_files))
from PIL import Image
from pathlib import Path
def is_valid_image(fn):
try:
# First check the image header
with Image.open(fn) as im:
im.verify()
# Then actually load/convert it, because verify() alone can miss some bad files
with Image.open(fn) as im:
im.convert("RGB").load()
return True
except Exception as e:
return False
good_files = []
bad_files = []
for fn in all_files:
if is_valid_image(fn):
good_files.append(fn)
else:
bad_files.append(fn)
print("Good files:", len(good_files))
print("Bad files:", len(bad_files))
for fn in bad_files[:30]:
print(fn)def extract_brand(fn):
folder_name = Path(fn).parent.name
return folder_name.replace(" F1 car", "").strip()
print(extract_brand("/kaggle/input/datasets/vesuvius13/formula-one-cars/Formula One Cars/Racing Point F1 car/00000090.png"))You can reach me at ddjapri@ayclogic.com.
All class notes can be found here.