Ok so I think I've found a solution to your problem, I'm not going to say that it is the most efficient solution, but it should work
What I've done is, firstly, I modified your code a bit, the colours are sorted out first if they are in colors_dict and then by their count, so when a colour not in the colors_dict appears, with a count percentage less than 1%, the program automatically ignores the rest of the colours, so it doesn't iterate through all of the colours
Then I added two functions: color_bounds(color, bound) and check_bounds(bounds, color_hex)
What color_bounds does is it gets the range of colours (using an inputted range) that are similar to the inputted colour, for example with a range of 2, the function would return C93AE0 and C536DC for the colour C738DE
Then check_bounds uses the bounds from the colour and checks if any of the more frequent colours (already used colours) are within the bounds, and if so, it won't add it
import pandas as pd
from PIL import Image
from collections import Counter
def color_bounds(color, bound):
r, g, b = color[:2], color[2:4], color[4:]
bounds = int(r, 16), int(g, 16), int(b, 16)
upper_bounds = []
lower_bounds = []
# upper_bounds = ""
# lower_bounds = ""
for value in bounds:
upper = value + bound
lower = value - bound
while upper > 255:
upper -= 1
while lower < 0:
lower += 1
"""
upper = hex(upper).split("x")[-1].upper()
lower = hex(lower).split("x")[-1].upper()
if len(upper) == 1:
upper = "0" + upper
if len(lower) == 1:
lower = "0" + lower
"""
upper_bounds.append(upper)
lower_bounds.append(lower)
# upper_bounds += upper
# lower_bounds += lower
return (upper_bounds, lower_bounds)
def check_bounds(bounds, colors):
upper_bounds = bounds[0]
lower_bounds = bounds[1]
for color in colors:
r, g, b = color[:2], color[2:4], color[4:]
bounds = int(r, 16), int(g, 16), int(b, 16)
similar = [False, False, False]
for i in range(0, 3):
if bounds[i] <= upper_bounds[i] and bounds[i] >= lower_bounds[i]:
similar[i] = True
if similar[0] and similar[1] and similar[2]:
return False
return True
colors_dict = {"000000": "Black", "FFFFFF": "White"} #<------------ huge dictionary of colors this is just one example
img = Image.open("image.jpg")
size = w, h = img.size
data = img.load()
colors = []
for x in range(w):
for y in range(h):
color = data[x, y]
hex_color_lower = ''.join([hex(c)[2:].rjust(2, '0') for c in color])
hex_color = hex_color_lower.upper()
colors.append(hex_color)
total = w * h
color_hex = []
color_count = []
color_percent = []
df = pd.DataFrame()
def key(i):
try:
color = colors_dict[i[0]]
except:
color = ""
return color, i[1]
colors = Counter(colors).items()
for color, count in sorted(colors, key=key, reverse=True):
percent = count/total * \
100 # Do not make it int. Majority of colors are < 1%, unless you want >= 1%
if percent > 1:
# New functions to ignore colours that are similar to more frequent colours
# Make the bound value bigger to include more colours and smaller to include less
bounds = color_bounds(color, 16)
if check_bounds(bounds, color_hex):
color_hex.append(color)
color_count.append(count)
color_percent.append(percent)
else: break
df['color'] = color_hex
df['count'] = color_count
df['percent'] = color_percent
df['color_name'] = df['color'].map(colors_dict)
df.to_excel(r'export_dataframe.xlsx',
index=False, header=True)
print('done')
Given a little more time I could make the code much more efficient, but as of yet I think I've answered your question, please tell me if this was helpful :D
PS You should be able to adjust the bound in color_bounds to include more or less colours
PPS I left in the code for color_bounds to convert the bounds back to hex, if you want to do that it will just require you to add in a function into check_bounds to re-convert it back into decimal rgb values