This notebook explains how to train a Deep Learning AI neural net to identify banknotes, using transfer learning, data augmentation and other state-of-the-art techniques. It uses multiple classes, so it separates a 10$
bill into 10
and $
classes. This is done so there is higher chances to identify all currencies of the same type across denominations (e.g. all dollars), and also learn about numbers across currencies (all 5s).
The model runs on a serverless backend and a static front-end available on iris.brunosan.eu. For documentation how to deploy the model, check the github repository.
%reload_ext autoreload
%autoreload 2
%matplotlib inline
import fastai
from fastai.vision import *
#For CPU only
fastai.torch_core.defaults.device = 'cpu'
defaults.device= 'cpu'
path = Path('.') #'/home/jupyter/.fastai/data/banknotes/')
path_imgs=path/'imgs'
path_imgs.mkdir(parents=True, exist_ok=True)
path_imgs
src = (ImageList.from_folder(path_imgs,recurse=True)
.split_by_rand_pct(valid_pct=.2))
src
src.train.items[0]
#single class
func=lambda i: str(i.parent.relative_to(path_imgs) )
#multi class
func=lambda i: (i.parent.relative_to(path_imgs).parts )
func(src.train.items[0])
ll = src.label_from_func(func); ll
#ll = src.label_from_folder(); ll
tfms = get_transforms(do_flip=True,flip_vert=True,
max_rotate=90,
max_zoom=1.5,
max_lighting=0.5,
max_warp=0.5)
#so its reproducible
#np.random.seed(42)
def get_data(size,bs):
size=int(size)
bs=int(bs)
data = (ll.transform(tfms, size=size)
.databunch(bs=bs) #for CPU only add ,num_workers=0
.normalize(imagenet_stats))
return data
size,bs=256/2,20
data=get_data(size,bs)
data.classes
data.show_batch(rows=4, figsize=(12,9))
arch = models.resnet50
acc_02 = partial(accuracy_thresh, thresh=0.2)
f_score = partial(fbeta, thresh=0.2)
#multiclass
learn = cnn_learner(data, arch, metrics=[acc_02, f_score])
#single class
#learn = cnn_learner(data, arch, metrics=[accuracy])
We use the LR Finder to pick a good learning rate.
learn.lr_find()
learn.recorder.plot()
Then we can fit the head of our network.
lr = 1e-2
learn.fit_one_cycle(10, slice(lr),callbacks=ShowGraph(learn))
learn.fit_one_cycle(5, slice(lr),callbacks=ShowGraph(learn))
learn.show_results(rows=3)
learn.save('stage-1-rn50')
...And fine-tune the whole model:
learn.unfreeze()
learn.lr_find()
learn.recorder.plot()
lr=1e-4
learn.fit_one_cycle(5, slice(1e-5, lr/5),callbacks=ShowGraph(learn))
learn.save('stage-2-rn50')
learn.load('stage-2-rn50');
learn.fit_one_cycle(10, slice(1e-5, lr/5),callbacks=ShowGraph(learn))
learn.show_results(rows=3)
learn.save('stage-3-rn50')
learn.fit_one_cycle(10, slice(1e-5, lr/5),callbacks=ShowGraph(learn))
gc.collect()
torch.cuda.empty_cache()
size,bs=256,10/4
data=get_data(size,bs)
learn.freeze()
learn.lr_find()
learn.recorder.plot()
lr=1e-4
learn.fit_one_cycle(5, slice(lr),callbacks=ShowGraph(learn))
learn.save('stage-1-256-rn50')
learn.fit_one_cycle(5, slice(lr),callbacks=ShowGraph(learn))
learn.save('stage-1-256-rn50')
learn.show_results()
learn.unfreeze()
learn.fit_one_cycle(5, slice(1e-5, lr/5),callbacks=ShowGraph(learn))
learn.save('stage-2-256-rn50-fbeta9930')
learn.load('stage-2-256-rn50');
learn.fit_one_cycle(5, slice(1e-5, lr/5),callbacks=ShowGraph(learn))
learn.show_results()
learn.save('multi-class')
imgPath=src.train.items[0]
img = open_image(imgPath)
pred=learn.predict(img)
probabilities=[ '%.2f' % float(100*elem) for elem in pred[2] ]
dict(zip(classes,probabilities))
a=[b'1', b'10', b'100', b'20', b'200', b'5', b'50', b'500', b'euro', b'usd']
[c.decode("utf-8") for c in a]
learn.export(file='multi-class.pkl')
path_img = Path('models/')
model_file = 'multi-class.pkl'
learn = load_learner( path_img , model_file )
model_file_jit = 'multi-class_jit.pth'
classes_file='classes.txt'
classes = ['1', '10', '100', '20', '200', '5', '50', '500', 'euro', 'usd']
#trace_input = torch.ones(1,3,299,299).cuda()
trace_input = torch.ones(1,3,256,256)
jit_model = torch.jit.trace(learn.model.float(), trace_input)
output_path = str(path_img/f'{model_file_jit}')
torch.jit.save(jit_model, output_path)
# export classes text file
save_texts(path_img/'classes.txt', classes)
tar_file=path_img/'model.tar.gz'
# create a tarfile with the exported model and classes text file
with tarfile.open(tar_file, 'w:gz') as f:
f.add(path_img/f'{model_file_jit}', arcname=model_file_jit)
f.add(path_img/f'{classes_file}', arcname=classes_file)
import boto3
s3 = boto3.resource('s3')
bucket = 'iris-ai'
s3.meta.client.upload_file(tar_file.as_posix(), bucket, 'fastai-models/iris/model.tar.gz')
#model
path_img = Path('models/')
model_file_jit = 'multi-class_jit.pth'
classes = ['1', '10', '100', '20', '200', '5', '50', '500', 'euro', 'usd']
file =str(path_img/f'{model_file_jit}')
model = torch.jit.load(file, map_location=torch.device('cpu')).eval()
from torchvision import transforms
import base64
import mimetypes
def img_to_data(path):
"""Convert a file (specified by a path) into a data URI."""
path=str(path)
if not os.path.exists(path):
raise FileNotFoundError
mime, _ = mimetypes.guess_type(path)
with open(path, 'rb') as fp:
data = fp.read()
data64 = b"".join(base64.encodebytes(data).splitlines()).decode("utf-8")
return u'data:%s;base64,%s' % (mime, data64)
preprocess = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]) ])
def base64_to_bytes(base64String):
image_data = re.sub('^data:image/.+;base64,', '', base64String)
image_bytes = io.BytesIO(base64.b64decode(image_data))
return image_bytes
from IPython.display import Image
import logging
import time
def sort_dict(dic):
return {k: "%.2f%%"%dic[k] for k in sorted(dic, key=dic.get, reverse=True)}
class MyEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, np.integer):
return int(obj)
elif isinstance(obj, np.floating):
return float(obj)
elif isinstance(obj, np.ndarray):
return obj.tolist()
else:
return super(MyEncoder, self).default(obj)
def predict(file):
dataURI= img_to_data(file)
img = PIL.Image.open(base64_to_bytes(dataURI))
img = img.convert('RGB') # breaks with PNGS, RGBA, ...
img_tensor = preprocess(img)
input_object = img_tensor.unsqueeze(0)
#Server code copy#
logger.info("Calling prediction on model")
start_time = time.time()
output = model(input_object)
inference_seconds = float("%.2f"%(time.time() - start_time))
logger.info("--- Inference time: %s seconds ---" % inference_seconds )
output_list = [ '%.2f' % float(100*elem) for elem in output[0].detach().numpy()]
prediction = F.softmax(output, dim=1)
prediction_list = [ '%.2f' % float(100*elem) for elem in prediction[0].detach().numpy()]
probabilities=[ '%.2f' % float(100*elem) for elem in prediction[0]]
response = {}
temp = list(zip(output_list,prediction_list,probabilities))
temp2 = dict(zip(classes,temp))
#print(temp,temp2)
response['probabilities'] = {k: v for k, v in temp2.items() if float(v[0]) >0}
response['predictions'] = probabilities
response['output'] = output_list
response['summary'] = list(response['probabilities'].keys())
response['others'] = sort_dict({k: float(v[2]) for k, v in temp2.items() if float(v[0]) <0 and float(v[2]) >0})
logger.info(f'Predicted class is %s' % response['summary'])
return json.dumps(response, cls=MyEncoder)
from random import shuffle
logger = logging.getLogger()
logger.setLevel(logging.INFO)
path_imgs = Path("./imgs/")
#path_imgs = Path("./docs/assets")
src = (ImageList.from_folder(path_imgs,recurse=True))
shuffle(src.items)
for i in np.arange(3):
img=src.items[i]
display(Image(img))
results=predict(src.items[i])
print(classes)
print(results)
"""
print(results['output'])
print(results['predictions'])
print(results['probabilities'])
print(results['summary'])
print(" ---> or: ",results['others'])
"""