Multi-label prediction with Banknotes¶

This notebook explains how to train a Deep Learning AI neural net to identify banknotes, using transfer learning, data augmentation and other state-of-the-art techniques. It uses multiple classes, so it separates a 10$ bill into 10 and $ classes. This is done so there is higher chances to identify all currencies of the same type across denominations (e.g. all dollars), and also learn about numbers across currencies (all 5s).

The model runs on a serverless backend and a static front-end available on iris.brunosan.eu. For documentation how to deploy the model, check the github repository.

%reload_ext autoreload
%autoreload 2
%matplotlib inline

import fastai
from fastai.vision import *

#For CPU only 
fastai.torch_core.defaults.device = 'cpu'
defaults.device= 'cpu'

Multiclassification¶

path = Path('.') #'/home/jupyter/.fastai/data/banknotes/')
path_imgs=path/'imgs'
path_imgs.mkdir(parents=True, exist_ok=True)
path_imgs

PosixPath('imgs')

src = (ImageList.from_folder(path_imgs,recurse=True)
               .split_by_rand_pct(valid_pct=.2))

src

ItemLists;

Train: ImageList (679 items)
Image (3, 500, 359),Image (3, 201, 500),Image (3, 225, 500),Image (3, 219, 500),Image (3, 281, 500)
Path: imgs;

Valid: ImageList (169 items)
Image (3, 215, 500),Image (3, 375, 500),Image (3, 640, 480),Image (3, 262, 500),Image (3, 480, 640)
Path: imgs;

Test: None

src.train.items[0]

PosixPath('imgs/usd/50/IMG_20190730_232610.jpg')

#single class
func=lambda i: str(i.parent.relative_to(path_imgs) )
#multi class
func=lambda i: (i.parent.relative_to(path_imgs).parts )
func(src.train.items[0])

('euro', '5')

ll = src.label_from_func(func); ll
#ll = src.label_from_folder(); ll

LabelLists;

Train: LabelList (211 items)
x: ImageList
Image (3, 428, 500),Image (3, 255, 500),Image (3, 255, 500),Image (3, 500, 305),Image (3, 500, 435)
y: MultiCategoryList
euro;5,euro;5,euro;5,euro;5,euro;5
Path: /home/jupyter/.fastai/data/banknotes/imgs;

Valid: LabelList (52 items)
x: ImageList
Image (3, 278, 500),Image (3, 267, 500),Image (3, 217, 500),Image (3, 400, 500),Image (3, 333, 500)
y: MultiCategoryList
euro;200,euro;200,usd;100,usd;5,usd;10
Path: /home/jupyter/.fastai/data/banknotes/imgs;

Test: None

tfms = get_transforms(do_flip=True,flip_vert=True, 
                      max_rotate=90, 
                      max_zoom=1.5, 
                      max_lighting=0.5, 
                      max_warp=0.5)

#so its reproducible
#np.random.seed(42)

def get_data(size,bs):
    size=int(size)
    bs=int(bs)
    data = (ll.transform(tfms, size=size)
        .databunch(bs=bs) #for CPU only add ,num_workers=0
        .normalize(imagenet_stats))
    return data
size,bs=256/2,20
data=get_data(size,bs)

data.classes

['1', '10', '100', '20', '200', '5', '50', '500', 'euro', 'usd']

data.show_batch(rows=4, figsize=(12,9))

arch = models.resnet50

acc_02 = partial(accuracy_thresh, thresh=0.2)
f_score = partial(fbeta, thresh=0.2)
#multiclass
learn = cnn_learner(data, arch, metrics=[acc_02, f_score])
#single class
#learn = cnn_learner(data, arch, metrics=[accuracy])

We use the LR Finder to pick a good learning rate.

learn.lr_find()
learn.recorder.plot()

LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.

Then we can fit the head of our network.

lr = 1e-2

learn.fit_one_cycle(10, slice(lr),callbacks=ShowGraph(learn))

learn.fit_one_cycle(5, slice(lr),callbacks=ShowGraph(learn))

learn.show_results(rows=3)

learn.save('stage-1-rn50')

...And fine-tune the whole model:

learn.unfreeze()

learn.lr_find()
learn.recorder.plot()

LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.

lr=1e-4
learn.fit_one_cycle(5, slice(1e-5, lr/5),callbacks=ShowGraph(learn))

learn.save('stage-2-rn50')

learn.load('stage-2-rn50');

learn.fit_one_cycle(10, slice(1e-5, lr/5),callbacks=ShowGraph(learn))

learn.show_results(rows=3)

learn.save('stage-3-rn50')

learn.fit_one_cycle(10, slice(1e-5, lr/5),callbacks=ShowGraph(learn))

gc.collect()
torch.cuda.empty_cache()

size,bs=256,10/4
data=get_data(size,bs)

learn.freeze()

learn.lr_find()
learn.recorder.plot()

LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.

lr=1e-4

learn.fit_one_cycle(5, slice(lr),callbacks=ShowGraph(learn))

learn.save('stage-1-256-rn50')

learn.fit_one_cycle(5, slice(lr),callbacks=ShowGraph(learn))

learn.save('stage-1-256-rn50')

learn.show_results()

learn.unfreeze()

learn.fit_one_cycle(5, slice(1e-5, lr/5),callbacks=ShowGraph(learn))

learn.save('stage-2-256-rn50-fbeta9930')

learn.load('stage-2-256-rn50');

learn.fit_one_cycle(5, slice(1e-5, lr/5),callbacks=ShowGraph(learn))

learn.show_results()

learn.save('multi-class')

Inference example¶

imgPath=src.train.items[0]
img = open_image(imgPath)
pred=learn.predict(img)
probabilities=[ '%.2f' % float(100*elem) for elem in pred[2] ]
dict(zip(classes,probabilities))

{'1': '0.26',
 '10': '0.03',
 '100': '0.00',
 '20': '0.01',
 '200': '0.00',
 '5': '0.03',
 '50': '99.99',
 '500': '0.00',
 'euro': '0.00',
 'usd': '100.00'}

a=[b'1', b'10', b'100', b'20', b'200', b'5', b'50', b'500', b'euro', b'usd']
[c.decode("utf-8") for c in a]

['1', '10', '100', '20', '200', '5', '50', '500', 'euro', 'usd']

learn.export(file='multi-class.pkl')

Convert to PyTorch model in TorchScript format¶

path_img = Path('models/')
model_file = 'multi-class.pkl'
learn = load_learner( path_img , model_file )

/Users/brunosan/anaconda3/envs/fastai/lib/python3.7/site-packages/torch/serialization.py:454: SourceChangeWarning: source code of class 'torchvision.models.resnet.Bottleneck' has changed. you can retrieve the original source code by accessing the object's source attribute or set `torch.nn.Module.dump_patches = True` and use the patch tool to revert the changes.
  warnings.warn(msg, SourceChangeWarning)

model_file_jit = 'multi-class_jit.pth'
classes_file='classes.txt'
classes = ['1', '10', '100', '20', '200', '5', '50', '500', 'euro', 'usd']

#trace_input = torch.ones(1,3,299,299).cuda()
trace_input = torch.ones(1,3,256,256)
jit_model = torch.jit.trace(learn.model.float(), trace_input)
output_path = str(path_img/f'{model_file_jit}')
torch.jit.save(jit_model, output_path)
# export classes text file
save_texts(path_img/'classes.txt', classes)
tar_file=path_img/'model.tar.gz'

# create a tarfile with the exported model and classes text file
with tarfile.open(tar_file, 'w:gz') as f:
    f.add(path_img/f'{model_file_jit}', arcname=model_file_jit)
    f.add(path_img/f'{classes_file}', arcname=classes_file)

Upload to the S3 bucket¶

import boto3
s3 = boto3.resource('s3')
bucket = 'iris-ai'
s3.meta.client.upload_file(tar_file.as_posix(), bucket, 'fastai-models/iris/model.tar.gz')

Test inference¶

#model
path_img = Path('models/')
model_file_jit = 'multi-class_jit.pth'
classes = ['1', '10', '100', '20', '200', '5', '50', '500', 'euro', 'usd']
file =str(path_img/f'{model_file_jit}')
model = torch.jit.load(file, map_location=torch.device('cpu')).eval()

from torchvision import transforms
import base64
import mimetypes


def img_to_data(path):
    """Convert a file (specified by a path) into a data URI."""
    path=str(path)
    if not os.path.exists(path):
        raise FileNotFoundError
    mime, _ = mimetypes.guess_type(path)
    with open(path, 'rb') as fp:
        data = fp.read()
        data64 = b"".join(base64.encodebytes(data).splitlines()).decode("utf-8")
        return u'data:%s;base64,%s' % (mime, data64)

preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]) ])

def base64_to_bytes(base64String):
    image_data = re.sub('^data:image/.+;base64,', '', base64String)
    image_bytes = io.BytesIO(base64.b64decode(image_data))
    return image_bytes

from IPython.display import Image
import logging
import time


def sort_dict(dic):
    return {k: "%.2f%%"%dic[k] for k in sorted(dic, key=dic.get, reverse=True)}

class MyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(MyEncoder, self).default(obj)

def predict(file):
    dataURI= img_to_data(file)
    img = PIL.Image.open(base64_to_bytes(dataURI))
    img = img.convert('RGB') # breaks with PNGS, RGBA, ...
    img_tensor = preprocess(img)
    input_object = img_tensor.unsqueeze(0)

    #Server code copy#
    logger.info("Calling prediction on model")
    start_time = time.time()
    output = model(input_object)
    inference_seconds = float("%.2f"%(time.time() - start_time))
    logger.info("--- Inference time: %s seconds ---" % inference_seconds )
    output_list = [ '%.2f' % float(100*elem) for elem in output[0].detach().numpy()]
    prediction = F.softmax(output, dim=1)
    prediction_list = [ '%.2f' % float(100*elem) for elem in prediction[0].detach().numpy()]
    probabilities=[ '%.2f' % float(100*elem) for elem in prediction[0]]
    response = {}
    temp = list(zip(output_list,prediction_list,probabilities))
    temp2 = dict(zip(classes,temp))
    #print(temp,temp2)
    response['probabilities'] = {k: v for k, v in temp2.items() if float(v[0]) >0}
    response['predictions'] = probabilities
    response['output'] = output_list
    response['summary'] = list(response['probabilities'].keys())
    response['others'] = sort_dict({k: float(v[2]) for k, v in temp2.items() if float(v[0]) <0 and float(v[2]) >0})
    logger.info(f'Predicted class is %s' % response['summary'])
    return json.dumps(response, cls=MyEncoder)

from random import shuffle

logger = logging.getLogger()
logger.setLevel(logging.INFO)

path_imgs = Path("./imgs/")
#path_imgs = Path("./docs/assets")
src = (ImageList.from_folder(path_imgs,recurse=True))
shuffle(src.items)

for i in np.arange(3):
    img=src.items[i]
    display(Image(img))
    results=predict(src.items[i])
    print(classes)
    print(results)
    """
    print(results['output'])
    print(results['predictions'])
    print(results['probabilities'])
    print(results['summary'])
    print("   ---> or: ",results['others'])
    """

['1', '10', '100', '20', '200', '5', '50', '500', 'euro', 'usd']
{"probabilities": {"20": ["892.83", "21.96", "21.96"], "euro": ["1019.65", "78.04", "78.04"]}, "predictions": ["0.00", "0.00", "0.00", "21.96", "0.00", "0.00", "0.00", "0.00", "78.04", "0.00"], "output": ["-948.66", "-703.11", "-958.62", "892.83", "-775.01", "-873.82", "-968.14", "-811.25", "1019.65", "-1014.17"], "summary": ["20", "euro"], "others": {}}

['1', '10', '100', '20', '200', '5', '50', '500', 'euro', 'usd']
{"probabilities": {"20": ["83.02", "47.78", "47.78"], "euro": ["63.91", "39.47", "39.47"]}, "predictions": ["0.34", "0.06", "0.06", "47.78", "0.03", "0.31", "0.19", "0.29", "39.47", "11.48"], "output": ["-410.92", "-577.95", "-590.61", "83.02", "-656.61", "-419.84", "-471.82", "-429.03", "63.91", "-59.61"], "summary": ["20", "euro"], "others": {"usd": "11.48%", "1": "0.34%", "5": "0.31%", "500": "0.29%", "50": "0.19%", "10": "0.06%", "100": "0.06%", "200": "0.03%"}}

['1', '10', '100', '20', '200', '5', '50', '500', 'euro', 'usd']
{"probabilities": {"usd": ["499.44", "99.57", "99.57"]}, "predictions": ["0.01", "0.00", "0.09", "0.01", "0.00", "0.30", "0.00", "0.00", "0.00", "99.57"], "output": ["-415.96", "-601.36", "-199.24", "-400.79", "-735.19", "-80.84", "-507.52", "-560.94", "-508.41", "499.44"], "summary": ["usd"], "others": {"5": "0.30%", "100": "0.09%", "1": "0.01%", "20": "0.01%"}}

epoch	train_loss	valid_loss	accuracy_thresh	fbeta	time
0	0.773087	0.656208	0.271154	0.579044	00:02
1	0.676508	0.453052	0.526923	0.646792	00:02
2	0.553611	0.254357	0.807692	0.741027	00:02
3	0.457661	0.225245	0.884615	0.779478	00:02
4	0.396879	0.240159	0.855769	0.754953	00:02
5	0.356476	0.201313	0.886538	0.794969	00:02
6	0.319123	0.169044	0.909615	0.843629	00:02
7	0.291177	0.159696	0.909615	0.843046	00:02
8	0.267934	0.166017	0.896154	0.820270	00:02
9	0.246331	0.164613	0.903846	0.831099	00:02

epoch	train_loss	valid_loss	accuracy_thresh	fbeta	time
0	0.168200	0.162801	0.896154	0.835130	00:02
1	0.173512	0.145427	0.915385	0.859460	00:02
2	0.172781	0.154199	0.911538	0.843629	00:02
3	0.170041	0.141247	0.932692	0.878885	00:02
4	0.165794	0.144410	0.928846	0.863151	00:02

epoch	train_loss	valid_loss	accuracy_thresh	fbeta	time
0	0.139368	0.147208	0.930769	0.871892	00:02
1	0.137202	0.140020	0.934615	0.880633	00:02
2	0.143186	0.142131	0.930769	0.872038	00:02
3	0.152684	0.140593	0.925000	0.861548	00:02
4	0.153283	0.138080	0.932692	0.872766	00:02

epoch	train_loss	valid_loss	accuracy_thresh	fbeta	time
0	0.033061	0.039044	0.976923	0.972902	00:04
1	0.046422	0.034355	0.982692	0.978147	00:04
2	0.048970	0.036748	0.982692	0.971348	00:04
3	0.047315	0.033696	0.980769	0.964161	00:04
4	0.046004	0.029788	0.984615	0.973096	00:04
5	0.048292	0.036230	0.978846	0.980769	00:04
6	0.046813	0.025255	0.982692	0.977467	00:04
7	0.042664	0.025334	0.984615	0.979895	00:04
8	0.039299	0.026015	0.982692	0.978147	00:04
9	0.036796	0.026130	0.978846	0.968531	00:04

epoch	train_loss	valid_loss	accuracy_thresh	fbeta	time
0	0.043927	0.024567	0.984615	0.973776	00:04
1	0.036700	0.025245	0.986539	0.974845	00:04
2	0.033197	0.027836	0.990385	0.985140	00:04
3	0.032516	0.031615	0.984615	0.979895	00:04
4	0.030709	0.051836	0.978846	0.961733	00:04
5	0.028689	0.050236	0.982692	0.972028	00:04
6	0.029450	0.036953	0.982692	0.978147	00:04
7	0.029097	0.027021	0.982692	0.971348	00:04
8	0.027411	0.023903	0.984615	0.979895	00:04
9	0.024911	0.024797	0.984615	0.979895	00:04

epoch	train_loss	valid_loss	accuracy_thresh	fbeta	time
0	0.027847	0.025546	0.986538	0.974845	00:03
1	0.026122	0.023918	0.988461	0.983392	00:03
2	0.025983	0.024420	0.984615	0.973096	00:03
3	0.028380	0.024073	0.988461	0.976593	00:03
4	0.027009	0.023663	0.988461	0.983392	00:03

epoch	train_loss	valid_loss	accuracy_thresh	fbeta	time
0	0.026562	0.022852	0.988461	0.983392	00:03
1	0.022301	0.025324	0.984615	0.973096	00:03
2	0.020922	0.024292	0.984615	0.973096	00:04
3	0.023071	0.022963	0.986538	0.981643	00:04
4	0.023296	0.022652	0.988461	0.983392	00:04

epoch	train_loss	valid_loss	accuracy_thresh	fbeta	time
0	0.017608	0.020679	0.988461	0.983392	00:04
1	0.026293	0.021941	0.988461	0.983392	00:04
2	0.028482	0.019248	0.988461	0.983392	00:04
3	0.024211	0.019550	0.988461	0.983392	00:04
4	0.024755	0.020520	0.988461	0.983392	00:04

epoch	train_loss	valid_loss	accuracy_thresh	fbeta	time
0	0.017794	0.014843	0.992308	0.993007	00:04
1	0.015237	0.014199	0.992308	0.993007	00:04
2	0.016013	0.015322	0.994231	0.994755	00:04
3	0.015068	0.014562	0.992308	0.993007	00:04
4	0.015075	0.014409	0.994231	0.994755	00:04