3 # Any copyright is dedicated to the Public Domain.
4 # https://creativecommons.org/publicdomain/zero/1.0/
6 # Written by Francois Fleuret <francois@fleuret.org>
10 if torch.cuda.is_available():
11 device = torch.device('cuda')
12 sync = torch.cuda.synchronize
14 device = torch.device('cpu')
18 d1, d2, d3 = 2048, 2048, 2048
20 for t in [ torch.float32, torch.float16 ]:
22 a = torch.rand(d1, d2, device = device, dtype = t)
23 b = torch.rand(d2, d3, device = device, dtype = t)
27 start_time = time.perf_counter()
28 while time.perf_counter() - start_time < max_duration:
32 duration = time.perf_counter() - start_time
34 nb_flop = float(nb_runs * d1 * d2 * d3 * 2) # 1 multiply-and-add is 2 ops
35 speed = nb_flop / duration
37 for u in [ '', 'K', 'M', 'G', 'T', 'P' ]:
41 print(f'{speed:.02f} {u}flops with {t} on {device}')
45 print(f'{t} is not available on {device}')