parser.add_argument("--deterministic_synthesis", action="store_true", default=False)
 
-parser.add_argument("--reverse_cleanup", action="store_true", default=False)
+parser.add_argument("--reverse_cleanup", action="store_true", default=True)
 
 parser.add_argument("--validation_forward_only", action="store_true", default=False)
 
 
             nb_test_samples += input.size(0)
 
-        main_test_accuracy = quizz_machine.produce_results(
+        model.main_test_accuracy = quizz_machine.produce_results(
             n_epoch=n_epoch,
             model=model,
             result_dir=args.result_dir,
 
         log_string(f"test_perplexity {n_epoch} {test_perplexity}")
 
-    model.main_test_accuracy = main_test_accuracy
-
 
 ######################################################################
 
 ):
     recorded = []
 
-    sum_logits, sum_nb_c_quizzes = 0, 0
-
     nb_to_create = nb_for_train + nb_for_test
 
     # ------------------------------------------------------------
             reverse_cleanup=args.reverse_cleanup,
         )
 
-        sum_logits += c_quizzes.size(0) * ave_seq_logproba
-        sum_nb_c_quizzes += c_quizzes.size(0)
-
         nb_correct = quizz_machine.compute_correctness(
             c_quizzes, models, both_directions=not args.validation_forward_only
         )
             else ""
         )
 
-        quizz_machine.problem.save_quizzes(
-            valid_c_quizzes(recorded, criteria=lambda nb_correct: nb_correct == n)[:72],
-            args.result_dir,
-            f"culture_c_quiz_{n_epoch:04d}_N{n}{s}",
-        )
+        q = valid_c_quizzes(recorded, criteria=lambda nb_correct: nb_correct == n)[:72]
 
-    return sum_logits / sum_nb_c_quizzes
+        if q.size(0) > 0:
+            quizz_machine.problem.save_quizzes(
+                q,
+                args.result_dir,
+                f"culture_c_quiz_{n_epoch:04d}_N{n}{s}",
+            )
 
 
 ######################################################################
     cta = " ".join([f"{float(m.main_test_accuracy):.04f}" for m in models])
     log_string(f"current_test_accuracies {cta}")
 
-    # replace a fraction of the w_quizzes with a fresh ones
+    # replace a fraction of the w_quizzes with fresh ones
     quizz_machine.renew_w_quizzes(args.nb_train_samples // args.nb_gpts)
 
     if min([m.main_test_accuracy for m in models]) >= args.accuracy_to_make_c_quizzes:
 
 
     ######################################################################
 
+    def generate_prompts_and_answers(self, nb):
+        frame_sequences = self.generate_frame_sequences(nb)
+        prompts = frame_sequences[:, : frame_sequences.size(0) // 2].flatten(1)
+        answers = frame_sequences[:, frame_sequences.size(0) // 2 :].flatten(1)
+        return prompts, answers
+
     def generate_token_sequences(self, nb):
         frame_sequences = self.generate_frame_sequences(nb)