reward(learning_representation, target_representation, j): score = −1 if learning_representation[j] == target_representation[j] score = 1 if learning_representation[left] == target_representation[left] score += 1 if learning_representation[right] == target_representation[right] score += 1 return score |