reward(learning_representation, target_representation, j):

score = −1

if learning_representation[j] == target_representation[j]

score = 1

if learning_representation[left] == target_representation[left]

score += 1

if learning_representation[right] == target_representation[right]

score += 1

return score