reward(learning_representation, target_representation, j):

score = TanimotoSimilarity(learning_representation[left,j,right], target_representation[left,j,right],)

score = 2.0*score

if learning_representation[j] == target_representation[j]

score += 1

return score