reward(learning_representation, target_representation, j):

score = TanimotoSimilarity(learning_representation[0 to j], target_representation[0 to j],)

score = 2.0*score

if learning_representation[j] == target_representation[j]

score += 1

return score