reward(learning_representation, target_representation, j): score = TanimotoSimilarity(learning_representation[0 to j], target_representation[0 to j],) score = 2.0*score if learning_representation[j] == target_representation[j] score += 1 return score |