reward(learning_representation, target_representation, j): score = TanimotoSimilarity(learning_representation[left,j,right], target_representation[left,j,right],) score = 2.0*score if learning_representation[j] == target_representation[j] score += 1 return score |