Dataset

XSum

Threshold Values

Threshold = 0.0001

Threshold = 0.11

Threshold = 1.2

Metrics

Accuracy

Precision

Recall

Accuracy

Precision

Recall

Accuracy

Precision

Recall

Attacks

Models

LiRA Candidate

Mistral 7B

99.2

100

98.4

98

100

96

53

100

6

LLaMA 7B

100

100

100

100

100

100

50.8

100

1.6

LLaMA 13B

100

100

100

100

100

100

54.4

100

8

Mixtral 8x7B

100

100

100

95.7

100

91.4

50.3

100

0.6

LiRA Base

Mistral 7B

100

100

100

49.7

0

0

50

0

0

LLaMA 7B

100

100

100

100

100

100

50.6

100

1.2

LLaMA 13B

100

100

100

100

100

100

51.5

100

3

Mixtral 8x7B

100

100

100

94

100

89

50

100

100