Dataset

AGNews

Threshold Values

Threshold = 0.0001

Threshold = 0.11

Threshold = 1.2

Metrics

Accuracy

Precision

Recall

Accuracy

Precision

Recall

Accuracy

Precision

Recall

Attacks

Models

LiRA Candidate

Mistral 7B

60

55.5

100

60.1

55.6

100

57.1

54.1

92.6

LLaMA 7B

98.9

97.8

100

99.67

98.03

100

78.4

100

56.8

LLaMA 13B

100

100

100

100

100

100

54.4

100

8.8

Mixtral 8x7B

100

100

100

100

100

100

55.8

100

11.6

LiRA Base

Mistral 7B

55.8

50

100

49.6

16

55.8

50.2

75

0.6

LLaMA 7B

50

50

100

100

100

100

100

12.8

100

LLaMA 13B

100

100

100

100

100

100

65.1

100

30.2

Mixtral 8x7B

100

100

100

99.9

100

100

54.2

100

10