Profile of Mail/SpamAssassin/Plugin/AutoLearnThreshold.pm

Filename	/usr/local/lib/perl5/site_perl/Mail/SpamAssassin/Plugin/AutoLearnThreshold.pm
Statements	Executed 29 statements in 1.95ms

Subroutines
Calls	P	F	Exclusive Time	Inclusive Time	Subroutine
1	1	1	44µs	246µs	Mail::SpamAssassin::Plugin::AutoLearnThreshold::new
1	1	1	38µs	38µs	Mail::SpamAssassin::Plugin::AutoLearnThreshold::BEGIN@54
1	1	1	37µs	184µs	Mail::SpamAssassin::Plugin::AutoLearnThreshold::set_config
1	1	1	26µs	195µs	Mail::SpamAssassin::Plugin::AutoLearnThreshold::BEGIN@55
1	1	1	23µs	36µs	Mail::SpamAssassin::Plugin::AutoLearnThreshold::BEGIN@56
1	1	1	22µs	60µs	Mail::SpamAssassin::Plugin::AutoLearnThreshold::BEGIN@57
1	1	1	21µs	28µs	Mail::SpamAssassin::Plugin::AutoLearnThreshold::BEGIN@58
1	1	1	21µs	87µs	Mail::SpamAssassin::Plugin::AutoLearnThreshold::BEGIN@59
1	1	1	20µs	85µs	Mail::SpamAssassin::Plugin::AutoLearnThreshold::BEGIN@61
0	0	0	0s	0s	Mail::SpamAssassin::Plugin::AutoLearnThreshold::autolearn_discriminator

Call graph for these subroutines as a Graphviz dot language file.

Line	State ments	Time on line	Calls	Time in subs	Code
1					# <@LICENSE>
2					# Licensed to the Apache Software Foundation (ASF) under one or more
3					# contributor license agreements. See the NOTICE file distributed with
4					# this work for additional information regarding copyright ownership.
5					# The ASF licenses this file to you under the Apache License, Version 2.0
6					# (the "License"); you may not use this file except in compliance with
7					# the License. You may obtain a copy of the License at:
8					#
9					# http://www.apache.org/licenses/LICENSE-2.0
10					#
11					# Unless required by applicable law or agreed to in writing, software
12					# distributed under the License is distributed on an "AS IS" BASIS,
13					# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14					# See the License for the specific language governing permissions and
15					# limitations under the License.
16					# </@LICENSE>
17
18					=head1 NAME
19
20					Mail::SpamAssassin::Plugin::AutoLearnThreshold - threshold-based discriminator for Bayes auto-learning
21
22					=head1 SYNOPSIS
23
24					loadplugin Mail::SpamAssassin::Plugin::AutoLearnThreshold
25
26					=head1 DESCRIPTION
27
28					This plugin implements the threshold-based auto-learning discriminator
29					for SpamAssassin's Bayes subsystem. Auto-learning is a mechanism
30					whereby high-scoring mails (or low-scoring mails, for non-spam) are fed
31					into its learning systems without user intervention, during scanning.
32
33					Note that certain tests are ignored when determining whether a message
34					should be trained upon:
35
36					=over 4
37
38					=item * rules with tflags set to 'learn' (the Bayesian rules)
39
40					=item * rules with tflags set to 'userconf' (user configuration)
41
42					=item * rules with tflags set to 'noautolearn'
43
44					=back
45
46					Also note that auto-learning occurs using scores from either scoreset 0
47					or 1, depending on what scoreset is used during message check. It is
48					likely that the message check and auto-learn scores will be different.
49
50					=cut
51
52					package Mail::SpamAssassin::Plugin::AutoLearnThreshold;
53
54	2	62µs	1	38µs	# spent 38µs within Mail::SpamAssassin::Plugin::AutoLearnThreshold::BEGIN@54 which was called: # once (38µs+0s) by Mail::SpamAssassin::PluginHandler::load_plugin at line 54 use Mail::SpamAssassin::Plugin; # spent 38µs making 1 call to Mail::SpamAssassin::Plugin::AutoLearnThreshold::BEGIN@54
55	2	59µs	2	365µs	# spent 195µs (26+169) within Mail::SpamAssassin::Plugin::AutoLearnThreshold::BEGIN@55 which was called: # once (26µs+169µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 55 use Mail::SpamAssassin::Logger; # spent 195µs making 1 call to Mail::SpamAssassin::Plugin::AutoLearnThreshold::BEGIN@55 # spent 169µs making 1 call to Exporter::import
56	2	58µs	2	49µs	# spent 36µs (23+13) within Mail::SpamAssassin::Plugin::AutoLearnThreshold::BEGIN@56 which was called: # once (23µs+13µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 56 use strict; # spent 36µs making 1 call to Mail::SpamAssassin::Plugin::AutoLearnThreshold::BEGIN@56 # spent 13µs making 1 call to strict::import
57	2	57µs	2	99µs	# spent 60µs (22+38) within Mail::SpamAssassin::Plugin::AutoLearnThreshold::BEGIN@57 which was called: # once (22µs+38µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 57 use warnings; # spent 60µs making 1 call to Mail::SpamAssassin::Plugin::AutoLearnThreshold::BEGIN@57 # spent 38µs making 1 call to warnings::import
58	2	57µs	2	35µs	# spent 28µs (21+7) within Mail::SpamAssassin::Plugin::AutoLearnThreshold::BEGIN@58 which was called: # once (21µs+7µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 58 use bytes; # spent 28µs making 1 call to Mail::SpamAssassin::Plugin::AutoLearnThreshold::BEGIN@58 # spent 7µs making 1 call to bytes::import
59	2	60µs	2	154µs	# spent 87µs (21+67) within Mail::SpamAssassin::Plugin::AutoLearnThreshold::BEGIN@59 which was called: # once (21µs+67µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 59 use re 'taint'; # spent 87µs making 1 call to Mail::SpamAssassin::Plugin::AutoLearnThreshold::BEGIN@59 # spent 67µs making 1 call to re::import
60
61	2	1.50ms	2	150µs	# spent 85µs (20+65) within Mail::SpamAssassin::Plugin::AutoLearnThreshold::BEGIN@61 which was called: # once (20µs+65µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 61 use vars qw(@ISA); # spent 85µs making 1 call to Mail::SpamAssassin::Plugin::AutoLearnThreshold::BEGIN@61 # spent 65µs making 1 call to vars::import
62	1	15µs			@ISA = qw(Mail::SpamAssassin::Plugin);
63
64					# spent 246µs (44+202) within Mail::SpamAssassin::Plugin::AutoLearnThreshold::new which was called: # once (44µs+202µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 1 of (eval 73)[Mail/SpamAssassin/PluginHandler.pm:129] sub new {
65	1	2µs			my $class = shift;
66	1	2µs			my $mailsaobject = shift;
67
68	1	2µs			$class = ref($class) \|\| $class;
69	1	10µs	1	19µs	my $self = $class->SUPER::new($mailsaobject); # spent 19µs making 1 call to Mail::SpamAssassin::Plugin::new
70	1	2µs			bless ($self, $class);
71
72	1	8µs	1	184µs	$self->set_config($mailsaobject->{conf}); # spent 184µs making 1 call to Mail::SpamAssassin::Plugin::AutoLearnThreshold::set_config
73
74	1	10µs			return $self;
75					}
76
77					# spent 184µs (37+147) within Mail::SpamAssassin::Plugin::AutoLearnThreshold::set_config which was called: # once (37µs+147µs) by Mail::SpamAssassin::Plugin::AutoLearnThreshold::new at line 72 sub set_config {
78	1	2µs			my($self, $conf) = @_;
79	1	2µs			my @cmds;
80
81					=head1 USER OPTIONS
82
83					The following configuration settings are used to control auto-learning:
84
85					=over 4
86
87					=item bayes_auto_learn_threshold_nonspam n.nn (default: 0.1)
88
89					The score threshold below which a mail has to score, to be fed into
90					SpamAssassin's learning systems automatically as a non-spam message.
91
92					=cut
93
94	1	6µs			push (@cmds, {
95					setting => 'bayes_auto_learn_threshold_nonspam',
96					default => 0.1,
97					type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
98					});
99
100					=item bayes_auto_learn_threshold_spam n.nn (default: 12.0)
101
102					The score threshold above which a mail has to score, to be fed into
103					SpamAssassin's learning systems automatically as a spam message.
104
105					Note: SpamAssassin requires at least 3 points from the header, and 3
106					points from the body to auto-learn as spam. Therefore, the minimum
107					working value for this option is 6.
108
109					If the test option autolearn_force is set, the minimum value will
110					remain at 6 points but there is no requirement that the points come
111					from body and header rules. This option is useful for autolearning
112					with rules that are considered to be extremely safe indicators of
113					the spaminess of a message.
114
115					=cut
116
117	1	4µs			push (@cmds, {
118					setting => 'bayes_auto_learn_threshold_spam',
119					default => 12.0,
120					type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
121					});
122
123					=item bayes_auto_learn_on_error (0 \| 1) (default: 0)
124
125					With C<bayes_auto_learn_on_error> off, autolearning will be performed
126					even if bayes classifier already agrees with the new classification (i.e.
127					yielded BAYES_00 for what we are now trying to teach it as ham, or yielded
128					BAYES_99 for spam). This is a traditional setting, the default was chosen
129					to retain backward compatibility.
130
131					With C<bayes_auto_learn_on_error> turned on, autolearning will be performed
132					only when a bayes classifier had a different opinion from what the autolearner
133					is now trying to teach it (i.e. it made an error in judgement). This strategy
134					may or may not produce better future classifications, but usually works
135					very well, while also preventing unnecessary overlearning and slows down
136					database growth.
137
138					=cut
139
140	1	6µs			push (@cmds, {
141					setting => 'bayes_auto_learn_on_error',
142					default => 0,
143					type => $Mail::SpamAssassin::Conf::CONF_TYPE_BOOL
144					});
145
146	1	16µs	1	147µs	$conf->{parser}->register_commands(\@cmds); # spent 147µs making 1 call to Mail::SpamAssassin::Conf::Parser::register_commands
147					}
148
149					sub autolearn_discriminator {
150					my ($self, $params) = @_;
151
152					my $scan = $params->{permsgstatus};
153					my $conf = $scan->{conf};
154
155					# Figure out min/max for autolearning.
156					# Default to specified auto_learn_threshold settings
157					my $min = $conf->{bayes_auto_learn_threshold_nonspam};
158					my $max = $conf->{bayes_auto_learn_threshold_spam};
159
160					# Find out what score we should consider this message to have ...
161					my $score = $scan->get_autolearn_points();
162					my $body_only_points = $scan->get_body_only_points();
163					my $head_only_points = $scan->get_head_only_points();
164					my $learned_points = $scan->get_learned_points();
165
166					# find out if any of the tests added an autolearn_force status
167					my $force_autolearn = $scan->get_autolearn_force_status();
168					my $force_autolearn_names = $scan->get_autolearn_force_names();
169
170					dbg("learn: auto-learn? ham=$min, spam=$max, ".
171					"body-points=".$body_only_points.", ".
172					"head-points=".$head_only_points.", ".
173					"learned-points=".$learned_points);
174
175					my $isspam;
176					if ($score < $min) {
177					$isspam = 0;
178					} elsif ($score >= $max) {
179					$isspam = 1;
180					} else {
181					dbg("learn: auto-learn? no: inside auto-learn thresholds, not considered ham or spam");
182					return;
183					}
184
185					my $learner_said_ham_points = -1.0;
186					my $learner_said_spam_points = 1.0;
187
188					if ($isspam) {
189					my $required_body_points = 3;
190					my $required_head_points = 3;
191
192					#Set a lower threshold of "just has to be spam" if autolearn_force was set on a rule
193					if ($force_autolearn) {
194					$required_body_points = -99;
195					$required_head_points = -99;
196					dbg("learn: auto-learn: autolearn_force flagged for a rule. Removing seperate body and head point threshold. Body Only Points: $body_only_points ($required_body_points req'd) / Head Only Points: $head_only_points ($required_head_points req'd)");
197					dbg("learn: auto-learn: autolearn_force flagged because of rule(s): $force_autolearn_names");
198					} else {
199					dbg("learn: auto-learn: autolearn_force not flagged for a rule. Body Only Points: $body_only_points ($required_body_points req'd) / Head Only Points: $head_only_points ($required_head_points req'd)");
200					}
201
202					if ($body_only_points < $required_body_points) {
203					dbg("learn: auto-learn? no: scored as spam but too few body points (".
204					$body_only_points." < ".$required_body_points.")");
205					return;
206					}
207					if ($head_only_points < $required_head_points) {
208					dbg("learn: auto-learn? no: scored as spam but too few head points (".
209					$head_only_points." < ".$required_head_points.")");
210					return;
211					}
212					if ($learned_points < $learner_said_ham_points) {
213					dbg("learn: auto-learn? no: scored as spam but learner indicated ham (".
214					$learned_points." < ".$learner_said_ham_points.")");
215					return;
216					}
217
218					if (!$scan->is_spam()) {
219					dbg("learn: auto-learn? no: scored as ham but autolearn wanted spam");
220					return;
221					}
222
223					} else {
224					if ($learned_points > $learner_said_spam_points) {
225					dbg("learn: auto-learn? no: scored as ham but learner indicated spam (".
226					$learned_points." > ".$learner_said_spam_points.")");
227					return;
228					}
229
230					if ($scan->is_spam()) {
231					dbg("learn: auto-learn? no: scored as spam but autolearn wanted ham");
232					return;
233					}
234					}
235
236					if ($conf->{bayes_auto_learn_on_error}) {
237					# learn-on-error strategy chosen:
238					# only allow learning if the autolearning classifier was unsure or
239					# had a different opinion from what we are trying to make it learn
240					#
241					my $tests = $scan->get_tag('TESTS');
242					if (defined $tests && $tests ne 'none') {
243					my %t = map { ($_,1) } split(/,/, $tests);
244					if ($isspam && $t{'BAYES_99'} \|\| !$isspam && $t{'BAYES_00'}) {
245					dbg("learn: auto-learn? no: learn-on-error, %s, already classified ".
246					"as such", $isspam ? 'spam' : 'ham');
247					return;
248					}
249					}
250					}
251
252					dbg("learn: auto-learn? yes, ".($isspam?"spam ($score > $max)":"ham ($score < $min)")." autolearn_force=".($force_autolearn?"yes":"no"));
253
254					#Return an array reference because call_plugins only carry's one return value
255					return [$isspam, $force_autolearn, $force_autolearn_names];
256					}
257
258	1	8µs			1;
259
260					=back
261
262					=cut