← Index
NYTProf Performance Profile   « line view »
For /usr/local/bin/sa-learn
  Run on Sun Nov 5 02:36:06 2017
Reported on Sun Nov 5 02:56:22 2017

Filename/usr/local/lib/perl5/site_perl/Mail/SpamAssassin/Plugin/HeaderEval.pm
StatementsExecuted 63 statements in 14.4ms
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
111454µs1.28msMail::SpamAssassin::Plugin::HeaderEval::::newMail::SpamAssassin::Plugin::HeaderEval::new
11167µs75µsMail::SpamAssassin::Plugin::HeaderEval::::BEGIN@20Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@20
11139µs122µsMail::SpamAssassin::Plugin::HeaderEval::::BEGIN@23Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@23
11132µs246µsMail::SpamAssassin::Plugin::HeaderEval::::BEGIN@28Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@28
11132µs122µsMail::SpamAssassin::Plugin::HeaderEval::::BEGIN@31Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@31
11131µs245µsMail::SpamAssassin::Plugin::HeaderEval::::BEGIN@602Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@602
11131µs225µsMail::SpamAssassin::Plugin::HeaderEval::::BEGIN@604Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@604
11130µs171µsMail::SpamAssassin::Plugin::HeaderEval::::BEGIN@24Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@24
11129µs212µsMail::SpamAssassin::Plugin::HeaderEval::::BEGIN@603Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@603
11124µs1.07msMail::SpamAssassin::Plugin::HeaderEval::::BEGIN@29Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@29
11122µs36µsMail::SpamAssassin::Plugin::HeaderEval::::BEGIN@22Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@22
11121µs58µsMail::SpamAssassin::Plugin::HeaderEval::::BEGIN@21Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@21
11119µs19µsMail::SpamAssassin::Plugin::HeaderEval::::BEGIN@26Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@26
11112µs12µsMail::SpamAssassin::Plugin::HeaderEval::::BEGIN@27Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@27
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::_check_date_diffMail::SpamAssassin::Plugin::HeaderEval::_check_date_diff
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::_check_date_receivedMail::SpamAssassin::Plugin::HeaderEval::_check_date_received
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::_check_for_forged_hotmail_received_headersMail::SpamAssassin::Plugin::HeaderEval::_check_for_forged_hotmail_received_headers
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::_check_recipientsMail::SpamAssassin::Plugin::HeaderEval::_check_recipients
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::_get_date_header_timeMail::SpamAssassin::Plugin::HeaderEval::_get_date_header_time
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::_get_received_header_timesMail::SpamAssassin::Plugin::HeaderEval::_get_received_header_times
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::check_equal_from_domainsMail::SpamAssassin::Plugin::HeaderEval::check_equal_from_domains
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::check_for_fake_aol_relay_in_rcvdMail::SpamAssassin::Plugin::HeaderEval::check_for_fake_aol_relay_in_rcvd
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::check_for_faraway_charset_in_headersMail::SpamAssassin::Plugin::HeaderEval::check_for_faraway_charset_in_headers
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::check_for_forged_eudoramail_received_headersMail::SpamAssassin::Plugin::HeaderEval::check_for_forged_eudoramail_received_headers
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::check_for_forged_gw05_received_headersMail::SpamAssassin::Plugin::HeaderEval::check_for_forged_gw05_received_headers
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::check_for_forged_hotmail_received_headersMail::SpamAssassin::Plugin::HeaderEval::check_for_forged_hotmail_received_headers
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::check_for_forged_juno_received_headersMail::SpamAssassin::Plugin::HeaderEval::check_for_forged_juno_received_headers
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::check_for_forged_yahoo_received_headersMail::SpamAssassin::Plugin::HeaderEval::check_for_forged_yahoo_received_headers
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::check_for_matching_env_and_hdr_fromMail::SpamAssassin::Plugin::HeaderEval::check_for_matching_env_and_hdr_from
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::check_for_missing_to_headerMail::SpamAssassin::Plugin::HeaderEval::check_for_missing_to_header
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::check_for_msn_groups_headersMail::SpamAssassin::Plugin::HeaderEval::check_for_msn_groups_headers
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::check_for_no_hotmail_received_headersMail::SpamAssassin::Plugin::HeaderEval::check_for_no_hotmail_received_headers
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::check_for_shifted_dateMail::SpamAssassin::Plugin::HeaderEval::check_for_shifted_date
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::check_for_to_in_subjectMail::SpamAssassin::Plugin::HeaderEval::check_for_to_in_subject
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::check_for_unique_subject_idMail::SpamAssassin::Plugin::HeaderEval::check_for_unique_subject_id
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::check_header_count_rangeMail::SpamAssassin::Plugin::HeaderEval::check_header_count_range
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::check_illegal_charsMail::SpamAssassin::Plugin::HeaderEval::check_illegal_chars
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::check_messageid_not_usableMail::SpamAssassin::Plugin::HeaderEval::check_messageid_not_usable
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::check_outlook_message_idMail::SpamAssassin::Plugin::HeaderEval::check_outlook_message_id
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::check_ratware_envelope_fromMail::SpamAssassin::Plugin::HeaderEval::check_ratware_envelope_from
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::check_ratware_name_idMail::SpamAssassin::Plugin::HeaderEval::check_ratware_name_id
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::check_unresolved_templateMail::SpamAssassin::Plugin::HeaderEval::check_unresolved_template
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::compile_now_startMail::SpamAssassin::Plugin::HeaderEval::compile_now_start
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::dbg2Mail::SpamAssassin::Plugin::HeaderEval::dbg2
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::gated_through_received_hdr_removerMail::SpamAssassin::Plugin::HeaderEval::gated_through_received_hdr_remover
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::received_within_monthsMail::SpamAssassin::Plugin::HeaderEval::received_within_months
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::similar_recipientsMail::SpamAssassin::Plugin::HeaderEval::similar_recipients
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::sorted_recipientsMail::SpamAssassin::Plugin::HeaderEval::sorted_recipients
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::subject_is_all_capsMail::SpamAssassin::Plugin::HeaderEval::subject_is_all_caps
0000s0sMail::SpamAssassin::Plugin::HeaderEval::::word_is_in_dictionaryMail::SpamAssassin::Plugin::HeaderEval::word_is_in_dictionary
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1# <@LICENSE>
2# Licensed to the Apache Software Foundation (ASF) under one or more
3# contributor license agreements. See the NOTICE file distributed with
4# this work for additional information regarding copyright ownership.
5# The ASF licenses this file to you under the Apache License, Version 2.0
6# (the "License"); you may not use this file except in compliance with
7# the License. You may obtain a copy of the License at:
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16# </@LICENSE>
17
18package Mail::SpamAssassin::Plugin::HeaderEval;
19
20271µs284µs
# spent 75µs (67+8) within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@20 which was called: # once (67µs+8µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 20
use strict;
# spent 75µs making 1 call to Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@20 # spent 8µs making 1 call to strict::import
21263µs295µs
# spent 58µs (21+37) within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@21 which was called: # once (21µs+37µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 21
use warnings;
# spent 58µs making 1 call to Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@21 # spent 37µs making 1 call to warnings::import
22291µs251µs
# spent 36µs (22+15) within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@22 which was called: # once (22µs+15µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 22
use bytes;
# spent 36µs making 1 call to Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@22 # spent 15µs making 1 call to bytes::import
23293µs2205µs
# spent 122µs (39+83) within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@23 which was called: # once (39µs+83µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 23
use re 'taint';
# spent 122µs making 1 call to Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@23 # spent 83µs making 1 call to re::import
24278µs2312µs
# spent 171µs (30+141) within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@24 which was called: # once (30µs+141µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 24
use Errno qw(EBADF);
# spent 171µs making 1 call to Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@24 # spent 141µs making 1 call to Exporter::import
25
26264µs119µs
# spent 19µs within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@26 which was called: # once (19µs+0s) by Mail::SpamAssassin::PluginHandler::load_plugin at line 26
use Mail::SpamAssassin::Plugin;
27269µs112µs
# spent 12µs within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@27 which was called: # once (12µs+0s) by Mail::SpamAssassin::PluginHandler::load_plugin at line 27
use Mail::SpamAssassin::Locales;
28282µs2459µs
# spent 246µs (32+214) within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@28 which was called: # once (32µs+214µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 28
use Mail::SpamAssassin::Logger;
# spent 246µs making 1 call to Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@28 # spent 214µs making 1 call to Exporter::import
29288µs22.12ms
# spent 1.07ms (24µs+1.05) within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@29 which was called: # once (24µs+1.05ms) by Mail::SpamAssassin::PluginHandler::load_plugin at line 29
use Mail::SpamAssassin::Constants qw(:sa :ip);
# spent 1.07ms making 1 call to Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@29 # spent 1.05ms making 1 call to Exporter::import
30
3126.76ms2211µs
# spent 122µs (32+90) within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@31 which was called: # once (32µs+90µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 31
use vars qw(@ISA);
# spent 122µs making 1 call to Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@31 # spent 90µs making 1 call to vars::import
32122µs@ISA = qw(Mail::SpamAssassin::Plugin);
33
34# constructor: register the eval rule
35
# spent 1.28ms (454µs+825µs) within Mail::SpamAssassin::Plugin::HeaderEval::new which was called: # once (454µs+825µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 1 of (eval 97)[Mail/SpamAssassin/PluginHandler.pm:129]
sub new {
3612µs my $class = shift;
3712µs my $mailsaobject = shift;
38
39 # some boilerplate...
4012µs $class = ref($class) || $class;
41118µs122µs my $self = $class->SUPER::new($mailsaobject);
# spent 22µs making 1 call to Mail::SpamAssassin::Plugin::new
4212µs bless ($self, $class);
43
44 # the important bit!
45110µs140µs $self->register_eval_rule("check_for_fake_aol_relay_in_rcvd");
# spent 40µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
4616µs129µs $self->register_eval_rule("check_for_faraway_charset_in_headers");
# spent 29µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
4716µs127µs $self->register_eval_rule("check_for_unique_subject_id");
# spent 27µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
48112µs127µs $self->register_eval_rule("check_illegal_chars");
# spent 27µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
49112µs131µs $self->register_eval_rule("check_for_forged_hotmail_received_headers");
# spent 31µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
5017µs119µs $self->register_eval_rule("check_for_no_hotmail_received_headers");
# spent 19µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
5116µs130µs $self->register_eval_rule("check_for_msn_groups_headers");
# spent 30µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
5216µs137µs $self->register_eval_rule("check_for_forged_eudoramail_received_headers");
# spent 37µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
5316µs131µs $self->register_eval_rule("check_for_forged_yahoo_received_headers");
# spent 31µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
54113µs127µs $self->register_eval_rule("check_for_forged_juno_received_headers");
# spent 27µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
5516µs138µs $self->register_eval_rule("check_for_matching_env_and_hdr_from");
# spent 38µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
56113µs131µs $self->register_eval_rule("sorted_recipients");
# spent 31µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
5716µs128µs $self->register_eval_rule("similar_recipients");
# spent 28µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
58113µs130µs $self->register_eval_rule("check_for_missing_to_header");
# spent 30µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
59112µs124µs $self->register_eval_rule("check_for_forged_gw05_received_headers");
# spent 24µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
60112µs128µs $self->register_eval_rule("check_for_shifted_date");
# spent 28µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
61113µs124µs $self->register_eval_rule("subject_is_all_caps");
# spent 24µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
6216µs130µs $self->register_eval_rule("check_for_to_in_subject");
# spent 30µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
63110µs128µs $self->register_eval_rule("check_outlook_message_id");
# spent 28µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
6417µs124µs $self->register_eval_rule("check_messageid_not_usable");
# spent 24µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
65112µs133µs $self->register_eval_rule("check_header_count_range");
# spent 33µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
66112µs127µs $self->register_eval_rule("check_unresolved_template");
# spent 27µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
6716µs129µs $self->register_eval_rule("check_ratware_name_id");
# spent 29µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
68112µs134µs $self->register_eval_rule("check_ratware_envelope_from");
# spent 34µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
6916µs130µs $self->register_eval_rule("gated_through_received_hdr_remover");
# spent 30µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
7016µs134µs $self->register_eval_rule("received_within_months");
# spent 34µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
7116µs133µs $self->register_eval_rule("check_equal_from_domains");
# spent 33µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
72
73115µs return $self;
74}
75
76# load triplets.txt into memory
77sub compile_now_start {
78 my ($self) = @_;
79
80 $self->word_is_in_dictionary("aba");
81}
82
83sub check_for_fake_aol_relay_in_rcvd {
84 my ($self, $pms) = @_;
85 local ($_);
86
87 $_ = $pms->get('Received');
88 s/\s/ /gs;
89
90 # this is the hostname format used by AOL for their relays. Spammers love
91 # forging it. Don't make it more specific to match aol.com only, though --
92 # there's another set of spammers who generate fake hostnames to go with
93 # it!
94 if (/ rly-[a-z][a-z]\d\d\./i) {
95 return 0 if /\/AOL-\d+\.\d+\.\d+\)/; # via AOL mail relay
96 return 0 if /ESMTP id (?:RELAY|MAILRELAY|MAILIN)/; # AOLish
97 return 1;
98 }
99
100# spam: Received: from unknown (HELO mta05bw.bigpond.com) (80.71.176.130) by
101# rly-xw01.mx.aol.com with QMQP; Sat, 15 Jun 2002 23:37:16 -0000
102
103# non: Received: from rly-xj02.mx.aol.com (rly-xj02.mail.aol.com [172.20.116.39]) by
104# omr-r05.mx.aol.com (v83.35) with ESMTP id RELAYIN7-0501132011; Wed, 01
105# May 2002 13:20:11 -0400
106
107# non: Received: from logs-tr.proxy.aol.com (logs-tr.proxy.aol.com [152.163.201.132])
108# by rly-ip01.mx.aol.com (8.8.8/8.8.8/AOL-5.0.0)
109# with ESMTP id NAA08955 for <sapient-alumni@yahoogroups.com>;
110# Thu, 4 Apr 2002 13:11:20 -0500 (EST)
111
112 return 0;
113}
114
115sub check_for_faraway_charset_in_headers {
116 my ($self, $pms) = @_;
117 my $hdr;
118
119 my @locales = Mail::SpamAssassin::Util::get_my_locales($self->{main}->{conf}->{ok_locales});
120
121 return 0 if grep { $_ eq "all" } @locales;
122
123 for my $h (qw(From Subject)) {
124 my @hdrs = $pms->get("$h:raw"); # ??? get() returns a scalar ???
125 if ($#hdrs >= 0) {
126 $hdr = join(" ", @hdrs);
127 } else {
128 $hdr = '';
129 }
130 while ($hdr =~ /=\?(.+?)\?.\?.*?\?=/g) {
131 Mail::SpamAssassin::Locales::is_charset_ok_for_locales($1, @locales)
132 or return 1;
133 }
134 }
135 0;
136}
137
138sub check_for_unique_subject_id {
139 my ($self, $pms) = @_;
140 local ($_);
141 $_ = lc $pms->get('Subject');
142 study; # study is a no-op since perl 5.16.0, eliminating related bugs
143
144 my $id = 0;
145 if (/[-_\.\s]{7,}([-a-z0-9]{4,})$/
146 || /\s{10,}(?:\S\s)?(\S+)$/
147 || /\s{3,}[-:\#\(\[]+([-a-z0-9]{4,})[\]\)]+$/
148 || /\s{3,}[:\#\(\[]*([a-f0-9]{4,})[\]\)]*$/
149 || /\s{3,}[-:\#]([a-z0-9]{5,})$/
150 || /[\s._]{3,}([^0\s._]\d{3,})$/
151 || /[\s._]{3,}\[(\S+)\]$/
152
153 # (7217vPhZ0-478TLdy5829qicU9-0@26) and similar
154 || /\(([-\w]{7,}\@\d+)\)$/
155
156 # Seven or more digits at the end of a subject is almost certainly a id
157 || /\b(\d{7,})\s*$/
158
159 # stuff at end of line after "!" or "?" is usually an id
160 || /[!\?]\s*(\d{4,}|\w+(-\w+)+)\s*$/
161
162 # 9095IPZK7-095wsvp8715rJgY8-286-28 and similar
163 # excluding 'Re:', etc and the first word
164 || /(?:\w{2,3}:\s)?\w+\s+(\w{7,}-\w{7,}(-\w+)*)\s*$/
165
166 # #30D7 and similar
167 || /\s#\s*([a-f0-9]{4,})\s*$/
168 )
169 {
170 $id = $1;
171 # exempt online purchases
172 if ($id =~ /\d{5,}/
173 && /(?:item|invoice|order|number|confirmation).{1,6}\Q$id\E\s*$/)
174 {
175 $id = 0;
176 }
177
178 # for the "foo-bar-baz" case, otherwise it won't
179 # be found in the dict:
180 $id =~ s/-//;
181 }
182
183 return ($id && !$self->word_is_in_dictionary($id));
184}
185
186# word_is_in_dictionary()
187#
188# See if the word looks like an English word, by checking if each triplet
189# of letters it contains is one that can be found in the English language.
190# Does not include triplets only found in proper names, or in the Latin
191# and Greek terms that might be found in a larger dictionary
192
19312µsmy %triplets;
19412µsmy $triplets_loaded = 0;
195
196sub word_is_in_dictionary {
197 my ($self, $word) = @_;
198 local ($_);
199 local $/ = "\n"; # Ensure $/ is set appropriately
200
201 # $word =~ tr/A-Z/a-z/; # already done by this stage
202 $word =~ s/^\s+//;
203 $word =~ s/\s+$//;
204
205 # If it contains a digit, dash, etc, it's not a valid word.
206 # Don't reject words like "can't" and "I'll"
207 return 0 if ($word =~ /[^a-z\']/);
208
209 # handle a few common "blah blah blah (comment)" styles
210 return 1 if ($word eq "ot"); # off-topic
211 return 1 if ($word =~ /(?:linux|nix|bsd)/); # not in most dicts
212 return 1 if ($word =~ /(?:whew|phew|attn|tha?nx)/); # not in most dicts
213
214 my $word_len = length($word);
215
216 # Unique IDs probably aren't going to be only one or two letters long
217 return 1 if ($word_len < 3);
218
219 if (!$triplets_loaded) {
220 # take a copy to avoid modifying the real one
221 my @default_triplets_path = @Mail::SpamAssassin::default_rules_path;
222 s{$}{/triplets.txt} for @default_triplets_path;
223 my $filename = $self->{main}->first_existing_path (@default_triplets_path);
224
225 if (!defined $filename) {
226 dbg("eval: failed to locate the triplets.txt file");
227 return 1;
228 }
229
230 local *TRIPLETS;
231 if (!open (TRIPLETS, "<$filename")) {
232 dbg("eval: failed to open '$filename', cannot check dictionary: $!");
233 return 1;
234 }
235 for($!=0; <TRIPLETS>; $!=0) {
236 chomp;
237 $triplets{$_} = 1;
238 }
239 defined $_ || $!==0 or
240 $!==EBADF ? dbg("eval: error reading from $filename: $!")
241 : die "error reading from $filename: $!";
242 close(TRIPLETS) or die "error closing $filename: $!";
243
244 $triplets_loaded = 1;
245 } # if (!$triplets_loaded)
246
247
248 my $i;
249
250 for ($i = 0; $i < ($word_len - 2); $i++) {
251 my $triplet = substr($word, $i, 3);
252 if (!$triplets{$triplet}) {
253 dbg("eval: unique ID: letter triplet '$triplet' from word '$word' not valid");
254 return 0;
255 }
256 } # for ($i = 0; $i < ($word_len - 2); $i++)
257
258 # All letter triplets in word were found to be valid
259 return 1;
260}
261
262# look for 8-bit and other illegal characters that should be MIME
263# encoded, these might want to exempt languages that do not use
264# Latin-based alphabets, but only if the user wants it that way
265sub check_illegal_chars {
266 my ($self, $pms, $header, $ratio, $count) = @_;
267
268 $header .= ":raw" unless ($header eq "ALL" || $header =~ /:raw$/);
269 my $str = $pms->get($header);
270 return 0 if !defined $str || $str eq '';
271
272 # avoid overlap between tests
273 if ($header eq "ALL") {
274 # fix continuation lines, then remove Subject and From
275 $str =~ s/\n[ \t]+/ /gs;
276 $str =~ s/^(?:Subject|From):.*$//gmi;
277 }
278
279 # count illegal substrings (RFC 2045)
280 # (non-ASCII + C0 controls except TAB, NL, CR)
281 my $illegal = $str =~ tr/\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff//;
282
283 # minor exemptions for Subject
284 if ($illegal > 0 && lc $header eq "subject:raw") {
285 # only exempt a single cent sign, pound sign, or registered sign
286 my $exempt = $str =~ tr/\xa2\xa3\xae//;
287 $illegal-- if $exempt == 1;
288 }
289
290 return 0 if $str eq '';
291 return (($illegal / length($str)) >= $ratio && $illegal >= $count);
292}
293
294# ezmlm has a very bad habit of removing Received: headers! bad ezmlm.
295#
296sub gated_through_received_hdr_remover {
297 my ($self, $pms) = @_;
298
299 my $txt = $pms->get("Mailing-List",undef);
300 if (defined $txt && $txt =~ /^contact \S+\@\S+\; run by ezmlm$/) {
301 my $dlto = $pms->get("Delivered-To");
302 my $rcvd = $pms->get("Received");
303
304 # ensure we have other indicative headers too
305 if ($dlto =~ /^mailing list \S+\@\S+/ &&
306 $rcvd =~ /qmail \d+ invoked (?:from network|by .{3,20})\); \d+ ... \d+/)
307 {
308 return 1;
309 }
310 }
311
312 my $rcvd = $pms->get("Received",undef);
313 if (!defined $rcvd) {
314 # we have no Received headers! These tests cannot run in that case
315 return 1;
316 }
317
318 # MSN groups removes Received lines. thanks MSN
319 if ($rcvd =~ /from groups\.msn\.com \(\S+\.msn\.com /) {
320 return 1;
321 }
322
323 return 0;
324}
325
326# FORGED_HOTMAIL_RCVD
327sub _check_for_forged_hotmail_received_headers {
328 my ($self, $pms) = @_;
329
330 if (defined $pms->{hotmail_addr_but_no_hotmail_received}) { return; }
331
332 $pms->{hotmail_addr_with_forged_hotmail_received} = 0;
333 $pms->{hotmail_addr_but_no_hotmail_received} = 0;
334
335 my $rcvd = $pms->get('Received');
336 $rcvd =~ s/\s+/ /gs; # just spaces, simplify the regexp
337
338 return if ($rcvd =~
339 /from mail pickup service by hotmail\.com with Microsoft SMTPSVC;/);
340
341 # Microsoft passes Hotmail mail directly to MSN Group servers.
342 return if $self->check_for_msn_groups_headers($pms);
343
344 my $ip = $pms->get('X-Originating-Ip',undef);
345 my $IP_ADDRESS = IP_ADDRESS;
346
347 if (defined $ip && $ip =~ /$IP_ADDRESS/) { $ip = 1; } else { $ip = 0; }
348
349 # Hotmail formats its received headers like this:
350 # Received: from hotmail.com (f135.law8.hotmail.com [216.33.241.135])
351 # spammers do not ;)
352
353 if ($self->gated_through_received_hdr_remover($pms)) { return; }
354
355 if ($rcvd =~ /from (?:\S*\.)?hotmail.com \(\S+\.hotmail(?:\.msn)?\.com[ \)]/ && $ip)
356 { return; }
357 if ($rcvd =~ /from \S*\.hotmail.com \(\[$IP_ADDRESS\][ \):]/ && $ip)
358 { return; }
359 if ($rcvd =~ /from \S+ by \S+\.hotmail(?:\.msn)?\.com with HTTP\;/ && $ip)
360 { return; }
361 if ($rcvd =~ /from \[66\.218.\S+\] by \S+\.yahoo\.com/ && $ip)
362 { return; }
363
364 if ($rcvd =~ /(?:from |HELO |helo=)\S*hotmail\.com\b/) {
365 # HELO'd as hotmail.com, despite not being hotmail
366 $pms->{hotmail_addr_with_forged_hotmail_received} = 1;
367 } else {
368 # check to see if From claimed to be @hotmail.com
369 my $from = $pms->get('From:addr');
370 if ($from !~ /\bhotmail\.com$/i) { return; }
371 $pms->{hotmail_addr_but_no_hotmail_received} = 1;
372 }
373}
374
375# FORGED_HOTMAIL_RCVD
376sub check_for_forged_hotmail_received_headers {
377 my ($self, $pms) = @_;
378 $self->_check_for_forged_hotmail_received_headers($pms);
379 return $pms->{hotmail_addr_with_forged_hotmail_received};
380}
381
382# SEMIFORGED_HOTMAIL_RCVD
383sub check_for_no_hotmail_received_headers {
384 my ($self, $pms) = @_;
385 $self->_check_for_forged_hotmail_received_headers($pms);
386 return $pms->{hotmail_addr_but_no_hotmail_received};
387}
388
389# MSN_GROUPS
390sub check_for_msn_groups_headers {
391 my ($self, $pms) = @_;
392
393 my $to = $pms->get('To');
394 return 0 unless $to =~ /<(\S+)\@groups\.msn\.com>/i;
395 my $listname = $1;
396
397 # from Theo Van Dinter, see bug 591
398 # Updated by DOS, based on messages from Bob Menschel, bug 4301
399
400 return 0 unless $pms->get('Received') =~
401 /from mail pickup service by ((?:p\d\d\.)groups\.msn\.com)\b/;
402 my $server = $1;
403
404 if ($listname =~ /^notifications$/) {
405 return 0 unless $pms->get('Message-Id') =~ /^<\S+\@$server>/;
406 } else {
407 return 0 unless $pms->get('Message-Id') =~ /^<$listname-\S+\@groups\.msn\.com>/;
408 return 0 unless $pms->get('EnvelopeFrom:addr') =~ /$listname-bounce\@groups\.msn\.com/;
409 }
410 return 1;
411
412# MSN Groups
413# Return-path: <ListName-bounce@groups.msn.com>
414# Received: from groups.msn.com (tk2dcpuba02.msn.com [65.54.195.210]) by
415# dogma.slashnull.org (8.11.6/8.11.6) with ESMTP id g72K35v10457 for
416# <zzzzzzzzzzzz@jmason.org>; Fri, 2 Aug 2002 21:03:05 +0100
417# Received: from mail pickup service by groups.msn.com with Microsoft
418# SMTPSVC; Fri, 2 Aug 2002 13:01:30 -0700
419# Message-id: <ListName-1392@groups.msn.com>
420# X-loop: notifications@groups.msn.com
421# Reply-to: "List Full Name" <ListName@groups.msn.com>
422# To: "List Full Name" <ListName@groups.msn.com>
423
424# Return-path: <ListName-bounce@groups.msn.com>
425# Received: from p04.groups.msn.com ([65.54.195.216]) etc...
426# Received: from mail pickup service by p04.groups.msn.com with Microsoft SMTPSVC;
427# Thu, 5 May 2005 20:30:37 -0700
428# X-Originating-Ip: 207.68.170.30
429# From: =?iso-8859-1?B?IqSj4/D9pEbzeN9s9vLw6qQiIA==?=<zzzzzzzz@hotmail.com>
430# To: "Managers of List Name" <notifications@groups.msn.com>
431# Subject: =?iso-8859-1?Q?APPROVAL_NEEDED:_=A4=A3=E3=F0=FD=A4F=F3x=DFl?=
432# =?iso-8859-1?Q?=F6=F2=F0=EA=A4_applied_to_join_List_Name=2C?=
433# =?iso-8859-1?Q?_an_MSN_Group?=
434# Date: Thu, 5 May 2005 20:30:37 -0700
435# MIME-Version: 1.0
436# Content-Type: multipart/alternative;
437# boundary="----=_NextPart_000_333944_01C551B1.4BBA02B0"
438# X-MimeOLE: Produced By Microsoft MimeOLE V5.50.4927.1200
439# Message-ID: <TK2DCPUBA042cv0aGlt00020aa3@p04.groups.msn.com>
440
441# Return-path: <ListName-bounce@groups.msn.com>
442# Received: from [65.54.208.83] (helo=p05.groups.msn.com) etc...
443# Received: from mail pickup service by p05.groups.msn.com with Microsoft SMTPSVC;
444# Fri, 6 May 2005 14:59:25 -0700
445# X-Originating-Ip: 207.68.170.30
446# Message-Id: <ListName-101@groups.msn.com>
447# Reply-To: "List Name" <ListName@groups.msn.com>
448# From: "whoever" <zzzzzzzzzz@hotmail.com>
449# To: "List Name" <ListName@groups.msn.com>
450# Subject: whatever
451# Date: Fri, 6 May 2005 14:59:25 -0700
452
453}
454
455###########################################################################
456
457sub check_for_forged_eudoramail_received_headers {
458 my ($self, $pms) = @_;
459
460 my $from = $pms->get('From:addr');
461 if ($from !~ /\beudoramail\.com$/i) { return 0; }
462
463 my $rcvd = $pms->get('Received');
464 $rcvd =~ s/\s+/ /gs; # just spaces, simplify the regexp
465
466 my $ip = $pms->get('X-Sender-Ip',undef);
467 my $IP_ADDRESS = IP_ADDRESS;
468 if (defined $ip && $ip =~ /$IP_ADDRESS/) { $ip = 1; } else { $ip = 0; }
469
470 # Eudoramail formats its received headers like this:
471 # Received: from Unknown/Local ([?.?.?.?]) by shared1-mail.whowhere.com;
472 # Thu Nov 29 13:44:25 2001
473 # Message-Id: <JGDHDEHPPJECDAAA@shared1-mail.whowhere.com>
474 # Organization: QUALCOMM Eudora Web-Mail (http://www.eudoramail.com:80)
475 # X-Sender-Ip: 192.175.21.146
476 # X-Mailer: MailCity Service
477
478 if ($self->gated_through_received_hdr_remover($pms)) { return 0; }
479
480 if ($rcvd =~ /by \S*whowhere.com\;/ && $ip) { return 0; }
481
482 return 1;
483}
484
485###########################################################################
486
487sub check_for_forged_yahoo_received_headers {
488 my ($self, $pms) = @_;
489
490 my $from = $pms->get('From:addr');
491 if ($from !~ /\byahoo\.com$/i) { return 0; }
492
493 my $rcvd = $pms->get('Received');
494
495 if ($pms->get("Resent-From") ne '' && $pms->get("Resent-To") ne '') {
496 my $xrcvd = $pms->get("X-Received");
497 $rcvd = $xrcvd if $xrcvd ne '';
498 }
499 $rcvd =~ s/\s+/ /gs; # just spaces, simplify the regexp
500
501 # not sure about this
502 #if ($rcvd !~ /from \S*yahoo\.com/) { return 0; }
503
504 if ($self->gated_through_received_hdr_remover($pms)) { return 0; }
505
506 # bug 3740: ignore bounces from Yahoo!. only honoured if the
507 # correct rDNS shows up in the trusted relay list, or first untrusted relay
508 #
509 # bug 4528: [ ip=68.142.202.54 rdns=mta122.mail.mud.yahoo.com
510 # helo=mta122.mail.mud.yahoo.com by=eclectic.kluge.net ident=
511 # envfrom= intl=0 id=49F2EAF13B auth= ]
512 #
513 if ($pms->{relays_trusted_str} =~ / rdns=\S+\.yahoo\.com /
514 || $pms->{relays_untrusted_str} =~ /^[^\]]+ rdns=\S+\.yahoo\.com /)
515 { return 0; }
516
517 if ($rcvd =~ /by web\S+\.mail\S*\.yahoo\.com via HTTP/) { return 0; }
518 if ($rcvd =~ /by smtp\S+\.yahoo\.com with SMTP/) { return 0; }
519 my $IP_ADDRESS = IP_ADDRESS;
520 if ($rcvd =~
521 /from \[$IP_ADDRESS\] by \S+\.(?:groups|scd|dcn)\.yahoo\.com with NNFMP/) {
522 return 0;
523 }
524
525 # used in "forward this news item to a friend" links. There's no better
526 # received hdrs to match on, unfortunately. I'm not sure if the next test is
527 # still useful, as a result.
528 #
529 # search for msgid <20020929140301.451A92940A9@xent.com>, subject "Yahoo!
530 # News Story - Top Stories", date Sep 29 2002 on
531 # <http://xent.com/pipermail/fork/> for an example.
532 #
533 if ($rcvd =~ /\bmailer\d+\.bulk\.scd\.yahoo\.com\b/
534 && $from =~ /\@reply\.yahoo\.com$/i) { return 0; }
535
536 if ($rcvd =~ /by \w+\.\w+\.yahoo\.com \(\d+\.\d+\.\d+\/\d+\.\d+\.\d+\)(?: with ESMTP)? id \w+/) {
537 # possibly sent from "mail this story to a friend"
538 return 0;
539 }
540
541 return 1;
542}
543
544sub check_for_forged_juno_received_headers {
545 my ($self, $pms) = @_;
546
547 my $from = $pms->get('From:addr');
548 if ($from !~ /\bjuno\.com$/i) { return 0; }
549
550 if ($self->gated_through_received_hdr_remover($pms)) { return 0; }
551
552 my $xorig = $pms->get('X-Originating-IP');
553 my $xmailer = $pms->get('X-Mailer');
554 my $rcvd = $pms->get('Received');
555 my $IP_ADDRESS = IP_ADDRESS;
556
557 if ($xorig ne '') {
558 # New style Juno has no X-Originating-IP header, and other changes
559 if($rcvd !~ /from.*\b(?:juno|untd)\.com.*[\[\(]$IP_ADDRESS[\]\)].*by/
560 && $rcvd !~ / cookie\.(?:juno|untd)\.com /) { return 1; }
561 if($xmailer !~ /Juno /) { return 1; }
562 } else {
563 if($rcvd =~ /from.*\bmail\.com.*\[$IP_ADDRESS\].*by/) {
564 if($xmailer !~ /\bmail\.com/) { return 1; }
565 } elsif($rcvd =~ /from (webmail\S+\.untd\.com) \(\1 \[$IP_ADDRESS\]\) by/) {
566 if($xmailer !~ /^Webmail Version \d/) { return 1; }
567 } else {
568 return 1;
569 }
570 if($xorig !~ /$IP_ADDRESS/) { return 1; }
571 }
572
573 return 0;
574}
575
576sub check_for_matching_env_and_hdr_from {
577 my ($self, $pms) =@_;
578 # two blank headers match so don't bother checking
579 return (lc $pms->get('EnvelopeFrom:addr') eq lc $pms->get('From:addr'));
580}
581
582sub sorted_recipients {
583 my ($self, $pms) = @_;
584
585 if (!exists $pms->{tocc_sorted}) {
586 $self->_check_recipients($pms);
587 }
588 return $pms->{tocc_sorted};
589}
590
591sub similar_recipients {
592 my ($self, $pms, $min, $max) = @_;
593
594 if (!exists $pms->{tocc_similar}) {
595 $self->_check_recipients($pms);
596 }
597 return (($min eq 'undef' || $pms->{tocc_similar} >= $min) &&
598 ($max eq 'undef' || $pms->{tocc_similar} < $max));
599}
600
601# best experimentally derived values
602275µs2460µs
# spent 245µs (31+214) within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@602 which was called: # once (31µs+214µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 602
use constant TOCC_SORTED_COUNT => 7;
# spent 245µs making 1 call to Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@602 # spent 214µs making 1 call to constant::import
603273µs2396µs
# spent 212µs (29+184) within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@603 which was called: # once (29µs+184µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 603
use constant TOCC_SIMILAR_COUNT => 5;
# spent 212µs making 1 call to Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@603 # spent 184µs making 1 call to constant::import
60426.43ms2420µs
# spent 225µs (31+195) within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@604 which was called: # once (31µs+195µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 604
use constant TOCC_SIMILAR_LENGTH => 2;
# spent 225µs making 1 call to Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@604 # spent 195µs making 1 call to constant::import
605
606sub _check_recipients {
607 my ($self, $pms) = @_;
608
609 my @inputs;
610
611 # ToCc: pseudo-header works best, but sometimes Bcc: is better
612 for ('ToCc', 'Bcc') {
613 my $to = $pms->get($_); # get recipients
614 $to =~ s/\(.*?\)//g; # strip out the (comments)
615 push(@inputs, ($to =~ m/([\w.=-]+\@\w+(?:[\w.-]+\.)+\w+)/g));
616 last if scalar(@inputs) >= TOCC_SIMILAR_COUNT;
617 }
618
619 # remove duplicate addresses only when they appear next to each other
620 my @address;
621 my $previous = '';
622 while (my $current = shift @inputs) {
623 push(@address, ($previous = $current)) if lc($current) ne lc($previous);
624 last if @address == 256;
625 }
626
627 # ideas that had both poor S/O ratios and poor hit rates:
628 # - testing for reverse sorted recipient lists
629 # - testing To: and Cc: headers separately
630 $pms->{tocc_sorted} = (scalar(@address) >= TOCC_SORTED_COUNT &&
631 join(',', @address) eq (join(',', sort @address)));
632
633 # a good S/O ratio and hit rate is achieved by comparing 2-byte
634 # substrings and requiring 5 or more addresses
635 $pms->{tocc_similar} = 0;
636 if (scalar (@address) >= TOCC_SIMILAR_COUNT) {
637 my @user = map { substr($_,0,TOCC_SIMILAR_LENGTH) } @address;
638 my @fqhn = map { m/\@(.*)/ } @address;
639 my @host = map { substr($_,0,TOCC_SIMILAR_LENGTH) } @fqhn;
640 my $hits = 0;
641 my $combinations = 0;
642 for (my $i = 0; $i <= $#address; $i++) {
643 for (my $j = $i+1; $j <= $#address; $j++) {
644 $hits++ if $user[$i] eq $user[$j];
645 $hits++ if $host[$i] eq $host[$j] && $fqhn[$i] ne $fqhn[$j];
646 $combinations++;
647 }
648 }
649 $pms->{tocc_similar} = $hits / $combinations;
650 }
651}
652
653sub check_for_missing_to_header {
654 my ($self, $pms) = @_;
655
656 my $hdr = $pms->get('To');
657 $hdr = $pms->get('Apparently-To') if $hdr eq '';
658 return 1 if $hdr eq '';
659
660 return 0;
661}
662
663sub check_for_forged_gw05_received_headers {
664 my ($self, $pms) = @_;
665 local ($_);
666
667 my $rcv = $pms->get('Received');
668
669 # e.g.
670 # Received: from mail3.icytundra.com by gw05 with ESMTP; Thu, 21 Jun 2001 02:28:32 -0400
671 my ($h1, $h2) = ($rcv =~
672 m/\nfrom\s(\S+)\sby\s(\S+)\swith\sESMTP\;\s+\S\S\S,\s+\d+\s+\S\S\S\s+
673 \d{4}\s+\d\d:\d\d:\d\d\s+[-+]*\d{4}\n$/xs);
674
675 if (defined ($h1) && defined ($h2) && $h2 !~ /\./) {
676 return 1;
677 }
678
679 0;
680}
681
682###########################################################################
683
684sub check_for_shifted_date {
685 my ($self, $pms, $min, $max) = @_;
686
687 if (!exists $pms->{date_diff}) {
688 $self->_check_date_diff($pms);
689 }
690 return (($min eq 'undef' || $pms->{date_diff} >= (3600 * $min)) &&
691 ($max eq 'undef' || $pms->{date_diff} < (3600 * $max)));
692}
693
694# filters out some false positives in old corpus mail - Allen
695sub received_within_months {
696 my ($self,$pms,$min,$max) = @_;
697
698 if (!exists($pms->{date_received})) {
699 $self->_check_date_received($pms);
700 }
701 my $diff = time() - $pms->{date_received};
702
703 # 365.2425 * 24 * 60 * 60 = 31556952 = seconds in year (including leap)
704
705 if (((! defined($min)) || ($min eq 'undef') ||
706 ($diff >= (31556952 * ($min/12)))) &&
707 ((! defined($max)) || ($max eq 'undef') ||
708 ($diff < (31556952 * ($max/12))))) {
709 return 1;
710 } else {
711 return 0;
712 }
713}
714
715sub _get_date_header_time {
716 my ($self, $pms) = @_;
717
718 my $time;
719 # a Resent-Date: header takes precedence over any Date: header
720 DATE: for my $header ('Resent-Date', 'Date') {
721 my @dates = $pms->{msg}->get_header($header);
722 for my $date (@dates) {
723 if (defined($date) && length($date)) {
724 chomp($date);
725 $time = Mail::SpamAssassin::Util::parse_rfc822_date($date);
726 }
727 last DATE if defined($time);
728 }
729 }
730 if (defined($time)) {
731 $pms->{date_header_time} = $time;
732 }
733 else {
734 $pms->{date_header_time} = undef;
735 }
736}
737
738sub _get_received_header_times {
739 my ($self, $pms) = @_;
740
741 $pms->{received_header_times} = [ () ];
742 $pms->{received_fetchmail_time} = undef;
743
744 my (@received);
745 my $received = $pms->get('Received');
746 if ($received ne '') {
747 @received = grep {$_ =~ m/\S/} (split(/\n/,$received));
748 }
749 # if we have no Received: headers, chances are we're archived mail
750 # with a limited set of headers
751 if (!scalar(@received)) {
752 return;
753 }
754
755 # handle fetchmail headers
756 my (@local);
757 if (($received[0] =~
758 m/\bfrom (?:localhost\s|(?:\S+ ){1,2}\S*\b127\.0\.0\.1\b)/) ||
759 ($received[0] =~ m/qmail \d+ invoked by uid \d+/)) {
760 push @local, (shift @received);
761 }
762 if (scalar(@received) &&
763 ($received[0] =~ m/\bby localhost with \w+ \(fetchmail-[\d.]+/)) {
764 push @local, (shift @received);
765 }
766 elsif (scalar(@local)) {
767 unshift @received, (shift @local);
768 }
769
770 if (scalar(@local)) {
771 my (@fetchmail_times);
772 foreach my $rcvd (@local) {
773 if ($rcvd =~ m/(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+)/) {
774 my $date = $1;
775 dbg2("eval: trying Received fetchmail header date for real time: $date");
776 my $time = Mail::SpamAssassin::Util::parse_rfc822_date($date);
777 if (defined($time) && (time() >= $time)) {
778 dbg2("eval: time_t from date=$time, rcvd=$date");
779 push @fetchmail_times, $time;
780 }
781 }
782 }
783 if (scalar(@fetchmail_times) > 1) {
784 $pms->{received_fetchmail_time} =
785 (sort {$b <=> $a} (@fetchmail_times))[0];
786 } elsif (scalar(@fetchmail_times)) {
787 $pms->{received_fetchmail_time} = $fetchmail_times[0];
788 }
789 }
790
791 my (@header_times);
792 foreach my $rcvd (@received) {
793 if ($rcvd =~ m/(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+)/) {
794 my $date = $1;
795 dbg2("eval: trying Received header date for real time: $date");
796 my $time = Mail::SpamAssassin::Util::parse_rfc822_date($date);
797 if (defined($time)) {
798 dbg2("eval: time_t from date=$time, rcvd=$date");
799 push @header_times, $time;
800 }
801 }
802 }
803
804 if (scalar(@header_times)) {
805 $pms->{received_header_times} = [ @header_times ];
806 } else {
807 dbg("eval: no dates found in Received headers");
808 }
809}
810
811sub _check_date_received {
812 my ($self, $pms) = @_;
813
814 my (@dates_poss);
815
816 $pms->{date_received} = 0;
817
818 if (!exists($pms->{date_header_time})) {
819 $self->_get_date_header_time($pms);
820 }
821
822 if (defined($pms->{date_header_time})) {
823 push @dates_poss, $pms->{date_header_time};
824 }
825
826 if (!exists($pms->{received_header_times})) {
827 $self->_get_received_header_times($pms);
828 }
829 my (@received_header_times) = @{ $pms->{received_header_times} };
830 if (scalar(@received_header_times)) {
831 push @dates_poss, $received_header_times[0];
832 }
833 if (defined($pms->{received_fetchmail_time})) {
834 push @dates_poss, $pms->{received_fetchmail_time};
835 }
836
837 if (defined($pms->{date_header_time}) && scalar(@received_header_times)) {
838 if (!exists($pms->{date_diff})) {
839 $self->_check_date_diff($pms);
840 }
841 push @dates_poss, $pms->{date_header_time} - $pms->{date_diff};
842 }
843
844 if (scalar(@dates_poss)) { # use median
845 $pms->{date_received} = (sort {$b <=> $a}
846 (@dates_poss))[int($#dates_poss/2)];
847 dbg("eval: date chosen from message: " .
848 scalar(localtime($pms->{date_received})));
849 } else {
850 dbg("eval: no dates found in message");
851 }
852}
853
854sub _check_date_diff {
855 my ($self, $pms) = @_;
856
857 $pms->{date_diff} = 0;
858
859 if (!exists($pms->{date_header_time})) {
860 $self->_get_date_header_time($pms);
861 }
862
863 if (!defined($pms->{date_header_time})) {
864 return; # already have tests for this
865 }
866
867 if (!exists($pms->{received_header_times})) {
868 $self->_get_received_header_times($pms);
869 }
870 my (@header_times) = @{ $pms->{received_header_times} };
871
872 if (!scalar(@header_times)) {
873 return; # archived mail?
874 }
875
876 my (@diffs) = map {$pms->{date_header_time} - $_} (@header_times);
877
878 # if the last Received: header has no difference, then we choose to
879 # exclude it
880 if ($#diffs > 0 && $diffs[$#diffs] == 0) {
881 pop(@diffs);
882 }
883
884 # use the date with the smallest absolute difference
885 # (experimentally, this results in the fewest false positives)
886 @diffs = sort { abs($a) <=> abs($b) } @diffs;
887 $pms->{date_diff} = $diffs[0];
888}
889
890
891sub subject_is_all_caps {
892 my ($self, $pms) = @_;
893 my $subject = $pms->get('Subject');
894
895 $subject =~ s/^\s+//;
896 $subject =~ s/\s+$//;
897 $subject =~ s/^(?:(?:Re|Fwd|Fw|Aw|Antwort|Sv):\s*)+//i; # Bug 6805
898 return 0 if $subject !~ /\s/; # don't match one word subjects
899 return 0 if (length $subject < 10); # don't match short subjects
900 $subject =~ s/[^a-zA-Z]//g; # only look at letters
901
902 # now, check to see if the subject is encoded using a non-ASCII charset.
903 # If so, punt on this test to avoid FPs. We just list the known charsets
904 # this test will FP on, here.
905 my $subjraw = $pms->get('Subject:raw');
906 my $CLTFAC = Mail::SpamAssassin::Constants::CHARSETS_LIKELY_TO_FP_AS_CAPS;
907 if ($subjraw =~ /=\?${CLTFAC}\?/i) {
908 return 0;
909 }
910
911 return length($subject) && ($subject eq uc($subject));
912}
913
914sub check_for_to_in_subject {
915 my ($self, $pms, $test) = @_;
916
917 my $full_to = $pms->get('To:addr');
918 return 0 unless $full_to ne '';
919
920 my $subject = $pms->get('Subject');
921
922 if ($test eq "address") {
923 return $subject =~ /\b\Q$full_to\E\b/i; # "user@domain.com"
924 }
925 elsif ($test eq "user") {
926 my $to = $full_to;
927 $to =~ s/\@.*//;
928 my $subj = $subject;
929 $subj =~ s/^\s+//;
930 $subj =~ s/\s+$//;
931
932 return $subject =~ /^(?:
933 (?:re|fw):\s*(?:\w+\s+)?\Q$to\E$
934 |(?-i:\Q$to\E)\s*[,:;!?-](?:$|\s)
935 |\Q$to\E$
936 |,\s*\Q$to\E[,:;!?-]$
937 )/ix;
938 }
939 return 0;
940}
941
942sub check_outlook_message_id {
943 my ($self, $pms) = @_;
944 local ($_);
945
946 my $id = $pms->get('MESSAGEID');
947 return 0 if $id !~ /^<[0-9a-f]{4}([0-9a-f]{8})\$[0-9a-f]{8}\$[0-9a-f]{8}\@/;
948
949 my $timetoken = hex($1);
950 my $x = 0.0023283064365387;
951 my $y = 27111902.8329849;
952
953 my $fudge = 250;
954
955 $_ = $pms->get('Date');
956 $_ = Mail::SpamAssassin::Util::parse_rfc822_date($_) || 0;
957 my $expected = int (($_ * $x) + $y);
958 my $diff = $timetoken - $expected;
959 return 0 if (abs($diff) < $fudge);
960
961 $_ = $pms->get('Received');
962 /(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+).*?$/;
963 $_ = Mail::SpamAssassin::Util::parse_rfc822_date($_) || 0;
964 $expected = int(($_ * $x) + $y);
965 $diff = $timetoken - $expected;
966
967 return (abs($diff) >= $fudge);
968}
969
970sub check_messageid_not_usable {
971 my ($self, $pms) = @_;
972 local ($_);
973
974 # Lyris eats message-ids. also some ezmlm, I think :(
975 $_ = $pms->get("List-Unsubscribe");
976 return 1 if (/<mailto:(?:leave-\S+|\S+-unsubscribe)\@\S+>$/i);
977
978 # ezmlm again
979 if($self->gated_through_received_hdr_remover($pms)) { return 1; }
980
981 # Allen notes this as 'Wacky sendmail version?'
982 $_ = $pms->get("Received");
983 return 1 if /\/CWT\/DCE\)/;
984
985 # Apr 2 2003 jm: iPlanet rewrites lots of stuff, including Message-IDs
986 return 1 if /iPlanet Messaging Server/;
987
988 return 0;
989}
990
991# Return true if the count of $hdr headers are within the given range
992sub check_header_count_range {
993 my ($self, $pms, $hdr, $min, $max) = @_;
994 my %uniq;
995 my @hdrs = grep(!$uniq{$_}++, $pms->{msg}->get_header ($hdr));
996 return (scalar @hdrs >= $min && scalar @hdrs <= $max);
997}
998
999sub check_unresolved_template {
1000 my ($self, $pms) = @_;
1001
1002 my $all = $pms->get('ALL'); # cached access
1003 $all =~ s/\n[ \t]+/ /gs; # fix continuation lines
1004
1005 for my $header (split(/\n/, $all)) {
1006 # slightly faster to test in this order
1007 if ($header =~ /%[A-Z][A-Z_-]/ &&
1008 $header !~ /^(?:X-VMS-To|X-UIDL|X-Face|To|Cc|From|Subject|References|In-Reply-To|(?:X-|Resent-|X-Original-)?Message-Id):/i)
1009 {
1010 return 1;
1011 }
1012 }
1013 return 0;
1014}
1015
1016sub check_ratware_name_id {
1017 my ($self, $pms) = @_;
1018
1019 my $mid = $pms->get('MESSAGEID');
1020 my $from = $pms->get('From');
1021 if ($mid =~ m/<[A-Z]{28}\.([^>]+?)>/) {
1022 if ($from =~ m/\"[^\"]+\"\s*<\Q$1\E>/) {
1023 return 1;
1024 }
1025 }
1026 return 0;
1027}
1028
1029sub check_ratware_envelope_from {
1030 my ($self, $pms) = @_;
1031
1032 my $to = $pms->get('To:addr');
1033 my $from = $pms->get('EnvelopeFrom:addr');
1034
1035 return 0 if $from eq '' || $to eq '';
1036 return 0 if $from =~ /^SRS\d=/;
1037
1038 if ($to =~ /^([^@]+)@(.+)$/) {
1039 my($user,$dom) = ($1,$2);
1040 $dom = $self->{main}->{registryboundaries}->trim_domain($dom);
1041 return unless
1042 ($self->{main}->{registryboundaries}->is_domain_valid($dom));
1043
1044 return 1 if ($from =~ /\b\Q$dom\E.\Q$user\E@/i);
1045 }
1046
1047 return 0;
1048}
1049
1050# ADDED FROM BUG 6487
1051sub check_equal_from_domains {
1052 my ($self, $pms) = @_;
1053
1054 my $from = $pms->get('From:addr');
1055 my $envfrom = $pms->get('EnvelopeFrom:addr');
1056
1057 local $1;
1058 my $fromdomain = '';
1059 #Revised regexp from 6487 comment 3
1060 $fromdomain = $1 if $from =~ /\@([^@]*)\z/;
1061 $fromdomain =~ s/^.+\.([^\.]+\.[^\.]+)$/$1/;
1062 return 0 if $fromdomain eq '';
1063
1064 my $envfromdomain = '';
1065 $envfromdomain = $1 if $envfrom =~ /\@([^@]*)\z/;
1066 $envfromdomain =~ s/^.+\.([^\.]+\.[^\.]+)$/$1/;
1067 return 0 if $envfromdomain eq '';
1068
1069 dbg("eval: From 2nd level domain: $fromdomain, EnvelopeFrom 2nd level domain: $envfromdomain");
1070
1071 return 1 if lc($fromdomain) ne lc($envfromdomain);
1072
1073 return 0;
1074}
1075
1076
1077###########################################################################
1078
1079# support eval-test verbose debugs using "-Deval"
1080sub dbg2 {
1081 if (would_log('dbg', 'eval') == 2) {
1082 dbg(@_);
1083 }
1084}
1085
1086118µs1;