Filename | /usr/local/lib/perl5/site_perl/Mail/SpamAssassin/Plugin/HeaderEval.pm |
Statements | Executed 63 statements in 13.9ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 414µs | 1.23ms | new | Mail::SpamAssassin::Plugin::HeaderEval::
1 | 1 | 1 | 57µs | 69µs | BEGIN@20 | Mail::SpamAssassin::Plugin::HeaderEval::
1 | 1 | 1 | 38µs | 43µs | BEGIN@22 | Mail::SpamAssassin::Plugin::HeaderEval::
1 | 1 | 1 | 36µs | 119µs | BEGIN@31 | Mail::SpamAssassin::Plugin::HeaderEval::
1 | 1 | 1 | 34µs | 114µs | BEGIN@23 | Mail::SpamAssassin::Plugin::HeaderEval::
1 | 1 | 1 | 32µs | 146µs | BEGIN@24 | Mail::SpamAssassin::Plugin::HeaderEval::
1 | 1 | 1 | 31µs | 234µs | BEGIN@603 | Mail::SpamAssassin::Plugin::HeaderEval::
1 | 1 | 1 | 30µs | 243µs | BEGIN@28 | Mail::SpamAssassin::Plugin::HeaderEval::
1 | 1 | 1 | 30µs | 226µs | BEGIN@602 | Mail::SpamAssassin::Plugin::HeaderEval::
1 | 1 | 1 | 29µs | 1.02ms | BEGIN@29 | Mail::SpamAssassin::Plugin::HeaderEval::
1 | 1 | 1 | 28µs | 210µs | BEGIN@604 | Mail::SpamAssassin::Plugin::HeaderEval::
1 | 1 | 1 | 22µs | 64µs | BEGIN@21 | Mail::SpamAssassin::Plugin::HeaderEval::
1 | 1 | 1 | 15µs | 15µs | BEGIN@26 | Mail::SpamAssassin::Plugin::HeaderEval::
1 | 1 | 1 | 11µs | 11µs | BEGIN@27 | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | _check_date_diff | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | _check_date_received | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | _check_for_forged_hotmail_received_headers | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | _check_recipients | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | _get_date_header_time | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | _get_received_header_times | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | check_equal_from_domains | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | check_for_fake_aol_relay_in_rcvd | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | check_for_faraway_charset_in_headers | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | check_for_forged_eudoramail_received_headers | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | check_for_forged_gw05_received_headers | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | check_for_forged_hotmail_received_headers | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | check_for_forged_juno_received_headers | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | check_for_forged_yahoo_received_headers | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | check_for_matching_env_and_hdr_from | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | check_for_missing_to_header | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | check_for_msn_groups_headers | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | check_for_no_hotmail_received_headers | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | check_for_shifted_date | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | check_for_to_in_subject | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | check_for_unique_subject_id | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | check_header_count_range | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | check_illegal_chars | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | check_messageid_not_usable | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | check_outlook_message_id | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | check_ratware_envelope_from | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | check_ratware_name_id | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | check_unresolved_template | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | compile_now_start | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | dbg2 | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | gated_through_received_hdr_remover | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | received_within_months | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | similar_recipients | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | sorted_recipients | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | subject_is_all_caps | Mail::SpamAssassin::Plugin::HeaderEval::
0 | 0 | 0 | 0s | 0s | word_is_in_dictionary | Mail::SpamAssassin::Plugin::HeaderEval::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | # <@LICENSE> | ||||
2 | # Licensed to the Apache Software Foundation (ASF) under one or more | ||||
3 | # contributor license agreements. See the NOTICE file distributed with | ||||
4 | # this work for additional information regarding copyright ownership. | ||||
5 | # The ASF licenses this file to you under the Apache License, Version 2.0 | ||||
6 | # (the "License"); you may not use this file except in compliance with | ||||
7 | # the License. You may obtain a copy of the License at: | ||||
8 | # | ||||
9 | # http://www.apache.org/licenses/LICENSE-2.0 | ||||
10 | # | ||||
11 | # Unless required by applicable law or agreed to in writing, software | ||||
12 | # distributed under the License is distributed on an "AS IS" BASIS, | ||||
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
14 | # See the License for the specific language governing permissions and | ||||
15 | # limitations under the License. | ||||
16 | # </@LICENSE> | ||||
17 | |||||
18 | package Mail::SpamAssassin::Plugin::HeaderEval; | ||||
19 | |||||
20 | 2 | 71µs | 2 | 81µs | # spent 69µs (57+12) within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@20 which was called:
# once (57µs+12µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 20 # spent 69µs making 1 call to Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@20
# spent 12µs making 1 call to strict::import |
21 | 2 | 78µs | 2 | 106µs | # spent 64µs (22+42) within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@21 which was called:
# once (22µs+42µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 21 # spent 64µs making 1 call to Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@21
# spent 42µs making 1 call to warnings::import |
22 | 2 | 78µs | 2 | 48µs | # spent 43µs (38+5) within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@22 which was called:
# once (38µs+5µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 22 # spent 43µs making 1 call to Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@22
# spent 5µs making 1 call to bytes::import |
23 | 2 | 68µs | 2 | 195µs | # spent 114µs (34+80) within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@23 which was called:
# once (34µs+80µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 23 # spent 114µs making 1 call to Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@23
# spent 80µs making 1 call to re::import |
24 | 2 | 75µs | 2 | 260µs | # spent 146µs (32+114) within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@24 which was called:
# once (32µs+114µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 24 # spent 146µs making 1 call to Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@24
# spent 114µs making 1 call to Exporter::import |
25 | |||||
26 | 2 | 69µs | 1 | 15µs | # spent 15µs within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@26 which was called:
# once (15µs+0s) by Mail::SpamAssassin::PluginHandler::load_plugin at line 26 # spent 15µs making 1 call to Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@26 |
27 | 2 | 77µs | 1 | 11µs | # spent 11µs within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@27 which was called:
# once (11µs+0s) by Mail::SpamAssassin::PluginHandler::load_plugin at line 27 # spent 11µs making 1 call to Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@27 |
28 | 2 | 80µs | 2 | 456µs | # spent 243µs (30+213) within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@28 which was called:
# once (30µs+213µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 28 # spent 243µs making 1 call to Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@28
# spent 213µs making 1 call to Exporter::import |
29 | 2 | 80µs | 2 | 2.02ms | # spent 1.02ms (29µs+993µs) within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@29 which was called:
# once (29µs+993µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 29 # spent 1.02ms making 1 call to Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@29
# spent 993µs making 1 call to Exporter::import |
30 | |||||
31 | 2 | 6.57ms | 2 | 202µs | # spent 119µs (36+83) within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@31 which was called:
# once (36µs+83µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 31 # spent 119µs making 1 call to Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@31
# spent 83µs making 1 call to vars::import |
32 | 1 | 22µs | @ISA = qw(Mail::SpamAssassin::Plugin); | ||
33 | |||||
34 | # constructor: register the eval rule | ||||
35 | # spent 1.23ms (414µs+816µs) within Mail::SpamAssassin::Plugin::HeaderEval::new which was called:
# once (414µs+816µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 1 of (eval 97)[Mail/SpamAssassin/PluginHandler.pm:129] | ||||
36 | 1 | 2µs | my $class = shift; | ||
37 | 1 | 2µs | my $mailsaobject = shift; | ||
38 | |||||
39 | # some boilerplate... | ||||
40 | 1 | 2µs | $class = ref($class) || $class; | ||
41 | 1 | 12µs | 1 | 20µs | my $self = $class->SUPER::new($mailsaobject); # spent 20µs making 1 call to Mail::SpamAssassin::Plugin::new |
42 | 1 | 2µs | bless ($self, $class); | ||
43 | |||||
44 | # the important bit! | ||||
45 | 1 | 16µs | 1 | 37µs | $self->register_eval_rule("check_for_fake_aol_relay_in_rcvd"); # spent 37µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
46 | 1 | 6µs | 1 | 24µs | $self->register_eval_rule("check_for_faraway_charset_in_headers"); # spent 24µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
47 | 1 | 6µs | 1 | 49µs | $self->register_eval_rule("check_for_unique_subject_id"); # spent 49µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
48 | 1 | 6µs | 1 | 24µs | $self->register_eval_rule("check_illegal_chars"); # spent 24µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
49 | 1 | 6µs | 1 | 18µs | $self->register_eval_rule("check_for_forged_hotmail_received_headers"); # spent 18µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
50 | 1 | 6µs | 1 | 27µs | $self->register_eval_rule("check_for_no_hotmail_received_headers"); # spent 27µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
51 | 1 | 6µs | 1 | 28µs | $self->register_eval_rule("check_for_msn_groups_headers"); # spent 28µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
52 | 1 | 6µs | 1 | 31µs | $self->register_eval_rule("check_for_forged_eudoramail_received_headers"); # spent 31µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
53 | 1 | 6µs | 1 | 28µs | $self->register_eval_rule("check_for_forged_yahoo_received_headers"); # spent 28µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
54 | 1 | 6µs | 1 | 21µs | $self->register_eval_rule("check_for_forged_juno_received_headers"); # spent 21µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
55 | 1 | 6µs | 1 | 30µs | $self->register_eval_rule("check_for_matching_env_and_hdr_from"); # spent 30µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
56 | 1 | 6µs | 1 | 36µs | $self->register_eval_rule("sorted_recipients"); # spent 36µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
57 | 1 | 6µs | 1 | 23µs | $self->register_eval_rule("similar_recipients"); # spent 23µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
58 | 1 | 8µs | 1 | 32µs | $self->register_eval_rule("check_for_missing_to_header"); # spent 32µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
59 | 1 | 6µs | 1 | 35µs | $self->register_eval_rule("check_for_forged_gw05_received_headers"); # spent 35µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
60 | 1 | 6µs | 1 | 32µs | $self->register_eval_rule("check_for_shifted_date"); # spent 32µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
61 | 1 | 12µs | 1 | 37µs | $self->register_eval_rule("subject_is_all_caps"); # spent 37µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
62 | 1 | 6µs | 1 | 38µs | $self->register_eval_rule("check_for_to_in_subject"); # spent 38µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
63 | 1 | 13µs | 1 | 25µs | $self->register_eval_rule("check_outlook_message_id"); # spent 25µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
64 | 1 | 6µs | 1 | 28µs | $self->register_eval_rule("check_messageid_not_usable"); # spent 28µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
65 | 1 | 6µs | 1 | 36µs | $self->register_eval_rule("check_header_count_range"); # spent 36µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
66 | 1 | 6µs | 1 | 18µs | $self->register_eval_rule("check_unresolved_template"); # spent 18µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
67 | 1 | 8µs | 1 | 27µs | $self->register_eval_rule("check_ratware_name_id"); # spent 27µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
68 | 1 | 8µs | 1 | 33µs | $self->register_eval_rule("check_ratware_envelope_from"); # spent 33µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
69 | 1 | 6µs | 1 | 18µs | $self->register_eval_rule("gated_through_received_hdr_remover"); # spent 18µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
70 | 1 | 6µs | 1 | 30µs | $self->register_eval_rule("received_within_months"); # spent 30µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
71 | 1 | 8µs | 1 | 30µs | $self->register_eval_rule("check_equal_from_domains"); # spent 30µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule |
72 | |||||
73 | 1 | 12µs | return $self; | ||
74 | } | ||||
75 | |||||
76 | # load triplets.txt into memory | ||||
77 | sub compile_now_start { | ||||
78 | my ($self) = @_; | ||||
79 | |||||
80 | $self->word_is_in_dictionary("aba"); | ||||
81 | } | ||||
82 | |||||
83 | sub check_for_fake_aol_relay_in_rcvd { | ||||
84 | my ($self, $pms) = @_; | ||||
85 | local ($_); | ||||
86 | |||||
87 | $_ = $pms->get('Received'); | ||||
88 | s/\s/ /gs; | ||||
89 | |||||
90 | # this is the hostname format used by AOL for their relays. Spammers love | ||||
91 | # forging it. Don't make it more specific to match aol.com only, though -- | ||||
92 | # there's another set of spammers who generate fake hostnames to go with | ||||
93 | # it! | ||||
94 | if (/ rly-[a-z][a-z]\d\d\./i) { | ||||
95 | return 0 if /\/AOL-\d+\.\d+\.\d+\)/; # via AOL mail relay | ||||
96 | return 0 if /ESMTP id (?:RELAY|MAILRELAY|MAILIN)/; # AOLish | ||||
97 | return 1; | ||||
98 | } | ||||
99 | |||||
100 | # spam: Received: from unknown (HELO mta05bw.bigpond.com) (80.71.176.130) by | ||||
101 | # rly-xw01.mx.aol.com with QMQP; Sat, 15 Jun 2002 23:37:16 -0000 | ||||
102 | |||||
103 | # non: Received: from rly-xj02.mx.aol.com (rly-xj02.mail.aol.com [172.20.116.39]) by | ||||
104 | # omr-r05.mx.aol.com (v83.35) with ESMTP id RELAYIN7-0501132011; Wed, 01 | ||||
105 | # May 2002 13:20:11 -0400 | ||||
106 | |||||
107 | # non: Received: from logs-tr.proxy.aol.com (logs-tr.proxy.aol.com [152.163.201.132]) | ||||
108 | # by rly-ip01.mx.aol.com (8.8.8/8.8.8/AOL-5.0.0) | ||||
109 | # with ESMTP id NAA08955 for <sapient-alumni@yahoogroups.com>; | ||||
110 | # Thu, 4 Apr 2002 13:11:20 -0500 (EST) | ||||
111 | |||||
112 | return 0; | ||||
113 | } | ||||
114 | |||||
115 | sub check_for_faraway_charset_in_headers { | ||||
116 | my ($self, $pms) = @_; | ||||
117 | my $hdr; | ||||
118 | |||||
119 | my @locales = Mail::SpamAssassin::Util::get_my_locales($self->{main}->{conf}->{ok_locales}); | ||||
120 | |||||
121 | return 0 if grep { $_ eq "all" } @locales; | ||||
122 | |||||
123 | for my $h (qw(From Subject)) { | ||||
124 | my @hdrs = $pms->get("$h:raw"); # ??? get() returns a scalar ??? | ||||
125 | if ($#hdrs >= 0) { | ||||
126 | $hdr = join(" ", @hdrs); | ||||
127 | } else { | ||||
128 | $hdr = ''; | ||||
129 | } | ||||
130 | while ($hdr =~ /=\?(.+?)\?.\?.*?\?=/g) { | ||||
131 | Mail::SpamAssassin::Locales::is_charset_ok_for_locales($1, @locales) | ||||
132 | or return 1; | ||||
133 | } | ||||
134 | } | ||||
135 | 0; | ||||
136 | } | ||||
137 | |||||
138 | sub check_for_unique_subject_id { | ||||
139 | my ($self, $pms) = @_; | ||||
140 | local ($_); | ||||
141 | $_ = lc $pms->get('Subject'); | ||||
142 | study; # study is a no-op since perl 5.16.0, eliminating related bugs | ||||
143 | |||||
144 | my $id = 0; | ||||
145 | if (/[-_\.\s]{7,}([-a-z0-9]{4,})$/ | ||||
146 | || /\s{10,}(?:\S\s)?(\S+)$/ | ||||
147 | || /\s{3,}[-:\#\(\[]+([-a-z0-9]{4,})[\]\)]+$/ | ||||
148 | || /\s{3,}[:\#\(\[]*([a-f0-9]{4,})[\]\)]*$/ | ||||
149 | || /\s{3,}[-:\#]([a-z0-9]{5,})$/ | ||||
150 | || /[\s._]{3,}([^0\s._]\d{3,})$/ | ||||
151 | || /[\s._]{3,}\[(\S+)\]$/ | ||||
152 | |||||
153 | # (7217vPhZ0-478TLdy5829qicU9-0@26) and similar | ||||
154 | || /\(([-\w]{7,}\@\d+)\)$/ | ||||
155 | |||||
156 | # Seven or more digits at the end of a subject is almost certainly a id | ||||
157 | || /\b(\d{7,})\s*$/ | ||||
158 | |||||
159 | # stuff at end of line after "!" or "?" is usually an id | ||||
160 | || /[!\?]\s*(\d{4,}|\w+(-\w+)+)\s*$/ | ||||
161 | |||||
162 | # 9095IPZK7-095wsvp8715rJgY8-286-28 and similar | ||||
163 | # excluding 'Re:', etc and the first word | ||||
164 | || /(?:\w{2,3}:\s)?\w+\s+(\w{7,}-\w{7,}(-\w+)*)\s*$/ | ||||
165 | |||||
166 | # #30D7 and similar | ||||
167 | || /\s#\s*([a-f0-9]{4,})\s*$/ | ||||
168 | ) | ||||
169 | { | ||||
170 | $id = $1; | ||||
171 | # exempt online purchases | ||||
172 | if ($id =~ /\d{5,}/ | ||||
173 | && /(?:item|invoice|order|number|confirmation).{1,6}\Q$id\E\s*$/) | ||||
174 | { | ||||
175 | $id = 0; | ||||
176 | } | ||||
177 | |||||
178 | # for the "foo-bar-baz" case, otherwise it won't | ||||
179 | # be found in the dict: | ||||
180 | $id =~ s/-//; | ||||
181 | } | ||||
182 | |||||
183 | return ($id && !$self->word_is_in_dictionary($id)); | ||||
184 | } | ||||
185 | |||||
186 | # word_is_in_dictionary() | ||||
187 | # | ||||
188 | # See if the word looks like an English word, by checking if each triplet | ||||
189 | # of letters it contains is one that can be found in the English language. | ||||
190 | # Does not include triplets only found in proper names, or in the Latin | ||||
191 | # and Greek terms that might be found in a larger dictionary | ||||
192 | |||||
193 | 1 | 2µs | my %triplets; | ||
194 | 1 | 2µs | my $triplets_loaded = 0; | ||
195 | |||||
196 | sub word_is_in_dictionary { | ||||
197 | my ($self, $word) = @_; | ||||
198 | local ($_); | ||||
199 | local $/ = "\n"; # Ensure $/ is set appropriately | ||||
200 | |||||
201 | # $word =~ tr/A-Z/a-z/; # already done by this stage | ||||
202 | $word =~ s/^\s+//; | ||||
203 | $word =~ s/\s+$//; | ||||
204 | |||||
205 | # If it contains a digit, dash, etc, it's not a valid word. | ||||
206 | # Don't reject words like "can't" and "I'll" | ||||
207 | return 0 if ($word =~ /[^a-z\']/); | ||||
208 | |||||
209 | # handle a few common "blah blah blah (comment)" styles | ||||
210 | return 1 if ($word eq "ot"); # off-topic | ||||
211 | return 1 if ($word =~ /(?:linux|nix|bsd)/); # not in most dicts | ||||
212 | return 1 if ($word =~ /(?:whew|phew|attn|tha?nx)/); # not in most dicts | ||||
213 | |||||
214 | my $word_len = length($word); | ||||
215 | |||||
216 | # Unique IDs probably aren't going to be only one or two letters long | ||||
217 | return 1 if ($word_len < 3); | ||||
218 | |||||
219 | if (!$triplets_loaded) { | ||||
220 | # take a copy to avoid modifying the real one | ||||
221 | my @default_triplets_path = @Mail::SpamAssassin::default_rules_path; | ||||
222 | s{$}{/triplets.txt} for @default_triplets_path; | ||||
223 | my $filename = $self->{main}->first_existing_path (@default_triplets_path); | ||||
224 | |||||
225 | if (!defined $filename) { | ||||
226 | dbg("eval: failed to locate the triplets.txt file"); | ||||
227 | return 1; | ||||
228 | } | ||||
229 | |||||
230 | local *TRIPLETS; | ||||
231 | if (!open (TRIPLETS, "<$filename")) { | ||||
232 | dbg("eval: failed to open '$filename', cannot check dictionary: $!"); | ||||
233 | return 1; | ||||
234 | } | ||||
235 | for($!=0; <TRIPLETS>; $!=0) { | ||||
236 | chomp; | ||||
237 | $triplets{$_} = 1; | ||||
238 | } | ||||
239 | defined $_ || $!==0 or | ||||
240 | $!==EBADF ? dbg("eval: error reading from $filename: $!") | ||||
241 | : die "error reading from $filename: $!"; | ||||
242 | close(TRIPLETS) or die "error closing $filename: $!"; | ||||
243 | |||||
244 | $triplets_loaded = 1; | ||||
245 | } # if (!$triplets_loaded) | ||||
246 | |||||
247 | |||||
248 | my $i; | ||||
249 | |||||
250 | for ($i = 0; $i < ($word_len - 2); $i++) { | ||||
251 | my $triplet = substr($word, $i, 3); | ||||
252 | if (!$triplets{$triplet}) { | ||||
253 | dbg("eval: unique ID: letter triplet '$triplet' from word '$word' not valid"); | ||||
254 | return 0; | ||||
255 | } | ||||
256 | } # for ($i = 0; $i < ($word_len - 2); $i++) | ||||
257 | |||||
258 | # All letter triplets in word were found to be valid | ||||
259 | return 1; | ||||
260 | } | ||||
261 | |||||
262 | # look for 8-bit and other illegal characters that should be MIME | ||||
263 | # encoded, these might want to exempt languages that do not use | ||||
264 | # Latin-based alphabets, but only if the user wants it that way | ||||
265 | sub check_illegal_chars { | ||||
266 | my ($self, $pms, $header, $ratio, $count) = @_; | ||||
267 | |||||
268 | $header .= ":raw" unless ($header eq "ALL" || $header =~ /:raw$/); | ||||
269 | my $str = $pms->get($header); | ||||
270 | return 0 if !defined $str || $str eq ''; | ||||
271 | |||||
272 | # avoid overlap between tests | ||||
273 | if ($header eq "ALL") { | ||||
274 | # fix continuation lines, then remove Subject and From | ||||
275 | $str =~ s/\n[ \t]+/ /gs; | ||||
276 | $str =~ s/^(?:Subject|From):.*$//gmi; | ||||
277 | } | ||||
278 | |||||
279 | # count illegal substrings (RFC 2045) | ||||
280 | # (non-ASCII + C0 controls except TAB, NL, CR) | ||||
281 | my $illegal = $str =~ tr/\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff//; | ||||
282 | |||||
283 | # minor exemptions for Subject | ||||
284 | if ($illegal > 0 && lc $header eq "subject:raw") { | ||||
285 | # only exempt a single cent sign, pound sign, or registered sign | ||||
286 | my $exempt = $str =~ tr/\xa2\xa3\xae//; | ||||
287 | $illegal-- if $exempt == 1; | ||||
288 | } | ||||
289 | |||||
290 | return 0 if $str eq ''; | ||||
291 | return (($illegal / length($str)) >= $ratio && $illegal >= $count); | ||||
292 | } | ||||
293 | |||||
294 | # ezmlm has a very bad habit of removing Received: headers! bad ezmlm. | ||||
295 | # | ||||
296 | sub gated_through_received_hdr_remover { | ||||
297 | my ($self, $pms) = @_; | ||||
298 | |||||
299 | my $txt = $pms->get("Mailing-List",undef); | ||||
300 | if (defined $txt && $txt =~ /^contact \S+\@\S+\; run by ezmlm$/) { | ||||
301 | my $dlto = $pms->get("Delivered-To"); | ||||
302 | my $rcvd = $pms->get("Received"); | ||||
303 | |||||
304 | # ensure we have other indicative headers too | ||||
305 | if ($dlto =~ /^mailing list \S+\@\S+/ && | ||||
306 | $rcvd =~ /qmail \d+ invoked (?:from network|by .{3,20})\); \d+ ... \d+/) | ||||
307 | { | ||||
308 | return 1; | ||||
309 | } | ||||
310 | } | ||||
311 | |||||
312 | my $rcvd = $pms->get("Received",undef); | ||||
313 | if (!defined $rcvd) { | ||||
314 | # we have no Received headers! These tests cannot run in that case | ||||
315 | return 1; | ||||
316 | } | ||||
317 | |||||
318 | # MSN groups removes Received lines. thanks MSN | ||||
319 | if ($rcvd =~ /from groups\.msn\.com \(\S+\.msn\.com /) { | ||||
320 | return 1; | ||||
321 | } | ||||
322 | |||||
323 | return 0; | ||||
324 | } | ||||
325 | |||||
326 | # FORGED_HOTMAIL_RCVD | ||||
327 | sub _check_for_forged_hotmail_received_headers { | ||||
328 | my ($self, $pms) = @_; | ||||
329 | |||||
330 | if (defined $pms->{hotmail_addr_but_no_hotmail_received}) { return; } | ||||
331 | |||||
332 | $pms->{hotmail_addr_with_forged_hotmail_received} = 0; | ||||
333 | $pms->{hotmail_addr_but_no_hotmail_received} = 0; | ||||
334 | |||||
335 | my $rcvd = $pms->get('Received'); | ||||
336 | $rcvd =~ s/\s+/ /gs; # just spaces, simplify the regexp | ||||
337 | |||||
338 | return if ($rcvd =~ | ||||
339 | /from mail pickup service by hotmail\.com with Microsoft SMTPSVC;/); | ||||
340 | |||||
341 | # Microsoft passes Hotmail mail directly to MSN Group servers. | ||||
342 | return if $self->check_for_msn_groups_headers($pms); | ||||
343 | |||||
344 | my $ip = $pms->get('X-Originating-Ip',undef); | ||||
345 | my $IP_ADDRESS = IP_ADDRESS; | ||||
346 | |||||
347 | if (defined $ip && $ip =~ /$IP_ADDRESS/) { $ip = 1; } else { $ip = 0; } | ||||
348 | |||||
349 | # Hotmail formats its received headers like this: | ||||
350 | # Received: from hotmail.com (f135.law8.hotmail.com [216.33.241.135]) | ||||
351 | # spammers do not ;) | ||||
352 | |||||
353 | if ($self->gated_through_received_hdr_remover($pms)) { return; } | ||||
354 | |||||
355 | if ($rcvd =~ /from (?:\S*\.)?hotmail.com \(\S+\.hotmail(?:\.msn)?\.com[ \)]/ && $ip) | ||||
356 | { return; } | ||||
357 | if ($rcvd =~ /from \S*\.hotmail.com \(\[$IP_ADDRESS\][ \):]/ && $ip) | ||||
358 | { return; } | ||||
359 | if ($rcvd =~ /from \S+ by \S+\.hotmail(?:\.msn)?\.com with HTTP\;/ && $ip) | ||||
360 | { return; } | ||||
361 | if ($rcvd =~ /from \[66\.218.\S+\] by \S+\.yahoo\.com/ && $ip) | ||||
362 | { return; } | ||||
363 | |||||
364 | if ($rcvd =~ /(?:from |HELO |helo=)\S*hotmail\.com\b/) { | ||||
365 | # HELO'd as hotmail.com, despite not being hotmail | ||||
366 | $pms->{hotmail_addr_with_forged_hotmail_received} = 1; | ||||
367 | } else { | ||||
368 | # check to see if From claimed to be @hotmail.com | ||||
369 | my $from = $pms->get('From:addr'); | ||||
370 | if ($from !~ /\bhotmail\.com$/i) { return; } | ||||
371 | $pms->{hotmail_addr_but_no_hotmail_received} = 1; | ||||
372 | } | ||||
373 | } | ||||
374 | |||||
375 | # FORGED_HOTMAIL_RCVD | ||||
376 | sub check_for_forged_hotmail_received_headers { | ||||
377 | my ($self, $pms) = @_; | ||||
378 | $self->_check_for_forged_hotmail_received_headers($pms); | ||||
379 | return $pms->{hotmail_addr_with_forged_hotmail_received}; | ||||
380 | } | ||||
381 | |||||
382 | # SEMIFORGED_HOTMAIL_RCVD | ||||
383 | sub check_for_no_hotmail_received_headers { | ||||
384 | my ($self, $pms) = @_; | ||||
385 | $self->_check_for_forged_hotmail_received_headers($pms); | ||||
386 | return $pms->{hotmail_addr_but_no_hotmail_received}; | ||||
387 | } | ||||
388 | |||||
389 | # MSN_GROUPS | ||||
390 | sub check_for_msn_groups_headers { | ||||
391 | my ($self, $pms) = @_; | ||||
392 | |||||
393 | my $to = $pms->get('To'); | ||||
394 | return 0 unless $to =~ /<(\S+)\@groups\.msn\.com>/i; | ||||
395 | my $listname = $1; | ||||
396 | |||||
397 | # from Theo Van Dinter, see bug 591 | ||||
398 | # Updated by DOS, based on messages from Bob Menschel, bug 4301 | ||||
399 | |||||
400 | return 0 unless $pms->get('Received') =~ | ||||
401 | /from mail pickup service by ((?:p\d\d\.)groups\.msn\.com)\b/; | ||||
402 | my $server = $1; | ||||
403 | |||||
404 | if ($listname =~ /^notifications$/) { | ||||
405 | return 0 unless $pms->get('Message-Id') =~ /^<\S+\@$server>/; | ||||
406 | } else { | ||||
407 | return 0 unless $pms->get('Message-Id') =~ /^<$listname-\S+\@groups\.msn\.com>/; | ||||
408 | return 0 unless $pms->get('EnvelopeFrom:addr') =~ /$listname-bounce\@groups\.msn\.com/; | ||||
409 | } | ||||
410 | return 1; | ||||
411 | |||||
412 | # MSN Groups | ||||
413 | # Return-path: <ListName-bounce@groups.msn.com> | ||||
414 | # Received: from groups.msn.com (tk2dcpuba02.msn.com [65.54.195.210]) by | ||||
415 | # dogma.slashnull.org (8.11.6/8.11.6) with ESMTP id g72K35v10457 for | ||||
416 | # <zzzzzzzzzzzz@jmason.org>; Fri, 2 Aug 2002 21:03:05 +0100 | ||||
417 | # Received: from mail pickup service by groups.msn.com with Microsoft | ||||
418 | # SMTPSVC; Fri, 2 Aug 2002 13:01:30 -0700 | ||||
419 | # Message-id: <ListName-1392@groups.msn.com> | ||||
420 | # X-loop: notifications@groups.msn.com | ||||
421 | # Reply-to: "List Full Name" <ListName@groups.msn.com> | ||||
422 | # To: "List Full Name" <ListName@groups.msn.com> | ||||
423 | |||||
424 | # Return-path: <ListName-bounce@groups.msn.com> | ||||
425 | # Received: from p04.groups.msn.com ([65.54.195.216]) etc... | ||||
426 | # Received: from mail pickup service by p04.groups.msn.com with Microsoft SMTPSVC; | ||||
427 | # Thu, 5 May 2005 20:30:37 -0700 | ||||
428 | # X-Originating-Ip: 207.68.170.30 | ||||
429 | # From: =?iso-8859-1?B?IqSj4/D9pEbzeN9s9vLw6qQiIA==?=<zzzzzzzz@hotmail.com> | ||||
430 | # To: "Managers of List Name" <notifications@groups.msn.com> | ||||
431 | # Subject: =?iso-8859-1?Q?APPROVAL_NEEDED:_=A4=A3=E3=F0=FD=A4F=F3x=DFl?= | ||||
432 | # =?iso-8859-1?Q?=F6=F2=F0=EA=A4_applied_to_join_List_Name=2C?= | ||||
433 | # =?iso-8859-1?Q?_an_MSN_Group?= | ||||
434 | # Date: Thu, 5 May 2005 20:30:37 -0700 | ||||
435 | # MIME-Version: 1.0 | ||||
436 | # Content-Type: multipart/alternative; | ||||
437 | # boundary="----=_NextPart_000_333944_01C551B1.4BBA02B0" | ||||
438 | # X-MimeOLE: Produced By Microsoft MimeOLE V5.50.4927.1200 | ||||
439 | # Message-ID: <TK2DCPUBA042cv0aGlt00020aa3@p04.groups.msn.com> | ||||
440 | |||||
441 | # Return-path: <ListName-bounce@groups.msn.com> | ||||
442 | # Received: from [65.54.208.83] (helo=p05.groups.msn.com) etc... | ||||
443 | # Received: from mail pickup service by p05.groups.msn.com with Microsoft SMTPSVC; | ||||
444 | # Fri, 6 May 2005 14:59:25 -0700 | ||||
445 | # X-Originating-Ip: 207.68.170.30 | ||||
446 | # Message-Id: <ListName-101@groups.msn.com> | ||||
447 | # Reply-To: "List Name" <ListName@groups.msn.com> | ||||
448 | # From: "whoever" <zzzzzzzzzz@hotmail.com> | ||||
449 | # To: "List Name" <ListName@groups.msn.com> | ||||
450 | # Subject: whatever | ||||
451 | # Date: Fri, 6 May 2005 14:59:25 -0700 | ||||
452 | |||||
453 | } | ||||
454 | |||||
455 | ########################################################################### | ||||
456 | |||||
457 | sub check_for_forged_eudoramail_received_headers { | ||||
458 | my ($self, $pms) = @_; | ||||
459 | |||||
460 | my $from = $pms->get('From:addr'); | ||||
461 | if ($from !~ /\beudoramail\.com$/i) { return 0; } | ||||
462 | |||||
463 | my $rcvd = $pms->get('Received'); | ||||
464 | $rcvd =~ s/\s+/ /gs; # just spaces, simplify the regexp | ||||
465 | |||||
466 | my $ip = $pms->get('X-Sender-Ip',undef); | ||||
467 | my $IP_ADDRESS = IP_ADDRESS; | ||||
468 | if (defined $ip && $ip =~ /$IP_ADDRESS/) { $ip = 1; } else { $ip = 0; } | ||||
469 | |||||
470 | # Eudoramail formats its received headers like this: | ||||
471 | # Received: from Unknown/Local ([?.?.?.?]) by shared1-mail.whowhere.com; | ||||
472 | # Thu Nov 29 13:44:25 2001 | ||||
473 | # Message-Id: <JGDHDEHPPJECDAAA@shared1-mail.whowhere.com> | ||||
474 | # Organization: QUALCOMM Eudora Web-Mail (http://www.eudoramail.com:80) | ||||
475 | # X-Sender-Ip: 192.175.21.146 | ||||
476 | # X-Mailer: MailCity Service | ||||
477 | |||||
478 | if ($self->gated_through_received_hdr_remover($pms)) { return 0; } | ||||
479 | |||||
480 | if ($rcvd =~ /by \S*whowhere.com\;/ && $ip) { return 0; } | ||||
481 | |||||
482 | return 1; | ||||
483 | } | ||||
484 | |||||
485 | ########################################################################### | ||||
486 | |||||
487 | sub check_for_forged_yahoo_received_headers { | ||||
488 | my ($self, $pms) = @_; | ||||
489 | |||||
490 | my $from = $pms->get('From:addr'); | ||||
491 | if ($from !~ /\byahoo\.com$/i) { return 0; } | ||||
492 | |||||
493 | my $rcvd = $pms->get('Received'); | ||||
494 | |||||
495 | if ($pms->get("Resent-From") ne '' && $pms->get("Resent-To") ne '') { | ||||
496 | my $xrcvd = $pms->get("X-Received"); | ||||
497 | $rcvd = $xrcvd if $xrcvd ne ''; | ||||
498 | } | ||||
499 | $rcvd =~ s/\s+/ /gs; # just spaces, simplify the regexp | ||||
500 | |||||
501 | # not sure about this | ||||
502 | #if ($rcvd !~ /from \S*yahoo\.com/) { return 0; } | ||||
503 | |||||
504 | if ($self->gated_through_received_hdr_remover($pms)) { return 0; } | ||||
505 | |||||
506 | # bug 3740: ignore bounces from Yahoo!. only honoured if the | ||||
507 | # correct rDNS shows up in the trusted relay list, or first untrusted relay | ||||
508 | # | ||||
509 | # bug 4528: [ ip=68.142.202.54 rdns=mta122.mail.mud.yahoo.com | ||||
510 | # helo=mta122.mail.mud.yahoo.com by=eclectic.kluge.net ident= | ||||
511 | # envfrom= intl=0 id=49F2EAF13B auth= ] | ||||
512 | # | ||||
513 | if ($pms->{relays_trusted_str} =~ / rdns=\S+\.yahoo\.com / | ||||
514 | || $pms->{relays_untrusted_str} =~ /^[^\]]+ rdns=\S+\.yahoo\.com /) | ||||
515 | { return 0; } | ||||
516 | |||||
517 | if ($rcvd =~ /by web\S+\.mail\S*\.yahoo\.com via HTTP/) { return 0; } | ||||
518 | if ($rcvd =~ /by smtp\S+\.yahoo\.com with SMTP/) { return 0; } | ||||
519 | my $IP_ADDRESS = IP_ADDRESS; | ||||
520 | if ($rcvd =~ | ||||
521 | /from \[$IP_ADDRESS\] by \S+\.(?:groups|scd|dcn)\.yahoo\.com with NNFMP/) { | ||||
522 | return 0; | ||||
523 | } | ||||
524 | |||||
525 | # used in "forward this news item to a friend" links. There's no better | ||||
526 | # received hdrs to match on, unfortunately. I'm not sure if the next test is | ||||
527 | # still useful, as a result. | ||||
528 | # | ||||
529 | # search for msgid <20020929140301.451A92940A9@xent.com>, subject "Yahoo! | ||||
530 | # News Story - Top Stories", date Sep 29 2002 on | ||||
531 | # <http://xent.com/pipermail/fork/> for an example. | ||||
532 | # | ||||
533 | if ($rcvd =~ /\bmailer\d+\.bulk\.scd\.yahoo\.com\b/ | ||||
534 | && $from =~ /\@reply\.yahoo\.com$/i) { return 0; } | ||||
535 | |||||
536 | if ($rcvd =~ /by \w+\.\w+\.yahoo\.com \(\d+\.\d+\.\d+\/\d+\.\d+\.\d+\)(?: with ESMTP)? id \w+/) { | ||||
537 | # possibly sent from "mail this story to a friend" | ||||
538 | return 0; | ||||
539 | } | ||||
540 | |||||
541 | return 1; | ||||
542 | } | ||||
543 | |||||
544 | sub check_for_forged_juno_received_headers { | ||||
545 | my ($self, $pms) = @_; | ||||
546 | |||||
547 | my $from = $pms->get('From:addr'); | ||||
548 | if ($from !~ /\bjuno\.com$/i) { return 0; } | ||||
549 | |||||
550 | if ($self->gated_through_received_hdr_remover($pms)) { return 0; } | ||||
551 | |||||
552 | my $xorig = $pms->get('X-Originating-IP'); | ||||
553 | my $xmailer = $pms->get('X-Mailer'); | ||||
554 | my $rcvd = $pms->get('Received'); | ||||
555 | my $IP_ADDRESS = IP_ADDRESS; | ||||
556 | |||||
557 | if ($xorig ne '') { | ||||
558 | # New style Juno has no X-Originating-IP header, and other changes | ||||
559 | if($rcvd !~ /from.*\b(?:juno|untd)\.com.*[\[\(]$IP_ADDRESS[\]\)].*by/ | ||||
560 | && $rcvd !~ / cookie\.(?:juno|untd)\.com /) { return 1; } | ||||
561 | if($xmailer !~ /Juno /) { return 1; } | ||||
562 | } else { | ||||
563 | if($rcvd =~ /from.*\bmail\.com.*\[$IP_ADDRESS\].*by/) { | ||||
564 | if($xmailer !~ /\bmail\.com/) { return 1; } | ||||
565 | } elsif($rcvd =~ /from (webmail\S+\.untd\.com) \(\1 \[$IP_ADDRESS\]\) by/) { | ||||
566 | if($xmailer !~ /^Webmail Version \d/) { return 1; } | ||||
567 | } else { | ||||
568 | return 1; | ||||
569 | } | ||||
570 | if($xorig !~ /$IP_ADDRESS/) { return 1; } | ||||
571 | } | ||||
572 | |||||
573 | return 0; | ||||
574 | } | ||||
575 | |||||
576 | sub check_for_matching_env_and_hdr_from { | ||||
577 | my ($self, $pms) =@_; | ||||
578 | # two blank headers match so don't bother checking | ||||
579 | return (lc $pms->get('EnvelopeFrom:addr') eq lc $pms->get('From:addr')); | ||||
580 | } | ||||
581 | |||||
582 | sub sorted_recipients { | ||||
583 | my ($self, $pms) = @_; | ||||
584 | |||||
585 | if (!exists $pms->{tocc_sorted}) { | ||||
586 | $self->_check_recipients($pms); | ||||
587 | } | ||||
588 | return $pms->{tocc_sorted}; | ||||
589 | } | ||||
590 | |||||
591 | sub similar_recipients { | ||||
592 | my ($self, $pms, $min, $max) = @_; | ||||
593 | |||||
594 | if (!exists $pms->{tocc_similar}) { | ||||
595 | $self->_check_recipients($pms); | ||||
596 | } | ||||
597 | return (($min eq 'undef' || $pms->{tocc_similar} >= $min) && | ||||
598 | ($max eq 'undef' || $pms->{tocc_similar} < $max)); | ||||
599 | } | ||||
600 | |||||
601 | # best experimentally derived values | ||||
602 | 2 | 72µs | 2 | 421µs | # spent 226µs (30+196) within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@602 which was called:
# once (30µs+196µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 602 # spent 226µs making 1 call to Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@602
# spent 196µs making 1 call to constant::import |
603 | 2 | 69µs | 2 | 437µs | # spent 234µs (31+203) within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@603 which was called:
# once (31µs+203µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 603 # spent 234µs making 1 call to Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@603
# spent 203µs making 1 call to constant::import |
604 | 2 | 6.29ms | 2 | 391µs | # spent 210µs (28+182) within Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@604 which was called:
# once (28µs+182µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 604 # spent 210µs making 1 call to Mail::SpamAssassin::Plugin::HeaderEval::BEGIN@604
# spent 182µs making 1 call to constant::import |
605 | |||||
606 | sub _check_recipients { | ||||
607 | my ($self, $pms) = @_; | ||||
608 | |||||
609 | my @inputs; | ||||
610 | |||||
611 | # ToCc: pseudo-header works best, but sometimes Bcc: is better | ||||
612 | for ('ToCc', 'Bcc') { | ||||
613 | my $to = $pms->get($_); # get recipients | ||||
614 | $to =~ s/\(.*?\)//g; # strip out the (comments) | ||||
615 | push(@inputs, ($to =~ m/([\w.=-]+\@\w+(?:[\w.-]+\.)+\w+)/g)); | ||||
616 | last if scalar(@inputs) >= TOCC_SIMILAR_COUNT; | ||||
617 | } | ||||
618 | |||||
619 | # remove duplicate addresses only when they appear next to each other | ||||
620 | my @address; | ||||
621 | my $previous = ''; | ||||
622 | while (my $current = shift @inputs) { | ||||
623 | push(@address, ($previous = $current)) if lc($current) ne lc($previous); | ||||
624 | last if @address == 256; | ||||
625 | } | ||||
626 | |||||
627 | # ideas that had both poor S/O ratios and poor hit rates: | ||||
628 | # - testing for reverse sorted recipient lists | ||||
629 | # - testing To: and Cc: headers separately | ||||
630 | $pms->{tocc_sorted} = (scalar(@address) >= TOCC_SORTED_COUNT && | ||||
631 | join(',', @address) eq (join(',', sort @address))); | ||||
632 | |||||
633 | # a good S/O ratio and hit rate is achieved by comparing 2-byte | ||||
634 | # substrings and requiring 5 or more addresses | ||||
635 | $pms->{tocc_similar} = 0; | ||||
636 | if (scalar (@address) >= TOCC_SIMILAR_COUNT) { | ||||
637 | my @user = map { substr($_,0,TOCC_SIMILAR_LENGTH) } @address; | ||||
638 | my @fqhn = map { m/\@(.*)/ } @address; | ||||
639 | my @host = map { substr($_,0,TOCC_SIMILAR_LENGTH) } @fqhn; | ||||
640 | my $hits = 0; | ||||
641 | my $combinations = 0; | ||||
642 | for (my $i = 0; $i <= $#address; $i++) { | ||||
643 | for (my $j = $i+1; $j <= $#address; $j++) { | ||||
644 | $hits++ if $user[$i] eq $user[$j]; | ||||
645 | $hits++ if $host[$i] eq $host[$j] && $fqhn[$i] ne $fqhn[$j]; | ||||
646 | $combinations++; | ||||
647 | } | ||||
648 | } | ||||
649 | $pms->{tocc_similar} = $hits / $combinations; | ||||
650 | } | ||||
651 | } | ||||
652 | |||||
653 | sub check_for_missing_to_header { | ||||
654 | my ($self, $pms) = @_; | ||||
655 | |||||
656 | my $hdr = $pms->get('To'); | ||||
657 | $hdr = $pms->get('Apparently-To') if $hdr eq ''; | ||||
658 | return 1 if $hdr eq ''; | ||||
659 | |||||
660 | return 0; | ||||
661 | } | ||||
662 | |||||
663 | sub check_for_forged_gw05_received_headers { | ||||
664 | my ($self, $pms) = @_; | ||||
665 | local ($_); | ||||
666 | |||||
667 | my $rcv = $pms->get('Received'); | ||||
668 | |||||
669 | # e.g. | ||||
670 | # Received: from mail3.icytundra.com by gw05 with ESMTP; Thu, 21 Jun 2001 02:28:32 -0400 | ||||
671 | my ($h1, $h2) = ($rcv =~ | ||||
672 | m/\nfrom\s(\S+)\sby\s(\S+)\swith\sESMTP\;\s+\S\S\S,\s+\d+\s+\S\S\S\s+ | ||||
673 | \d{4}\s+\d\d:\d\d:\d\d\s+[-+]*\d{4}\n$/xs); | ||||
674 | |||||
675 | if (defined ($h1) && defined ($h2) && $h2 !~ /\./) { | ||||
676 | return 1; | ||||
677 | } | ||||
678 | |||||
679 | 0; | ||||
680 | } | ||||
681 | |||||
682 | ########################################################################### | ||||
683 | |||||
684 | sub check_for_shifted_date { | ||||
685 | my ($self, $pms, $min, $max) = @_; | ||||
686 | |||||
687 | if (!exists $pms->{date_diff}) { | ||||
688 | $self->_check_date_diff($pms); | ||||
689 | } | ||||
690 | return (($min eq 'undef' || $pms->{date_diff} >= (3600 * $min)) && | ||||
691 | ($max eq 'undef' || $pms->{date_diff} < (3600 * $max))); | ||||
692 | } | ||||
693 | |||||
694 | # filters out some false positives in old corpus mail - Allen | ||||
695 | sub received_within_months { | ||||
696 | my ($self,$pms,$min,$max) = @_; | ||||
697 | |||||
698 | if (!exists($pms->{date_received})) { | ||||
699 | $self->_check_date_received($pms); | ||||
700 | } | ||||
701 | my $diff = time() - $pms->{date_received}; | ||||
702 | |||||
703 | # 365.2425 * 24 * 60 * 60 = 31556952 = seconds in year (including leap) | ||||
704 | |||||
705 | if (((! defined($min)) || ($min eq 'undef') || | ||||
706 | ($diff >= (31556952 * ($min/12)))) && | ||||
707 | ((! defined($max)) || ($max eq 'undef') || | ||||
708 | ($diff < (31556952 * ($max/12))))) { | ||||
709 | return 1; | ||||
710 | } else { | ||||
711 | return 0; | ||||
712 | } | ||||
713 | } | ||||
714 | |||||
715 | sub _get_date_header_time { | ||||
716 | my ($self, $pms) = @_; | ||||
717 | |||||
718 | my $time; | ||||
719 | # a Resent-Date: header takes precedence over any Date: header | ||||
720 | DATE: for my $header ('Resent-Date', 'Date') { | ||||
721 | my @dates = $pms->{msg}->get_header($header); | ||||
722 | for my $date (@dates) { | ||||
723 | if (defined($date) && length($date)) { | ||||
724 | chomp($date); | ||||
725 | $time = Mail::SpamAssassin::Util::parse_rfc822_date($date); | ||||
726 | } | ||||
727 | last DATE if defined($time); | ||||
728 | } | ||||
729 | } | ||||
730 | if (defined($time)) { | ||||
731 | $pms->{date_header_time} = $time; | ||||
732 | } | ||||
733 | else { | ||||
734 | $pms->{date_header_time} = undef; | ||||
735 | } | ||||
736 | } | ||||
737 | |||||
738 | sub _get_received_header_times { | ||||
739 | my ($self, $pms) = @_; | ||||
740 | |||||
741 | $pms->{received_header_times} = [ () ]; | ||||
742 | $pms->{received_fetchmail_time} = undef; | ||||
743 | |||||
744 | my (@received); | ||||
745 | my $received = $pms->get('Received'); | ||||
746 | if ($received ne '') { | ||||
747 | @received = grep {$_ =~ m/\S/} (split(/\n/,$received)); | ||||
748 | } | ||||
749 | # if we have no Received: headers, chances are we're archived mail | ||||
750 | # with a limited set of headers | ||||
751 | if (!scalar(@received)) { | ||||
752 | return; | ||||
753 | } | ||||
754 | |||||
755 | # handle fetchmail headers | ||||
756 | my (@local); | ||||
757 | if (($received[0] =~ | ||||
758 | m/\bfrom (?:localhost\s|(?:\S+ ){1,2}\S*\b127\.0\.0\.1\b)/) || | ||||
759 | ($received[0] =~ m/qmail \d+ invoked by uid \d+/)) { | ||||
760 | push @local, (shift @received); | ||||
761 | } | ||||
762 | if (scalar(@received) && | ||||
763 | ($received[0] =~ m/\bby localhost with \w+ \(fetchmail-[\d.]+/)) { | ||||
764 | push @local, (shift @received); | ||||
765 | } | ||||
766 | elsif (scalar(@local)) { | ||||
767 | unshift @received, (shift @local); | ||||
768 | } | ||||
769 | |||||
770 | if (scalar(@local)) { | ||||
771 | my (@fetchmail_times); | ||||
772 | foreach my $rcvd (@local) { | ||||
773 | if ($rcvd =~ m/(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+)/) { | ||||
774 | my $date = $1; | ||||
775 | dbg2("eval: trying Received fetchmail header date for real time: $date"); | ||||
776 | my $time = Mail::SpamAssassin::Util::parse_rfc822_date($date); | ||||
777 | if (defined($time) && (time() >= $time)) { | ||||
778 | dbg2("eval: time_t from date=$time, rcvd=$date"); | ||||
779 | push @fetchmail_times, $time; | ||||
780 | } | ||||
781 | } | ||||
782 | } | ||||
783 | if (scalar(@fetchmail_times) > 1) { | ||||
784 | $pms->{received_fetchmail_time} = | ||||
785 | (sort {$b <=> $a} (@fetchmail_times))[0]; | ||||
786 | } elsif (scalar(@fetchmail_times)) { | ||||
787 | $pms->{received_fetchmail_time} = $fetchmail_times[0]; | ||||
788 | } | ||||
789 | } | ||||
790 | |||||
791 | my (@header_times); | ||||
792 | foreach my $rcvd (@received) { | ||||
793 | if ($rcvd =~ m/(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+)/) { | ||||
794 | my $date = $1; | ||||
795 | dbg2("eval: trying Received header date for real time: $date"); | ||||
796 | my $time = Mail::SpamAssassin::Util::parse_rfc822_date($date); | ||||
797 | if (defined($time)) { | ||||
798 | dbg2("eval: time_t from date=$time, rcvd=$date"); | ||||
799 | push @header_times, $time; | ||||
800 | } | ||||
801 | } | ||||
802 | } | ||||
803 | |||||
804 | if (scalar(@header_times)) { | ||||
805 | $pms->{received_header_times} = [ @header_times ]; | ||||
806 | } else { | ||||
807 | dbg("eval: no dates found in Received headers"); | ||||
808 | } | ||||
809 | } | ||||
810 | |||||
811 | sub _check_date_received { | ||||
812 | my ($self, $pms) = @_; | ||||
813 | |||||
814 | my (@dates_poss); | ||||
815 | |||||
816 | $pms->{date_received} = 0; | ||||
817 | |||||
818 | if (!exists($pms->{date_header_time})) { | ||||
819 | $self->_get_date_header_time($pms); | ||||
820 | } | ||||
821 | |||||
822 | if (defined($pms->{date_header_time})) { | ||||
823 | push @dates_poss, $pms->{date_header_time}; | ||||
824 | } | ||||
825 | |||||
826 | if (!exists($pms->{received_header_times})) { | ||||
827 | $self->_get_received_header_times($pms); | ||||
828 | } | ||||
829 | my (@received_header_times) = @{ $pms->{received_header_times} }; | ||||
830 | if (scalar(@received_header_times)) { | ||||
831 | push @dates_poss, $received_header_times[0]; | ||||
832 | } | ||||
833 | if (defined($pms->{received_fetchmail_time})) { | ||||
834 | push @dates_poss, $pms->{received_fetchmail_time}; | ||||
835 | } | ||||
836 | |||||
837 | if (defined($pms->{date_header_time}) && scalar(@received_header_times)) { | ||||
838 | if (!exists($pms->{date_diff})) { | ||||
839 | $self->_check_date_diff($pms); | ||||
840 | } | ||||
841 | push @dates_poss, $pms->{date_header_time} - $pms->{date_diff}; | ||||
842 | } | ||||
843 | |||||
844 | if (scalar(@dates_poss)) { # use median | ||||
845 | $pms->{date_received} = (sort {$b <=> $a} | ||||
846 | (@dates_poss))[int($#dates_poss/2)]; | ||||
847 | dbg("eval: date chosen from message: " . | ||||
848 | scalar(localtime($pms->{date_received}))); | ||||
849 | } else { | ||||
850 | dbg("eval: no dates found in message"); | ||||
851 | } | ||||
852 | } | ||||
853 | |||||
854 | sub _check_date_diff { | ||||
855 | my ($self, $pms) = @_; | ||||
856 | |||||
857 | $pms->{date_diff} = 0; | ||||
858 | |||||
859 | if (!exists($pms->{date_header_time})) { | ||||
860 | $self->_get_date_header_time($pms); | ||||
861 | } | ||||
862 | |||||
863 | if (!defined($pms->{date_header_time})) { | ||||
864 | return; # already have tests for this | ||||
865 | } | ||||
866 | |||||
867 | if (!exists($pms->{received_header_times})) { | ||||
868 | $self->_get_received_header_times($pms); | ||||
869 | } | ||||
870 | my (@header_times) = @{ $pms->{received_header_times} }; | ||||
871 | |||||
872 | if (!scalar(@header_times)) { | ||||
873 | return; # archived mail? | ||||
874 | } | ||||
875 | |||||
876 | my (@diffs) = map {$pms->{date_header_time} - $_} (@header_times); | ||||
877 | |||||
878 | # if the last Received: header has no difference, then we choose to | ||||
879 | # exclude it | ||||
880 | if ($#diffs > 0 && $diffs[$#diffs] == 0) { | ||||
881 | pop(@diffs); | ||||
882 | } | ||||
883 | |||||
884 | # use the date with the smallest absolute difference | ||||
885 | # (experimentally, this results in the fewest false positives) | ||||
886 | @diffs = sort { abs($a) <=> abs($b) } @diffs; | ||||
887 | $pms->{date_diff} = $diffs[0]; | ||||
888 | } | ||||
889 | |||||
890 | |||||
891 | sub subject_is_all_caps { | ||||
892 | my ($self, $pms) = @_; | ||||
893 | my $subject = $pms->get('Subject'); | ||||
894 | |||||
895 | $subject =~ s/^\s+//; | ||||
896 | $subject =~ s/\s+$//; | ||||
897 | $subject =~ s/^(?:(?:Re|Fwd|Fw|Aw|Antwort|Sv):\s*)+//i; # Bug 6805 | ||||
898 | return 0 if $subject !~ /\s/; # don't match one word subjects | ||||
899 | return 0 if (length $subject < 10); # don't match short subjects | ||||
900 | $subject =~ s/[^a-zA-Z]//g; # only look at letters | ||||
901 | |||||
902 | # now, check to see if the subject is encoded using a non-ASCII charset. | ||||
903 | # If so, punt on this test to avoid FPs. We just list the known charsets | ||||
904 | # this test will FP on, here. | ||||
905 | my $subjraw = $pms->get('Subject:raw'); | ||||
906 | my $CLTFAC = Mail::SpamAssassin::Constants::CHARSETS_LIKELY_TO_FP_AS_CAPS; | ||||
907 | if ($subjraw =~ /=\?${CLTFAC}\?/i) { | ||||
908 | return 0; | ||||
909 | } | ||||
910 | |||||
911 | return length($subject) && ($subject eq uc($subject)); | ||||
912 | } | ||||
913 | |||||
914 | sub check_for_to_in_subject { | ||||
915 | my ($self, $pms, $test) = @_; | ||||
916 | |||||
917 | my $full_to = $pms->get('To:addr'); | ||||
918 | return 0 unless $full_to ne ''; | ||||
919 | |||||
920 | my $subject = $pms->get('Subject'); | ||||
921 | |||||
922 | if ($test eq "address") { | ||||
923 | return $subject =~ /\b\Q$full_to\E\b/i; # "user@domain.com" | ||||
924 | } | ||||
925 | elsif ($test eq "user") { | ||||
926 | my $to = $full_to; | ||||
927 | $to =~ s/\@.*//; | ||||
928 | my $subj = $subject; | ||||
929 | $subj =~ s/^\s+//; | ||||
930 | $subj =~ s/\s+$//; | ||||
931 | |||||
932 | return $subject =~ /^(?: | ||||
933 | (?:re|fw):\s*(?:\w+\s+)?\Q$to\E$ | ||||
934 | |(?-i:\Q$to\E)\s*[,:;!?-](?:$|\s) | ||||
935 | |\Q$to\E$ | ||||
936 | |,\s*\Q$to\E[,:;!?-]$ | ||||
937 | )/ix; | ||||
938 | } | ||||
939 | return 0; | ||||
940 | } | ||||
941 | |||||
942 | sub check_outlook_message_id { | ||||
943 | my ($self, $pms) = @_; | ||||
944 | local ($_); | ||||
945 | |||||
946 | my $id = $pms->get('MESSAGEID'); | ||||
947 | return 0 if $id !~ /^<[0-9a-f]{4}([0-9a-f]{8})\$[0-9a-f]{8}\$[0-9a-f]{8}\@/; | ||||
948 | |||||
949 | my $timetoken = hex($1); | ||||
950 | my $x = 0.0023283064365387; | ||||
951 | my $y = 27111902.8329849; | ||||
952 | |||||
953 | my $fudge = 250; | ||||
954 | |||||
955 | $_ = $pms->get('Date'); | ||||
956 | $_ = Mail::SpamAssassin::Util::parse_rfc822_date($_) || 0; | ||||
957 | my $expected = int (($_ * $x) + $y); | ||||
958 | my $diff = $timetoken - $expected; | ||||
959 | return 0 if (abs($diff) < $fudge); | ||||
960 | |||||
961 | $_ = $pms->get('Received'); | ||||
962 | /(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+).*?$/; | ||||
963 | $_ = Mail::SpamAssassin::Util::parse_rfc822_date($_) || 0; | ||||
964 | $expected = int(($_ * $x) + $y); | ||||
965 | $diff = $timetoken - $expected; | ||||
966 | |||||
967 | return (abs($diff) >= $fudge); | ||||
968 | } | ||||
969 | |||||
970 | sub check_messageid_not_usable { | ||||
971 | my ($self, $pms) = @_; | ||||
972 | local ($_); | ||||
973 | |||||
974 | # Lyris eats message-ids. also some ezmlm, I think :( | ||||
975 | $_ = $pms->get("List-Unsubscribe"); | ||||
976 | return 1 if (/<mailto:(?:leave-\S+|\S+-unsubscribe)\@\S+>$/i); | ||||
977 | |||||
978 | # ezmlm again | ||||
979 | if($self->gated_through_received_hdr_remover($pms)) { return 1; } | ||||
980 | |||||
981 | # Allen notes this as 'Wacky sendmail version?' | ||||
982 | $_ = $pms->get("Received"); | ||||
983 | return 1 if /\/CWT\/DCE\)/; | ||||
984 | |||||
985 | # Apr 2 2003 jm: iPlanet rewrites lots of stuff, including Message-IDs | ||||
986 | return 1 if /iPlanet Messaging Server/; | ||||
987 | |||||
988 | return 0; | ||||
989 | } | ||||
990 | |||||
991 | # Return true if the count of $hdr headers are within the given range | ||||
992 | sub check_header_count_range { | ||||
993 | my ($self, $pms, $hdr, $min, $max) = @_; | ||||
994 | my %uniq; | ||||
995 | my @hdrs = grep(!$uniq{$_}++, $pms->{msg}->get_header ($hdr)); | ||||
996 | return (scalar @hdrs >= $min && scalar @hdrs <= $max); | ||||
997 | } | ||||
998 | |||||
999 | sub check_unresolved_template { | ||||
1000 | my ($self, $pms) = @_; | ||||
1001 | |||||
1002 | my $all = $pms->get('ALL'); # cached access | ||||
1003 | $all =~ s/\n[ \t]+/ /gs; # fix continuation lines | ||||
1004 | |||||
1005 | for my $header (split(/\n/, $all)) { | ||||
1006 | # slightly faster to test in this order | ||||
1007 | if ($header =~ /%[A-Z][A-Z_-]/ && | ||||
1008 | $header !~ /^(?:X-VMS-To|X-UIDL|X-Face|To|Cc|From|Subject|References|In-Reply-To|(?:X-|Resent-|X-Original-)?Message-Id):/i) | ||||
1009 | { | ||||
1010 | return 1; | ||||
1011 | } | ||||
1012 | } | ||||
1013 | return 0; | ||||
1014 | } | ||||
1015 | |||||
1016 | sub check_ratware_name_id { | ||||
1017 | my ($self, $pms) = @_; | ||||
1018 | |||||
1019 | my $mid = $pms->get('MESSAGEID'); | ||||
1020 | my $from = $pms->get('From'); | ||||
1021 | if ($mid =~ m/<[A-Z]{28}\.([^>]+?)>/) { | ||||
1022 | if ($from =~ m/\"[^\"]+\"\s*<\Q$1\E>/) { | ||||
1023 | return 1; | ||||
1024 | } | ||||
1025 | } | ||||
1026 | return 0; | ||||
1027 | } | ||||
1028 | |||||
1029 | sub check_ratware_envelope_from { | ||||
1030 | my ($self, $pms) = @_; | ||||
1031 | |||||
1032 | my $to = $pms->get('To:addr'); | ||||
1033 | my $from = $pms->get('EnvelopeFrom:addr'); | ||||
1034 | |||||
1035 | return 0 if $from eq '' || $to eq ''; | ||||
1036 | return 0 if $from =~ /^SRS\d=/; | ||||
1037 | |||||
1038 | if ($to =~ /^([^@]+)@(.+)$/) { | ||||
1039 | my($user,$dom) = ($1,$2); | ||||
1040 | $dom = $self->{main}->{registryboundaries}->trim_domain($dom); | ||||
1041 | return unless | ||||
1042 | ($self->{main}->{registryboundaries}->is_domain_valid($dom)); | ||||
1043 | |||||
1044 | return 1 if ($from =~ /\b\Q$dom\E.\Q$user\E@/i); | ||||
1045 | } | ||||
1046 | |||||
1047 | return 0; | ||||
1048 | } | ||||
1049 | |||||
1050 | # ADDED FROM BUG 6487 | ||||
1051 | sub check_equal_from_domains { | ||||
1052 | my ($self, $pms) = @_; | ||||
1053 | |||||
1054 | my $from = $pms->get('From:addr'); | ||||
1055 | my $envfrom = $pms->get('EnvelopeFrom:addr'); | ||||
1056 | |||||
1057 | local $1; | ||||
1058 | my $fromdomain = ''; | ||||
1059 | #Revised regexp from 6487 comment 3 | ||||
1060 | $fromdomain = $1 if $from =~ /\@([^@]*)\z/; | ||||
1061 | $fromdomain =~ s/^.+\.([^\.]+\.[^\.]+)$/$1/; | ||||
1062 | return 0 if $fromdomain eq ''; | ||||
1063 | |||||
1064 | my $envfromdomain = ''; | ||||
1065 | $envfromdomain = $1 if $envfrom =~ /\@([^@]*)\z/; | ||||
1066 | $envfromdomain =~ s/^.+\.([^\.]+\.[^\.]+)$/$1/; | ||||
1067 | return 0 if $envfromdomain eq ''; | ||||
1068 | |||||
1069 | dbg("eval: From 2nd level domain: $fromdomain, EnvelopeFrom 2nd level domain: $envfromdomain"); | ||||
1070 | |||||
1071 | return 1 if lc($fromdomain) ne lc($envfromdomain); | ||||
1072 | |||||
1073 | return 0; | ||||
1074 | } | ||||
1075 | |||||
1076 | |||||
1077 | ########################################################################### | ||||
1078 | |||||
1079 | # support eval-test verbose debugs using "-Deval" | ||||
1080 | sub dbg2 { | ||||
1081 | if (would_log('dbg', 'eval') == 2) { | ||||
1082 | dbg(@_); | ||||
1083 | } | ||||
1084 | } | ||||
1085 | |||||
1086 | 1 | 24µs | 1; |