← Index
NYTProf Performance Profile   « line view »
For /usr/local/bin/sa-learn
  Run on Tue Nov 7 05:38:10 2017
Reported on Tue Nov 7 06:16:05 2017

Filename/usr/local/lib/perl5/site_perl/Mail/SpamAssassin/Plugin/MIMEEval.pm
StatementsExecuted 43 statements in 7.22ms
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
111189µs559µsMail::SpamAssassin::Plugin::MIMEEval::::newMail::SpamAssassin::Plugin::MIMEEval::new
11146µs58µsMail::SpamAssassin::Plugin::MIMEEval::::BEGIN@20Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@20
11132µs150µsMail::SpamAssassin::Plugin::MIMEEval::::BEGIN@28Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@28
21129µs29µsMail::SpamAssassin::Plugin::MIMEEval::::has_check_abundant_unicode_ratioMail::SpamAssassin::Plugin::MIMEEval::has_check_abundant_unicode_ratio
11127µs32µsMail::SpamAssassin::Plugin::MIMEEval::::BEGIN@22Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@22
11125µs211µsMail::SpamAssassin::Plugin::MIMEEval::::BEGIN@29Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@29
11125µs87µsMail::SpamAssassin::Plugin::MIMEEval::::BEGIN@23Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@23
11125µs799µsMail::SpamAssassin::Plugin::MIMEEval::::BEGIN@27Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@27
11124µs92µsMail::SpamAssassin::Plugin::MIMEEval::::BEGIN@31Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@31
11123µs72µsMail::SpamAssassin::Plugin::MIMEEval::::BEGIN@21Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@21
11118µs18µsMail::SpamAssassin::Plugin::MIMEEval::::BEGIN@26Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@26
11115µs15µsMail::SpamAssassin::Plugin::MIMEEval::::BEGIN@25Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@25
11110µs10µsMail::SpamAssassin::Plugin::MIMEEval::::has_check_for_ascii_text_illegalMail::SpamAssassin::Plugin::MIMEEval::has_check_for_ascii_text_illegal
0000s0sMail::SpamAssassin::Plugin::MIMEEval::::_check_attachmentsMail::SpamAssassin::Plugin::MIMEEval::_check_attachments
0000s0sMail::SpamAssassin::Plugin::MIMEEval::::_check_base64_lengthMail::SpamAssassin::Plugin::MIMEEval::_check_base64_length
0000s0sMail::SpamAssassin::Plugin::MIMEEval::::_check_mime_headerMail::SpamAssassin::Plugin::MIMEEval::_check_mime_header
0000s0sMail::SpamAssassin::Plugin::MIMEEval::::are_more_high_bits_setMail::SpamAssassin::Plugin::MIMEEval::are_more_high_bits_set
0000s0sMail::SpamAssassin::Plugin::MIMEEval::::body_charset_is_likely_to_fpMail::SpamAssassin::Plugin::MIMEEval::body_charset_is_likely_to_fp
0000s0sMail::SpamAssassin::Plugin::MIMEEval::::check_abundant_unicode_ratioMail::SpamAssassin::Plugin::MIMEEval::check_abundant_unicode_ratio
0000s0sMail::SpamAssassin::Plugin::MIMEEval::::check_base64_lengthMail::SpamAssassin::Plugin::MIMEEval::check_base64_length
0000s0sMail::SpamAssassin::Plugin::MIMEEval::::check_for_ascii_text_illegalMail::SpamAssassin::Plugin::MIMEEval::check_for_ascii_text_illegal
0000s0sMail::SpamAssassin::Plugin::MIMEEval::::check_for_faraway_charsetMail::SpamAssassin::Plugin::MIMEEval::check_for_faraway_charset
0000s0sMail::SpamAssassin::Plugin::MIMEEval::::check_for_mimeMail::SpamAssassin::Plugin::MIMEEval::check_for_mime
0000s0sMail::SpamAssassin::Plugin::MIMEEval::::check_for_mime_htmlMail::SpamAssassin::Plugin::MIMEEval::check_for_mime_html
0000s0sMail::SpamAssassin::Plugin::MIMEEval::::check_for_mime_html_onlyMail::SpamAssassin::Plugin::MIMEEval::check_for_mime_html_only
0000s0sMail::SpamAssassin::Plugin::MIMEEval::::check_for_uppercaseMail::SpamAssassin::Plugin::MIMEEval::check_for_uppercase
0000s0sMail::SpamAssassin::Plugin::MIMEEval::::check_ma_non_textMail::SpamAssassin::Plugin::MIMEEval::check_ma_non_text
0000s0sMail::SpamAssassin::Plugin::MIMEEval::::check_mime_multipart_ratioMail::SpamAssassin::Plugin::MIMEEval::check_mime_multipart_ratio
0000s0sMail::SpamAssassin::Plugin::MIMEEval::::check_msg_parse_flagsMail::SpamAssassin::Plugin::MIMEEval::check_msg_parse_flags
0000s0sMail::SpamAssassin::Plugin::MIMEEval::::check_qp_ratioMail::SpamAssassin::Plugin::MIMEEval::check_qp_ratio
0000s0sMail::SpamAssassin::Plugin::MIMEEval::::get_charset_from_ct_lineMail::SpamAssassin::Plugin::MIMEEval::get_charset_from_ct_line
0000s0sMail::SpamAssassin::Plugin::MIMEEval::::has_check_qp_ratioMail::SpamAssassin::Plugin::MIMEEval::has_check_qp_ratio
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1# <@LICENSE>
2# Licensed to the Apache Software Foundation (ASF) under one or more
3# contributor license agreements. See the NOTICE file distributed with
4# this work for additional information regarding copyright ownership.
5# The ASF licenses this file to you under the Apache License, Version 2.0
6# (the "License"); you may not use this file except in compliance with
7# the License. You may obtain a copy of the License at:
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16# </@LICENSE>
17
18package Mail::SpamAssassin::Plugin::MIMEEval;
19
20278µs270µs
# spent 58µs (46+12) within Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@20 which was called: # once (46µs+12µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 20
use strict;
# spent 58µs making 1 call to Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@20 # spent 12µs making 1 call to strict::import
21275µs2121µs
# spent 72µs (23+49) within Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@21 which was called: # once (23µs+49µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 21
use warnings;
# spent 72µs making 1 call to Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@21 # spent 49µs making 1 call to warnings::import
22272µs238µs
# spent 32µs (27+6) within Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@22 which was called: # once (27µs+6µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 22
use bytes;
# spent 32µs making 1 call to Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@22 # spent 6µs making 1 call to bytes::import
23270µs2149µs
# spent 87µs (25+62) within Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@23 which was called: # once (25µs+62µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 23
use re 'taint';
# spent 87µs making 1 call to Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@23 # spent 62µs making 1 call to re::import
24
25256µs115µs
# spent 15µs within Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@25 which was called: # once (15µs+0s) by Mail::SpamAssassin::PluginHandler::load_plugin at line 25
use Mail::SpamAssassin::Plugin;
# spent 15µs making 1 call to Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@25
26263µs118µs
# spent 18µs within Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@26 which was called: # once (18µs+0s) by Mail::SpamAssassin::PluginHandler::load_plugin at line 26
use Mail::SpamAssassin::Locales;
# spent 18µs making 1 call to Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@26
27297µs21.57ms
# spent 799µs (25+774) within Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@27 which was called: # once (25µs+774µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 27
use Mail::SpamAssassin::Constants qw(:sa CHARSETS_LIKELY_TO_FP_AS_CAPS);
# spent 799µs making 1 call to Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@27 # spent 774µs making 1 call to Exporter::import
28278µs2270µs
# spent 150µs (32+119) within Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@28 which was called: # once (32µs+119µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 28
use Mail::SpamAssassin::Util qw(untaint_var);
# spent 150µs making 1 call to Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@28 # spent 119µs making 1 call to Exporter::import
29278µs2397µs
# spent 211µs (25+186) within Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@29 which was called: # once (25µs+186µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 29
use Mail::SpamAssassin::Logger;
# spent 211µs making 1 call to Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@29 # spent 186µs making 1 call to Exporter::import
30
3126.37ms2160µs
# spent 92µs (24+68) within Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@31 which was called: # once (24µs+68µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 31
use vars qw(@ISA);
# spent 92µs making 1 call to Mail::SpamAssassin::Plugin::MIMEEval::BEGIN@31 # spent 68µs making 1 call to vars::import
32115µs@ISA = qw(Mail::SpamAssassin::Plugin);
33
34# constructor: register the eval rule
35
# spent 559µs (189+370) within Mail::SpamAssassin::Plugin::MIMEEval::new which was called: # once (189µs+370µs) by Mail::SpamAssassin::PluginHandler::load_plugin at line 1 of (eval 99)[Mail/SpamAssassin/PluginHandler.pm:129]
sub new {
3612µs my $class = shift;
3712µs my $mailsaobject = shift;
38
39 # some boilerplate...
4012µs $class = ref($class) || $class;
41113µs126µs my $self = $class->SUPER::new($mailsaobject);
# spent 26µs making 1 call to Mail::SpamAssassin::Plugin::new
4212µs bless ($self, $class);
43
44 # the important bit!
45118µs132µs $self->register_eval_rule("check_for_mime");
# spent 32µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
4616µs137µs $self->register_eval_rule("check_for_mime_html");
# spent 37µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
4716µs131µs $self->register_eval_rule("check_for_mime_html_only");
# spent 31µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
4816µs119µs $self->register_eval_rule("check_mime_multipart_ratio");
# spent 19µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
4916µs127µs $self->register_eval_rule("check_msg_parse_flags");
# spent 27µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
5016µs130µs $self->register_eval_rule("check_for_ascii_text_illegal");
# spent 30µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
5116µs121µs $self->register_eval_rule("check_abundant_unicode_ratio");
# spent 21µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
5215µs130µs $self->register_eval_rule("check_for_faraway_charset");
# spent 30µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
5316µs130µs $self->register_eval_rule("check_for_uppercase");
# spent 30µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
5416µs124µs $self->register_eval_rule("check_ma_non_text");
# spent 24µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
5516µs130µs $self->register_eval_rule("check_base64_length");
# spent 30µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
5615µs131µs $self->register_eval_rule("check_qp_ratio");
# spent 31µs making 1 call to Mail::SpamAssassin::Plugin::register_eval_rule
57
58124µs return $self;
59}
60
61###########################################################################
62
63sub are_more_high_bits_set {
64 my ($self, $str) = @_;
65
66 # TODO: I suspect a tr// trick may be faster here
67 my $numhis = () = ($str =~ /[\200-\377]/g);
68 my $numlos = length($str) - $numhis;
69
70 ($numlos <= $numhis && $numhis > 3);
71}
72=over 4
73
74=item has_check_for_ascii_text_illegal
75
76Adds capability check for "if can()" for check_for_ascii_text_illegal
77
78=cut
79
80111µs
# spent 10µs within Mail::SpamAssassin::Plugin::MIMEEval::has_check_for_ascii_text_illegal which was called: # once (10µs+0s) by Mail::SpamAssassin::Conf::Parser::cond_clause_can_or_has at line 595 of Mail/SpamAssassin/Conf/Parser.pm
sub has_check_for_ascii_text_illegal { 1 }
81
82=item check_for_ascii_text_illegal
83
84If a MIME part claims to be text/plain or text/plain;charset=us-ascii and the Content-Transfer-Encoding is 7bit (either explicitly or by default), then we should enforce the actual text being only TAB, NL, SPACE through TILDE, i.e. all 7bit characters excluding NO-WS-CTL (per RFC-2822).
85
86All mainstream MTA's get this right.
87
88=cut
89
90sub check_for_ascii_text_illegal {
91 my ($self, $pms) = @_;
92
93 $self->_check_attachments($pms) unless exists $pms->{mime_ascii_text_illegal};
94 return ($pms->{mime_ascii_text_illegal} > 0);
95}
96
97=item has_check_abundant_unicode_ratio
98
99Adds capability check for "if can()" for check_abundant_unicode_ratio
100
101=cut
102
103216µs
# spent 29µs within Mail::SpamAssassin::Plugin::MIMEEval::has_check_abundant_unicode_ratio which was called 2 times, avg 14µs/call: # 2 times (29µs+0s) by Mail::SpamAssassin::Conf::Parser::cond_clause_can_or_has at line 595 of Mail/SpamAssassin/Conf/Parser.pm, avg 14µs/call
sub has_check_abundant_unicode_ratio { 1 }
104
105=item check_abundant_unicode_ratio
106
107A MIME part claiming to be text/plain and containing Unicode characters must be encoded as quoted-printable or base64, or use UTF data coding (typically with 8bit encoding). Any message in 7bit or 8bit encoding containing (HTML) Unicode entities will not render them as Unicode, but literally.
108
109Thus a few such sequences might occur on a mailing list of developers discussing such characters, but a message with a high density of such characters is likely spam.
110
111=cut
112
113sub check_abundant_unicode_ratio {
114 my ($self, $pms, undef, $ratio) = @_;
115
116 # validate ratio?
117 return 0 unless ($ratio =~ /^\d{0,3}\.\d{1,3}$/);
118
119 $self->_check_attachments($pms) unless exists $pms->{mime_text_unicode_ratio};
120 return ($pms->{mime_text_unicode_ratio} >= $ratio);
121}
122
123sub check_for_faraway_charset {
124 my ($self, $pms, $body) = @_;
125
126 my $type = $pms->get('Content-Type',undef);
127
128 my @locales = Mail::SpamAssassin::Util::get_my_locales($self->{main}->{conf}->{ok_locales});
129
130 return 0 if grep { $_ eq "all" } @locales;
131
132 $type = get_charset_from_ct_line($type) if defined $type;
133
134 if (defined $type &&
135 !Mail::SpamAssassin::Locales::is_charset_ok_for_locales
136 ($type, @locales))
137 {
138 # sanity check. Some charsets (e.g. koi8-r) include the ASCII
139 # 7-bit charset as well, so make sure we actually have a high
140 # number of 8-bit chars in the body text first.
141
142 $body = join("\n", @$body);
143 if ($self->are_more_high_bits_set ($body)) {
144 return 1;
145 }
146 }
147
148 0;
149}
150
151sub check_for_mime {
152 my ($self, $pms, undef, $test) = @_;
153
154 $self->_check_attachments($pms) unless exists $pms->{$test};
155 return $pms->{$test};
156}
157
158# any text/html MIME part
159sub check_for_mime_html {
160 my ($self, $pms) = @_;
161
162 my $ctype = $pms->get('Content-Type');
163 return 1 if $ctype =~ m{^text/html}i;
164
165 $self->_check_attachments($pms) unless exists $pms->{mime_body_html_count};
166 return ($pms->{mime_body_html_count} > 0);
167}
168
169# HTML without some other type of MIME text part
170sub check_for_mime_html_only {
171 my ($self, $pms) = @_;
172
173 my $ctype = $pms->get('Content-Type');
174 return 1 if $ctype =~ m{^text/html}i;
175
176 $self->_check_attachments($pms) unless exists $pms->{mime_body_html_count};
177 return ($pms->{mime_body_html_count} > 0 &&
178 $pms->{mime_body_text_count} == 0);
179}
180
181sub check_mime_multipart_ratio {
182 my ($self, $pms, undef, $min, $max) = @_;
183
184 $self->_check_attachments($pms) unless exists $pms->{mime_multipart_alternative};
185
186 return ($pms->{mime_multipart_ratio} >= $min &&
187 $pms->{mime_multipart_ratio} < $max);
188}
189
190sub _check_mime_header {
191 my ($self, $pms, $ctype, $cte, $cd, $charset, $name) = @_;
192
193 $charset ||= '';
194
195 if ($ctype eq 'text/html') {
196 $pms->{mime_body_html_count}++;
197 }
198 elsif ($ctype =~ m@^text@i) {
199 $pms->{mime_body_text_count}++;
200 }
201
202 if ($cte =~ /base64/) {
203 $pms->{mime_base64_count}++;
204 }
205 elsif ($cte =~ /quoted-printable/) {
206 $pms->{mime_qp_count}++;
207 }
208
209 if ($cd && $cd =~ /attachment/) {
210 $pms->{mime_attachment}++;
211 }
212
213 if ($ctype =~ /^text/ &&
214 $cte =~ /base64/ &&
215 (!$charset || $charset =~ /(?:us-ascii|ansi_x3\.4-1968|iso-ir-6|ansi_x3\.4-1986|iso_646\.irv:1991|ascii|iso646-us|us|ibm367|cp367|csascii)/) &&
216 !($cd && $cd =~ /^(?:attachment|inline)/))
217 {
218 $pms->{mime_base64_encoded_text} = 1;
219 }
220
221 if ($charset =~ /iso-\S+-\S+\b/i &&
222 $charset !~ /iso-(?:8859-\d{1,2}|2022-(?:jp|kr))\b/)
223 {
224 $pms->{mime_bad_iso_charset} = 1;
225 }
226
227 # MIME_BASE64_LATIN: now a zero-hitter
228 # if (!$name &&
229 # $cte =~ /base64/ &&
230 # $charset =~ /\b(?:us-ascii|iso-8859-(?:[12349]|1[0345])|windows-(?:125[0247]))\b/)
231 # {
232 # $pms->{mime_base64_latin} = 1;
233 # }
234
235 # MIME_QP_NO_CHARSET: now a zero-hitter
236 # if ($cte =~ /quoted-printable/ && $cd =~ /inline/ && !$charset) {
237 # $pms->{mime_qp_inline_no_charset} = 1;
238 # }
239
240 # MIME_HTML_NO_CHARSET: now a zero-hitter
241 # if ($ctype eq 'text/html' &&
242 # !(defined($charset) && $charset) &&
243 # !($cd && $cd =~ /^(?:attachment|inline)/))
244 # {
245 # $pms->{mime_html_no_charset} = 1;
246 # }
247
248 if ($charset =~ /[a-z]/i) {
249 if (defined $pms->{mime_html_charsets}) {
250 $pms->{mime_html_charsets} .= " ".$charset;
251 } else {
252 $pms->{mime_html_charsets} = $charset;
253 }
254
255 if (! $pms->{mime_faraway_charset}) {
256 my @l = Mail::SpamAssassin::Util::get_my_locales($self->{main}->{conf}->{ok_locales});
257
258 if (!(grep { $_ eq "all" } @l) &&
259 !Mail::SpamAssassin::Locales::is_charset_ok_for_locales($charset, @l))
260 {
261 $pms->{mime_faraway_charset} = 1;
262 }
263 }
264 }
265}
266
267sub _check_attachments {
268 my ($self, $pms) = @_;
269
270 # MIME status
271 my $where = -1; # -1 = start, 0 = nowhere, 1 = header, 2 = body
272 my $qp_bytes = 0; # total bytes in QP regions
273 my $qp_count = 0; # QP-encoded bytes in QP regions
274 my @part_bytes; # MIME part total bytes
275 my @part_type; # MIME part types
276
277 my $normal_chars = 0; # MIME text bytes that aren't encoded
278 my $unicode_chars = 0; # MIME text bytes that are unicode entities
279
280 # MIME header information
281 my $part = -1; # MIME part index
282
283 # indicate the scan has taken place
284 $pms->{mime_checked_attachments} = 1;
285
286 # results
287# $pms->{mime_base64_blanks} = 0; # expensive to determine, no longer avail
288 $pms->{mime_base64_count} = 0;
289 $pms->{mime_base64_encoded_text} = 0;
290 # $pms->{mime_base64_illegal} = 0;
291 # $pms->{mime_base64_latin} = 0;
292 $pms->{mime_body_html_count} = 0;
293 $pms->{mime_body_text_count} = 0;
294 $pms->{mime_faraway_charset} = 0;
295 # $pms->{mime_html_no_charset} = 0;
296 $pms->{mime_missing_boundary} = 0;
297 $pms->{mime_multipart_alternative} = 0;
298 $pms->{mime_multipart_ratio} = 1.0;
299 $pms->{mime_qp_count} = 0;
300 # $pms->{mime_qp_illegal} = 0;
301 # $pms->{mime_qp_inline_no_charset} = 0;
302 $pms->{mime_qp_long_line} = 0;
303 $pms->{mime_qp_ratio} = 0;
304 $pms->{mime_ascii_text_illegal} = 0;
305 $pms->{mime_text_unicode_ratio} = 0;
306
307 # Get all parts ...
308 foreach my $p ($pms->{msg}->find_parts(qr/./)) {
309 # message headers
310 my ($ctype, $boundary, $charset, $name) = Mail::SpamAssassin::Util::parse_content_type($p->get_header("content-type"));
311
312 if ($ctype eq 'multipart/alternative') {
313 $pms->{mime_multipart_alternative} = 1;
314 }
315
316 my $cte = $p->get_header('Content-Transfer-Encoding') || '';
317 chomp($cte = defined($cte) ? lc $cte : "");
318
319 my $cd = $p->get_header('Content-Disposition') || '';
320 chomp($cd = defined($cd) ? lc $cd : "");
321
322 $charset = lc $charset if ($charset);
323 $name = lc $name if ($name);
324
325 $self->_check_mime_header($pms, $ctype, $cte, $cd, $charset, $name);
326
327 # If we're not in a leaf node in the tree, there will be no raw
328 # section, so skip it.
329 if (! $p->is_leaf()) {
330 next;
331 }
332
333 $part++;
334 $part_type[$part] = $ctype;
335 $part_bytes[$part] = 0 if $cd !~ /attachment/;
336
337 my $cte_is_base64 = $cte =~ /base64/i;
338 my $previous = '';
339 foreach (@{$p->raw()}) {
340
341 # if ($cte_is_base64) {
342 # if ($previous =~ /^\s*$/ && /^\s*$/) { # expensive, avoid!
343 # $pms->{mime_base64_blanks} = 1; # never used, don't bother
344 # }
345 # # MIME_BASE64_ILLEGAL: now a zero-hitter
346 # # if (m@[^A-Za-z0-9+/=\n]@ || /=[^=\s]/) {
347 # # $pms->{mime_base64_illegal} = 1;
348 # # }
349 # }
350
351 # if ($pms->{mime_html_no_charset} && $ctype eq 'text/html' && defined $charset) {
352 # $pms->{mime_html_no_charset} = 0;
353 # }
354 if ($pms->{mime_multipart_alternative} && $cd !~ /attachment/ &&
355 ($ctype eq 'text/plain' || $ctype eq 'text/html')) {
356 $part_bytes[$part] += length;
357 }
358
359 if ($where != 1 && $cte eq "quoted-printable" && ! /^SPAM: /) {
360 # RFC 5322: Each line SHOULD be no more than 78 characters,
361 # excluding the CRLF.
362 # RFC 2045: The Quoted-Printable encoding REQUIRES that
363 # encoded lines be no more than 76 characters long.
364 # Bug 5491: 6% of email classified as HAM by SA triggered the
365 # MIME_QP_LONG_LINE rule. Apple Mail can generate a QP-line
366 # that is 2 chars too long. Same goes for Outlook Web Access.
367 # lines include one trailing \n character
368 # if (length > 76+1) { # conforms to RFC 5322 and RFC 2045
369 if (length > 78+1) { # conforms to RFC 5322 only, not RFC 2045
370 $pms->{mime_qp_long_line} = 1;
371 }
372 $qp_bytes += length;
373
374 # MIME_QP_DEFICIENT: zero-hitter now
375
376 # check for illegal substrings (RFC 2045), hexadecimal values 7F-FF and
377 # control characters other than TAB, or CR and LF as parts of CRLF pairs
378 # if (!$pms->{mime_qp_illegal} && /[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff]/)
379 # {
380 # $pms->{mime_qp_illegal} = 1;
381 # }
382
383 # count excessive QP bytes
384 if (index($_, '=') != -1) {
385 # whoever wrote this next line is an evil hacker -- jm
386 my $qp = () = m/=(?:09|3[0-9ABCEF]|[2456][0-9A-F]|7[0-9A-E])/g;
387 if ($qp) {
388 $qp_count += $qp;
389 # tabs and spaces at end of encoded line are okay. Also, multiple
390 # whitespace at the end of a line are OK, like ">=20=20=20=20=20=20".
391 my ($trailing) = m/((?:=09|=20)+)\s*$/g;
392 if ($trailing) {
393 $qp_count -= (length($trailing) / 3);
394 }
395 }
396 }
397 }
398
399 # if our charset is ASCII, this should only contain 7-bit characters
400 # except NUL or a free-standing CR. anything else is a violation of
401 # the definition of charset="us-ascii".
402 if ($ctype eq 'text/plain' && (!defined $charset || $charset eq 'us-ascii')) {
403 # no re "strict"; # since perl 5.21.8: Ranges of ASCII printables...
404 if (m/[\x00\x0d\x80-\xff]+/) {
405 if (would_log('dbg', 'eval')) {
406 my $str = $_;
407 $str =~ s/([\x00\x0d\x80-\xff]+)/'<' . unpack('H*', $1) . '>'/eg;
408 dbg("check: ascii_text_illegal: matches " . $str . "\n");
409 }
410 $pms->{mime_ascii_text_illegal}++;
411 }
412 }
413
414 # if we're text/plain, we should never see unicode escapes in this
415 # format, especially not for 7bit or 8bit.
416 if ($ctype eq 'text/plain' && ($cte eq '' || $cte eq '7bit' || $cte eq '8bit')) {
417 my ($text, $subs) = $_;
418
419 $subs = $text =~ s/&#x[0-9A-F]{4};//g;
420 $normal_chars += length($text);
421 $unicode_chars += $subs;
422
423 if ($subs && would_log('dbg', 'eval')) {
424 my $str = $_;
425 $str = substr($str, 0, 512) . '...' if (length($str) > 512);
426 dbg("check: abundant_unicode: " . $str . " (" . $subs . ")\n");
427 }
428 }
429
430 $previous = $_;
431 }
432 }
433
434 if ($qp_bytes) {
435 $pms->{mime_qp_ratio} = $qp_count / $qp_bytes;
436 $pms->{mime_qp_count} = $qp_count;
437 $pms->{mime_qp_bytes} = $qp_bytes;
438 }
439
440 if ($normal_chars) {
441 $pms->{mime_text_unicode_ratio} = $unicode_chars / $normal_chars;
442 }
443
444 if ($pms->{mime_multipart_alternative}) {
445 my $text;
446 my $html;
447 # bug 4207: we want the size of the last parts
448 for (my $i = $part; $i >= 0; $i--) {
449 next if !defined $part_bytes[$i];
450 if (!defined($html) && $part_type[$i] eq 'text/html') {
451 $html = $part_bytes[$i];
452 }
453 elsif (!defined($text) && $part_type[$i] eq 'text/plain') {
454 $text = $part_bytes[$i];
455 }
456 last if (defined($html) && defined($text));
457 }
458 if (defined($text) && defined($html) && $html > 0) {
459 $pms->{mime_multipart_ratio} = ($text / $html);
460 }
461 }
462
463 # Look to see if any multipart boundaries are not "balanced"
464 foreach my $val (values %{$pms->{msg}->{mime_boundary_state}}) {
465 if ($val != 0) {
466 $pms->{mime_missing_boundary} = 1;
467 last;
468 }
469 }
470}
471
472=item has_check_qp_ratio
473
474Adds capability check for "if can()" for check_qp_ratio
475
476=cut
477sub has_check_qp_ratio { 1 }
478
479=item check_qp_ratio
480
481Takes a min ratio to use in eval to see if there is an spamminess to the ratio of
482quoted printable to total bytes in an email.
483
484=back
485
486=cut
487sub check_qp_ratio {
488 my ($self, $pms, undef, $min) = @_;
489
490 $self->_check_attachments($pms) unless exists $pms->{mime_checked_attachments};
491
492 my $qp_ratio = $pms->{mime_qp_ratio};
493
494 dbg("eval: qp_ratio - %s - check for min of %s", $qp_ratio, $min);
495
496 return (defined $qp_ratio && $qp_ratio >= $min) ? 1 : 0;
497}
498
499
500sub check_msg_parse_flags {
501 my($self, $pms, $type, $type2) = @_;
502 $type = $type2 if ref($type);
503 return defined $pms->{msg}->{$type};
504}
505
506sub check_for_uppercase {
507 my ($self, $pms, $body, $min, $max) = @_;
508 local ($_);
509
510 if (exists $pms->{uppercase}) {
511 return ($pms->{uppercase} > $min && $pms->{uppercase} <= $max);
512 }
513
514 if ($self->body_charset_is_likely_to_fp($pms)) {
515 $pms->{uppercase} = 0; return 0;
516 }
517
518 # Dec 20 2002 jm: trade off some speed for low memory footprint, by
519 # iterating over the array computing sums, instead of joining the
520 # array into a giant string and working from that.
521
522 my $len = 0;
523 my $lower = 0;
524 my $upper = 0;
525 foreach (@{$body}) {
526 # examine lines in the body that have an intermediate space
527 next unless /\S\s+\S/;
528 # strip out lingering base64 (currently possible for forwarded messages)
529 next if /^(?:[A-Za-z0-9+\/=]{60,76} ){2}/;
530
531 my $line = $_; # copy so we don't muck up the original
532
533 # remove shift-JIS charset codes
534 $line =~ s/\x1b\$B.*\x1b\(B//gs;
535
536 $len += length($line);
537
538 # count numerals as lower case, otherwise 'date|mail' is spam
539 $lower += ($line =~ tr/a-z0-9//d);
540 $upper += ($line =~ tr/A-Z//);
541 }
542
543 # report only on mails above a minimum size; otherwise one
544 # or two acronyms can throw it off
545 if ($len < 200) {
546 $pms->{uppercase} = 0;
547 return 0;
548 }
549 if (($upper + $lower) == 0) {
550 $pms->{uppercase} = 0;
551 } else {
552 $pms->{uppercase} = ($upper / ($upper + $lower)) * 100;
553 }
554
555 return ($pms->{uppercase} > $min && $pms->{uppercase} <= $max);
556}
557
558sub body_charset_is_likely_to_fp {
559 my ($self, $pms) = @_;
560
561 # check for charsets where this test will FP -- iso-2022-jp, gb2312,
562 # koi8-r etc.
563 #
564 $self->_check_attachments($pms) unless exists $pms->{mime_checked_attachments};
565 my @charsets;
566 my $type = $pms->get('Content-Type',undef);
567 $type = get_charset_from_ct_line($type) if defined $type;
568 push (@charsets, $type) if defined $type;
569 if (defined $pms->{mime_html_charsets}) {
570 push (@charsets, split(' ', $pms->{mime_html_charsets}));
571 }
572
573 my $CHARSETS_LIKELY_TO_FP_AS_CAPS = CHARSETS_LIKELY_TO_FP_AS_CAPS;
574 foreach my $charset (@charsets) {
575 if ($charset =~ /^${CHARSETS_LIKELY_TO_FP_AS_CAPS}$/) {
576 return 1;
577 }
578 }
579 return 0;
580}
581
582sub get_charset_from_ct_line {
583 my $type = shift;
584 if (!defined $type) { return; }
585 if ($type =~ /charset="([^"]+)"/i) { return $1; }
586 if ($type =~ /charset='([^']+)'/i) { return $1; }
587 if ($type =~ /charset=(\S+)/i) { return $1; }
588 return;
589}
590
591# came up on the users@ list, look for multipart/alternative parts which
592# include non-text parts -- skip certain types which occur normally in ham
593sub check_ma_non_text {
594 my($self, $pms) = @_;
595
596 foreach my $map ($pms->{msg}->find_parts(qr@^multipart/alternative$@i)) {
597 foreach my $p ($map->find_parts(qr/./, 1, 0)) {
598 next if (lc $p->{'type'} eq 'multipart/related');
599 next if (lc $p->{'type'} eq 'application/rtf');
600 next if ($p->{'type'} =~ m@^text/@i);
601 return 1;
602 }
603 }
604
605 return 0;
606}
607
608sub check_base64_length {
609 my $self = shift;
610 my $pms = shift;
611 shift; # body array, unnecessary
612 my $min = shift;
613 my $max = shift;
614
615 if (!defined $pms->{base64_length}) {
616 $pms->{base64_length} = $self->_check_base64_length($pms->{msg});
617 }
618
619 return 0 if (defined $max && $pms->{base64_length} > $max);
620 return $pms->{base64_length} >= $min;
621}
622
623sub _check_base64_length {
624 my $self = shift;
625 my $msg = shift;
626
627 my $result = 0;
628
629 foreach my $p ($msg->find_parts(qr@.@, 1)) {
630 my $ctype=
631 Mail::SpamAssassin::Util::parse_content_type($p->get_header('content-type'));
632
633 # FPs from Google Calendar invites, etc.
634 # perhaps just limit to test, and image?
635 next if ($ctype eq 'application/ics');
636
637 my $cte = lc($p->get_header('content-transfer-encoding') || '');
638 next if ($cte !~ /^base64$/);
639 foreach my $l ( @{$p->raw()} ) {
640 $result = length $l if length $l > $result;
641 }
642 }
643
644 return $result;
645}
646
64719µs1;