Filename | /usr/local/lib/perl5/site_perl/Mail/SpamAssassin/PerMsgStatus.pm |
Statements | Executed 304519 statements in 3.59s |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
468 | 1 | 1 | 850ms | 4.87s | _get_parsed_uri_list | Mail::SpamAssassin::PerMsgStatus::
3024 | 2 | 2 | 588ms | 11.0s | get_uri_detail_list | Mail::SpamAssassin::PerMsgStatus::
2594 | 1 | 1 | 273ms | 707ms | _get (recurses: max depth 1, inclusive time 175ms) | Mail::SpamAssassin::PerMsgStatus::
3383 | 14 | 2 | 120ms | 777ms | get (recurses: max depth 1, inclusive time 225ms) | Mail::SpamAssassin::PerMsgStatus::
234 | 1 | 1 | 114ms | 5.70s | get_uri_list | Mail::SpamAssassin::PerMsgStatus::
468 | 2 | 2 | 109ms | 138ms | new | Mail::SpamAssassin::PerMsgStatus::
234 | 1 | 1 | 97.6ms | 44.9s | extract_message_metadata | Mail::SpamAssassin::PerMsgStatus::
3028 | 14 | 4 | 80.4ms | 185ms | set_tag | Mail::SpamAssassin::PerMsgStatus::
3028 | 1 | 1 | 64.7ms | 105ms | tag_is_ready | Mail::SpamAssassin::PerMsgStatus::
468 | 1 | 1 | 53.7ms | 70.5ms | _tbirdurire | Mail::SpamAssassin::PerMsgStatus::
321 | 1 | 1 | 48.1ms | 239ms | all_from_addrs | Mail::SpamAssassin::PerMsgStatus::
234 | 1 | 1 | 42.5ms | 422ms | get_envelope_from | Mail::SpamAssassin::PerMsgStatus::
234 | 1 | 1 | 26.6ms | 46.7ms | finish | Mail::SpamAssassin::PerMsgStatus::
702 | 2 | 1 | 21.6ms | 25.9s | get_decoded_stripped_body_text_array | Mail::SpamAssassin::PerMsgStatus::
234 | 1 | 1 | 17.4ms | 19.0ms | action_depends_on_tags | Mail::SpamAssassin::PerMsgStatus::
321 | 1 | 1 | 12.5ms | 12.5ms | get_tag | Mail::SpamAssassin::PerMsgStatus::
313 | 2 | 2 | 8.50ms | 11.4ms | DESTROY | Mail::SpamAssassin::PerMsgStatus::
1 | 1 | 1 | 6.49ms | 7.65ms | BEGIN@60 | Mail::SpamAssassin::PerMsgStatus::
234 | 1 | 1 | 3.82ms | 3.82ms | report_unsatisfied_actions | Mail::SpamAssassin::PerMsgStatus::
313 | 1 | 1 | 2.92ms | 2.92ms | delete_fulltext_tmpfile | Mail::SpamAssassin::PerMsgStatus::
1 | 1 | 1 | 2.66ms | 3.78ms | BEGIN@63 | Mail::SpamAssassin::PerMsgStatus::
1 | 1 | 1 | 441µs | 3.61ms | all_to_addrs | Mail::SpamAssassin::PerMsgStatus::
1 | 1 | 1 | 115µs | 115µs | BEGIN@82 | Mail::SpamAssassin::PerMsgStatus::
1 | 1 | 1 | 48µs | 57µs | BEGIN@52 | Mail::SpamAssassin::PerMsgStatus::
1 | 1 | 1 | 39µs | 244µs | BEGIN@64 | Mail::SpamAssassin::PerMsgStatus::
1 | 1 | 1 | 37µs | 156µs | BEGIN@56 | Mail::SpamAssassin::PerMsgStatus::
1 | 1 | 1 | 31µs | 106µs | BEGIN@54 | Mail::SpamAssassin::PerMsgStatus::
1 | 1 | 1 | 30µs | 202µs | BEGIN@62 | Mail::SpamAssassin::PerMsgStatus::
1 | 1 | 1 | 30µs | 110µs | BEGIN@80 | Mail::SpamAssassin::PerMsgStatus::
1 | 1 | 1 | 25µs | 56µs | BEGIN@53 | Mail::SpamAssassin::PerMsgStatus::
1 | 1 | 1 | 23µs | 815µs | BEGIN@59 | Mail::SpamAssassin::PerMsgStatus::
1 | 1 | 1 | 22µs | 232µs | BEGIN@66 | Mail::SpamAssassin::PerMsgStatus::
1 | 1 | 1 | 21µs | 398µs | BEGIN@57 | Mail::SpamAssassin::PerMsgStatus::
1 | 1 | 1 | 16µs | 16µs | BEGIN@61 | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:103] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:108] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:112] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:122] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:128] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:133] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:138] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:144] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:150] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:156] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:161] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:167] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:169] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:178] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:183] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:188] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:194] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:200] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:213] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:218] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:223] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:230] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:235] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:240] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:245] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:251] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:338] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:439] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:88] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:93] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | __ANON__[:98] | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | _fixup_report_line_endings | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | _get_added_headers | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | _get_autolearn_points | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | _get_tag_value_for_required_score | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | _get_tag_value_for_score | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | _get_tag_value_for_yesno | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | _handle_hit | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | _process_header | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | _replace_tags | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | _test_log_line | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | _wrap_desc | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | all_from_addrs_domains | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | check | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | check_timed | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | clear_test_state | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | create_fulltext_tmpfile | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | ensure_rules_are_complete | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | finish_tests | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | get_all_hdrs_in_rcvd_index_range | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | get_autolearn_force_names | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | get_autolearn_force_status | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | get_autolearn_points | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | get_autolearn_status | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | get_body_only_points | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | get_content_preview | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | get_current_eval_rule_name | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | get_decoded_body_text_array | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | get_head_only_points | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | get_hits | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | get_learned_points | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | get_message | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | get_names_of_subtests_hit | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | get_names_of_tests_hit | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | get_names_of_tests_hit_with_scores | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | get_names_of_tests_hit_with_scores_hash | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | get_report | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | get_required_hits | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | get_required_score | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | get_score | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | get_spamd_result_log_items | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | get_tag_raw | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | got_hit | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | handle_eval_rule_errors | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | is_spam | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | learn | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | learn_timed | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | qp_encode_header | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | register_plugin_eval_glue | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | rewrite_mail | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | rewrite_no_report_safe | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | rewrite_report_safe | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | sa_die | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | set_spamd_result_item | Mail::SpamAssassin::PerMsgStatus::
0 | 0 | 0 | 0s | 0s | test_log | Mail::SpamAssassin::PerMsgStatus::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | # <@LICENSE> | ||||
2 | # Licensed to the Apache Software Foundation (ASF) under one or more | ||||
3 | # contributor license agreements. See the NOTICE file distributed with | ||||
4 | # this work for additional information regarding copyright ownership. | ||||
5 | # The ASF licenses this file to you under the Apache License, Version 2.0 | ||||
6 | # (the "License"); you may not use this file except in compliance with | ||||
7 | # the License. You may obtain a copy of the License at: | ||||
8 | # | ||||
9 | # http://www.apache.org/licenses/LICENSE-2.0 | ||||
10 | # | ||||
11 | # Unless required by applicable law or agreed to in writing, software | ||||
12 | # distributed under the License is distributed on an "AS IS" BASIS, | ||||
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
14 | # See the License for the specific language governing permissions and | ||||
15 | # limitations under the License. | ||||
16 | # </@LICENSE> | ||||
17 | |||||
18 | =head1 NAME | ||||
19 | |||||
20 | Mail::SpamAssassin::PerMsgStatus - per-message status (spam or not-spam) | ||||
21 | |||||
22 | =head1 SYNOPSIS | ||||
23 | |||||
24 | my $spamtest = new Mail::SpamAssassin ({ | ||||
25 | 'rules_filename' => '/etc/spamassassin.rules', | ||||
26 | 'userprefs_filename' => $ENV{HOME}.'/.spamassassin/user_prefs' | ||||
27 | }); | ||||
28 | my $mail = $spamtest->parse(); | ||||
29 | |||||
30 | my $status = $spamtest->check ($mail); | ||||
31 | |||||
32 | my $rewritten_mail; | ||||
33 | if ($status->is_spam()) { | ||||
34 | $rewritten_mail = $status->rewrite_mail (); | ||||
35 | } | ||||
36 | ... | ||||
37 | |||||
38 | |||||
39 | =head1 DESCRIPTION | ||||
40 | |||||
41 | The Mail::SpamAssassin C<check()> method returns an object of this | ||||
42 | class. This object encapsulates all the per-message state. | ||||
43 | |||||
44 | =head1 METHODS | ||||
45 | |||||
46 | =over 4 | ||||
47 | |||||
48 | =cut | ||||
49 | |||||
50 | package Mail::SpamAssassin::PerMsgStatus; | ||||
51 | |||||
52 | 2 | 63µs | 2 | 66µs | # spent 57µs (48+9) within Mail::SpamAssassin::PerMsgStatus::BEGIN@52 which was called:
# once (48µs+9µs) by Mail::SpamAssassin::BEGIN@74 at line 52 # spent 57µs making 1 call to Mail::SpamAssassin::PerMsgStatus::BEGIN@52
# spent 9µs making 1 call to strict::import |
53 | 2 | 69µs | 2 | 88µs | # spent 56µs (25+31) within Mail::SpamAssassin::PerMsgStatus::BEGIN@53 which was called:
# once (25µs+31µs) by Mail::SpamAssassin::BEGIN@74 at line 53 # spent 56µs making 1 call to Mail::SpamAssassin::PerMsgStatus::BEGIN@53
# spent 31µs making 1 call to warnings::import |
54 | 2 | 84µs | 2 | 182µs | # spent 106µs (31+75) within Mail::SpamAssassin::PerMsgStatus::BEGIN@54 which was called:
# once (31µs+75µs) by Mail::SpamAssassin::BEGIN@74 at line 54 # spent 106µs making 1 call to Mail::SpamAssassin::PerMsgStatus::BEGIN@54
# spent 75µs making 1 call to re::import |
55 | |||||
56 | 2 | 72µs | 2 | 276µs | # spent 156µs (37+120) within Mail::SpamAssassin::PerMsgStatus::BEGIN@56 which was called:
# once (37µs+120µs) by Mail::SpamAssassin::BEGIN@74 at line 56 # spent 156µs making 1 call to Mail::SpamAssassin::PerMsgStatus::BEGIN@56
# spent 120µs making 1 call to Exporter::import |
57 | 2 | 76µs | 2 | 775µs | # spent 398µs (21+377) within Mail::SpamAssassin::PerMsgStatus::BEGIN@57 which was called:
# once (21µs+377µs) by Mail::SpamAssassin::BEGIN@74 at line 57 # spent 398µs making 1 call to Mail::SpamAssassin::PerMsgStatus::BEGIN@57
# spent 377µs making 1 call to Time::HiRes::import |
58 | |||||
59 | 2 | 77µs | 2 | 1.61ms | # spent 815µs (23+792) within Mail::SpamAssassin::PerMsgStatus::BEGIN@59 which was called:
# once (23µs+792µs) by Mail::SpamAssassin::BEGIN@74 at line 59 # spent 815µs making 1 call to Mail::SpamAssassin::PerMsgStatus::BEGIN@59
# spent 792µs making 1 call to Exporter::import |
60 | 2 | 334µs | 1 | 7.65ms | # spent 7.65ms (6.49+1.16) within Mail::SpamAssassin::PerMsgStatus::BEGIN@60 which was called:
# once (6.49ms+1.16ms) by Mail::SpamAssassin::BEGIN@74 at line 60 # spent 7.65ms making 1 call to Mail::SpamAssassin::PerMsgStatus::BEGIN@60 |
61 | 2 | 66µs | 1 | 16µs | # spent 16µs within Mail::SpamAssassin::PerMsgStatus::BEGIN@61 which was called:
# once (16µs+0s) by Mail::SpamAssassin::BEGIN@74 at line 61 # spent 16µs making 1 call to Mail::SpamAssassin::PerMsgStatus::BEGIN@61 |
62 | 2 | 65µs | 2 | 374µs | # spent 202µs (30+172) within Mail::SpamAssassin::PerMsgStatus::BEGIN@62 which was called:
# once (30µs+172µs) by Mail::SpamAssassin::BEGIN@74 at line 62 # spent 202µs making 1 call to Mail::SpamAssassin::PerMsgStatus::BEGIN@62
# spent 172µs making 1 call to Exporter::import |
63 | 2 | 387µs | 1 | 3.78ms | # spent 3.78ms (2.66+1.12) within Mail::SpamAssassin::PerMsgStatus::BEGIN@63 which was called:
# once (2.66ms+1.12ms) by Mail::SpamAssassin::BEGIN@74 at line 63 # spent 3.78ms making 1 call to Mail::SpamAssassin::PerMsgStatus::BEGIN@63 |
64 | 2 | 79µs | 2 | 449µs | # spent 244µs (39+205) within Mail::SpamAssassin::PerMsgStatus::BEGIN@64 which was called:
# once (39µs+205µs) by Mail::SpamAssassin::BEGIN@74 at line 64 # spent 244µs making 1 call to Mail::SpamAssassin::PerMsgStatus::BEGIN@64
# spent 205µs making 1 call to Exporter::import |
65 | |||||
66 | 1 | 2µs | # spent 232µs (22+210) within Mail::SpamAssassin::PerMsgStatus::BEGIN@66 which was called:
# once (22µs+210µs) by Mail::SpamAssassin::BEGIN@74 at line 68 | ||
67 | @ISA @TEMPORARY_METHODS %TEMPORARY_EVAL_GLUE_METHODS | ||||
68 | 1 | 121µs | 2 | 442µs | }; # spent 232µs making 1 call to Mail::SpamAssassin::PerMsgStatus::BEGIN@66
# spent 210µs making 1 call to vars::import |
69 | |||||
70 | 1 | 17µs | @ISA = qw(); | ||
71 | |||||
72 | # methods defined by the compiled ruleset; deleted in finish_tests() | ||||
73 | 1 | 2µs | @TEMPORARY_METHODS = (); | ||
74 | |||||
75 | # methods defined by register_plugin_eval_glue(); deleted in finish_tests() | ||||
76 | 1 | 4µs | %TEMPORARY_EVAL_GLUE_METHODS = (); | ||
77 | |||||
78 | ########################################################################### | ||||
79 | |||||
80 | 2 | 2.16ms | 2 | 190µs | # spent 110µs (30+80) within Mail::SpamAssassin::PerMsgStatus::BEGIN@80 which was called:
# once (30µs+80µs) by Mail::SpamAssassin::BEGIN@74 at line 80 # spent 110µs making 1 call to Mail::SpamAssassin::PerMsgStatus::BEGIN@80
# spent 80µs making 1 call to vars::import |
81 | |||||
82 | # spent 115µs within Mail::SpamAssassin::PerMsgStatus::BEGIN@82 which was called:
# once (115µs+0s) by Mail::SpamAssassin::BEGIN@74 at line 254 | ||||
83 | %common_tags = ( | ||||
84 | |||||
85 | YESNO => sub { | ||||
86 | my $pms = shift; | ||||
87 | $pms->_get_tag_value_for_yesno(@_); | ||||
88 | }, | ||||
89 | |||||
90 | YESNOCAPS => sub { | ||||
91 | my $pms = shift; | ||||
92 | uc $pms->_get_tag_value_for_yesno(@_); | ||||
93 | }, | ||||
94 | |||||
95 | SCORE => sub { | ||||
96 | my $pms = shift; | ||||
97 | $pms->_get_tag_value_for_score(@_); | ||||
98 | }, | ||||
99 | |||||
100 | HITS => sub { | ||||
101 | my $pms = shift; | ||||
102 | $pms->_get_tag_value_for_score(@_); | ||||
103 | }, | ||||
104 | |||||
105 | REQD => sub { | ||||
106 | my $pms = shift; | ||||
107 | $pms->_get_tag_value_for_required_score(@_); | ||||
108 | }, | ||||
109 | |||||
110 | VERSION => \&Mail::SpamAssassin::Version, | ||||
111 | |||||
112 | SUBVERSION => sub { $Mail::SpamAssassin::SUB_VERSION }, | ||||
113 | |||||
114 | RULESVERSION => sub { | ||||
115 | my $pms = shift; | ||||
116 | my $conf = $pms->{conf}; | ||||
117 | my @fnames; | ||||
118 | @fnames = | ||||
119 | keys %{$conf->{update_version}} if $conf->{update_version}; | ||||
120 | @fnames = sort @fnames if @fnames > 1; | ||||
121 | join(',', map($conf->{update_version}{$_}, @fnames)); | ||||
122 | }, | ||||
123 | |||||
124 | HOSTNAME => sub { | ||||
125 | my $pms = shift; | ||||
126 | $pms->{conf}->{report_hostname} || | ||||
127 | Mail::SpamAssassin::Util::fq_hostname(); | ||||
128 | }, | ||||
129 | |||||
130 | REMOTEHOSTNAME => sub { | ||||
131 | my $pms = shift; | ||||
132 | $pms->{tag_data}->{'REMOTEHOSTNAME'} || "localhost"; | ||||
133 | }, | ||||
134 | |||||
135 | REMOTEHOSTADDR => sub { | ||||
136 | my $pms = shift; | ||||
137 | $pms->{tag_data}->{'REMOTEHOSTADDR'} || "127.0.0.1"; | ||||
138 | }, | ||||
139 | |||||
140 | LASTEXTERNALIP => sub { | ||||
141 | my $pms = shift; | ||||
142 | my $lasthop = $pms->{relays_external}->[0]; | ||||
143 | $lasthop ? $lasthop->{ip} : ''; | ||||
144 | }, | ||||
145 | |||||
146 | LASTEXTERNALRDNS => sub { | ||||
147 | my $pms = shift; | ||||
148 | my $lasthop = $pms->{relays_external}->[0]; | ||||
149 | $lasthop ? $lasthop->{rdns} : ''; | ||||
150 | }, | ||||
151 | |||||
152 | LASTEXTERNALHELO => sub { | ||||
153 | my $pms = shift; | ||||
154 | my $lasthop = $pms->{relays_external}->[0]; | ||||
155 | $lasthop ? $lasthop->{helo} : ''; | ||||
156 | }, | ||||
157 | |||||
158 | CONTACTADDRESS => sub { | ||||
159 | my $pms = shift; | ||||
160 | $pms->{conf}->{report_contact}; | ||||
161 | }, | ||||
162 | |||||
163 | BAYES => sub { | ||||
164 | my $pms = shift; | ||||
165 | defined $pms->{bayes_score} ? sprintf("%3.4f", $pms->{bayes_score}) | ||||
166 | : "0.5"; | ||||
167 | }, | ||||
168 | |||||
169 | DATE => sub { Mail::SpamAssassin::Util::time_to_rfc822_date() }, | ||||
170 | |||||
171 | STARS => sub { | ||||
172 | my $pms = shift; | ||||
173 | my $arg = (shift || "*"); | ||||
174 | my $length = int($pms->{score}); | ||||
175 | $length = 50 if $length > 50; | ||||
176 | # avoid a perl 5.21 warning: "Negative repeat count does nothing" | ||||
177 | $length > 0 ? $arg x $length : ''; | ||||
178 | }, | ||||
179 | |||||
180 | AUTOLEARN => sub { | ||||
181 | my $pms = shift; | ||||
182 | $pms->get_autolearn_status(); | ||||
183 | }, | ||||
184 | |||||
185 | AUTOLEARNSCORE => sub { | ||||
186 | my $pms = shift; | ||||
187 | $pms->get_autolearn_points(); | ||||
188 | }, | ||||
189 | |||||
190 | TESTS => sub { | ||||
191 | my $pms = shift; | ||||
192 | my $arg = (shift || ','); | ||||
193 | join($arg, sort(@{$pms->{test_names_hit}})) || "none"; | ||||
194 | }, | ||||
195 | |||||
196 | SUBTESTS => sub { | ||||
197 | my $pms = shift; | ||||
198 | my $arg = (shift || ','); | ||||
199 | join($arg, sort(@{$pms->{subtest_names_hit}})) || "none"; | ||||
200 | }, | ||||
201 | |||||
202 | TESTSSCORES => sub { | ||||
203 | my $pms = shift; | ||||
204 | my $arg = (shift || ","); | ||||
205 | my $line = ''; | ||||
206 | foreach my $test (sort @{$pms->{test_names_hit}}) { | ||||
207 | my $score = $pms->{conf}->{scores}->{$test}; | ||||
208 | $score = '0' if !defined $score; | ||||
209 | $line .= $arg if $line ne ''; | ||||
210 | $line .= $test . "=" . $score; | ||||
211 | } | ||||
212 | $line ne '' ? $line : 'none'; | ||||
213 | }, | ||||
214 | |||||
215 | PREVIEW => sub { | ||||
216 | my $pms = shift; | ||||
217 | $pms->get_content_preview(); | ||||
218 | }, | ||||
219 | |||||
220 | REPORT => sub { | ||||
221 | my $pms = shift; | ||||
222 | "\n" . ($pms->{tag_data}->{REPORT} || ""); | ||||
223 | }, | ||||
224 | |||||
225 | HEADER => sub { | ||||
226 | my $pms = shift; | ||||
227 | my $hdr = shift; | ||||
228 | return if !$hdr; | ||||
229 | $pms->get($hdr,undef); | ||||
230 | }, | ||||
231 | |||||
232 | TIMING => sub { | ||||
233 | my $pms = shift; | ||||
234 | $pms->{main}->timer_report(); | ||||
235 | }, | ||||
236 | |||||
237 | ADDEDHEADERHAM => sub { | ||||
238 | my $pms = shift; | ||||
239 | $pms->_get_added_headers('headers_ham'); | ||||
240 | }, | ||||
241 | |||||
242 | ADDEDHEADERSPAM => sub { | ||||
243 | my $pms = shift; | ||||
244 | $pms->_get_added_headers('headers_spam'); | ||||
245 | }, | ||||
246 | |||||
247 | ADDEDHEADER => sub { | ||||
248 | my $pms = shift; | ||||
249 | $pms->_get_added_headers( | ||||
250 | $pms->{is_spam} ? 'headers_spam' : 'headers_ham'); | ||||
251 | }, | ||||
252 | |||||
253 | 1 | 116µs | ); | ||
254 | 1 | 24.4ms | 1 | 115µs | } # spent 115µs making 1 call to Mail::SpamAssassin::PerMsgStatus::BEGIN@82 |
255 | |||||
256 | # spent 138ms (109+29.2) within Mail::SpamAssassin::PerMsgStatus::new which was called 468 times, avg 294µs/call:
# 234 times (54.0ms+15.1ms) by Mail::SpamAssassin::Plugin::Bayes::get_body_from_msg at line 1024 of Mail/SpamAssassin/Plugin/Bayes.pm, avg 295µs/call
# 234 times (54.7ms+14.1ms) by Mail::SpamAssassin::Plugin::TxRep::learn_message at line 1773 of Mail/SpamAssassin/Plugin/TxRep.pm, avg 294µs/call | ||||
257 | 468 | 1.20ms | my $class = shift; | ||
258 | 468 | 1.24ms | $class = ref($class) || $class; | ||
259 | 468 | 1.17ms | my ($main, $msg, $opts) = @_; | ||
260 | |||||
261 | my $self = { | ||||
262 | 'main' => $main, | ||||
263 | 'msg' => $msg, | ||||
264 | 'score' => 0, | ||||
265 | 'test_log_msgs' => { }, | ||||
266 | 'test_names_hit' => [ ], | ||||
267 | 'subtest_names_hit' => [ ], | ||||
268 | 'spamd_result_log_items' => [ ], | ||||
269 | 'tests_already_hit' => { }, | ||||
270 | 'c' => { }, | ||||
271 | 'tag_data' => { }, | ||||
272 | 'rule_errors' => 0, | ||||
273 | 'disable_auto_learning' => 0, | ||||
274 | 'auto_learn_status' => undef, | ||||
275 | 'auto_learn_force_status' => undef, | ||||
276 | 'conf' => $main->{conf}, | ||||
277 | 'async' => Mail::SpamAssassin::AsyncLoop->new($main), | ||||
278 | 'master_deadline' => $msg->{master_deadline}, # dflt inherited from msg | ||||
279 | 468 | 17.6ms | 468 | 15.4ms | 'deadline_exceeded' => 0, # time limit exceeded, skipping further tests # spent 15.4ms making 468 calls to Mail::SpamAssassin::AsyncLoop::new, avg 33µs/call |
280 | }; | ||||
281 | #$self->{main}->{use_rule_subs} = 1; | ||||
282 | |||||
283 | dbg("check: pms new, time limit in %.3f s", | ||||
284 | 468 | 10.1ms | 936 | 6.21ms | $self->{master_deadline} - time) if $self->{master_deadline}; # spent 3.14ms making 468 calls to Mail::SpamAssassin::Logger::dbg, avg 7µs/call
# spent 3.07ms making 468 calls to Time::HiRes::time, avg 7µs/call |
285 | |||||
286 | 468 | 973µs | if (defined $opts && $opts->{disable_auto_learning}) { | ||
287 | $self->{disable_auto_learning} = 1; | ||||
288 | } | ||||
289 | |||||
290 | # used with "mass-check --loghits" | ||||
291 | 468 | 1.44ms | if ($self->{main}->{save_pattern_hits}) { | ||
292 | $self->{save_pattern_hits} = 1; | ||||
293 | $self->{pattern_hits} = { }; | ||||
294 | } | ||||
295 | |||||
296 | 468 | 1.20ms | delete $self->{should_log_rule_hits}; | ||
297 | 468 | 3.82ms | 468 | 7.58ms | my $dbgcache = would_log('dbg', 'rules'); # spent 7.58ms making 468 calls to Mail::SpamAssassin::Logger::would_log, avg 16µs/call |
298 | 468 | 1.29ms | if ($dbgcache || $self->{save_pattern_hits}) { | ||
299 | $self->{should_log_rule_hits} = 1; | ||||
300 | } | ||||
301 | |||||
302 | # known valid tags that might not get their entry in pms->{tag_data} | ||||
303 | # in some circumstances | ||||
304 | 468 | 1.11ms | my $tag_data_ref = $self->{tag_data}; | ||
305 | 1872 | 11.9ms | foreach (qw(SUMMARY REPORT RBL)) { $tag_data_ref->{$_} = '' } | ||
306 | 468 | 3.20ms | foreach (qw(AWL AWLMEAN AWLCOUNT AWLPRESCORE | ||
307 | DCCB DCCR DCCREP PYZOR DKIMIDENTITY DKIMDOMAIN | ||||
308 | BAYESTC BAYESTCLEARNED BAYESTCSPAMMY BAYESTCHAMMY | ||||
309 | HAMMYTOKENS SPAMMYTOKENS TOKENSUMMARY)) { | ||||
310 | 7956 | 46.3ms | $tag_data_ref->{$_} = undef; # exist, but undefined | ||
311 | } | ||||
312 | |||||
313 | 468 | 1.13ms | bless ($self, $class); | ||
314 | 468 | 16.6ms | $self; | ||
315 | } | ||||
316 | |||||
317 | # spent 11.4ms (8.50+2.92) within Mail::SpamAssassin::PerMsgStatus::DESTROY which was called 313 times, avg 36µs/call:
# 234 times (6.41ms+2.11ms) by Mail::SpamAssassin::Plugin::Bayes::get_body_from_msg at line 1035 of Mail/SpamAssassin/Plugin/Bayes.pm, avg 36µs/call
# 79 times (2.09ms+812µs) by Mail::SpamAssassin::Plugin::TxRep::learn_message at line 1784 of Mail/SpamAssassin/Plugin/TxRep.pm, avg 37µs/call | ||||
318 | 313 | 770µs | my ($self) = shift; | ||
319 | 313 | 649µs | local $@; | ||
320 | 626 | 16.5ms | 313 | 2.92ms | eval { $self->delete_fulltext_tmpfile() }; # Bug 5808 # spent 2.92ms making 313 calls to Mail::SpamAssassin::PerMsgStatus::delete_fulltext_tmpfile, avg 9µs/call |
321 | } | ||||
322 | |||||
323 | ########################################################################### | ||||
324 | |||||
325 | =item $status->check () | ||||
326 | |||||
327 | Runs the SpamAssassin rules against the message pointed to by the object. | ||||
328 | |||||
329 | =cut | ||||
330 | |||||
331 | sub check { | ||||
332 | my ($self) = shift; | ||||
333 | my $master_deadline = $self->{master_deadline}; | ||||
334 | if (!$master_deadline) { | ||||
335 | $self->check_timed(@_); | ||||
336 | } else { | ||||
337 | my $t = Mail::SpamAssassin::Timeout->new({ deadline => $master_deadline }); | ||||
338 | my $err = $t->run(sub { $self->check_timed(@_) }); | ||||
339 | if (time > $master_deadline && !$self->{deadline_exceeded}) { | ||||
340 | info("check: exceeded time limit in pms check"); | ||||
341 | $self->{deadline_exceeded} = 1; | ||||
342 | } | ||||
343 | } | ||||
344 | } | ||||
345 | |||||
346 | sub check_timed { | ||||
347 | my ($self) = @_; | ||||
348 | local ($_); | ||||
349 | |||||
350 | $self->{learned_points} = 0; | ||||
351 | $self->{body_only_points} = 0; | ||||
352 | $self->{head_only_points} = 0; | ||||
353 | $self->{score} = 0; | ||||
354 | |||||
355 | # clear NetSet cache before every check to prevent it growing too large | ||||
356 | foreach my $nset_name (qw(internal_networks trusted_networks msa_networks)) { | ||||
357 | my $netset = $self->{conf}->{$nset_name}; | ||||
358 | $netset->ditch_cache() if $netset; | ||||
359 | } | ||||
360 | |||||
361 | $self->{main}->call_plugins ("check_start", { permsgstatus => $self }); | ||||
362 | |||||
363 | # in order of slowness; fastest first, slowest last. | ||||
364 | # we do ALL the tests, even if a spam triggers lots of them early on. | ||||
365 | # this lets us see ludicrously spammish mails (score: 40) etc., which | ||||
366 | # we can then immediately submit to spamblocking services. | ||||
367 | # | ||||
368 | # TODO: change this to do whitelist/blacklists first? probably a plan | ||||
369 | # NOTE: definitely need AWL stuff last, for regression-to-mean of score | ||||
370 | |||||
371 | # TVD: we may want to do more than just clearing out the headers, but ... | ||||
372 | $self->{msg}->delete_header('X-Spam-.*'); | ||||
373 | |||||
374 | # Resident Mail::SpamAssassin code will possibly never change score | ||||
375 | # sets, even if bayes becomes available. So we should do a quick check | ||||
376 | # to see if we should go from {0,1} to {2,3}. We of course don't need | ||||
377 | # to do this switch if we're already using bayes ... ;) | ||||
378 | my $set = $self->{conf}->get_score_set(); | ||||
379 | if (($set & 2) == 0 && $self->{main}->{bayes_scanner} && $self->{main}->{bayes_scanner}->is_scan_available()) { | ||||
380 | dbg("check: scoreset $set but bayes is available, switching scoresets"); | ||||
381 | $self->{conf}->set_score_set ($set|2); | ||||
382 | } | ||||
383 | |||||
384 | # The primary check functionality occurs via a plugin call. For more | ||||
385 | # information, please see: Mail::SpamAssassin::Plugin::Check | ||||
386 | if (!$self->{main}->call_plugins ("check_main", { permsgstatus => $self })) | ||||
387 | { | ||||
388 | # did anything happen? if not, this is fatal | ||||
389 | if (!$self->{main}->have_plugin("check_main")) { | ||||
390 | die "check: no loaded plugin implements 'check_main': cannot scan!\n". | ||||
391 | "Check that the necessary '.pre' files are in the config directory.\n". | ||||
392 | "At a minimum, v320.pre loads the Check plugin which is required.\n"; | ||||
393 | } | ||||
394 | } | ||||
395 | |||||
396 | # delete temporary storage and memory allocation used during checking | ||||
397 | $self->delete_fulltext_tmpfile(); | ||||
398 | |||||
399 | # now that we've finished checking the mail, clear out this cache | ||||
400 | # to avoid unforeseen side-effects. | ||||
401 | $self->{c} = { }; | ||||
402 | |||||
403 | # Round the score to 3 decimal places to avoid rounding issues | ||||
404 | # We assume required_score to be properly rounded already. | ||||
405 | # add 0 to force it back to numeric representation instead of string. | ||||
406 | $self->{score} = (sprintf "%0.3f", $self->{score}) + 0; | ||||
407 | |||||
408 | dbg("check: is spam? score=".$self->{score}. | ||||
409 | " required=".$self->{conf}->{required_score}); | ||||
410 | dbg("check: tests=".$self->get_names_of_tests_hit()); | ||||
411 | dbg("check: subtests=".$self->get_names_of_subtests_hit()); | ||||
412 | $self->{is_spam} = $self->is_spam(); | ||||
413 | |||||
414 | $self->{main}->{resolver}->bgabort(); | ||||
415 | $self->{main}->call_plugins ("check_end", { permsgstatus => $self }); | ||||
416 | |||||
417 | 1; | ||||
418 | } | ||||
419 | |||||
420 | ########################################################################### | ||||
421 | |||||
422 | =item $status->learn() | ||||
423 | |||||
424 | After a mail message has been checked, this method can be called. If the score | ||||
425 | is outside a certain range around the threshold, ie. if the message is judged | ||||
426 | more-or-less definitely spam or definitely non-spam, it will be fed into | ||||
427 | SpamAssassin's learning systems (currently the naive Bayesian classifier), | ||||
428 | so that future similar mails will be caught. | ||||
429 | |||||
430 | =cut | ||||
431 | |||||
432 | sub learn { | ||||
433 | my ($self) = shift; | ||||
434 | my $master_deadline = $self->{master_deadline}; | ||||
435 | if (!$master_deadline) { | ||||
436 | $self->learn_timed(@_); | ||||
437 | } else { | ||||
438 | my $t = Mail::SpamAssassin::Timeout->new({ deadline => $master_deadline }); | ||||
439 | my $err = $t->run(sub { $self->learn_timed(@_) }); | ||||
440 | if (time > $master_deadline && !$self->{deadline_exceeded}) { | ||||
441 | info("learn: exceeded time limit in pms learn"); | ||||
442 | $self->{deadline_exceeded} = 1; | ||||
443 | } | ||||
444 | } | ||||
445 | } | ||||
446 | |||||
447 | sub learn_timed { | ||||
448 | my ($self) = @_; | ||||
449 | |||||
450 | if (!$self->{conf}->{bayes_auto_learn} || | ||||
451 | !$self->{conf}->{use_bayes} || | ||||
452 | $self->{disable_auto_learning}) | ||||
453 | { | ||||
454 | $self->{auto_learn_status} = "disabled"; | ||||
455 | return; | ||||
456 | } | ||||
457 | |||||
458 | my ($isspam, $force_autolearn, $force_autolearn_names, $arrayref); | ||||
459 | $arrayref = $self->{main}->call_plugins ("autolearn_discriminator", { | ||||
460 | permsgstatus => $self | ||||
461 | }); | ||||
462 | |||||
463 | $isspam = $arrayref->[0]; | ||||
464 | $force_autolearn = $arrayref->[1]; | ||||
465 | $force_autolearn_names = $arrayref->[2]; | ||||
466 | |||||
467 | #AUTOLEARN_FORCE FLAG INFORMATION | ||||
468 | if (defined $force_autolearn and $force_autolearn > 0) { | ||||
469 | $self->{auto_learn_force_status} = "yes"; | ||||
470 | if (defined $force_autolearn_names) { | ||||
471 | $self->{auto_learn_force_status} .= " ($force_autolearn_names)"; | ||||
472 | } | ||||
473 | } else { | ||||
474 | $self->{auto_learn_force_status} = "no"; | ||||
475 | } | ||||
476 | |||||
477 | if (!defined $isspam) { | ||||
478 | $self->{auto_learn_status} = 'no'; | ||||
479 | return; | ||||
480 | } | ||||
481 | |||||
482 | |||||
483 | my $timer = $self->{main}->time_method("learn"); | ||||
484 | |||||
485 | $self->{main}->call_plugins ("autolearn", { | ||||
486 | permsgstatus => $self, | ||||
487 | isspam => $isspam | ||||
488 | }); | ||||
489 | |||||
490 | # bug 3704: temporarily override learn's ability to re-learn a message | ||||
491 | my $orig_learner = $self->{main}->init_learner({ "no_relearn" => 1 }); | ||||
492 | |||||
493 | my $eval_stat; | ||||
494 | eval { | ||||
495 | my $learnstatus = $self->{main}->learn ($self->{msg}, undef, $isspam, 0); | ||||
496 | if ($learnstatus->did_learn()) { | ||||
497 | $self->{auto_learn_status} = $isspam ? "spam" : "ham"; | ||||
498 | } | ||||
499 | # This must wait until the did_learn call. | ||||
500 | $learnstatus->finish(); | ||||
501 | $self->{main}->finish_learner(); # for now | ||||
502 | |||||
503 | if (exists $self->{main}->{bayes_scanner}) { | ||||
504 | $self->{main}->{bayes_scanner}->force_close(); | ||||
505 | } | ||||
506 | 1; | ||||
507 | } or do { | ||||
508 | $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat; | ||||
509 | }; | ||||
510 | |||||
511 | # reset learner options to their original values | ||||
512 | $self->{main}->init_learner($orig_learner); | ||||
513 | |||||
514 | if (defined $eval_stat) { | ||||
515 | dbg("learn: auto-learning failed: $eval_stat"); | ||||
516 | $self->{auto_learn_status} = "failed"; | ||||
517 | } | ||||
518 | } | ||||
519 | |||||
520 | =item $score = $status->get_autolearn_points() | ||||
521 | |||||
522 | Return the message's score as computed for auto-learning. Certain tests are | ||||
523 | ignored: | ||||
524 | |||||
525 | - rules with tflags set to 'learn' (the Bayesian rules) | ||||
526 | |||||
527 | - rules with tflags set to 'userconf' (user white/black-listing rules, etc) | ||||
528 | |||||
529 | - rules with tflags set to 'noautolearn' | ||||
530 | |||||
531 | Also note that auto-learning occurs using scores from either scoreset 0 or 1, | ||||
532 | depending on what scoreset is used during message check. It is likely that the | ||||
533 | message check and auto-learn scores will be different. | ||||
534 | |||||
535 | =cut | ||||
536 | |||||
537 | sub get_autolearn_points { | ||||
538 | my ($self) = @_; | ||||
539 | $self->_get_autolearn_points(); | ||||
540 | return $self->{autolearn_points}; | ||||
541 | } | ||||
542 | |||||
543 | =item $score = $status->get_head_only_points() | ||||
544 | |||||
545 | Return the message's score as computed for auto-learning, ignoring | ||||
546 | all rules except for header-based ones. | ||||
547 | |||||
548 | =cut | ||||
549 | |||||
550 | sub get_head_only_points { | ||||
551 | my ($self) = @_; | ||||
552 | $self->_get_autolearn_points(); | ||||
553 | return $self->{head_only_points}; | ||||
554 | } | ||||
555 | |||||
556 | =item $score = $status->get_learned_points() | ||||
557 | |||||
558 | Return the message's score as computed for auto-learning, ignoring | ||||
559 | all rules except for learning-based ones. | ||||
560 | |||||
561 | =cut | ||||
562 | |||||
563 | sub get_learned_points { | ||||
564 | my ($self) = @_; | ||||
565 | $self->_get_autolearn_points(); | ||||
566 | return $self->{learned_points}; | ||||
567 | } | ||||
568 | |||||
569 | =item $score = $status->get_body_only_points() | ||||
570 | |||||
571 | Return the message's score as computed for auto-learning, ignoring | ||||
572 | all rules except for body-based ones. | ||||
573 | |||||
574 | =cut | ||||
575 | |||||
576 | sub get_body_only_points { | ||||
577 | my ($self) = @_; | ||||
578 | $self->_get_autolearn_points(); | ||||
579 | return $self->{body_only_points}; | ||||
580 | } | ||||
581 | |||||
582 | =item $score = $status->get_autolearn_force_status() | ||||
583 | |||||
584 | Return whether a message's score included any rules that are flagged as | ||||
585 | autolearn_force. | ||||
586 | |||||
587 | =cut | ||||
588 | |||||
589 | sub get_autolearn_force_status { | ||||
590 | my ($self) = @_; | ||||
591 | $self->_get_autolearn_points(); | ||||
592 | return $self->{autolearn_force}; | ||||
593 | } | ||||
594 | |||||
595 | =item $rule_names = $status->get_autolearn_force_names() | ||||
596 | |||||
597 | Return a list of comma separated list of rule names if a message's | ||||
598 | score included any rules that are flagged as autolearn_force. | ||||
599 | |||||
600 | =cut | ||||
601 | |||||
602 | sub get_autolearn_force_names { | ||||
603 | my ($self) = @_; | ||||
604 | my ($names); | ||||
605 | |||||
606 | $self->_get_autolearn_points(); | ||||
607 | $names = $self->{autolearn_force_names}; | ||||
608 | |||||
609 | if (defined $names) { | ||||
610 | #remove trailing comma | ||||
611 | $names =~ s/,$//; | ||||
612 | } else { | ||||
613 | $names = ""; | ||||
614 | } | ||||
615 | |||||
616 | return $names; | ||||
617 | } | ||||
618 | |||||
619 | sub _get_autolearn_points { | ||||
620 | my ($self) = @_; | ||||
621 | |||||
622 | return if (exists $self->{autolearn_points}); | ||||
623 | # ensure it only gets computed once, even if we return early | ||||
624 | $self->{autolearn_points} = 0; | ||||
625 | |||||
626 | # This function needs to use use sum($score[scoreset % 2]) not just {score}. | ||||
627 | # otherwise we shift what we autolearn on and it gets really wierd. - tvd | ||||
628 | my $orig_scoreset = $self->{conf}->get_score_set(); | ||||
629 | my $new_scoreset = $orig_scoreset; | ||||
630 | my $scores = $self->{conf}->{scores}; | ||||
631 | |||||
632 | if (($orig_scoreset & 2) == 0) { # we don't need to recompute | ||||
633 | dbg("learn: auto-learn: currently using scoreset $orig_scoreset"); | ||||
634 | } | ||||
635 | else { | ||||
636 | $new_scoreset = $orig_scoreset & ~2; | ||||
637 | dbg("learn: auto-learn: currently using scoreset $orig_scoreset, recomputing score based on scoreset $new_scoreset"); | ||||
638 | $scores = $self->{conf}->{scoreset}->[$new_scoreset]; | ||||
639 | } | ||||
640 | |||||
641 | my $tflags = $self->{conf}->{tflags}; | ||||
642 | my $points = 0; | ||||
643 | |||||
644 | # Just in case this function is called multiple times, clear out the | ||||
645 | # previous calculated values | ||||
646 | $self->{learned_points} = 0; | ||||
647 | $self->{body_only_points} = 0; | ||||
648 | $self->{head_only_points} = 0; | ||||
649 | $self->{autolearn_force} = 0; | ||||
650 | |||||
651 | foreach my $test (@{$self->{test_names_hit}}) { | ||||
652 | # According to the documentation, noautolearn, userconf, and learn | ||||
653 | # rules are ignored for autolearning. | ||||
654 | if (exists $tflags->{$test}) { | ||||
655 | next if $tflags->{$test} =~ /\bnoautolearn\b/; | ||||
656 | next if $tflags->{$test} =~ /\buserconf\b/; | ||||
657 | |||||
658 | # Keep track of the learn points for an additional autolearn check. | ||||
659 | # Use the original scoreset since it'll be 0 in sets 0 and 1. | ||||
660 | if ($tflags->{$test} =~ /\blearn\b/) { | ||||
661 | # we're guaranteed that the score will be defined | ||||
662 | $self->{learned_points} += $self->{conf}->{scoreset}->[$orig_scoreset]->{$test}; | ||||
663 | next; | ||||
664 | } | ||||
665 | |||||
666 | #IF ANY RULES ARE AUTOLEARN FORCE, SET THAT FLAG | ||||
667 | if ($tflags->{$test} =~ /\bautolearn_force\b/) { | ||||
668 | $self->{autolearn_force}++; | ||||
669 | #ADD RULE NAME TO LIST | ||||
670 | $self->{autolearn_force_names}.="$test,"; | ||||
671 | } | ||||
672 | } | ||||
673 | |||||
674 | # ignore tests with 0 score (or undefined) in this scoreset | ||||
675 | next if !$scores->{$test}; | ||||
676 | |||||
677 | # Go ahead and add points to the proper locations | ||||
678 | # Changed logic because in testing, I was getting both head and body. Bug 5503 | ||||
679 | if ($self->{conf}->maybe_header_only ($test)) { | ||||
680 | $self->{head_only_points} += $scores->{$test}; | ||||
681 | dbg("learn: auto-learn: adding head_only points $scores->{$test}"); | ||||
682 | } elsif ($self->{conf}->maybe_body_only ($test)) { | ||||
683 | $self->{body_only_points} += $scores->{$test}; | ||||
684 | dbg("learn: auto-learn: adding body_only points $scores->{$test}"); | ||||
685 | } else { | ||||
686 | dbg("learn: auto-learn: not considered head or body scores: $scores->{$test}"); | ||||
687 | } | ||||
688 | |||||
689 | $points += $scores->{$test}; | ||||
690 | } | ||||
691 | |||||
692 | # Figure out the final value we'll use for autolearning | ||||
693 | $points = (sprintf "%0.3f", $points) + 0; | ||||
694 | dbg("learn: auto-learn: message score: ".$self->{score}.", computed score for autolearn: $points"); | ||||
695 | |||||
696 | $self->{autolearn_points} = $points; | ||||
697 | } | ||||
698 | |||||
699 | ########################################################################### | ||||
700 | |||||
701 | =item $isspam = $status->is_spam () | ||||
702 | |||||
703 | After a mail message has been checked, this method can be called. It will | ||||
704 | return 1 for mail determined likely to be spam, 0 if it does not seem | ||||
705 | spam-like. | ||||
706 | |||||
707 | =cut | ||||
708 | |||||
709 | sub is_spam { | ||||
710 | my ($self) = @_; | ||||
711 | # changed to test this so sub-tests can ask "is_spam" during a run | ||||
712 | return ($self->{score} >= $self->{conf}->{required_score}); | ||||
713 | } | ||||
714 | |||||
715 | ########################################################################### | ||||
716 | |||||
717 | =item $list = $status->get_names_of_tests_hit () | ||||
718 | |||||
719 | After a mail message has been checked, this method can be called. It will | ||||
720 | return a comma-separated string, listing all the symbolic test names | ||||
721 | of the tests which were triggered by the mail. | ||||
722 | |||||
723 | =cut | ||||
724 | |||||
725 | sub get_names_of_tests_hit { | ||||
726 | my ($self) = @_; | ||||
727 | |||||
728 | return join(',', sort(@{$self->{test_names_hit}})); | ||||
729 | } | ||||
730 | |||||
731 | =item $list = $status->get_names_of_tests_hit_with_scores_hash () | ||||
732 | |||||
733 | After a mail message has been checked, this method can be called. It will | ||||
734 | return a pointer to a hash for rule & score pairs for all the symbolic | ||||
735 | test names and individual scores of the tests which were triggered by the mail. | ||||
736 | |||||
737 | =cut | ||||
738 | sub get_names_of_tests_hit_with_scores_hash { | ||||
739 | my ($self) = @_; | ||||
740 | |||||
741 | my ($line, %testsscores); | ||||
742 | |||||
743 | #BASED ON CODE FOR TESTSSCORES TAG - KAM 2014-04-24 | ||||
744 | foreach my $test (@{$self->{test_names_hit}}) { | ||||
745 | my $score = $self->{conf}->{scores}->{$test}; | ||||
746 | $score = '0' if !defined $score; | ||||
747 | |||||
748 | $testsscores{$test} = $score; | ||||
749 | } | ||||
750 | |||||
751 | return \%testsscores; | ||||
752 | } | ||||
753 | |||||
754 | =item $list = $status->get_names_of_tests_hit_with_scores () | ||||
755 | |||||
756 | After a mail message has been checked, this method can be called. It will | ||||
757 | return a comma-separated string of rule=score pairs for all the symbolic | ||||
758 | test names and individual scores of the tests which were triggered by the mail. | ||||
759 | |||||
760 | =cut | ||||
761 | sub get_names_of_tests_hit_with_scores { | ||||
762 | my ($self) = @_; | ||||
763 | |||||
764 | my ($line, %testsscores); | ||||
765 | |||||
766 | #BASED ON CODE FOR TESTSSCORES TAG - KAM 2014-04-24 | ||||
767 | foreach my $test (sort @{$self->{test_names_hit}}) { | ||||
768 | my $score = $self->{conf}->{scores}->{$test}; | ||||
769 | $score = '0' if !defined $score; | ||||
770 | $line .= ',' if $line ne ''; | ||||
771 | $line .= $test . '=' . $score; | ||||
772 | } | ||||
773 | |||||
774 | $line ||= 'none'; | ||||
775 | |||||
776 | return $line; | ||||
777 | } | ||||
778 | |||||
779 | |||||
780 | ########################################################################### | ||||
781 | |||||
782 | =item $list = $status->get_names_of_subtests_hit () | ||||
783 | |||||
784 | After a mail message has been checked, this method can be called. It will | ||||
785 | return a comma-separated string, listing all the symbolic test names of the | ||||
786 | meta-rule sub-tests which were triggered by the mail. Sub-tests are the | ||||
787 | normally-hidden rules, which score 0 and have names beginning with two | ||||
788 | underscores, used in meta rules. | ||||
789 | |||||
790 | =cut | ||||
791 | |||||
792 | sub get_names_of_subtests_hit { | ||||
793 | my ($self) = @_; | ||||
794 | |||||
795 | return join(',', sort(@{$self->{subtest_names_hit}})); | ||||
796 | } | ||||
797 | |||||
798 | ########################################################################### | ||||
799 | |||||
800 | =item $num = $status->get_score () | ||||
801 | |||||
802 | After a mail message has been checked, this method can be called. It will | ||||
803 | return the message's score. | ||||
804 | |||||
805 | =cut | ||||
806 | |||||
807 | sub get_score { | ||||
808 | my ($self) = @_; | ||||
809 | return $self->{score}; | ||||
810 | } | ||||
811 | |||||
812 | # left as backward compatibility | ||||
813 | sub get_hits { | ||||
814 | my ($self) = @_; | ||||
815 | return $self->{score}; | ||||
816 | } | ||||
817 | |||||
818 | ########################################################################### | ||||
819 | |||||
820 | =item $num = $status->get_required_score () | ||||
821 | |||||
822 | After a mail message has been checked, this method can be called. It will | ||||
823 | return the score required for a mail to be considered spam. | ||||
824 | |||||
825 | =cut | ||||
826 | |||||
827 | sub get_required_score { | ||||
828 | my ($self) = @_; | ||||
829 | return $self->{conf}->{required_score}; | ||||
830 | } | ||||
831 | |||||
832 | # left as backward compatibility | ||||
833 | sub get_required_hits { | ||||
834 | my ($self) = @_; | ||||
835 | return $self->{conf}->{required_score}; | ||||
836 | } | ||||
837 | |||||
838 | ########################################################################### | ||||
839 | |||||
840 | =item $num = $status->get_autolearn_status () | ||||
841 | |||||
842 | After a mail message has been checked, this method can be called. It will | ||||
843 | return one of the following strings depending on whether the mail was | ||||
844 | auto-learned or not: "ham", "no", "spam", "disabled", "failed", "unavailable". | ||||
845 | |||||
846 | It also returns is flagged with auto_learn_force, it will also include the status | ||||
847 | and the rules hit. For example: "autolearn_force=yes (AUTOLEARNTEST_BODY)" | ||||
848 | |||||
849 | =cut | ||||
850 | |||||
851 | sub get_autolearn_status { | ||||
852 | my ($self) = @_; | ||||
853 | my ($status) = $self->{auto_learn_status} || "unavailable"; | ||||
854 | |||||
855 | if (defined $self->{auto_learn_force_status}) { | ||||
856 | $status .= " autolearn_force=".$self->{auto_learn_force_status}; | ||||
857 | } | ||||
858 | |||||
859 | return $status; | ||||
860 | } | ||||
861 | |||||
862 | ########################################################################### | ||||
863 | |||||
864 | =item $report = $status->get_report () | ||||
865 | |||||
866 | Deliver a "spam report" on the checked mail message. This contains details of | ||||
867 | how many spam detection rules it triggered. | ||||
868 | |||||
869 | The report is returned as a multi-line string, with the lines separated by | ||||
870 | C<\n> characters. | ||||
871 | |||||
872 | =cut | ||||
873 | |||||
874 | sub get_report { | ||||
875 | my ($self) = @_; | ||||
876 | |||||
877 | if (!exists $self->{'report'}) { | ||||
878 | my $report; | ||||
879 | |||||
880 | my $timer = $self->{main}->time_method("get_report"); | ||||
881 | $report = $self->{conf}->{report_template}; | ||||
882 | $report ||= '(no report template found)'; | ||||
883 | |||||
884 | $report = $self->_replace_tags($report); | ||||
885 | |||||
886 | $report =~ s/\n*$/\n\n/s; | ||||
887 | $self->{report} = $report; | ||||
888 | } | ||||
889 | |||||
890 | return $self->{report}; | ||||
891 | } | ||||
892 | |||||
893 | ########################################################################### | ||||
894 | |||||
895 | =item $preview = $status->get_content_preview () | ||||
896 | |||||
897 | Give a "preview" of the content. | ||||
898 | |||||
899 | This is returned as a multi-line string, with the lines separated by C<\n> | ||||
900 | characters, containing a fully-decoded, safe, plain-text sample of the first | ||||
901 | few lines of the message body. | ||||
902 | |||||
903 | =cut | ||||
904 | |||||
905 | sub get_content_preview { | ||||
906 | my ($self) = @_; | ||||
907 | |||||
908 | my $str = ''; | ||||
909 | my $ary = $self->get_decoded_stripped_body_text_array(); | ||||
910 | shift @{$ary}; # drop the subject line | ||||
911 | |||||
912 | my $numlines = 3; | ||||
913 | while (length ($str) < 200 && @{$ary} && $numlines-- > 0) { | ||||
914 | $str .= shift @{$ary}; | ||||
915 | } | ||||
916 | undef $ary; | ||||
917 | |||||
918 | # in case the last line was huge, trim it back to around 200 chars | ||||
919 | local $1; | ||||
920 | $str =~ s/^(.{200}).+$/$1 [...]/gm; | ||||
921 | chomp ($str); $str .= "\n"; | ||||
922 | |||||
923 | # now, some tidy-ups that make things look a bit prettier | ||||
924 | $str =~ s/-----Original Message-----.*$//gm; | ||||
925 | $str =~ s/This is a multi-part message in MIME format\.//gs; | ||||
926 | $str =~ s/[-_*.]{10,}//gs; | ||||
927 | $str =~ s/\s+/ /gs; | ||||
928 | |||||
929 | # add "Content preview:" ourselves, so that the text aligns | ||||
930 | # correctly with the template -- then trim it off. We don't | ||||
931 | # have to get this *exactly* right, but it's nicer if we | ||||
932 | # make a bit of an effort ;) | ||||
933 | $str = Mail::SpamAssassin::Util::wrap($str, " ", "Content preview: ", 75, 1); | ||||
934 | $str =~ s/^Content preview:\s+//gs; | ||||
935 | |||||
936 | return $str; | ||||
937 | } | ||||
938 | |||||
939 | ########################################################################### | ||||
940 | |||||
941 | =item $msg = $status->get_message() | ||||
942 | |||||
943 | Return the object representing the message being scanned. | ||||
944 | |||||
945 | =cut | ||||
946 | |||||
947 | sub get_message { | ||||
948 | my ($self) = @_; | ||||
949 | return $self->{msg}; | ||||
950 | } | ||||
951 | |||||
952 | ########################################################################### | ||||
953 | |||||
954 | =item $status->rewrite_mail () | ||||
955 | |||||
956 | Rewrite the mail message. This will at minimum add headers, and at | ||||
957 | maximum MIME-encapsulate the message text, to reflect its spam or not-spam | ||||
958 | status. The function will return a scalar of the rewritten message. | ||||
959 | |||||
960 | The actual modifications depend on the configuration (see | ||||
961 | C<Mail::SpamAssassin::Conf> for more information). | ||||
962 | |||||
963 | The possible modifications are as follows: | ||||
964 | |||||
965 | =over 4 | ||||
966 | |||||
967 | =item To:, From: and Subject: modification on spam mails | ||||
968 | |||||
969 | Depending on the configuration, the To: and From: lines can have a | ||||
970 | user-defined RFC 2822 comment appended for spam mail. The subject line | ||||
971 | may have a user-defined string prepended to it for spam mail. | ||||
972 | |||||
973 | =item X-Spam-* headers for all mails | ||||
974 | |||||
975 | Depending on the configuration, zero or more headers with names | ||||
976 | beginning with C<X-Spam-> will be added to mail depending on whether | ||||
977 | it is spam or ham. | ||||
978 | |||||
979 | =item spam message with report_safe | ||||
980 | |||||
981 | If report_safe is set to true (1), then spam messages are encapsulated | ||||
982 | into their own message/rfc822 MIME attachment without any modifications | ||||
983 | being made. | ||||
984 | |||||
985 | If report_safe is set to false (0), then the message will only have the | ||||
986 | above headers added/modified. | ||||
987 | |||||
988 | =back | ||||
989 | |||||
990 | =cut | ||||
991 | |||||
992 | sub rewrite_mail { | ||||
993 | my ($self) = @_; | ||||
994 | |||||
995 | my $timer = $self->{main}->time_method("rewrite_mail"); | ||||
996 | my $msg = $self->{msg}->get_mbox_separator() || ''; | ||||
997 | |||||
998 | if ($self->{is_spam} && $self->{conf}->{report_safe}) { | ||||
999 | $msg .= $self->rewrite_report_safe(); | ||||
1000 | } | ||||
1001 | else { | ||||
1002 | $msg .= $self->rewrite_no_report_safe(); | ||||
1003 | } | ||||
1004 | |||||
1005 | return $msg; | ||||
1006 | } | ||||
1007 | |||||
1008 | # Make the line endings in the passed string reference appropriate | ||||
1009 | # for the original mail. Callers must note bug 5250: don't rewrite | ||||
1010 | # the message body, since that will corrupt 8bit attachments/MIME parts. | ||||
1011 | # | ||||
1012 | sub _fixup_report_line_endings { | ||||
1013 | my ($self, $strref) = @_; | ||||
1014 | if ($self->{msg}->{line_ending} ne "\n") { | ||||
1015 | $$strref =~ s/\r?\n/$self->{msg}->{line_ending}/gs; | ||||
1016 | } | ||||
1017 | } | ||||
1018 | |||||
1019 | sub _get_added_headers { | ||||
1020 | my ($self, $which) = @_; | ||||
1021 | my $str = ''; | ||||
1022 | # use string appends to put this back together -- I finally benchmarked it. | ||||
1023 | # join() is 56% of the speed of just using string appends. ;) | ||||
1024 | foreach my $hf_ref (@{$self->{conf}->{$which}}) { | ||||
1025 | my($hfname, $hfbody) = @$hf_ref; | ||||
1026 | my $line = $self->_process_header($hfname,$hfbody); | ||||
1027 | $line = $self->qp_encode_header($line); | ||||
1028 | $str .= "X-Spam-$hfname: $line\n"; | ||||
1029 | } | ||||
1030 | return $str; | ||||
1031 | }; | ||||
1032 | |||||
1033 | # rewrite the message in report_safe mode | ||||
1034 | # should not be called directly, use rewrite_mail instead | ||||
1035 | # | ||||
1036 | sub rewrite_report_safe { | ||||
1037 | my ($self) = @_; | ||||
1038 | |||||
1039 | # This is the original message. We do not want to make any modifications so | ||||
1040 | # we may recover it if necessary. It will be put into the new message as a | ||||
1041 | # message/rfc822 MIME part. | ||||
1042 | my $original = $self->{msg}->get_pristine(); | ||||
1043 | |||||
1044 | # This is the new message. | ||||
1045 | my $newmsg = ''; | ||||
1046 | |||||
1047 | # the report charset | ||||
1048 | my $report_charset = "; charset=iso-8859-1"; | ||||
1049 | if ($self->{conf}->{report_charset}) { | ||||
1050 | $report_charset = "; charset=" . $self->{conf}->{report_charset}; | ||||
1051 | } | ||||
1052 | |||||
1053 | # the SpamAssassin report | ||||
1054 | my $report = $self->get_report(); | ||||
1055 | |||||
1056 | # If there are any wide characters, need to MIME-encode in UTF-8 | ||||
1057 | # TODO: If $report_charset is something other than iso-8859-1/us-ascii, then | ||||
1058 | # we could try converting to that charset if possible | ||||
1059 | unless ($] < 5.008 || utf8::downgrade($report, 1)) { | ||||
1060 | $report_charset = "; charset=utf-8"; | ||||
1061 | utf8::encode($report); | ||||
1062 | } | ||||
1063 | |||||
1064 | # get original headers, "pristine" if we can do it | ||||
1065 | my $from = $self->{msg}->get_pristine_header("From"); | ||||
1066 | my $to = $self->{msg}->get_pristine_header("To"); | ||||
1067 | my $cc = $self->{msg}->get_pristine_header("Cc"); | ||||
1068 | my $subject = $self->{msg}->get_pristine_header("Subject"); | ||||
1069 | my $msgid = $self->{msg}->get_pristine_header('Message-Id'); | ||||
1070 | my $date = $self->{msg}->get_pristine_header("Date"); | ||||
1071 | |||||
1072 | # It'd be nice to do this with a foreach loop, but with only three | ||||
1073 | # possibilities right now, it's easier not to... | ||||
1074 | |||||
1075 | if (defined $self->{conf}->{rewrite_header}->{Subject}) { | ||||
1076 | $subject = "\n" if !defined $subject; | ||||
1077 | my $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{Subject}); | ||||
1078 | $tag =~ s/\n/ /gs; # strip tag's newlines | ||||
1079 | $subject =~ s/^(?:\Q${tag}\E )?/${tag} /g; # For some reason the tag may already be there!? | ||||
1080 | } | ||||
1081 | |||||
1082 | if (defined $self->{conf}->{rewrite_header}->{To}) { | ||||
1083 | $to = "\n" if !defined $to; | ||||
1084 | my $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{To}); | ||||
1085 | $tag =~ s/\n/ /gs; # strip tag's newlines | ||||
1086 | $to =~ s/(?:\t\Q(${tag})\E)?$/\t(${tag})/; | ||||
1087 | } | ||||
1088 | |||||
1089 | if (defined $self->{conf}->{rewrite_header}->{From}) { | ||||
1090 | $from = "\n" if !defined $from; | ||||
1091 | my $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{From}); | ||||
1092 | $tag =~ s/\n+//gs; # strip tag's newlines | ||||
1093 | $from =~ s/(?:\t\Q(${tag})\E)?$/\t(${tag})/; | ||||
1094 | } | ||||
1095 | |||||
1096 | # add report headers to message | ||||
1097 | $newmsg .= "From: $from" if defined $from; | ||||
1098 | $newmsg .= "To: $to" if defined $to; | ||||
1099 | $newmsg .= "Cc: $cc" if defined $cc; | ||||
1100 | $newmsg .= "Subject: $subject" if defined $subject; | ||||
1101 | $newmsg .= "Date: $date" if defined $date; | ||||
1102 | $newmsg .= "Message-Id: $msgid" if defined $msgid; | ||||
1103 | $newmsg .= $self->_get_added_headers('headers_spam'); | ||||
1104 | |||||
1105 | if (defined $self->{conf}->{report_safe_copy_headers}) { | ||||
1106 | my %already_added = map { $_ => 1 } qw/from to cc subject date message-id/; | ||||
1107 | |||||
1108 | foreach my $hdr (@{$self->{conf}->{report_safe_copy_headers}}) { | ||||
1109 | next if exists $already_added{lc $hdr}; | ||||
1110 | my @hdrtext = $self->{msg}->get_pristine_header($hdr); | ||||
1111 | $already_added{lc $hdr}++; | ||||
1112 | |||||
1113 | if (lc $hdr eq "received") { # add Received at the top ... | ||||
1114 | my $rhdr = ""; | ||||
1115 | foreach (@hdrtext) { | ||||
1116 | $rhdr .= "$hdr: $_"; | ||||
1117 | } | ||||
1118 | $newmsg = "$rhdr$newmsg"; | ||||
1119 | } | ||||
1120 | else { | ||||
1121 | foreach (@hdrtext) { | ||||
1122 | $newmsg .= "$hdr: $_"; | ||||
1123 | } | ||||
1124 | } | ||||
1125 | } | ||||
1126 | } | ||||
1127 | |||||
1128 | # jm: add a SpamAssassin Received header to note markup time etc. | ||||
1129 | # emulates the fetchmail style. | ||||
1130 | # tvd: do this after report_safe_copy_headers so Received will be done correctly | ||||
1131 | $newmsg = "Received: from localhost by " . | ||||
1132 | Mail::SpamAssassin::Util::fq_hostname() . "\n" . | ||||
1133 | "\twith SpamAssassin (version " . | ||||
1134 | Mail::SpamAssassin::Version() . ");\n" . | ||||
1135 | "\t" . Mail::SpamAssassin::Util::time_to_rfc822_date() . "\n" . | ||||
1136 | $newmsg; | ||||
1137 | |||||
1138 | # MIME boundary | ||||
1139 | my $boundary = "----------=_" . sprintf("%08X.%08X",time,int(rand(2 ** 32))); | ||||
1140 | |||||
1141 | # ensure it's unique, so we can't be attacked this way | ||||
1142 | while ($original =~ /^\Q${boundary}\E(?:--)?$/m) { | ||||
1143 | $boundary .= "/".sprintf("%08X",int(rand(2 ** 32))); | ||||
1144 | } | ||||
1145 | |||||
1146 | # determine whether Content-Disposition should be "attachment" or "inline" | ||||
1147 | my $disposition; | ||||
1148 | my $ct = $self->{msg}->get_header("Content-Type"); | ||||
1149 | if (defined $ct && $ct ne '' && $ct !~ m{text/plain}i) { | ||||
1150 | $disposition = "attachment"; | ||||
1151 | $report .= $self->_replace_tags($self->{conf}->{unsafe_report_template}); | ||||
1152 | # if we wanted to defang the attachment, this would be the place | ||||
1153 | } | ||||
1154 | else { | ||||
1155 | $disposition = "inline"; | ||||
1156 | } | ||||
1157 | |||||
1158 | my $type = "message/rfc822"; | ||||
1159 | $type = "text/plain" if $self->{conf}->{report_safe} > 1; | ||||
1160 | |||||
1161 | my $description = $self->{conf}->{'encapsulated_content_description'}; | ||||
1162 | |||||
1163 | # Note: the message should end in blank line since mbox format wants | ||||
1164 | # blank line at end and messages may be concatenated! In addition, the | ||||
1165 | # x-spam-type parameter is fixed since we will use it later to recognize | ||||
1166 | # original messages that can be extracted. | ||||
1167 | $newmsg .= <<"EOM"; | ||||
1168 | MIME-Version: 1.0 | ||||
1169 | Content-Type: multipart/mixed; boundary="$boundary" | ||||
1170 | |||||
1171 | This is a multi-part message in MIME format. | ||||
1172 | |||||
1173 | --$boundary | ||||
1174 | Content-Type: text/plain$report_charset | ||||
1175 | Content-Disposition: inline | ||||
1176 | Content-Transfer-Encoding: 8bit | ||||
1177 | |||||
1178 | $report | ||||
1179 | |||||
1180 | --$boundary | ||||
1181 | Content-Type: $type; x-spam-type=original | ||||
1182 | Content-Description: $description | ||||
1183 | Content-Disposition: $disposition | ||||
1184 | Content-Transfer-Encoding: 8bit | ||||
1185 | |||||
1186 | EOM | ||||
1187 | |||||
1188 | my $newmsgtrailer = "\n--$boundary--\n\n"; | ||||
1189 | |||||
1190 | # now fix line endings in both headers, report_safe body parts, | ||||
1191 | # and new MIME boundaries and structure | ||||
1192 | $self->_fixup_report_line_endings(\$newmsg); | ||||
1193 | $self->_fixup_report_line_endings(\$newmsgtrailer); | ||||
1194 | $newmsg .= $original.$newmsgtrailer; | ||||
1195 | |||||
1196 | return $newmsg; | ||||
1197 | } | ||||
1198 | |||||
1199 | # rewrite the message in non-report_safe mode (just headers) | ||||
1200 | # should not be called directly, use rewrite_mail instead | ||||
1201 | # | ||||
1202 | sub rewrite_no_report_safe { | ||||
1203 | my ($self) = @_; | ||||
1204 | |||||
1205 | # put the pristine headers into an array | ||||
1206 | # skip the X-Spam- headers, but allow the X-Spam-Prev headers to remain. | ||||
1207 | # since there may be a missing header/body | ||||
1208 | # | ||||
1209 | my @pristine_headers = split(/^/m, $self->{msg}->get_pristine_header()); | ||||
1210 | for (my $line = 0; $line <= $#pristine_headers; $line++) { | ||||
1211 | next unless ($pristine_headers[$line] =~ /^X-Spam-(?!Prev-)/i); | ||||
1212 | splice @pristine_headers, $line, 1 while ($pristine_headers[$line] =~ /^(?:X-Spam-(?!Prev-)|[ \t])/i); | ||||
1213 | $line--; | ||||
1214 | } | ||||
1215 | my $separator = ''; | ||||
1216 | if (@pristine_headers && $pristine_headers[$#pristine_headers] =~ /^\s*$/) { | ||||
1217 | $separator = pop @pristine_headers; | ||||
1218 | } | ||||
1219 | |||||
1220 | my $addition = 'headers_ham'; | ||||
1221 | |||||
1222 | if($self->{is_spam}) | ||||
1223 | { | ||||
1224 | # special-case: Subject lines. ensure one exists, if we're | ||||
1225 | # supposed to mark it up. | ||||
1226 | my $created_subject = 0; | ||||
1227 | my $subject = $self->{msg}->get_pristine_header('Subject'); | ||||
1228 | if (!defined($subject) && $self->{is_spam} | ||||
1229 | && exists $self->{conf}->{rewrite_header}->{'Subject'}) | ||||
1230 | { | ||||
1231 | push(@pristine_headers, "Subject: \n"); | ||||
1232 | $created_subject = 1; | ||||
1233 | } | ||||
1234 | |||||
1235 | # Deal with header rewriting | ||||
1236 | foreach (@pristine_headers) { | ||||
1237 | # if we're not going to do a rewrite, skip this header! | ||||
1238 | next if (!/^(From|Subject|To):/i); | ||||
1239 | my $hdr = ucfirst(lc($1)); | ||||
1240 | next if (!defined $self->{conf}->{rewrite_header}->{$hdr}); | ||||
1241 | |||||
1242 | # pop the original version onto the end of the header array | ||||
1243 | if ($created_subject) { | ||||
1244 | push(@pristine_headers, "X-Spam-Prev-Subject: (nonexistent)\n"); | ||||
1245 | } else { | ||||
1246 | push(@pristine_headers, "X-Spam-Prev-$_"); | ||||
1247 | } | ||||
1248 | |||||
1249 | # Figure out the rewrite piece | ||||
1250 | my $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{$hdr}); | ||||
1251 | $tag =~ s/\n/ /gs; | ||||
1252 | |||||
1253 | # The tag should be a comment for this header ... | ||||
1254 | $tag = "($tag)" if ($hdr =~ /^(?:From|To)$/); | ||||
1255 | |||||
1256 | local $1; | ||||
1257 | s/^([^:]+:)[ \t]*(?:\Q${tag}\E )?/$1 ${tag} /i; | ||||
1258 | } | ||||
1259 | |||||
1260 | $addition = 'headers_spam'; | ||||
1261 | } | ||||
1262 | |||||
1263 | # Break the pristine header set into two blocks; $new_hdrs_pre is the stuff | ||||
1264 | # that we want to ensure comes before any SpamAssassin markup headers, | ||||
1265 | # like the Return-Path header (see bug 3409). | ||||
1266 | # | ||||
1267 | # all the rest of the message headers (as left in @pristine_headers), is | ||||
1268 | # to be placed after the SpamAssassin markup hdrs. Once one of those headers | ||||
1269 | # is seen, all further headers go into that set; it's assumed that it's an | ||||
1270 | # old copy of the header, or attempted spoofing, if it crops up halfway | ||||
1271 | # through the headers. | ||||
1272 | |||||
1273 | my $new_hdrs_pre = ''; | ||||
1274 | if (@pristine_headers && $pristine_headers[0] =~ /^Return-Path:/i) { | ||||
1275 | $new_hdrs_pre .= shift(@pristine_headers); | ||||
1276 | while (@pristine_headers && $pristine_headers[0] =~ /^[ \t]/) { | ||||
1277 | $new_hdrs_pre .= shift(@pristine_headers); | ||||
1278 | } | ||||
1279 | } | ||||
1280 | $new_hdrs_pre .= $self->_get_added_headers($addition); | ||||
1281 | |||||
1282 | # fix up line endings appropriately | ||||
1283 | my $newmsg = $new_hdrs_pre . join('',@pristine_headers) . $separator; | ||||
1284 | $self->_fixup_report_line_endings(\$newmsg); | ||||
1285 | |||||
1286 | return $newmsg.$self->{msg}->get_pristine_body(); | ||||
1287 | } | ||||
1288 | |||||
1289 | sub qp_encode_header { | ||||
1290 | my ($self, $text) = @_; | ||||
1291 | |||||
1292 | # do nothing unless there's an 8-bit char | ||||
1293 | return $text unless ($text =~ /[\x80-\xff]/); | ||||
1294 | |||||
1295 | my $cs = 'ISO-8859-1'; | ||||
1296 | if ($self->{report_charset}) { | ||||
1297 | $cs = $self->{report_charset}; | ||||
1298 | } | ||||
1299 | |||||
1300 | my @hexchars = split('', '0123456789abcdef'); | ||||
1301 | my $ord; | ||||
1302 | local $1; | ||||
1303 | $text =~ s{([\x80-\xff])}{ | ||||
1304 | $ord = ord $1; | ||||
1305 | '='.$hexchars[($ord & 0xf0) >> 4].$hexchars[$ord & 0x0f] | ||||
1306 | }ges; | ||||
1307 | |||||
1308 | $text = '=?'.$cs.'?Q?'.$text.'?='; | ||||
1309 | |||||
1310 | dbg("markup: encoding header in $cs: $text"); | ||||
1311 | return $text; | ||||
1312 | } | ||||
1313 | |||||
1314 | sub _process_header { | ||||
1315 | my ($self, $hdr_name, $hdr_data) = @_; | ||||
1316 | |||||
1317 | $hdr_data = $self->_replace_tags($hdr_data); | ||||
1318 | $hdr_data =~ s/(?:\r?\n)+$//; # make sure there are no trailing newlines ... | ||||
1319 | |||||
1320 | if ($self->{conf}->{fold_headers}) { | ||||
1321 | if ($hdr_data =~ /\n/) { | ||||
1322 | $hdr_data =~ s/\s*\n\s*/\n\t/g; | ||||
1323 | return $hdr_data; | ||||
1324 | } | ||||
1325 | else { | ||||
1326 | # use '!!' instead of ': ' so it doesn't wrap on the space | ||||
1327 | my $hdr = "X-Spam-$hdr_name!!$hdr_data"; | ||||
1328 | $hdr = Mail::SpamAssassin::Util::wrap($hdr, "\t", "", 79, 0, '(?<=[\s,])'); | ||||
1329 | $hdr =~ s/^\t\n//gm; | ||||
1330 | return (split (/!!/, $hdr, 2))[1]; # just return the data part | ||||
1331 | } | ||||
1332 | } | ||||
1333 | else { | ||||
1334 | $hdr_data =~ s/\n/ /g; # Can't have newlines in headers, unless folded | ||||
1335 | return $hdr_data; | ||||
1336 | } | ||||
1337 | } | ||||
1338 | |||||
1339 | sub _replace_tags { | ||||
1340 | my $self = shift; | ||||
1341 | my $text = shift; | ||||
1342 | |||||
1343 | # default to leaving the original string in place, if we cannot find | ||||
1344 | # a tag for it (bug 4793) | ||||
1345 | local($1,$2,$3); | ||||
1346 | $text =~ s{(_(\w+?)(?:\((.*?)\))?_)}{ | ||||
1347 | my $full = $1; | ||||
1348 | my $tag = $2; | ||||
1349 | my $result; | ||||
1350 | if ($tag =~ /^ADDEDHEADER(?:HAM|SPAM|)\z/) { | ||||
1351 | # Bug 6278: break infinite recursion through _get_added_headers and | ||||
1352 | # _get_tag on an attempt to use such tag in add_header template | ||||
1353 | } else { | ||||
1354 | $result = $self->get_tag_raw($tag,$3); | ||||
1355 | $result = join(' ',@$result) if ref $result eq 'ARRAY'; | ||||
1356 | } | ||||
1357 | defined $result ? $result : $full; | ||||
1358 | }ge; | ||||
1359 | |||||
1360 | return $text; | ||||
1361 | } | ||||
1362 | |||||
1363 | ########################################################################### | ||||
1364 | |||||
1365 | # public API for plugins | ||||
1366 | |||||
1367 | =item $status->action_depends_on_tags($tags, $code, @args) | ||||
1368 | |||||
1369 | Enqueue the supplied subroutine reference C<$code>, to become runnable when | ||||
1370 | all the specified tags become available. The C<$tags> may be a simple | ||||
1371 | scalar - a tag name, or a listref of tag names. The subroutine C<&$code> | ||||
1372 | when called will be passed a C<permessagestatus> object as its first argument, | ||||
1373 | followed by the supplied (optional) list C<@args> . | ||||
1374 | |||||
1375 | =cut | ||||
1376 | |||||
1377 | # spent 19.0ms (17.4+1.59) within Mail::SpamAssassin::PerMsgStatus::action_depends_on_tags which was called 234 times, avg 81µs/call:
# 234 times (17.4ms+1.59ms) by Mail::SpamAssassin::Plugin::AskDNS::extract_metadata at line 398 of Mail/SpamAssassin/Plugin/AskDNS.pm, avg 81µs/call | ||||
1378 | 234 | 779µs | my($self, $tags, $code, @args) = @_; | ||
1379 | |||||
1380 | 234 | 810µs | ref $code eq 'CODE' | ||
1381 | or die "action_depends_on_tags: argument must be a subroutine ref"; | ||||
1382 | |||||
1383 | # tag names on which the given action depends | ||||
1384 | 234 | 1.06ms | my @dep_tags = !ref $tags ? uc $tags : map(uc($_),@$tags); | ||
1385 | |||||
1386 | # @{$self->{tagrun_subs}} list of all submitted subroutines | ||||
1387 | # @{$self->{tagrun_actions}{$tag}} bitmask of action indices blocked by tag | ||||
1388 | # $self->{tagrun_tagscnt}[$action_ind] count of tags still pending | ||||
1389 | |||||
1390 | # store action details, obtain its index | ||||
1391 | 468 | 2.38ms | push(@{$self->{tagrun_subs}}, [$code,@args]); | ||
1392 | 468 | 1.62ms | my $action_ind = $#{$self->{tagrun_subs}}; | ||
1393 | |||||
1394 | # list dependency tag names which are not already satistied | ||||
1395 | my @blocking_tags = | ||||
1396 | 234 | 1.41ms | grep(!defined $self->{tag_data}{$_} || $self->{tag_data}{$_} eq '', | ||
1397 | @dep_tags); | ||||
1398 | |||||
1399 | 234 | 1.01ms | $self->{tagrun_tagscnt}[$action_ind] = scalar @blocking_tags; | ||
1400 | 234 | 2.07ms | $self->{tagrun_actions}{$_}[$action_ind] = 1 for @blocking_tags; | ||
1401 | |||||
1402 | 234 | 2.46ms | if (@blocking_tags) { | ||
1403 | 234 | 2.01ms | 234 | 1.59ms | dbg("check: tagrun - action %s blocking on tags %s", # spent 1.59ms making 234 calls to Mail::SpamAssassin::Logger::dbg, avg 7µs/call |
1404 | $action_ind, join(', ',@blocking_tags)); | ||||
1405 | } else { | ||||
1406 | dbg("check: tagrun - tag %s was ready, action %s runnable immediately: %s", | ||||
1407 | join(', ',@dep_tags), $action_ind, join(', ',$code,@args)); | ||||
1408 | &$code($self, @args); | ||||
1409 | } | ||||
1410 | } | ||||
1411 | |||||
1412 | # tag_is_ready() will be called by set_tag(), indicating that a given | ||||
1413 | # tag just received its value, possibly unblocking an action routine | ||||
1414 | # as declared by action_depends_on_tags(). | ||||
1415 | # | ||||
1416 | # Well-behaving plugins should call set_tag() once when a tag is fully | ||||
1417 | # assembled and ready. Multiple calls to set the same tag value are handled | ||||
1418 | # gracefully, but may result in premature activation of a pending action. | ||||
1419 | # Setting tag values by plugins should not be done directly but only through | ||||
1420 | # the public API set_tag(), otherwise a pending action release may be missed. | ||||
1421 | # | ||||
1422 | # spent 105ms (64.7+39.8) within Mail::SpamAssassin::PerMsgStatus::tag_is_ready which was called 3028 times, avg 35µs/call:
# 3028 times (64.7ms+39.8ms) by Mail::SpamAssassin::PerMsgStatus::set_tag at line 1504, avg 35µs/call | ||||
1423 | 3028 | 6.22ms | my($self, $tag) = @_; | ||
1424 | 3028 | 6.83ms | $tag = uc $tag; | ||
1425 | |||||
1426 | 3028 | 21.0ms | 3028 | 39.8ms | if (would_log('dbg', 'check')) { # spent 39.8ms making 3028 calls to Mail::SpamAssassin::Logger::would_log, avg 13µs/call |
1427 | my $tag_val = $self->{tag_data}{$tag}; | ||||
1428 | dbg("check: tagrun - tag %s is now ready, value: %s", | ||||
1429 | $tag, !defined $tag_val ? '<UNDEF>' | ||||
1430 | : ref $tag_val ne 'ARRAY' ? $tag_val | ||||
1431 | : 'ARY:[' . join(',',@$tag_val) . ']' ); | ||||
1432 | } | ||||
1433 | 3028 | 33.1ms | if (ref $self->{tagrun_actions}{$tag}) { # any action blocking on this tag? | ||
1434 | my $action_ind = 0; | ||||
1435 | foreach my $action_pending (@{$self->{tagrun_actions}{$tag}}) { | ||||
1436 | if ($action_pending) { | ||||
1437 | $self->{tagrun_actions}{$tag}[$action_ind] = 0; | ||||
1438 | if ($self->{tagrun_tagscnt}[$action_ind] <= 0) { | ||||
1439 | # should not happen, warn and ignore | ||||
1440 | warn "tagrun error: count for $action_ind is ". | ||||
1441 | $self->{tagrun_tagscnt}[$action_ind]."\n"; | ||||
1442 | } elsif (! --($self->{tagrun_tagscnt}[$action_ind])) { | ||||
1443 | my($code,@args) = @{$self->{tagrun_subs}[$action_ind]}; | ||||
1444 | dbg("check: tagrun - tag %s unblocking the action %s: %s", | ||||
1445 | $tag, $action_ind, join(', ',$code,@args)); | ||||
1446 | &$code($self, @args); | ||||
1447 | } | ||||
1448 | } | ||||
1449 | $action_ind++; | ||||
1450 | } | ||||
1451 | } | ||||
1452 | } | ||||
1453 | |||||
1454 | # debugging aid: show actions that are still pending, waiting for their | ||||
1455 | # tags to receive a value | ||||
1456 | # | ||||
1457 | # spent 3.82ms within Mail::SpamAssassin::PerMsgStatus::report_unsatisfied_actions which was called 234 times, avg 16µs/call:
# 234 times (3.82ms+0s) by Mail::SpamAssassin::PerMsgStatus::finish at line 1663, avg 16µs/call | ||||
1458 | 234 | 521µs | my($self) = @_; | ||
1459 | 234 | 475µs | my @tags; | ||
1460 | 234 | 723µs | @tags = keys %{$self->{tagrun_actions}} if ref $self->{tagrun_actions}; | ||
1461 | 234 | 14.6ms | for my $tag (@tags) { | ||
1462 | my @pending_actions = grep($self->{tagrun_actions}{$tag}[$_], | ||||
1463 | (0 .. $#{$self->{tagrun_actions}{$tag}})); | ||||
1464 | dbg("check: tagrun - tag %s is still blocking action %s", | ||||
1465 | $tag, join(', ', @pending_actions)) if @pending_actions; | ||||
1466 | } | ||||
1467 | } | ||||
1468 | |||||
1469 | =item $status->set_tag($tagname, $value) | ||||
1470 | |||||
1471 | Set a template tag, as used in C<add_header>, report templates, etc. | ||||
1472 | This API is intended for use by plugins. Tag names will be converted | ||||
1473 | to an all-uppercase representation internally. | ||||
1474 | |||||
1475 | C<$value> can be a simple scalar (string or number), or a reference to an | ||||
1476 | array, in which case the public method get_tag will join array elements | ||||
1477 | using a space as a separator, returning a single string for backward | ||||
1478 | compatibility. | ||||
1479 | |||||
1480 | C<$value> can also be a subroutine reference, which will be evaluated | ||||
1481 | each time the template is expanded. The first argument passed by get_tag | ||||
1482 | to a called subroutine will be a PerMsgStatus object (this module's object), | ||||
1483 | followed by optional arguments provided a caller to get_tag. | ||||
1484 | |||||
1485 | Note that perl supports closures, which means that variables set in the | ||||
1486 | caller's scope can be accessed inside this C<sub>. For example: | ||||
1487 | |||||
1488 | my $text = "hello world!"; | ||||
1489 | $status->set_tag("FOO", sub { | ||||
1490 | my $pms = shift; | ||||
1491 | return $text; | ||||
1492 | }); | ||||
1493 | |||||
1494 | See C<Mail::SpamAssassin::Conf>'s C<TEMPLATE TAGS> section for more details | ||||
1495 | on how template tags are used. | ||||
1496 | |||||
1497 | C<undef> will be returned if a tag by that name has not been defined. | ||||
1498 | |||||
1499 | =cut | ||||
1500 | |||||
1501 | # spent 185ms (80.4+105) within Mail::SpamAssassin::PerMsgStatus::set_tag which was called 3028 times, avg 61µs/call:
# 466 times (13.6ms+17.4ms) by Mail::SpamAssassin::Message::Metadata::extract at line 100 of Mail/SpamAssassin/Message/Metadata.pm, avg 67µs/call
# 234 times (6.77ms+8.57ms) by Mail::SpamAssassin::PerMsgStatus::extract_message_metadata at line 1726, avg 66µs/call
# 234 times (6.00ms+7.58ms) by Mail::SpamAssassin::PerMsgStatus::extract_message_metadata at line 1737, avg 58µs/call
# 234 times (5.77ms+7.75ms) by Mail::SpamAssassin::PerMsgStatus::extract_message_metadata at line 1741, avg 58µs/call
# 234 times (5.89ms+7.17ms) by Mail::SpamAssassin::PerMsgStatus::extract_message_metadata at line 1739, avg 56µs/call
# 234 times (5.92ms+7.04ms) by Mail::SpamAssassin::PerMsgStatus::extract_message_metadata at line 1738, avg 55µs/call
# 234 times (5.53ms+7.14ms) by Mail::SpamAssassin::PerMsgStatus::extract_message_metadata at line 1740, avg 54µs/call
# 233 times (6.99ms+8.38ms) by Mail::SpamAssassin::PerMsgStatus::extract_message_metadata at line 1733, avg 66µs/call
# 174 times (5.22ms+7.25ms) by Mail::SpamAssassin::Plugin::TxRep::check_reputation at line 1413 of Mail/SpamAssassin/Plugin/TxRep.pm, avg 72µs/call
# 155 times (3.78ms+6.41ms) by Mail::SpamAssassin::Plugin::URIDNSBL::parsed_metadata at line 487 of Mail/SpamAssassin/Plugin/URIDNSBL.pm, avg 66µs/call
# 155 times (3.64ms+5.09ms) by Mail::SpamAssassin::Plugin::URIDNSBL::parsed_metadata at line 490 of Mail/SpamAssassin/Plugin/URIDNSBL.pm, avg 56µs/call
# 147 times (4.60ms+5.89ms) by Mail::SpamAssassin::Plugin::TxRep::check_reputation at line 1417 of Mail/SpamAssassin/Plugin/TxRep.pm, avg 71µs/call
# 147 times (3.79ms+4.42ms) by Mail::SpamAssassin::Plugin::TxRep::check_reputation at line 1422 of Mail/SpamAssassin/Plugin/TxRep.pm, avg 56µs/call
# 147 times (2.91ms+4.44ms) by Mail::SpamAssassin::Plugin::TxRep::check_reputation at line 1421 of Mail/SpamAssassin/Plugin/TxRep.pm, avg 50µs/call | ||||
1502 | 3028 | 13.8ms | my($self,$tag,$val) = @_; | ||
1503 | 3028 | 20.5ms | $self->{tag_data}->{uc $tag} = $val; | ||
1504 | 3028 | 47.7ms | 3028 | 105ms | $self->tag_is_ready($tag); # spent 105ms making 3028 calls to Mail::SpamAssassin::PerMsgStatus::tag_is_ready, avg 35µs/call |
1505 | } | ||||
1506 | |||||
1507 | # public API for plugins | ||||
1508 | |||||
1509 | =item $string = $status->get_tag($tagname) | ||||
1510 | |||||
1511 | Get the current value of a template tag, as used in C<add_header>, report | ||||
1512 | templates, etc. This API is intended for use by plugins. Tag names will be | ||||
1513 | converted to an all-uppercase representation internally. See | ||||
1514 | C<Mail::SpamAssassin::Conf>'s C<TEMPLATE TAGS> section for more details on | ||||
1515 | tags. | ||||
1516 | |||||
1517 | C<undef> will be returned if a tag by that name has not been defined. | ||||
1518 | |||||
1519 | =cut | ||||
1520 | |||||
1521 | # spent 12.5ms within Mail::SpamAssassin::PerMsgStatus::get_tag which was called 321 times, avg 39µs/call:
# 321 times (12.5ms+0s) by Mail::SpamAssassin::Plugin::TxRep::check_senders_reputation at line 1307 of Mail/SpamAssassin/Plugin/TxRep.pm, avg 39µs/call | ||||
1522 | 321 | 1.24ms | my($self, $tag, @args) = @_; | ||
1523 | |||||
1524 | 321 | 722µs | return if !defined $tag; | ||
1525 | 321 | 1.13ms | $tag = uc $tag; | ||
1526 | 321 | 675µs | my $data; | ||
1527 | 321 | 2.71ms | if (exists $common_tags{$tag}) { | ||
1528 | # tag data from traditional pre-defined tag subroutines | ||||
1529 | $data = $common_tags{$tag}; | ||||
1530 | $data = $data->($self,@args) if ref $data eq 'CODE'; | ||||
1531 | $data = join(' ',@$data) if ref $data eq 'ARRAY'; | ||||
1532 | $data = "" if !defined $data; | ||||
1533 | } elsif (exists $self->{tag_data}->{$tag}) { | ||||
1534 | # tag data comes from $self->{tag_data}->{TAG}, typically from plugins | ||||
1535 | 321 | 1.01ms | $data = $self->{tag_data}->{$tag}; | ||
1536 | 321 | 1.12ms | $data = $data->($self,@args) if ref $data eq 'CODE'; | ||
1537 | 321 | 759µs | $data = join(' ',@$data) if ref $data eq 'ARRAY'; | ||
1538 | 321 | 847µs | $data = "" if !defined $data; | ||
1539 | } | ||||
1540 | 321 | 2.62ms | return $data; | ||
1541 | } | ||||
1542 | |||||
1543 | =item $string = $status->get_tag_raw($tagname, @args) | ||||
1544 | |||||
1545 | Similar to C<get_tag>, but keeps a tag name unchanged (does not uppercase it), | ||||
1546 | and does not convert arrayref tag values into a single string. | ||||
1547 | |||||
1548 | =cut | ||||
1549 | |||||
1550 | sub get_tag_raw { | ||||
1551 | my($self, $tag, @args) = @_; | ||||
1552 | |||||
1553 | return if !defined $tag; | ||||
1554 | my $data; | ||||
1555 | if (exists $common_tags{$tag}) { | ||||
1556 | # tag data from traditional pre-defined tag subroutines | ||||
1557 | $data = $common_tags{$tag}; | ||||
1558 | $data = $data->($self,@args) if ref $data eq 'CODE'; | ||||
1559 | $data = "" if !defined $data; | ||||
1560 | } elsif (exists $self->{tag_data}->{$tag}) { | ||||
1561 | # tag data comes from $self->{tag_data}->{TAG}, typically from plugins | ||||
1562 | $data = $self->{tag_data}->{$tag}; | ||||
1563 | $data = $data->($self,@args) if ref $data eq 'CODE'; | ||||
1564 | $data = "" if !defined $data; | ||||
1565 | } | ||||
1566 | return $data; | ||||
1567 | } | ||||
1568 | |||||
1569 | ########################################################################### | ||||
1570 | |||||
1571 | # public API for plugins | ||||
1572 | |||||
1573 | =item $status->set_spamd_result_item($subref) | ||||
1574 | |||||
1575 | Set an entry for the spamd result log line. C<$subref> should be a code | ||||
1576 | reference for a subroutine which will return a string in C<'name=VALUE'> | ||||
1577 | format, similar to the other entries in the spamd result line: | ||||
1578 | |||||
1579 | Jul 17 14:10:47 radish spamd[16670]: spamd: result: Y 22 - ALL_NATURAL, | ||||
1580 | DATE_IN_FUTURE_03_06,DIET_1,DRUGS_ERECTILE,DRUGS_PAIN, | ||||
1581 | TEST_FORGED_YAHOO_RCVD,TEST_INVALID_DATE,TEST_NOREALNAME, | ||||
1582 | TEST_NORMAL_HTTP_TO_IP,UNDISC_RECIPS scantime=0.4,size=3138,user=jm, | ||||
1583 | uid=1000,required_score=5.0,rhost=localhost,raddr=127.0.0.1, | ||||
1584 | rport=33153,mid=<9PS291LhupY>,autolearn=spam | ||||
1585 | |||||
1586 | C<name> and C<VALUE> must not contain C<=> or C<,> characters, as it | ||||
1587 | is important that these log lines are easy to parse. | ||||
1588 | |||||
1589 | The code reference will be called by spamd after the message has been scanned, | ||||
1590 | and the C<PerMsgStatus::check()> method has returned. | ||||
1591 | |||||
1592 | =cut | ||||
1593 | |||||
1594 | sub set_spamd_result_item { | ||||
1595 | my ($self, $ref) = @_; | ||||
1596 | push @{$self->{spamd_result_log_items}}, $ref; | ||||
1597 | } | ||||
1598 | |||||
1599 | # called by spamd | ||||
1600 | sub get_spamd_result_log_items { | ||||
1601 | my ($self) = @_; | ||||
1602 | my @ret; | ||||
1603 | foreach my $ref (@{$self->{spamd_result_log_items}}) { | ||||
1604 | push @ret, &$ref; | ||||
1605 | } | ||||
1606 | return @ret; | ||||
1607 | } | ||||
1608 | |||||
1609 | ########################################################################### | ||||
1610 | |||||
1611 | sub _get_tag_value_for_yesno { | ||||
1612 | my($self, $arg) = @_; | ||||
1613 | my($arg_spam, $arg_ham); | ||||
1614 | ($arg_spam, $arg_ham) = split(/,/, $arg, 2) if defined $arg; | ||||
1615 | return $self->{is_spam} ? (defined $arg_spam ? $arg_spam : 'Yes') | ||||
1616 | : (defined $arg_ham ? $arg_ham : 'No'); | ||||
1617 | } | ||||
1618 | |||||
1619 | sub _get_tag_value_for_score { | ||||
1620 | my ($self, $pad) = @_; | ||||
1621 | |||||
1622 | my $score = sprintf("%2.1f", $self->{score}); | ||||
1623 | my $rscore = $self->_get_tag_value_for_required_score(); | ||||
1624 | |||||
1625 | #Change due to bug 6419 to use Util function for consistency with spamd | ||||
1626 | #and PerMessageStatus | ||||
1627 | $score = Mail::SpamAssassin::Util::get_tag_value_for_score($score, $rscore, $self->{is_spam}); | ||||
1628 | |||||
1629 | #$pad IS PROVIDED BY THE _SCORE(PAD)_ tag | ||||
1630 | if (defined $pad && $pad =~ /^(0+| +)$/) { | ||||
1631 | my $count = length($1) + 3 - length($score); | ||||
1632 | $score = (substr($pad, 0, $count) . $score) if $count > 0; | ||||
1633 | } | ||||
1634 | return $score; | ||||
1635 | |||||
1636 | } | ||||
1637 | |||||
1638 | sub _get_tag_value_for_required_score { | ||||
1639 | my $self = shift; | ||||
1640 | return sprintf("%2.1f", $self->{conf}->{required_score}); | ||||
1641 | } | ||||
1642 | |||||
1643 | |||||
1644 | ########################################################################### | ||||
1645 | |||||
1646 | =item $status->finish () | ||||
1647 | |||||
1648 | Indicate that this C<$status> object is finished with, and can be destroyed. | ||||
1649 | |||||
1650 | If you are using SpamAssassin in a persistent environment, or checking many | ||||
1651 | mail messages from one C<Mail::SpamAssassin> factory, this method should be | ||||
1652 | called to ensure Perl's garbage collection will clean up old status objects. | ||||
1653 | |||||
1654 | =cut | ||||
1655 | |||||
1656 | # spent 46.7ms (26.6+20.1) within Mail::SpamAssassin::PerMsgStatus::finish which was called 234 times, avg 200µs/call:
# 234 times (26.6ms+20.1ms) by Mail::SpamAssassin::Plugin::Bayes::get_body_from_msg at line 1027 of Mail/SpamAssassin/Plugin/Bayes.pm, avg 200µs/call | ||||
1657 | 234 | 552µs | my ($self) = @_; | ||
1658 | |||||
1659 | 234 | 3.25ms | 234 | 0s | $self->{main}->call_plugins ("per_msg_finish", { # spent 16.3ms making 234 calls to Mail::SpamAssassin::call_plugins, avg 69µs/call, recursion: max depth 1, sum of overlapping time 16.3ms |
1660 | permsgstatus => $self | ||||
1661 | }); | ||||
1662 | |||||
1663 | 234 | 2.09ms | 234 | 3.82ms | $self->report_unsatisfied_actions; # spent 3.82ms making 234 calls to Mail::SpamAssassin::PerMsgStatus::report_unsatisfied_actions, avg 16µs/call |
1664 | |||||
1665 | # Delete out all of the members of $self. This will remove any direct | ||||
1666 | # circular references and let the memory get reclaimed while also being more | ||||
1667 | # efficient than a foreach() loop over the keys. | ||||
1668 | 468 | 8.16ms | %{$self} = (); | ||
1669 | } | ||||
1670 | |||||
1671 | sub finish_tests { | ||||
1672 | my ($conf) = @_; | ||||
1673 | foreach my $method (@TEMPORARY_METHODS) { | ||||
1674 | if (defined &{$method}) { | ||||
1675 | undef &{$method}; | ||||
1676 | } | ||||
1677 | } | ||||
1678 | @TEMPORARY_METHODS = (); # clear for next time | ||||
1679 | %TEMPORARY_EVAL_GLUE_METHODS = (); | ||||
1680 | } | ||||
1681 | |||||
1682 | |||||
1683 | =item $name = $status->get_current_eval_rule_name() | ||||
1684 | |||||
1685 | Return the name of the currently-running eval rule. C<undef> is | ||||
1686 | returned if no eval rule is currently being run. Useful for plugins | ||||
1687 | to determine the current rule name while inside an eval test function | ||||
1688 | call. | ||||
1689 | |||||
1690 | =cut | ||||
1691 | |||||
1692 | sub get_current_eval_rule_name { | ||||
1693 | my ($self) = @_; | ||||
1694 | return $self->{current_rule_name}; | ||||
1695 | } | ||||
1696 | |||||
1697 | ########################################################################### | ||||
1698 | |||||
1699 | # spent 44.9s (97.6ms+44.8) within Mail::SpamAssassin::PerMsgStatus::extract_message_metadata which was called 234 times, avg 192ms/call:
# 234 times (97.6ms+44.8s) by Mail::SpamAssassin::Plugin::TxRep::learn_message at line 1775 of Mail/SpamAssassin/Plugin/TxRep.pm, avg 192ms/call | ||||
1700 | 234 | 592µs | my ($self) = @_; | ||
1701 | |||||
1702 | 234 | 2.03ms | 234 | 2.05ms | my $timer = $self->{main}->time_method("extract_message_metadata"); # spent 2.05ms making 234 calls to Mail::SpamAssassin::time_method, avg 9µs/call |
1703 | 234 | 2.90ms | 234 | 3.81s | $self->{msg}->extract_message_metadata($self); # spent 3.81s making 234 calls to Mail::SpamAssassin::Message::extract_message_metadata, avg 16.3ms/call |
1704 | |||||
1705 | 234 | 1.58ms | foreach my $item (qw( | ||
1706 | relays_trusted relays_trusted_str num_relays_trusted | ||||
1707 | relays_untrusted relays_untrusted_str num_relays_untrusted | ||||
1708 | relays_internal relays_internal_str num_relays_internal | ||||
1709 | relays_external relays_external_str num_relays_external | ||||
1710 | num_relays_unparseable last_trusted_relay_index | ||||
1711 | last_internal_relay_index | ||||
1712 | )) | ||||
1713 | { | ||||
1714 | 3510 | 24.6ms | $self->{$item} = $self->{msg}->{metadata}->{$item}; | ||
1715 | } | ||||
1716 | |||||
1717 | # TODO: International domain names (UTF-8) must be converted to | ||||
1718 | # ASCII-compatible encoding (ACE) for the purpose of setting the | ||||
1719 | # SENDERDOMAIN and AUTHORDOMAIN tags (and probably for other uses too). | ||||
1720 | # (explicitly required for DMARC, draft-kucherawy-dmarc-base sect. 5.6.1) | ||||
1721 | # | ||||
1722 | 468 | 1.81ms | { local $1; | ||
1723 | 234 | 2.77ms | 234 | 484ms | my $addr = $self->get('EnvelopeFrom:addr', undef); # spent 484ms making 234 calls to Mail::SpamAssassin::PerMsgStatus::get, avg 2.07ms/call |
1724 | # collect a FQDN, ignoring potential trailing WSP | ||||
1725 | 234 | 5.55ms | 234 | 2.20ms | if (defined $addr && $addr =~ /\@([^@. \t]+\.[^@ \t]+?)[ \t]*\z/s) { # spent 2.20ms making 234 calls to Mail::SpamAssassin::PerMsgStatus::CORE:match, avg 9µs/call |
1726 | 234 | 2.61ms | 234 | 15.3ms | $self->set_tag('SENDERDOMAIN', lc $1); # spent 15.3ms making 234 calls to Mail::SpamAssassin::PerMsgStatus::set_tag, avg 66µs/call |
1727 | } | ||||
1728 | # TODO: the get ':addr' only returns the first address; this should be | ||||
1729 | # augmented to be able to return all addresses in a header field, multiple | ||||
1730 | # addresses in a From header field are allowed according to RFC 5322 | ||||
1731 | 234 | 1.99ms | 234 | 92.5ms | $addr = $self->get('From:addr', undef); # spent 92.5ms making 234 calls to Mail::SpamAssassin::PerMsgStatus::get, avg 395µs/call |
1732 | 234 | 5.40ms | 234 | 1.76ms | if (defined $addr && $addr =~ /\@([^@. \t]+\.[^@ \t]+?)[ \t]*\z/s) { # spent 1.76ms making 234 calls to Mail::SpamAssassin::PerMsgStatus::CORE:match, avg 8µs/call |
1733 | 233 | 2.37ms | 233 | 15.4ms | $self->set_tag('AUTHORDOMAIN', lc $1); # spent 15.4ms making 233 calls to Mail::SpamAssassin::PerMsgStatus::set_tag, avg 66µs/call |
1734 | } | ||||
1735 | } | ||||
1736 | |||||
1737 | 234 | 1.81ms | 234 | 13.6ms | $self->set_tag('RELAYSTRUSTED', $self->{relays_trusted_str}); # spent 13.6ms making 234 calls to Mail::SpamAssassin::PerMsgStatus::set_tag, avg 58µs/call |
1738 | 234 | 1.76ms | 234 | 13.0ms | $self->set_tag('RELAYSUNTRUSTED', $self->{relays_untrusted_str}); # spent 13.0ms making 234 calls to Mail::SpamAssassin::PerMsgStatus::set_tag, avg 55µs/call |
1739 | 234 | 1.71ms | 234 | 13.1ms | $self->set_tag('RELAYSINTERNAL', $self->{relays_internal_str}); # spent 13.1ms making 234 calls to Mail::SpamAssassin::PerMsgStatus::set_tag, avg 56µs/call |
1740 | 234 | 1.69ms | 234 | 12.7ms | $self->set_tag('RELAYSEXTERNAL', $self->{relays_external_str}); # spent 12.7ms making 234 calls to Mail::SpamAssassin::PerMsgStatus::set_tag, avg 54µs/call |
1741 | 234 | 2.88ms | 468 | 16.2ms | $self->set_tag('LANGUAGES', $self->{msg}->get_metadata("X-Languages")); # spent 13.5ms making 234 calls to Mail::SpamAssassin::PerMsgStatus::set_tag, avg 58µs/call
# spent 2.71ms making 234 calls to Mail::SpamAssassin::Message::get_metadata, avg 12µs/call |
1742 | |||||
1743 | # This should happen before we get called, but just in case. | ||||
1744 | 234 | 1.47ms | if (!defined $self->{msg}->{metadata}->{html}) { | ||
1745 | 234 | 2.04ms | 234 | 25.8s | $self->get_decoded_stripped_body_text_array(); # spent 25.8s making 234 calls to Mail::SpamAssassin::PerMsgStatus::get_decoded_stripped_body_text_array, avg 110ms/call |
1746 | } | ||||
1747 | 234 | 1.24ms | $self->{html} = $self->{msg}->{metadata}->{html}; | ||
1748 | |||||
1749 | # allow plugins to add more metadata, read the stuff that's there, etc. | ||||
1750 | 234 | 5.24ms | 234 | 0s | $self->{main}->call_plugins ("parsed_metadata", { permsgstatus => $self }); # spent 14.5s making 234 calls to Mail::SpamAssassin::call_plugins, avg 62.0ms/call, recursion: max depth 1, sum of overlapping time 14.5s |
1751 | } | ||||
1752 | |||||
1753 | ########################################################################### | ||||
1754 | |||||
1755 | =item $status->get_decoded_body_text_array () | ||||
1756 | |||||
1757 | Returns the message body, with B<base64> or B<quoted-printable> encodings | ||||
1758 | decoded, and non-text parts or non-inline attachments stripped. | ||||
1759 | |||||
1760 | It is returned as an array of strings, with each string representing | ||||
1761 | one newline-separated line of the body. | ||||
1762 | |||||
1763 | =cut | ||||
1764 | |||||
1765 | sub get_decoded_body_text_array { | ||||
1766 | return $_[0]->{msg}->get_decoded_body_text_array(); | ||||
1767 | } | ||||
1768 | |||||
1769 | =item $status->get_decoded_stripped_body_text_array () | ||||
1770 | |||||
1771 | Returns the message body, decoded (as described in | ||||
1772 | get_decoded_body_text_array()), with HTML rendered, and with whitespace | ||||
1773 | normalized. | ||||
1774 | |||||
1775 | It will always render text/html, and will use a heuristic to determine if other | ||||
1776 | text/* parts should be considered text/html. | ||||
1777 | |||||
1778 | It is returned as an array of strings, with each string representing one | ||||
1779 | 'paragraph'. Paragraphs, in plain-text mails, are double-newline-separated | ||||
1780 | blocks of multi-line text. | ||||
1781 | |||||
1782 | =cut | ||||
1783 | |||||
1784 | # spent 25.9s (21.6ms+25.8) within Mail::SpamAssassin::PerMsgStatus::get_decoded_stripped_body_text_array which was called 702 times, avg 36.8ms/call:
# 468 times (6.74ms+14.9ms) by Mail::SpamAssassin::PerMsgStatus::_get_parsed_uri_list at line 2360, avg 46µs/call
# 234 times (14.9ms+25.8s) by Mail::SpamAssassin::PerMsgStatus::extract_message_metadata at line 1745, avg 110ms/call | ||||
1785 | 702 | 11.5ms | 702 | 25.8s | return $_[0]->{msg}->get_rendered_body_text_array(); # spent 25.8s making 702 calls to Mail::SpamAssassin::Message::get_rendered_body_text_array, avg 36.8ms/call |
1786 | } | ||||
1787 | |||||
1788 | ########################################################################### | ||||
1789 | |||||
1790 | =item $status->get (header_name [, default_value]) | ||||
1791 | |||||
1792 | Returns a message header, pseudo-header, real name or address. | ||||
1793 | C<header_name> is the name of a mail header, such as 'Subject', 'To', | ||||
1794 | etc. If C<default_value> is given, it will be used if the requested | ||||
1795 | C<header_name> does not exist. | ||||
1796 | |||||
1797 | Appending C<:raw> to the header name will inhibit decoding of quoted-printable | ||||
1798 | or base-64 encoded strings. | ||||
1799 | |||||
1800 | Appending a modifier C<:addr> to a header field name will cause everything | ||||
1801 | except the first email address to be removed from the header field. It is | ||||
1802 | mainly applicable to header fields 'From', 'Sender', 'To', 'Cc' along with | ||||
1803 | their 'Resent-*' counterparts, and the 'Return-Path'. For example, all of | ||||
1804 | the following will result in "example@foo": | ||||
1805 | |||||
1806 | =over 4 | ||||
1807 | |||||
1808 | =item example@foo | ||||
1809 | |||||
1810 | =item example@foo (Foo Blah) | ||||
1811 | |||||
1812 | =item example@foo, example@bar | ||||
1813 | |||||
1814 | =item display: example@foo (Foo Blah), example@bar ; | ||||
1815 | |||||
1816 | =item Foo Blah <example@foo> | ||||
1817 | |||||
1818 | =item "Foo Blah" <example@foo> | ||||
1819 | |||||
1820 | =item "'Foo Blah'" <example@foo> | ||||
1821 | |||||
1822 | =back | ||||
1823 | |||||
1824 | Appending a modifier C<:name> to a header field name will cause everything | ||||
1825 | except the first display name to be removed from the header field. It is | ||||
1826 | mainly applicable to header fields containing a single mail address: 'From', | ||||
1827 | 'Sender', along with their 'Resent-From' and 'Resent-Sender' counterparts. | ||||
1828 | For example, all of the following will result in "Foo Blah". One level of | ||||
1829 | single quotes is stripped too, as it is often seen. | ||||
1830 | |||||
1831 | =over 4 | ||||
1832 | |||||
1833 | =item example@foo (Foo Blah) | ||||
1834 | |||||
1835 | =item example@foo (Foo Blah), example@bar | ||||
1836 | |||||
1837 | =item display: example@foo (Foo Blah), example@bar ; | ||||
1838 | |||||
1839 | =item Foo Blah <example@foo> | ||||
1840 | |||||
1841 | =item "Foo Blah" <example@foo> | ||||
1842 | |||||
1843 | =item "'Foo Blah'" <example@foo> | ||||
1844 | |||||
1845 | =back | ||||
1846 | |||||
1847 | There are several special pseudo-headers that can be specified: | ||||
1848 | |||||
1849 | =over 4 | ||||
1850 | |||||
1851 | =item C<ALL> can be used to mean the text of all the message's headers. | ||||
1852 | |||||
1853 | =item C<ALL-TRUSTED> can be used to mean the text of all the message's headers | ||||
1854 | that could only have been added by trusted relays. | ||||
1855 | |||||
1856 | =item C<ALL-INTERNAL> can be used to mean the text of all the message's headers | ||||
1857 | that could only have been added by internal relays. | ||||
1858 | |||||
1859 | =item C<ALL-UNTRUSTED> can be used to mean the text of all the message's | ||||
1860 | headers that may have been added by untrusted relays. To make this | ||||
1861 | pseudo-header more useful for header rules the 'Received' header that was added | ||||
1862 | by the last trusted relay is included, even though it can be trusted. | ||||
1863 | |||||
1864 | =item C<ALL-EXTERNAL> can be used to mean the text of all the message's headers | ||||
1865 | that may have been added by external relays. Like C<ALL-UNTRUSTED> the | ||||
1866 | 'Received' header added by the last internal relay is included. | ||||
1867 | |||||
1868 | =item C<ToCc> can be used to mean the contents of both the 'To' and 'Cc' | ||||
1869 | headers. | ||||
1870 | |||||
1871 | =item C<EnvelopeFrom> is the address used in the 'MAIL FROM:' phase of the SMTP | ||||
1872 | transaction that delivered this message, if this data has been made available | ||||
1873 | by the SMTP server. | ||||
1874 | |||||
1875 | =item C<MESSAGEID> is a symbol meaning all Message-Id's found in the message; | ||||
1876 | some mailing list software moves the real 'Message-Id' to 'Resent-Message-Id' | ||||
1877 | or 'X-Message-Id', then uses its own one in the 'Message-Id' header. The value | ||||
1878 | returned for this symbol is the text from all 3 headers, separated by newlines. | ||||
1879 | |||||
1880 | =item C<X-Spam-Relays-Untrusted> is the generated metadata of untrusted relays | ||||
1881 | the message has passed through | ||||
1882 | |||||
1883 | =item C<X-Spam-Relays-Trusted> is the generated metadata of trusted relays | ||||
1884 | the message has passed through | ||||
1885 | |||||
1886 | =back | ||||
1887 | |||||
1888 | =cut | ||||
1889 | |||||
1890 | # only uses two arguments, ignores $defval | ||||
1891 | # spent 707ms (273+435) within Mail::SpamAssassin::PerMsgStatus::_get which was called 2594 times, avg 273µs/call:
# 2594 times (273ms+435ms) by Mail::SpamAssassin::PerMsgStatus::get at line 2070, avg 273µs/call | ||||
1892 | 2594 | 5.86ms | my ($self, $request) = @_; | ||
1893 | |||||
1894 | 2594 | 4.25ms | my $result; | ||
1895 | 2594 | 4.66ms | my $getaddr = 0; | ||
1896 | 2594 | 4.76ms | my $getname = 0; | ||
1897 | 2594 | 4.49ms | my $getraw = 0; | ||
1898 | |||||
1899 | # special queries - process and strip modifiers | ||||
1900 | 2594 | 10.3ms | if (index($request,':') >= 0) { # triage | ||
1901 | 1404 | 7.40ms | local $1; | ||
1902 | 1404 | 29.3ms | 1404 | 11.2ms | while ($request =~ s/:([^:]*)//) { # spent 11.2ms making 1404 calls to Mail::SpamAssassin::PerMsgStatus::CORE:subst, avg 8µs/call |
1903 | 1638 | 25.7ms | 1404 | 3.01ms | if ($1 eq 'raw') { $getraw = 1 } # spent 3.01ms making 1404 calls to Mail::SpamAssassin::PerMsgStatus::CORE:subst, avg 2µs/call |
1904 | 1170 | 2.88ms | elsif ($1 eq 'addr') { $getaddr = $getraw = 1 } | ||
1905 | elsif ($1 eq 'name') { $getname = 1 } | ||||
1906 | } | ||||
1907 | } | ||||
1908 | 2594 | 8.93ms | my $request_lc = lc $request; | ||
1909 | |||||
1910 | # ALL: entire pristine or semi-raw headers | ||||
1911 | 2594 | 22.2ms | if ($request eq 'ALL') { | ||
1912 | $result = $getraw ? $self->{msg}->get_pristine_header() | ||||
1913 | 234 | 2.48ms | 234 | 3.62ms | : $self->{msg}->get_all_headers(1); # spent 3.62ms making 234 calls to Mail::SpamAssassin::Message::get_pristine_header, avg 15µs/call |
1914 | } | ||||
1915 | # ALL-TRUSTED: entire trusted raw headers | ||||
1916 | elsif ($request eq 'ALL-TRUSTED') { | ||||
1917 | # '+1' since we added the received header even though it's not considered | ||||
1918 | # trusted, so we know that those headers can be trusted too | ||||
1919 | return $self->get_all_hdrs_in_rcvd_index_range( | ||||
1920 | undef, $self->{last_trusted_relay_index}+1); | ||||
1921 | } | ||||
1922 | # ALL-INTERNAL: entire internal raw headers | ||||
1923 | elsif ($request eq 'ALL-INTERNAL') { | ||||
1924 | # '+1' for the same reason as in ALL-TRUSTED above | ||||
1925 | return $self->get_all_hdrs_in_rcvd_index_range( | ||||
1926 | undef, $self->{last_internal_relay_index}+1); | ||||
1927 | } | ||||
1928 | # ALL-UNTRUSTED: entire untrusted raw headers | ||||
1929 | elsif ($request eq 'ALL-UNTRUSTED') { | ||||
1930 | # '+1' for the same reason as in ALL-TRUSTED above | ||||
1931 | return $self->get_all_hdrs_in_rcvd_index_range( | ||||
1932 | $self->{last_trusted_relay_index}+1, undef); | ||||
1933 | } | ||||
1934 | # ALL-EXTERNAL: entire external raw headers | ||||
1935 | elsif ($request eq 'ALL-EXTERNAL') { | ||||
1936 | # '+1' for the same reason as in ALL-TRUSTED above | ||||
1937 | return $self->get_all_hdrs_in_rcvd_index_range( | ||||
1938 | $self->{last_internal_relay_index}+1, undef); | ||||
1939 | } | ||||
1940 | # EnvelopeFrom: the SMTP MAIL FROM: address | ||||
1941 | elsif ($request_lc eq "\LEnvelopeFrom") { | ||||
1942 | 234 | 2.41ms | 234 | 422ms | $result = $self->get_envelope_from(); # spent 422ms making 234 calls to Mail::SpamAssassin::PerMsgStatus::get_envelope_from, avg 1.80ms/call |
1943 | } | ||||
1944 | # untrusted relays list, as string | ||||
1945 | elsif ($request_lc eq "\LX-Spam-Relays-Untrusted") { | ||||
1946 | $result = $self->{relays_untrusted_str}; | ||||
1947 | } | ||||
1948 | # trusted relays list, as string | ||||
1949 | elsif ($request_lc eq "\LX-Spam-Relays-Trusted") { | ||||
1950 | $result = $self->{relays_trusted_str}; | ||||
1951 | } | ||||
1952 | # external relays list, as string | ||||
1953 | elsif ($request_lc eq "\LX-Spam-Relays-External") { | ||||
1954 | $result = $self->{relays_external_str}; | ||||
1955 | } | ||||
1956 | # internal relays list, as string | ||||
1957 | elsif ($request_lc eq "\LX-Spam-Relays-Internal") { | ||||
1958 | $result = $self->{relays_internal_str}; | ||||
1959 | } | ||||
1960 | # ToCc: the combined recipients list | ||||
1961 | elsif ($request_lc eq "\LToCc") { | ||||
1962 | $result = join("\n", $self->{msg}->get_header('To', $getraw)); | ||||
1963 | if ($result ne '') { | ||||
1964 | chomp $result; | ||||
1965 | $result .= ", " if $result =~ /\S/; | ||||
1966 | } | ||||
1967 | $result .= join("\n", $self->{msg}->get_header('Cc', $getraw)); | ||||
1968 | $result = undef if $result eq ''; | ||||
1969 | } | ||||
1970 | # MESSAGEID: handle lists which move the real message-id to another | ||||
1971 | # header for resending. | ||||
1972 | elsif ($request eq 'MESSAGEID') { | ||||
1973 | $result = join("\n", grep { defined($_) && $_ ne '' } | ||||
1974 | $self->{msg}->get_header('X-Message-Id', $getraw), | ||||
1975 | $self->{msg}->get_header('Resent-Message-Id', $getraw), | ||||
1976 | $self->{msg}->get_header('X-Original-Message-ID', $getraw), | ||||
1977 | $self->{msg}->get_header('Message-Id', $getraw)); | ||||
1978 | } | ||||
1979 | # a conventional header | ||||
1980 | else { | ||||
1981 | my @results = $getraw ? $self->{msg}->raw_header($request) | ||||
1982 | 2126 | 20.8ms | 2126 | 129ms | : $self->{msg}->get_header($request); # spent 92.4ms making 1190 calls to Mail::SpamAssassin::Message::Node::get_header, avg 78µs/call
# spent 36.3ms making 936 calls to Mail::SpamAssassin::Message::Node::raw_header, avg 39µs/call |
1983 | # dbg("message: get(%s) = %s", $request, join(", ",@results)); | ||||
1984 | 2126 | 10.4ms | if (@results) { | ||
1985 | 484 | 3.39ms | $result = join('', @results); | ||
1986 | } else { # metadata | ||||
1987 | 1642 | 12.5ms | 1642 | 21.2ms | $result = $self->{msg}->get_metadata($request); # spent 21.2ms making 1642 calls to Mail::SpamAssassin::Message::get_metadata, avg 13µs/call |
1988 | } | ||||
1989 | } | ||||
1990 | |||||
1991 | # special queries | ||||
1992 | 2594 | 9.63ms | if (defined $result && ($getaddr || $getname)) { | ||
1993 | 468 | 1.42ms | local $1; | ||
1994 | 468 | 4.76ms | 468 | 1.34ms | $result =~ s/^[^:]+:(.*);\s*$/$1/gs; # 'undisclosed-recipients: ;' # spent 1.34ms making 468 calls to Mail::SpamAssassin::PerMsgStatus::CORE:subst, avg 3µs/call |
1995 | 468 | 14.8ms | 468 | 4.64ms | $result =~ s/\s+/ /g; # reduce whitespace # spent 4.64ms making 468 calls to Mail::SpamAssassin::PerMsgStatus::CORE:subst, avg 10µs/call |
1996 | 468 | 5.09ms | 468 | 1.97ms | $result =~ s/^\s+//; # leading whitespace # spent 1.97ms making 468 calls to Mail::SpamAssassin::PerMsgStatus::CORE:subst, avg 4µs/call |
1997 | 468 | 5.27ms | 468 | 2.26ms | $result =~ s/\s+$//; # trailing whitespace # spent 2.26ms making 468 calls to Mail::SpamAssassin::PerMsgStatus::CORE:subst, avg 5µs/call |
1998 | |||||
1999 | 468 | 2.35ms | if ($getaddr) { | ||
2000 | # Get the email address out of the header | ||||
2001 | # All of these should result in "jm@foo": | ||||
2002 | # jm@foo | ||||
2003 | # jm@foo (Foo Blah) | ||||
2004 | # jm@foo, jm@bar | ||||
2005 | # display: jm@foo (Foo Blah), jm@bar ; | ||||
2006 | # Foo Blah <jm@foo> | ||||
2007 | # "Foo Blah" <jm@foo> | ||||
2008 | # "'Foo Blah'" <jm@foo> | ||||
2009 | # | ||||
2010 | # strip out the (comments) | ||||
2011 | 468 | 4.00ms | 468 | 1.19ms | $result =~ s/\s*\(.*?\)//g; # spent 1.19ms making 468 calls to Mail::SpamAssassin::PerMsgStatus::CORE:subst, avg 3µs/call |
2012 | # strip out the "quoted text", unless it's the only thing in the string | ||||
2013 | 468 | 5.11ms | 468 | 1.17ms | if ($result !~ /^".*"$/) { # spent 1.17ms making 468 calls to Mail::SpamAssassin::PerMsgStatus::CORE:match, avg 3µs/call |
2014 | 468 | 5.28ms | 468 | 2.37ms | $result =~ s/(?<!<)"[^"]*"(?!\@)//g; #" emacs # spent 2.37ms making 468 calls to Mail::SpamAssassin::PerMsgStatus::CORE:subst, avg 5µs/call |
2015 | } | ||||
2016 | # Foo Blah <jm@xxx> or <jm@xxx> | ||||
2017 | 468 | 1.41ms | local $1; | ||
2018 | 468 | 7.05ms | 468 | 4.19ms | $result =~ s/^[^"<]*?<(.*?)>.*$/$1/; # spent 4.19ms making 468 calls to Mail::SpamAssassin::PerMsgStatus::CORE:subst, avg 9µs/call |
2019 | # multiple addresses on one line? remove all but first | ||||
2020 | 468 | 5.11ms | 468 | 1.34ms | $result =~ s/,.*$//; # spent 1.34ms making 468 calls to Mail::SpamAssassin::PerMsgStatus::CORE:subst, avg 3µs/call |
2021 | } | ||||
2022 | elsif ($getname) { | ||||
2023 | # Get the display name out of the header | ||||
2024 | # All of these should result in "Foo Blah": | ||||
2025 | # | ||||
2026 | # jm@foo (Foo Blah) | ||||
2027 | # (Foo Blah) jm@foo | ||||
2028 | # jm@foo (Foo Blah), jm@bar | ||||
2029 | # display: jm@foo (Foo Blah), jm@bar ; | ||||
2030 | # Foo Blah <jm@foo> | ||||
2031 | # "Foo Blah" <jm@foo> | ||||
2032 | # "'Foo Blah'" <jm@foo> | ||||
2033 | # | ||||
2034 | local $1; | ||||
2035 | # does not handle mailbox-list or address-list well, to be improved | ||||
2036 | if ($result =~ /^ \s* (.*?) \s* < [^<>]* >/sx) { | ||||
2037 | $result = $1; # display-name, RFC 5322 | ||||
2038 | # name-addr = [display-name] angle-addr | ||||
2039 | # display-name = phrase | ||||
2040 | # phrase = 1*word / obs-phrase | ||||
2041 | # word = atom / quoted-string | ||||
2042 | # obs-phrase = word *(word / "." / CFWS) | ||||
2043 | $result =~ s{ " ( (?: [^"\\] | \\. )* ) " } | ||||
2044 | { my $s=$1; $s=~s{\\(.)}{$1}gs; $s }gsxe; | ||||
2045 | } elsif ($result =~ /^ [^(,]*? \( (.*?) \) /sx) { # legacy form | ||||
2046 | # nested comments are not handled, to be improved | ||||
2047 | $result = $1; | ||||
2048 | } else { # no display name | ||||
2049 | $result = ''; | ||||
2050 | } | ||||
2051 | $result =~ s/^ \s* ' \s* (.*?) \s* ' \s* \z/$1/sx; | ||||
2052 | } | ||||
2053 | } | ||||
2054 | 2594 | 33.6ms | return $result; | ||
2055 | } | ||||
2056 | |||||
2057 | # optimized for speed | ||||
2058 | # $_[0] is self | ||||
2059 | # $_[1] is request | ||||
2060 | # $_[2] is defval | ||||
2061 | # spent 777ms (120+657) within Mail::SpamAssassin::PerMsgStatus::get which was called 3383 times, avg 230µs/call:
# 1170 times (35.0ms+107ms) by Mail::SpamAssassin::PerMsgStatus::all_from_addrs at line 3042, avg 121µs/call
# 321 times (8.15ms+0s) by Mail::SpamAssassin::Plugin::TxRep::check_senders_reputation at line 1244 of Mail/SpamAssassin/Plugin/TxRep.pm, avg 25µs/call
# 234 times (8.32ms+476ms) by Mail::SpamAssassin::PerMsgStatus::extract_message_metadata at line 1723, avg 2.07ms/call
# 234 times (9.37ms+83.1ms) by Mail::SpamAssassin::PerMsgStatus::extract_message_metadata at line 1731, avg 395µs/call
# 234 times (8.38ms+39.0ms) by Mail::SpamAssassin::PerMsgStatus::all_from_addrs at line 3028, avg 202µs/call
# 234 times (9.23ms+-9.23ms) by Mail::SpamAssassin::PerMsgStatus::get_envelope_from at line 2855, avg 0s/call
# 234 times (14.9ms+-14.9ms) by Mail::SpamAssassin::PerMsgStatus::get_envelope_from at line 2887, avg 0s/call
# 234 times (10.6ms+-10.6ms) by Mail::SpamAssassin::PerMsgStatus::get_envelope_from at line 2884, avg 0s/call
# 234 times (8.52ms+-8.52ms) by Mail::SpamAssassin::PerMsgStatus::get_envelope_from at line 2867, avg 0s/call
# 234 times (6.64ms+-6.64ms) by Mail::SpamAssassin::PerMsgStatus::get_envelope_from at line 2846, avg 0s/call
# 12 times (407µs+1.76ms) by Mail::SpamAssassin::PerMsgStatus::all_to_addrs at line 3124, avg 180µs/call
# 5 times (199µs+-199µs) by Mail::SpamAssassin::PerMsgStatus::get_envelope_from at line 2847, avg 0s/call
# 2 times (86µs+317µs) by Mail::SpamAssassin::PerMsgStatus::all_to_addrs at line 3104, avg 202µs/call
# once (45µs+194µs) by Mail::SpamAssassin::PerMsgStatus::all_to_addrs at line 3113 | ||||
2062 | 3383 | 7.58ms | my $cache = $_[0]->{c}; | ||
2063 | 3383 | 5.57ms | my $found; | ||
2064 | 3383 | 22.8ms | if (exists $cache->{$_[1]}) { | ||
2065 | # return cache entry if it is known | ||||
2066 | # (measured hit/attempts rate on a production mailer is about 47%) | ||||
2067 | 789 | 3.49ms | $found = $cache->{$_[1]}; | ||
2068 | } else { | ||||
2069 | # fill in a cache entry | ||||
2070 | 2594 | 20.7ms | 2594 | 707ms | $found = _get(@_); # spent 883ms making 2594 calls to Mail::SpamAssassin::PerMsgStatus::_get, avg 340µs/call, recursion: max depth 1, sum of overlapping time 175ms |
2071 | 2594 | 22.3ms | $cache->{$_[1]} = $found; | ||
2072 | } | ||||
2073 | # if the requested header wasn't found, we should return a default value | ||||
2074 | # as specified by the caller: if defval argument is present it represents | ||||
2075 | # a default value even if undef; if defval argument is absent a default | ||||
2076 | # value is an empty string for upwards compatibility | ||||
2077 | 3383 | 50.7ms | return (defined $found ? $found : @_ > 2 ? $_[2] : ''); | ||
2078 | } | ||||
2079 | |||||
2080 | ########################################################################### | ||||
2081 | |||||
2082 | # uri parsing from plain text: | ||||
2083 | # The goals are to find URIs in plain text spam that are intended to be clicked on or copy/pasted, but | ||||
2084 | # ignore random strings that might look like URIs, for example in uuencoded files, and to ignore | ||||
2085 | # URIs that spammers might seed in spam in ways not visible or clickable to add work to spam filters. | ||||
2086 | # When we extract a domain and look it up in an RBL, an FP on deciding that the text is a URI is not much | ||||
2087 | # of a problem, as the only cost is an extra RBL lookup. The same FP is worse if the URI is used in matching rule | ||||
2088 | # because it could lead to a rule FP, as in bug 5780 with WIERD_PORT matching random uuencoded strings. | ||||
2089 | # The principles of the following code are 1) if ThunderBird or Outlook Express would linkify a string, | ||||
2090 | # then we should attempt to parse it as a URI; 2) Where TBird and OE parse differently, choose to do what is most | ||||
2091 | # likely to find a domain for the RBL tests; 3) If it begins with a scheme or www\d*\. or ftp\. assume that | ||||
2092 | # it is a URI; 4) If it does not then require that the start of the string looks like a FQDN with a valid TLD; | ||||
2093 | # 5) Reject strings that after parsing, URLDecoding, and redirection processing don't have a valid TLD | ||||
2094 | # | ||||
2095 | # We get the entire URI that would be linkified before dealing with it, in order to do the right thing | ||||
2096 | # with URI-encodings and redirecting URIs. | ||||
2097 | # | ||||
2098 | # The delimiters for start of a URI in TBird are @(`{|[\"'<>,\s in OE they are ("<\s | ||||
2099 | # | ||||
2100 | # Tbird allows .,?';-! in a URI but ignores [.,?';-!]* at the end. | ||||
2101 | # TBird's end delimiters are )`{}|[]"<>\s but ) is only an end delmiter if there is no ( in the URI | ||||
2102 | # OE only uses space as a delimiter, but ignores [~!@#^&*()_+`-={}|[]:";'<>?,.]* at the end. | ||||
2103 | # | ||||
2104 | # Both TBird and OE decide that a URI is an email address when there is '@' character embedded in it. | ||||
2105 | # TBird has some additional restrictions on email URIs: They cannot contain non-ASCII characters and their end | ||||
2106 | # delimiters include ( and ' | ||||
2107 | # | ||||
2108 | # bug 4522: ISO2022 format mail, most commonly Japanese SHIFT-JIS, inserts a three character escape sequence ESC ( . | ||||
2109 | |||||
2110 | # spent 70.5ms (53.7+16.9) within Mail::SpamAssassin::PerMsgStatus::_tbirdurire which was called 468 times, avg 151µs/call:
# 468 times (53.7ms+16.9ms) by Mail::SpamAssassin::PerMsgStatus::_get_parsed_uri_list at line 2365, avg 151µs/call | ||||
2111 | 468 | 997µs | my ($self) = @_; | ||
2112 | |||||
2113 | # Cached? | ||||
2114 | 468 | 1.19ms | return $self->{tbirdurire} if $self->{tbirdurire}; | ||
2115 | |||||
2116 | # a hybrid of tbird and oe's version of uri parsing | ||||
2117 | 468 | 1.24ms | my $tbirdstartdelim = '><"\'`,{[(|\s' . "\x1b"; # The \x1b as per bug 4522 | ||
2118 | 468 | 1.09ms | my $iso2022shift = "\x1b" . '\(.'; # bug 4522 | ||
2119 | 468 | 1.08ms | my $tbirdenddelim = '><"`}\]{[|\s' . "\x1b"; # The \x1b as per bug 4522 | ||
2120 | 468 | 1.14ms | my $nonASCII = '\x80-\xff'; | ||
2121 | |||||
2122 | # bug 7100: we allow a comma to delimit the end of an email address because it will never appear in a domain name, and | ||||
2123 | # it's a common thing to find in text | ||||
2124 | 468 | 1.77ms | my $tbirdenddelimemail = $tbirdenddelim . ',(\'' . $nonASCII; # tbird ignores non-ASCII mail addresses for now, until RFC changes | ||
2125 | 468 | 1.36ms | my $tbirdenddelimplusat = $tbirdenddelimemail . '@'; | ||
2126 | |||||
2127 | # valid TLDs | ||||
2128 | 468 | 2.00ms | my $tldsRE = $self->{main}->{registryboundaries}->{valid_tlds_re}; | ||
2129 | |||||
2130 | # knownscheme regexp looks for either a https?: or ftp: scheme, or www\d*\. or ftp\. prefix, i.e., likely to start a URL | ||||
2131 | # schemeless regexp looks for a valid TLD at the end of what may be a FQDN, followed by optional ., optional :portnum, optional /rest_of_uri | ||||
2132 | 468 | 26.0ms | 936 | 6.05ms | my $urischemeless = qr/[a-z\d][a-z\d._-]{0,251}\.${tldsRE}\.?(?::\d{1,5})?(?:\/[^$tbirdenddelim]{1,251})?/io; # spent 3.47ms making 468 calls to Mail::SpamAssassin::PerMsgStatus::CORE:regcomp, avg 7µs/call
# spent 2.59ms making 468 calls to Mail::SpamAssassin::PerMsgStatus::CORE:qr, avg 6µs/call |
2133 | 468 | 8.22ms | 936 | 2.71ms | my $uriknownscheme = qr/(?:(?:(?:(?:https?)|(?:ftp)):(?:\/\/)?)|(?:(?:www\d{0,2}|ftp)\.))[^$tbirdenddelim]{1,251}/io; # spent 1.74ms making 468 calls to Mail::SpamAssassin::PerMsgStatus::CORE:qr, avg 4µs/call
# spent 965µs making 468 calls to Mail::SpamAssassin::PerMsgStatus::CORE:regcomp, avg 2µs/call |
2134 | 468 | 8.39ms | 936 | 2.85ms | my $urimailscheme = qr/(?:mailto:)?[^$tbirdenddelimplusat]{1,251}@[^$tbirdenddelimemail]{1,251}/io; # spent 1.81ms making 468 calls to Mail::SpamAssassin::PerMsgStatus::CORE:qr, avg 4µs/call
# spent 1.03ms making 468 calls to Mail::SpamAssassin::PerMsgStatus::CORE:regcomp, avg 2µs/call |
2135 | |||||
2136 | 468 | 11.8ms | 936 | 5.27ms | $self->{tbirdurire} = qr/(?:\b|(?<=$iso2022shift)|(?<=[$tbirdstartdelim])) # spent 3.47ms making 468 calls to Mail::SpamAssassin::PerMsgStatus::CORE:regcomp, avg 7µs/call
# spent 1.79ms making 468 calls to Mail::SpamAssassin::PerMsgStatus::CORE:qr, avg 4µs/call |
2137 | (?:(?:($uriknownscheme)(?=(?:[$tbirdenddelim]|\z))) | | ||||
2138 | (?:($urimailscheme)(?=(?:[$tbirdenddelimemail]|\z))) | | ||||
2139 | (?:\b($urischemeless)(?=(?:[$tbirdenddelim]|\z))))/xo; | ||||
2140 | |||||
2141 | 468 | 5.87ms | return $self->{tbirdurire}; | ||
2142 | } | ||||
2143 | |||||
2144 | =item $status->get_uri_list () | ||||
2145 | |||||
2146 | Returns an array of all unique URIs found in the message. It takes | ||||
2147 | a combination of the URIs found in the rendered (decoded and HTML | ||||
2148 | stripped) body and the URIs found when parsing the HTML in the message. | ||||
2149 | Will also set $status->{uri_list} (the array as returned by this function). | ||||
2150 | |||||
2151 | The returned array will include the "raw" URI as well as | ||||
2152 | "slightly cooked" versions. For example, the single URI | ||||
2153 | 'http://%77w%77.example.com/' will get turned into: | ||||
2154 | ( 'http://%77w%77.example.com/', 'http://www.example.com/' ) | ||||
2155 | |||||
2156 | =cut | ||||
2157 | |||||
2158 | # spent 5.70s (114ms+5.58) within Mail::SpamAssassin::PerMsgStatus::get_uri_list which was called 234 times, avg 24.3ms/call:
# 234 times (114ms+5.58s) by Mail::SpamAssassin::Plugin::Bayes::_get_msgdata_from_permsgstatus at line 1050 of Mail/SpamAssassin/Plugin/Bayes.pm, avg 24.3ms/call | ||||
2159 | 234 | 551µs | my ($self) = @_; | ||
2160 | |||||
2161 | # use cached answer if available | ||||
2162 | 234 | 643µs | if (defined $self->{uri_list}) { | ||
2163 | return @{$self->{uri_list}}; | ||||
2164 | } | ||||
2165 | |||||
2166 | 234 | 449µs | my @uris; | ||
2167 | # $self->{redirect_num} = 0; | ||||
2168 | |||||
2169 | # get URIs from HTML parsing | ||||
2170 | 3024 | 40.3ms | 2790 | 5.58s | while(my($uri, $info) = each %{ $self->get_uri_detail_list() }) { # spent 5.58s making 2790 calls to Mail::SpamAssassin::PerMsgStatus::get_uri_detail_list, avg 2.00ms/call |
2171 | 2556 | 10.4ms | if ($info->{cleaned}) { | ||
2172 | 5112 | 19.1ms | foreach (@{$info->{cleaned}}) { | ||
2173 | 2708 | 13.5ms | push(@uris, $_); | ||
2174 | |||||
2175 | # count redirection attempts and log it | ||||
2176 | # if (my @http = m{\b(https?:/{0,2})}gi) { | ||||
2177 | # $self->{redirect_num} = $#http if ($#http > $self->{redirect_num}); | ||||
2178 | # } | ||||
2179 | } | ||||
2180 | } | ||||
2181 | } | ||||
2182 | |||||
2183 | 234 | 763µs | $self->{uri_list} = \@uris; | ||
2184 | # $self->set_tag('URILIST', @uris == 1 ? $uris[0] : \@uris) if @uris; | ||||
2185 | |||||
2186 | 234 | 2.55ms | return @uris; | ||
2187 | } | ||||
2188 | |||||
2189 | =item $status->get_uri_detail_list () | ||||
2190 | |||||
2191 | Returns a hash reference of all unique URIs found in the message and | ||||
2192 | various data about where the URIs were found in the message. It takes a | ||||
2193 | combination of the URIs found in the rendered (decoded and HTML stripped) | ||||
2194 | body and the URIs found when parsing the HTML in the message. Will also | ||||
2195 | set $status->{uri_detail_list} (the hash reference as returned by this | ||||
2196 | function). This function will also set $status->{uri_domain_count} (count of | ||||
2197 | unique domains). | ||||
2198 | |||||
2199 | The hash format looks something like this: | ||||
2200 | |||||
2201 | raw_uri => { | ||||
2202 | types => { a => 1, img => 1, parsed => 1 }, | ||||
2203 | cleaned => [ canonicalized_uri ], | ||||
2204 | anchor_text => [ "click here", "no click here" ], | ||||
2205 | domains => { domain1 => 1, domain2 => 1 }, | ||||
2206 | } | ||||
2207 | |||||
2208 | C<raw_uri> is whatever the URI was in the message itself | ||||
2209 | (http://spamassassin.apache%2Eorg/). | ||||
2210 | |||||
2211 | C<types> is a hash of the HTML tags (lowercase) which referenced | ||||
2212 | the raw_uri. I<parsed> is a faked type which specifies that the | ||||
2213 | raw_uri was seen in the rendered text. | ||||
2214 | |||||
2215 | C<cleaned> is an array of the raw and canonicalized version of the raw_uri | ||||
2216 | (http://spamassassin.apache%2Eorg/, http://spamassassin.apache.org/). | ||||
2217 | |||||
2218 | C<anchor_text> is an array of the anchor text (text between <a> and | ||||
2219 | </a>), if any, which linked to the URI. | ||||
2220 | |||||
2221 | C<domains> is a hash of the domains found in the canonicalized URIs. | ||||
2222 | |||||
2223 | C<hosts> is a hash of unstripped hostnames found in the canonicalized URIs | ||||
2224 | as hash keys, with their domain part stored as a value of each hash entry. | ||||
2225 | |||||
2226 | =cut | ||||
2227 | |||||
2228 | # spent 11.0s (588ms+10.5) within Mail::SpamAssassin::PerMsgStatus::get_uri_detail_list which was called 3024 times, avg 3.65ms/call:
# 2790 times (303ms+5.28s) by Mail::SpamAssassin::PerMsgStatus::get_uri_list at line 2170, avg 2.00ms/call
# 234 times (285ms+5.17s) by Mail::SpamAssassin::Plugin::URIDNSBL::parsed_metadata at line 406 of Mail/SpamAssassin/Plugin/URIDNSBL.pm, avg 23.3ms/call | ||||
2229 | 3024 | 5.43ms | my ($self) = @_; | ||
2230 | |||||
2231 | # use cached answer if available | ||||
2232 | 3024 | 6.77ms | if (defined $self->{uri_detail_list}) { | ||
2233 | 2556 | 46.6ms | return $self->{uri_detail_list}; | ||
2234 | } | ||||
2235 | |||||
2236 | 468 | 4.40ms | 468 | 4.41ms | my $timer = $self->{main}->time_method("get_uri_detail_list"); # spent 4.41ms making 468 calls to Mail::SpamAssassin::time_method, avg 9µs/call |
2237 | |||||
2238 | 468 | 1.41ms | $self->{uri_domain_count} = 0; | ||
2239 | |||||
2240 | # do this so we're sure metadata->html is setup | ||||
2241 | 4814 | 37.5ms | 468 | 4.87s | my %parsed = map { $_ => 'parsed' } $self->_get_parsed_uri_list(); # spent 4.87s making 468 calls to Mail::SpamAssassin::PerMsgStatus::_get_parsed_uri_list, avg 10.4ms/call |
2242 | |||||
2243 | |||||
2244 | # This parses of DKIM for URIs disagrees with documentation and bug 6700 votes to disable | ||||
2245 | # this functionality | ||||
2246 | # 2013-01-07 | ||||
2247 | # This functionality is re-enabled as a configuration option disabled by | ||||
2248 | # default (bug 7087) | ||||
2249 | # 2014-10-06 | ||||
2250 | |||||
2251 | # Look for the domain in DK/DKIM headers | ||||
2252 | 468 | 1.62ms | if ( $self->{conf}->{parse_dkim_uris} ) { | ||
2253 | my $dk = join(" ", grep {defined} ( $self->get('DomainKey-Signature',undef), | ||||
2254 | $self->get('DKIM-Signature',undef) )); | ||||
2255 | while ($dk =~ /\bd\s*=\s*([^;]+)/g) { | ||||
2256 | my $dom = $1; | ||||
2257 | $dom =~ s/\s+//g; | ||||
2258 | $parsed{$dom} = 'domainkeys'; | ||||
2259 | } | ||||
2260 | } | ||||
2261 | |||||
2262 | # get URIs from HTML parsing | ||||
2263 | # use the metadata version since $self->{html} may not be setup | ||||
2264 | 468 | 2.58ms | my $detail = $self->{msg}->{metadata}->{html}->{uri_detail} || { }; | ||
2265 | 468 | 1.42ms | $self->{'uri_truncated'} = 1 if $self->{msg}->{metadata}->{html}->{uri_truncated}; | ||
2266 | |||||
2267 | # don't keep dereferencing ... | ||||
2268 | 468 | 1.37ms | my $redirector_patterns = $self->{conf}->{redirector_patterns}; | ||
2269 | |||||
2270 | # canonicalize the HTML parsed URIs | ||||
2271 | 5147 | 68.5ms | while(my($uri, $info) = each %{ $detail }) { | ||
2272 | 4211 | 32.5ms | 4211 | 3.30s | my @tmp = uri_list_canonicalize($redirector_patterns, $uri); # spent 3.30s making 4211 calls to Mail::SpamAssassin::Util::uri_list_canonicalize, avg 783µs/call |
2273 | 4211 | 30.1ms | $info->{cleaned} = \@tmp; | ||
2274 | |||||
2275 | 4211 | 14.5ms | foreach (@tmp) { | ||
2276 | 4467 | 35.5ms | 4467 | 1.38s | my($domain,$host) = $self->{main}->{registryboundaries}->uri_to_domain($_); # spent 1.38s making 4467 calls to Mail::SpamAssassin::RegistryBoundaries::uri_to_domain, avg 309µs/call |
2277 | 4467 | 54.5ms | if (defined $host && $host ne '' && !$info->{hosts}->{$host}) { | ||
2278 | # unstripped full host name as a key, and its domain part as a value | ||||
2279 | 1784 | 8.48ms | $info->{hosts}->{$host} = $domain; | ||
2280 | 1784 | 9.34ms | if (defined $domain && $domain ne '' && !$info->{domains}->{$domain}) { | ||
2281 | 1784 | 8.43ms | $info->{domains}->{$domain} = 1; # stripped to domain boundary | ||
2282 | 1784 | 3.28ms | $self->{uri_domain_count}++; | ||
2283 | } | ||||
2284 | } | ||||
2285 | } | ||||
2286 | |||||
2287 | 4211 | 44.9ms | 4211 | 47.7ms | if (would_log('dbg', 'uri') == 2) { # spent 47.7ms making 4211 calls to Mail::SpamAssassin::Logger::would_log, avg 11µs/call |
2288 | dbg("uri: html uri found, $uri"); | ||||
2289 | foreach my $nuri (@tmp) { | ||||
2290 | dbg("uri: cleaned html uri, $nuri"); | ||||
2291 | } | ||||
2292 | if ($info->{hosts} && $info->{domains}) { | ||||
2293 | for my $host (keys %{$info->{hosts}}) { | ||||
2294 | dbg("uri: html host %s, domain %s", $host, $info->{hosts}->{$host}); | ||||
2295 | } | ||||
2296 | } | ||||
2297 | } | ||||
2298 | } | ||||
2299 | |||||
2300 | # canonicalize the text parsed URIs | ||||
2301 | 468 | 16.1ms | while (my($uri, $type) = each %parsed) { | ||
2302 | 1718 | 13.6ms | $detail->{$uri}->{types}->{$type} = 1; | ||
2303 | 1718 | 3.31ms | my $info = $detail->{$uri}; | ||
2304 | |||||
2305 | 1718 | 2.85ms | my @uris; | ||
2306 | |||||
2307 | 1718 | 5.07ms | if (!exists $info->{cleaned}) { | ||
2308 | 901 | 3.44ms | if ($type eq 'parsed') { | ||
2309 | 901 | 8.34ms | 901 | 531ms | @uris = uri_list_canonicalize($redirector_patterns, $uri); # spent 531ms making 901 calls to Mail::SpamAssassin::Util::uri_list_canonicalize, avg 590µs/call |
2310 | } | ||||
2311 | else { | ||||
2312 | @uris = ( $uri ); | ||||
2313 | } | ||||
2314 | 901 | 2.37ms | $info->{cleaned} = \@uris; | ||
2315 | |||||
2316 | 901 | 3.43ms | foreach (@uris) { | ||
2317 | 949 | 7.82ms | 949 | 307ms | my($domain,$host) = $self->{main}->{registryboundaries}->uri_to_domain($_); # spent 307ms making 949 calls to Mail::SpamAssassin::RegistryBoundaries::uri_to_domain, avg 323µs/call |
2318 | 949 | 11.3ms | if (defined $host && $host ne '' && !$info->{hosts}->{$host}) { | ||
2319 | # unstripped full host name as a key, and its domain part as a value | ||||
2320 | 905 | 4.23ms | $info->{hosts}->{$host} = $domain; | ||
2321 | 905 | 4.23ms | if (defined $domain && $domain ne '' && !$info->{domains}->{$domain}){ | ||
2322 | 905 | 4.36ms | $info->{domains}->{$domain} = 1; | ||
2323 | 905 | 1.75ms | $self->{uri_domain_count}++; | ||
2324 | } | ||||
2325 | } | ||||
2326 | } | ||||
2327 | } | ||||
2328 | |||||
2329 | 1718 | 14.9ms | 1718 | 19.6ms | if (would_log('dbg', 'uri') == 2) { # spent 19.6ms making 1718 calls to Mail::SpamAssassin::Logger::would_log, avg 11µs/call |
2330 | dbg("uri: parsed uri found of type $type, $uri"); | ||||
2331 | foreach my $nuri (@uris) { | ||||
2332 | dbg("uri: cleaned parsed uri, $nuri"); | ||||
2333 | } | ||||
2334 | if ($info->{hosts} && $info->{domains}) { | ||||
2335 | for my $host (keys %{$info->{hosts}}) { | ||||
2336 | dbg("uri: parsed host %s, domain %s", $host, $info->{hosts}->{$host}); | ||||
2337 | } | ||||
2338 | } | ||||
2339 | } | ||||
2340 | } | ||||
2341 | |||||
2342 | # setup the cache | ||||
2343 | 468 | 1.63ms | $self->{uri_detail_list} = $detail; | ||
2344 | |||||
2345 | 468 | 4.65ms | return $detail; | ||
2346 | } | ||||
2347 | |||||
2348 | # spent 4.87s (850ms+4.02) within Mail::SpamAssassin::PerMsgStatus::_get_parsed_uri_list which was called 468 times, avg 10.4ms/call:
# 468 times (850ms+4.02s) by Mail::SpamAssassin::PerMsgStatus::get_uri_detail_list at line 2241, avg 10.4ms/call | ||||
2349 | 468 | 1000µs | my ($self) = @_; | ||
2350 | |||||
2351 | # use cached answer if available | ||||
2352 | 468 | 2.33ms | unless (defined $self->{parsed_uri_list}) { | ||
2353 | # TVD: we used to use decoded_body which is fine, except then we'll | ||||
2354 | # try parsing URLs out of HTML, which is what the HTML code is going | ||||
2355 | # to do (note: we know the HTML parsing occurs, because we call for the | ||||
2356 | # rendered text which does HTML parsing...) trying to get URLs out of | ||||
2357 | # HTML w/out parsing causes issues, so let's not do it. | ||||
2358 | # also, if we allow $textary to be passed in, we need to invalidate | ||||
2359 | # the cache first. fyi. | ||||
2360 | 468 | 3.94ms | 468 | 21.7ms | my $textary = $self->get_decoded_stripped_body_text_array(); # spent 21.7ms making 468 calls to Mail::SpamAssassin::PerMsgStatus::get_decoded_stripped_body_text_array, avg 46µs/call |
2361 | 468 | 1.87ms | my $redirector_patterns = $self->{conf}->{redirector_patterns}; | ||
2362 | |||||
2363 | 468 | 1.10ms | my ($rulename, $pat, @uris); | ||
2364 | my $text; | ||||
2365 | 468 | 4.23ms | 468 | 70.5ms | my $tbirdurire = $self->_tbirdurire; # spent 70.5ms making 468 calls to Mail::SpamAssassin::PerMsgStatus::_tbirdurire, avg 151µs/call |
2366 | |||||
2367 | 468 | 3.86ms | for my $entry (@$textary) { | ||
2368 | |||||
2369 | # a workaround for [perl #69973] bug: | ||||
2370 | # Invalid and tainted utf-8 char crashes perl 5.10.1 in regexp evaluation | ||||
2371 | # Bug 6225, regexp and string should both be utf8, or none of them; | ||||
2372 | # untainting string also seems to avoid the crash | ||||
2373 | # | ||||
2374 | # Bug 6225: untaint the string in an attempt to work around a perl crash | ||||
2375 | 8956 | 59.4ms | 8956 | 337ms | local $_ = untaint_var($entry); # spent 337ms making 8956 calls to Mail::SpamAssassin::Util::untaint_var, avg 38µs/call |
2376 | |||||
2377 | 8956 | 32.1ms | local($1,$2,$3); | ||
2378 | 8956 | 1.13s | 18676 | 852ms | while (/$tbirdurire/igo) { # spent 836ms making 9338 calls to Mail::SpamAssassin::PerMsgStatus::CORE:match, avg 90µs/call
# spent 15.5ms making 9338 calls to Mail::SpamAssassin::PerMsgStatus::CORE:regcomp, avg 2µs/call |
2379 | 2294 | 9.09ms | my $rawuri = $1||$2||$3; | ||
2380 | 2294 | 41.6ms | 2294 | 6.21ms | $rawuri =~ s/(^[^(]*)\).*$/$1/; # as per ThunderBird, ) is an end delimiter if there is no ( preceeding it # spent 6.21ms making 2294 calls to Mail::SpamAssassin::PerMsgStatus::CORE:subst, avg 3µs/call |
2381 | 2294 | 59.4ms | 2294 | 40.2ms | $rawuri =~ s/[-~!@#^&*()_+=:;\'?,.]*$//; # remove trailing string of punctuations that TBird ignores # spent 40.2ms making 2294 calls to Mail::SpamAssassin::PerMsgStatus::CORE:subst, avg 18µs/call |
2382 | # skip if there is '..' in the hostname portion of the URI, something we can't catch in the general URI regexp | ||||
2383 | 2294 | 21.2ms | 2294 | 5.51ms | next if $rawuri =~ /^(?:(?:https?|ftp|mailto):(?:\/\/)?)?[a-z\d.-]*\.\./i; # spent 5.51ms making 2294 calls to Mail::SpamAssassin::PerMsgStatus::CORE:match, avg 2µs/call |
2384 | |||||
2385 | # If it's a hostname that was just sitting out in the | ||||
2386 | # open, without a protocol, and not inside of an HTML tag, | ||||
2387 | # the we should add the proper protocol in front, rather | ||||
2388 | # than using the base URI. | ||||
2389 | 2276 | 4.49ms | my $uri = $rawuri; | ||
2390 | 2276 | 4.11ms | my $rblonly; | ||
2391 | 2276 | 29.5ms | 2276 | 8.85ms | if ($uri !~ /^(?:https?|ftp|mailto|javascript|file):/i) { # spent 8.85ms making 2276 calls to Mail::SpamAssassin::PerMsgStatus::CORE:match, avg 4µs/call |
2392 | 608 | 28.2ms | 1742 | 4.78ms | if ($uri =~ /^ftp\./i) { # spent 4.78ms making 1742 calls to Mail::SpamAssassin::PerMsgStatus::CORE:match, avg 3µs/call |
2393 | $uri = "ftp://$uri"; | ||||
2394 | } | ||||
2395 | elsif ($uri =~ /^www\d{0,2}\./i) { | ||||
2396 | 82 | 341µs | $uri = "http://$uri"; | ||
2397 | } | ||||
2398 | elsif ($uri =~ /\@/) { | ||||
2399 | 328 | 1.44ms | $uri = "mailto:$uri"; | ||
2400 | } | ||||
2401 | else { | ||||
2402 | # some spammers are using unschemed URIs to escape filters | ||||
2403 | 198 | 398µs | $rblonly = 1; # flag that this is a URI that MUAs don't linkify so only use for RBLs | ||
2404 | 198 | 842µs | $uri = "http://$uri"; | ||
2405 | } | ||||
2406 | } | ||||
2407 | |||||
2408 | 2276 | 22.6ms | 2276 | 6.36ms | if ($uri =~ /^mailto:/i) { # spent 6.36ms making 2276 calls to Mail::SpamAssassin::PerMsgStatus::CORE:match, avg 3µs/call |
2409 | # skip a mail link that does not have a valid TLD or other than one @ after decoding any URLEncoded characters | ||||
2410 | 360 | 15.8ms | 360 | 843µs | $uri = Mail::SpamAssassin::Util::url_encode($uri) if ($uri =~ /\%(?:2[1-9a-fA-F]|[3-6][0-9a-fA-F]|7[0-9a-eA-E])/); # spent 843µs making 360 calls to Mail::SpamAssassin::PerMsgStatus::CORE:match, avg 2µs/call |
2411 | 360 | 4.31ms | 360 | 1.72ms | next if ($uri !~ /^[^@]+@[^@]+$/); # spent 1.72ms making 360 calls to Mail::SpamAssassin::PerMsgStatus::CORE:match, avg 5µs/call |
2412 | 360 | 3.39ms | 360 | 115ms | my $domuri = $self->{main}->{registryboundaries}->uri_to_domain($uri); # spent 115ms making 360 calls to Mail::SpamAssassin::RegistryBoundaries::uri_to_domain, avg 319µs/call |
2413 | 360 | 633µs | next unless $domuri; | ||
2414 | 358 | 1.61ms | push (@uris, $rawuri); | ||
2415 | 358 | 1.42ms | push (@uris, $uri) unless ($rawuri eq $uri); | ||
2416 | } | ||||
2417 | |||||
2418 | 2274 | 36.9ms | 2274 | 8.55ms | next unless ($uri =~/^(?:https?|ftp):/i); # at this point only valid if one or the other of these # spent 8.55ms making 2274 calls to Mail::SpamAssassin::PerMsgStatus::CORE:match, avg 4µs/call |
2419 | |||||
2420 | 1916 | 15.8ms | 1916 | 1.78s | my @tmp = uri_list_canonicalize($redirector_patterns, $uri); # spent 1.78s making 1916 calls to Mail::SpamAssassin::Util::uri_list_canonicalize, avg 927µs/call |
2421 | 1916 | 3.49ms | my $goodurifound = 0; | ||
2422 | 1916 | 7.04ms | foreach my $cleanuri (@tmp) { | ||
2423 | 1954 | 17.9ms | 1954 | 661ms | my $domain = $self->{main}->{registryboundaries}->uri_to_domain($cleanuri); # spent 661ms making 1954 calls to Mail::SpamAssassin::RegistryBoundaries::uri_to_domain, avg 338µs/call |
2424 | 1954 | 13.0ms | if ($domain) { | ||
2425 | # bug 5780: Stop after domain to avoid FP, but do that after all deobfuscation of urlencoding and redirection | ||||
2426 | 1948 | 3.75ms | if ($rblonly) { | ||
2427 | 206 | 1.16ms | local $1; | ||
2428 | 206 | 4.17ms | 206 | 2.08ms | $cleanuri =~ s/^(https?:\/\/[^:\/]+).*$/$1/i; # spent 2.08ms making 206 calls to Mail::SpamAssassin::PerMsgStatus::CORE:subst, avg 10µs/call |
2429 | } | ||||
2430 | 1948 | 5.57ms | push (@uris, $cleanuri); | ||
2431 | 1948 | 3.56ms | $goodurifound = 1; | ||
2432 | } | ||||
2433 | } | ||||
2434 | 1916 | 3.13ms | next unless $goodurifound; | ||
2435 | 1912 | 142ms | 3824 | 102ms | push @uris, $rawuri unless $rblonly; # spent 98.3ms making 1912 calls to Mail::SpamAssassin::PerMsgStatus::CORE:match, avg 51µs/call
# spent 3.52ms making 1912 calls to Mail::SpamAssassin::PerMsgStatus::CORE:regcomp, avg 2µs/call |
2436 | } | ||||
2437 | } | ||||
2438 | |||||
2439 | # Make sure all the URIs are nice and short | ||||
2440 | 468 | 2.48ms | foreach my $uri ( @uris ) { | ||
2441 | 4346 | 17.4ms | if (length $uri > MAX_URI_LENGTH) { | ||
2442 | $self->{'uri_truncated'} = 1; | ||||
2443 | $uri = substr $uri, 0, MAX_URI_LENGTH; | ||||
2444 | } | ||||
2445 | } | ||||
2446 | |||||
2447 | # setup the cache and return | ||||
2448 | 468 | 2.23ms | $self->{parsed_uri_list} = \@uris; | ||
2449 | } | ||||
2450 | |||||
2451 | 936 | 6.63ms | return @{$self->{parsed_uri_list}}; | ||
2452 | } | ||||
2453 | |||||
2454 | ########################################################################### | ||||
2455 | |||||
2456 | sub ensure_rules_are_complete { | ||||
2457 | my $self = shift; | ||||
2458 | my $metarule = shift; | ||||
2459 | # @_ is now the list of rules | ||||
2460 | |||||
2461 | foreach my $r (@_) { | ||||
2462 | # dbg("rules: meta rule depends on net rule $r"); | ||||
2463 | next if ($self->is_rule_complete($r)); | ||||
2464 | |||||
2465 | dbg("rules: meta rule $metarule depends on pending rule $r, blocking"); | ||||
2466 | my $timer = $self->{main}->time_method("wait_for_pending_rules"); | ||||
2467 | |||||
2468 | my $start = time; | ||||
2469 | $self->harvest_until_rule_completes($r); | ||||
2470 | my $elapsed = time - $start; | ||||
2471 | |||||
2472 | if (!$self->is_rule_complete($r)) { | ||||
2473 | dbg("rules: rule $r is still not complete; exited early?"); | ||||
2474 | } | ||||
2475 | elsif ($elapsed > 0) { | ||||
2476 | info("rules: $r took $elapsed seconds to complete, for $metarule"); | ||||
2477 | } | ||||
2478 | } | ||||
2479 | } | ||||
2480 | |||||
2481 | ########################################################################### | ||||
2482 | |||||
2483 | # use a separate sub here, for brevity | ||||
2484 | # called out of generated eval | ||||
2485 | sub handle_eval_rule_errors { | ||||
2486 | my ($self, $rulename) = @_; | ||||
2487 | warn "rules: failed to run $rulename test, skipping:\n\t($@)\n"; | ||||
2488 | $self->{rule_errors}++; | ||||
2489 | } | ||||
2490 | |||||
2491 | sub register_plugin_eval_glue { | ||||
2492 | my ($self, $function) = @_; | ||||
2493 | |||||
2494 | if (!$function) { | ||||
2495 | warn "rules: empty function name"; | ||||
2496 | return; | ||||
2497 | } | ||||
2498 | |||||
2499 | # only need to call this once per fn (globally) | ||||
2500 | return if exists $TEMPORARY_EVAL_GLUE_METHODS{$function}; | ||||
2501 | $TEMPORARY_EVAL_GLUE_METHODS{$function} = undef; | ||||
2502 | |||||
2503 | # return if it's not an eval_plugin function | ||||
2504 | return if (!exists $self->{conf}->{eval_plugins}->{$function}); | ||||
2505 | |||||
2506 | # return if it's been registered already | ||||
2507 | return if ($self->can ($function) && | ||||
2508 | defined &{'Mail::SpamAssassin::PerMsgStatus::'.$function}); | ||||
2509 | |||||
2510 | my $evalstr = <<"ENDOFEVAL"; | ||||
2511 | { | ||||
2512 | package Mail::SpamAssassin::PerMsgStatus; | ||||
2513 | |||||
2514 | sub $function { | ||||
2515 | my (\$self) = shift; | ||||
2516 | my \$plugin = \$self->{conf}->{eval_plugins}->{$function}; | ||||
2517 | return \$plugin->$function (\$self, \@_); | ||||
2518 | } | ||||
2519 | |||||
2520 | 1; | ||||
2521 | } | ||||
2522 | ENDOFEVAL | ||||
2523 | eval $evalstr . '; 1' ## no critic | ||||
2524 | or do { | ||||
2525 | my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat; | ||||
2526 | warn "rules: failed to run header tests, skipping some: $eval_stat\n"; | ||||
2527 | $self->{rule_errors}++; | ||||
2528 | }; | ||||
2529 | |||||
2530 | # ensure this method is deleted if finish_tests() is called | ||||
2531 | push (@TEMPORARY_METHODS, $function); | ||||
2532 | } | ||||
2533 | |||||
2534 | ########################################################################### | ||||
2535 | |||||
2536 | # note: only eval tests should store state in $self->{test_log_msgs}; | ||||
2537 | # pattern tests do not. | ||||
2538 | # | ||||
2539 | # the clearing of the test state is now inlined as: | ||||
2540 | # | ||||
2541 | # %{$self->{test_log_msgs}} = (); # clear test state | ||||
2542 | # | ||||
2543 | # except for this public API for plugin use: | ||||
2544 | |||||
2545 | =item $status->clear_test_state() | ||||
2546 | |||||
2547 | Clear test state, including test log messages from C<$status-E<gt>test_log()>. | ||||
2548 | |||||
2549 | =cut | ||||
2550 | |||||
2551 | sub clear_test_state { | ||||
2552 | my ($self) = @_; | ||||
2553 | %{$self->{test_log_msgs}} = (); | ||||
2554 | } | ||||
2555 | |||||
2556 | # internal API, called only by get_hit() | ||||
2557 | # TODO: refactor and merge this into that function | ||||
2558 | sub _handle_hit { | ||||
2559 | my ($self, $rule, $score, $area, $ruletype, $desc) = @_; | ||||
2560 | |||||
2561 | $self->{main}->call_plugins ("hit_rule", { | ||||
2562 | permsgstatus => $self, | ||||
2563 | rulename => $rule, | ||||
2564 | ruletype => $ruletype, | ||||
2565 | score => $score | ||||
2566 | }); | ||||
2567 | |||||
2568 | # ignore meta-match sub-rules. | ||||
2569 | if ($rule =~ /^__/) { push(@{$self->{subtest_names_hit}}, $rule); return; } | ||||
2570 | |||||
2571 | # this should not happen; warn about it | ||||
2572 | if (!defined $score) { | ||||
2573 | warn "rules: score undef for rule '$rule' in '$area' '$desc'"; | ||||
2574 | return; | ||||
2575 | } | ||||
2576 | |||||
2577 | # this should not happen; warn about NaN (bug 3364) | ||||
2578 | if ($score != $score) { | ||||
2579 | warn "rules: score '$score' for rule '$rule' in '$area' '$desc'"; | ||||
2580 | return; | ||||
2581 | } | ||||
2582 | |||||
2583 | # Add the rule hit to the score | ||||
2584 | $self->{score} += $score; | ||||
2585 | |||||
2586 | push(@{$self->{test_names_hit}}, $rule); | ||||
2587 | $area ||= ''; | ||||
2588 | |||||
2589 | if ($score >= 10 || $score <= -10) { | ||||
2590 | $score = sprintf("%4.0f", $score); | ||||
2591 | } | ||||
2592 | else { | ||||
2593 | $score = sprintf("%4.1f", $score); | ||||
2594 | } | ||||
2595 | |||||
2596 | # save both summaries | ||||
2597 | # TODO: this is slower than necessary, if we only need one | ||||
2598 | $self->{tag_data}->{REPORT} .= sprintf ("* %s %s %s%s\n%s", | ||||
2599 | $score, $rule, $area, | ||||
2600 | $self->_wrap_desc($desc, | ||||
2601 | 4+length($rule)+length($score)+length($area), "* "), | ||||
2602 | ($self->{test_log_msgs}->{TERSE} ? | ||||
2603 | "* " . $self->{test_log_msgs}->{TERSE} : '')); | ||||
2604 | |||||
2605 | $self->{tag_data}->{SUMMARY} .= sprintf ("%s %-22s %s%s\n%s", | ||||
2606 | $score, $rule, $area, | ||||
2607 | $self->_wrap_desc($desc, | ||||
2608 | 3+length($rule)+length($score)+length($area), " " x 28), | ||||
2609 | ($self->{test_log_msgs}->{LONG} || '')); | ||||
2610 | } | ||||
2611 | |||||
2612 | sub _wrap_desc { | ||||
2613 | my ($self, $desc, $firstlinelength, $prefix) = @_; | ||||
2614 | |||||
2615 | my $firstline = " " x $firstlinelength; | ||||
2616 | my $wrapped = Mail::SpamAssassin::Util::wrap($desc, $prefix, $firstline, 75, 0); | ||||
2617 | $wrapped =~ s/^\s+//s; | ||||
2618 | $wrapped; | ||||
2619 | } | ||||
2620 | |||||
2621 | ########################################################################### | ||||
2622 | |||||
2623 | =item $status->got_hit ($rulename, $desc_prepend [, name => value, ...]) | ||||
2624 | |||||
2625 | Register a hit against a rule in the ruleset. | ||||
2626 | |||||
2627 | There are two mandatory arguments. These are C<$rulename>, the name of the rule | ||||
2628 | that fired, and C<$desc_prepend>, which is a short string that will be | ||||
2629 | prepended to the rules C<describe> string in output reports. | ||||
2630 | |||||
2631 | In addition, callers can supplement that with the following optional | ||||
2632 | data: | ||||
2633 | |||||
2634 | =over 4 | ||||
2635 | |||||
2636 | =item score => $num | ||||
2637 | |||||
2638 | Optional: the score to use for the rule hit. If unspecified, | ||||
2639 | the value from the C<Mail::SpamAssassin::Conf> object's C<{scores}> | ||||
2640 | hash will be used (a configured score), and in its absence the | ||||
2641 | C<defscore> option value. | ||||
2642 | |||||
2643 | =item defscore => $num | ||||
2644 | |||||
2645 | Optional: the score to use for the rule hit if neither the | ||||
2646 | option C<score> is provided, nor a configured score value is provided. | ||||
2647 | |||||
2648 | =item value => $num | ||||
2649 | |||||
2650 | Optional: the value to assign to the rule; the default value is C<1>. | ||||
2651 | I<tflags multiple> rules use values of greater than 1 to indicate | ||||
2652 | multiple hits. This value is accessible to meta rules. | ||||
2653 | |||||
2654 | =item ruletype => $type | ||||
2655 | |||||
2656 | Optional, but recommended: the rule type string. This is used in the | ||||
2657 | C<hit_rule> plugin call, called by this method. If unset, I<'unknown'> is | ||||
2658 | used. | ||||
2659 | |||||
2660 | =item tflags => $string | ||||
2661 | |||||
2662 | Optional: a string, i.e. a space-separated list of additional tflags | ||||
2663 | to be appended to an existing list of flags in $self->{conf}->{tflags}, | ||||
2664 | such as: "nice noautolearn multiple". No syntax checks are performed. | ||||
2665 | |||||
2666 | =item description => $string | ||||
2667 | |||||
2668 | Optional: a custom rule description string. This is used in the | ||||
2669 | C<hit_rule> plugin call, called by this method. If unset, the static | ||||
2670 | description is used. | ||||
2671 | |||||
2672 | =back | ||||
2673 | |||||
2674 | Backward compatibility: the two mandatory arguments have been part of this API | ||||
2675 | since SpamAssassin 2.x. The optional I<name=<gt>value> pairs, however, are a | ||||
2676 | new addition in SpamAssassin 3.2.0. | ||||
2677 | |||||
2678 | =cut | ||||
2679 | |||||
2680 | sub got_hit { | ||||
2681 | my ($self, $rule, $area, %params) = @_; | ||||
2682 | |||||
2683 | my $conf_ref = $self->{conf}; | ||||
2684 | |||||
2685 | my $dynamic_score_provided; | ||||
2686 | my $score = $params{score}; | ||||
2687 | if (defined $score) { # overrides any configured scores | ||||
2688 | $dynamic_score_provided = 1; | ||||
2689 | } else { | ||||
2690 | $score = $conf_ref->{scores}->{$rule}; | ||||
2691 | $score = $params{defscore} if !defined $score; | ||||
2692 | } | ||||
2693 | |||||
2694 | # adding a hit does nothing if we don't have a score -- we probably | ||||
2695 | # shouldn't have run it in the first place | ||||
2696 | if (!$score) { | ||||
2697 | %{$self->{test_log_msgs}} = (); | ||||
2698 | return; | ||||
2699 | } | ||||
2700 | |||||
2701 | # ensure that rule values always result in an *increase* | ||||
2702 | # of $self->{tests_already_hit}->{$rule}: | ||||
2703 | my $value = $params{value}; | ||||
2704 | if (!$value || $value <= 0) { $value = 1 } | ||||
2705 | |||||
2706 | my $tflags_ref = $conf_ref->{tflags}; | ||||
2707 | my $tflags_add = $params{tflags}; | ||||
2708 | if (defined $tflags_add && $tflags_add ne '') { | ||||
2709 | $_ = (!defined $_ || $_ eq '') ? $tflags_add : ($_ . ' ' . $tflags_add) | ||||
2710 | for $tflags_ref->{$rule}; | ||||
2711 | }; | ||||
2712 | |||||
2713 | my $already_hit = $self->{tests_already_hit}->{$rule} || 0; | ||||
2714 | # don't count hits multiple times, unless 'tflags multiple' is on | ||||
2715 | if ($already_hit && ($tflags_ref->{$rule}||'') !~ /\bmultiple\b/) { | ||||
2716 | %{$self->{test_log_msgs}} = (); | ||||
2717 | return; | ||||
2718 | } | ||||
2719 | |||||
2720 | $self->{tests_already_hit}->{$rule} = $already_hit + $value; | ||||
2721 | |||||
2722 | # default ruletype, if not specified: | ||||
2723 | $params{ruletype} ||= 'unknown'; | ||||
2724 | |||||
2725 | if ($dynamic_score_provided) { # copy it to static for proper reporting | ||||
2726 | $conf_ref->{scoreset}->[$_]->{$rule} = $score for (0..3); | ||||
2727 | $conf_ref->{scores}->{$rule} = $score; | ||||
2728 | } | ||||
2729 | |||||
2730 | my $rule_descr = $params{description}; | ||||
2731 | if (defined $rule_descr) { | ||||
2732 | $conf_ref->{descriptions}->{$rule} = $rule_descr; # save dynamic descr. | ||||
2733 | } else { | ||||
2734 | $rule_descr = $conf_ref->get_description_for_rule($rule); # static | ||||
2735 | } | ||||
2736 | # Bug 6880 Set Rule Description to something that says no rule | ||||
2737 | #$rule_descr = $rule if !defined $rule_descr || $rule_descr eq ''; | ||||
2738 | $rule_descr = "No description available." if !defined $rule_descr || $rule_descr eq ''; | ||||
2739 | |||||
2740 | $self->_handle_hit($rule, | ||||
2741 | $score, | ||||
2742 | $area, | ||||
2743 | $params{ruletype}, | ||||
2744 | $rule_descr); | ||||
2745 | |||||
2746 | # take care of duplicate rules, too (bug 5206) | ||||
2747 | my $dups = $conf_ref->{duplicate_rules}->{$rule}; | ||||
2748 | if ($dups && @{$dups}) { | ||||
2749 | foreach my $dup (@{$dups}) { | ||||
2750 | $self->got_hit($dup, $area, %params); | ||||
2751 | } | ||||
2752 | } | ||||
2753 | |||||
2754 | %{$self->{test_log_msgs}} = (); # clear test logs | ||||
2755 | return 1; | ||||
2756 | } | ||||
2757 | |||||
2758 | ########################################################################### | ||||
2759 | |||||
2760 | # TODO: this needs API doc | ||||
2761 | sub test_log { | ||||
2762 | my ($self, $msg) = @_; | ||||
2763 | local $1; | ||||
2764 | while ($msg =~ s/^(.{30,48})\s//) { | ||||
2765 | $self->_test_log_line ($1); | ||||
2766 | } | ||||
2767 | $self->_test_log_line ($msg); | ||||
2768 | } | ||||
2769 | |||||
2770 | sub _test_log_line { | ||||
2771 | my ($self, $msg) = @_; | ||||
2772 | |||||
2773 | $self->{test_log_msgs}->{TERSE} .= sprintf ("[%s]\n", $msg); | ||||
2774 | if (length($msg) > 47) { | ||||
2775 | $self->{test_log_msgs}->{LONG} .= sprintf ("%78s\n", "[$msg]"); | ||||
2776 | } else { | ||||
2777 | $self->{test_log_msgs}->{LONG} .= sprintf ("%27s [%s]\n", "", $msg); | ||||
2778 | } | ||||
2779 | } | ||||
2780 | |||||
2781 | ########################################################################### | ||||
2782 | |||||
2783 | # helper for get(). Do not call directly, as get() caches its results | ||||
2784 | # and this does not! | ||||
2785 | # spent 422ms (42.5+379) within Mail::SpamAssassin::PerMsgStatus::get_envelope_from which was called 234 times, avg 1.80ms/call:
# 234 times (42.5ms+379ms) by Mail::SpamAssassin::PerMsgStatus::_get at line 1942, avg 1.80ms/call | ||||
2786 | 234 | 548µs | my ($self) = @_; | ||
2787 | |||||
2788 | # bug 2142: | ||||
2789 | # Get the SMTP MAIL FROM:, aka. the "envelope sender", if our | ||||
2790 | # calling app has helpfully marked up the source message | ||||
2791 | # with it. Various MTAs and calling apps each have their | ||||
2792 | # own idea of what header to use for this! see | ||||
2793 | |||||
2794 | 234 | 475µs | my $envf; | ||
2795 | |||||
2796 | # Rely on the 'envelope-sender-header' header if the user has configured one. | ||||
2797 | # Assume that because they have configured it, their MTA will always add it. | ||||
2798 | # This will prevent us falling through and picking up inappropriate headers. | ||||
2799 | 234 | 968µs | if (defined $self->{conf}->{envelope_sender_header}) { | ||
2800 | # make sure we get the most recent copy - there can be only one EnvelopeSender. | ||||
2801 | $envf = $self->get($self->{conf}->{envelope_sender_header}.":addr",undef); | ||||
2802 | # ok if it contains an "@" sign, or is "" (ie. "<>" without the < and >) | ||||
2803 | goto ok if defined $envf && ($envf =~ /\@/ || $envf eq ''); | ||||
2804 | # Warn them if it's configured, but not there or not usable. | ||||
2805 | if (defined $envf) { | ||||
2806 | chomp $envf; | ||||
2807 | dbg("message: envelope_sender_header '%s: %s' is not an FQDN, ignoring", | ||||
2808 | $self->{conf}->{envelope_sender_header}, $envf); | ||||
2809 | } else { | ||||
2810 | dbg("message: envelope_sender_header '%s' not found in message", | ||||
2811 | $self->{conf}->{envelope_sender_header}); | ||||
2812 | } | ||||
2813 | # Couldn't get envelope-sender using the configured header. | ||||
2814 | return; | ||||
2815 | } | ||||
2816 | |||||
2817 | # User hasn't given us a header to trust, so try to guess the sender. | ||||
2818 | |||||
2819 | # use the "envelope-sender" string found in the Received headers, | ||||
2820 | # if possible... use the last untrusted header, in case there's | ||||
2821 | # trusted headers. | ||||
2822 | 234 | 617µs | my $lasthop = $self->{relays_untrusted}->[0]; | ||
2823 | 234 | 522µs | if (!defined $lasthop) { | ||
2824 | # no untrusted headers? in that case, the message is ALL_TRUSTED. | ||||
2825 | # use the first trusted header (ie. the oldest, originating one). | ||||
2826 | 1 | 2µs | $lasthop = $self->{relays_trusted}->[-1]; | ||
2827 | } | ||||
2828 | |||||
2829 | 234 | 921µs | if (defined $lasthop) { | ||
2830 | 234 | 774µs | $envf = $lasthop->{envfrom}; | ||
2831 | # TODO FIXME: Received.pm puts both null senders and absence-of-sender | ||||
2832 | # into the relays array as '', so we can't distinguish them :( | ||||
2833 | 234 | 455µs | if ($envf && ($envf =~ /\@/)) { | ||
2834 | goto ok; | ||||
2835 | } | ||||
2836 | } | ||||
2837 | |||||
2838 | # WARNING: a lot of list software adds an X-Sender for the original env-from | ||||
2839 | # (including Yahoo! Groups). Unfortunately, fetchmail will pick it up and | ||||
2840 | # reuse it as the env-from for *its* delivery -- even though the list | ||||
2841 | # software had used a different env-from in the intervening delivery. Hence, | ||||
2842 | # if this header is present, and there's a fetchmail sig in the Received | ||||
2843 | # lines, we cannot trust any Envelope-From headers, since they're likely to | ||||
2844 | # be incorrect fetchmail guesses. | ||||
2845 | |||||
2846 | 234 | 4.01ms | 468 | 572µs | if ($self->get("X-Sender") =~ /\@/) { # spent 572µs making 234 calls to Mail::SpamAssassin::PerMsgStatus::CORE:match, avg 2µs/call
# spent 46.5ms making 234 calls to Mail::SpamAssassin::PerMsgStatus::get, avg 199µs/call, recursion: max depth 1, sum of overlapping time 46.5ms |
2847 | 5 | 57µs | 5 | 0s | my $rcvd = join(' ', $self->get("Received")); # spent 1.16ms making 5 calls to Mail::SpamAssassin::PerMsgStatus::get, avg 233µs/call, recursion: max depth 1, sum of overlapping time 1.16ms |
2848 | 5 | 67µs | 5 | 27µs | if ($rcvd =~ /\(fetchmail/) { # spent 27µs making 5 calls to Mail::SpamAssassin::PerMsgStatus::CORE:match, avg 5µs/call |
2849 | dbg("message: X-Sender and fetchmail signatures found, cannot trust envelope-from"); | ||||
2850 | return; | ||||
2851 | } | ||||
2852 | } | ||||
2853 | |||||
2854 | # procmailrc notes this (we now recommend adding it to Received instead) | ||||
2855 | 234 | 1.85ms | 234 | 0s | if ($envf = $self->get("X-Envelope-From")) { # spent 40.6ms making 234 calls to Mail::SpamAssassin::PerMsgStatus::get, avg 174µs/call, recursion: max depth 1, sum of overlapping time 40.6ms |
2856 | # heuristic: this could have been relayed via a list which then used | ||||
2857 | # a *new* Envelope-from. check | ||||
2858 | if ($self->get("ALL:raw") =~ /^Received:.*^X-Envelope-From:/smi) { | ||||
2859 | dbg("message: X-Envelope-From header found after 1 or more Received lines, cannot trust envelope-from"); | ||||
2860 | return; | ||||
2861 | } else { | ||||
2862 | goto ok; | ||||
2863 | } | ||||
2864 | } | ||||
2865 | |||||
2866 | # qmail, new-inject(1) | ||||
2867 | 234 | 1.90ms | 234 | 0s | if ($envf = $self->get("Envelope-Sender")) { # spent 47.8ms making 234 calls to Mail::SpamAssassin::PerMsgStatus::get, avg 204µs/call, recursion: max depth 1, sum of overlapping time 47.8ms |
2868 | # heuristic: this could have been relayed via a list which then used | ||||
2869 | # a *new* Envelope-from. check | ||||
2870 | if ($self->get("ALL:raw") =~ /^Received:.*^Envelope-Sender:/smi) { | ||||
2871 | dbg("message: Envelope-Sender header found after 1 or more Received lines, cannot trust envelope-from"); | ||||
2872 | } else { | ||||
2873 | goto ok; | ||||
2874 | } | ||||
2875 | } | ||||
2876 | |||||
2877 | # Postfix, sendmail, amavisd-new, ... | ||||
2878 | # RFC 2821 requires it: | ||||
2879 | # When the delivery SMTP server makes the "final delivery" of a | ||||
2880 | # message, it inserts a return-path line at the beginning of the mail | ||||
2881 | # data. This use of return-path is required; mail systems MUST support | ||||
2882 | # it. The return-path line preserves the information in the <reverse- | ||||
2883 | # path> from the MAIL command. | ||||
2884 | 234 | 2.05ms | 234 | 0s | if ($envf = $self->get("Return-Path")) { # spent 48.7ms making 234 calls to Mail::SpamAssassin::PerMsgStatus::get, avg 208µs/call, recursion: max depth 1, sum of overlapping time 48.7ms |
2885 | # heuristic: this could have been relayed via a list which then used | ||||
2886 | # a *new* Envelope-from. check | ||||
2887 | 234 | 151ms | 468 | 147ms | if ($self->get("ALL:raw") =~ /^Received:.*^Return-Path:/smi) { # spent 147ms making 234 calls to Mail::SpamAssassin::PerMsgStatus::CORE:match, avg 628µs/call
# spent 40.5ms making 234 calls to Mail::SpamAssassin::PerMsgStatus::get, avg 173µs/call, recursion: max depth 1, sum of overlapping time 40.5ms |
2888 | dbg("message: Return-Path header found after 1 or more Received lines, cannot trust envelope-from"); | ||||
2889 | } else { | ||||
2890 | 234 | 1.90ms | goto ok; | ||
2891 | } | ||||
2892 | } | ||||
2893 | |||||
2894 | # give up. | ||||
2895 | return; | ||||
2896 | |||||
2897 | 234 | 3.20ms | 234 | 1.49ms | ok: # spent 1.49ms making 234 calls to Mail::SpamAssassin::PerMsgStatus::CORE:subst, avg 6µs/call |
2898 | $envf =~ s/^<*//s; # remove < | ||||
2899 | 234 | 6.44ms | 234 | 4.84ms | $envf =~ s/>*\s*\z//s; # remove >, whitespace, newlines # spent 4.84ms making 234 calls to Mail::SpamAssassin::PerMsgStatus::CORE:subst, avg 21µs/call |
2900 | |||||
2901 | 234 | 2.26ms | return $envf; | ||
2902 | } | ||||
2903 | |||||
2904 | ########################################################################### | ||||
2905 | |||||
2906 | # helper for get(ALL-*). get() caches its results, so don't call this | ||||
2907 | # directly unless you need a range of headers not covered by the ALL-* | ||||
2908 | # psuedo-headers! | ||||
2909 | |||||
2910 | # Get all the headers found between an index range of received headers, the | ||||
2911 | # index doesn't care if we could parse the received headers or not. | ||||
2912 | # Use undef for the $start_rcvd or $end_rcvd numbers to start/end with the | ||||
2913 | # first/last header in the message, otherwise indicate the index number you | ||||
2914 | # want to start/end at. Set $include_start_rcvd or $include_end_rcvd to 0 to | ||||
2915 | # indicate you don't want to include the received header found at the start or | ||||
2916 | # end indexes... basically toggles between [s,e], [s,e), (s,e], (s,e). | ||||
2917 | sub get_all_hdrs_in_rcvd_index_range { | ||||
2918 | my ($self, $start_rcvd, $end_rcvd, $include_start_rcvd, $include_end_rcvd) = @_; | ||||
2919 | |||||
2920 | # prevent bad input causing us to return the first header found | ||||
2921 | return if (defined $end_rcvd && $end_rcvd < 0); | ||||
2922 | |||||
2923 | $include_start_rcvd = 1 unless defined $include_start_rcvd; | ||||
2924 | $include_end_rcvd = 1 unless defined $include_end_rcvd; | ||||
2925 | |||||
2926 | my $cur_rcvd_index = -1; # none found yet | ||||
2927 | my $result = ''; | ||||
2928 | |||||
2929 | foreach my $hdr (split(/^/m, $self->{msg}->get_pristine_header())) { | ||||
2930 | if ($hdr =~ /^Received:/i) { | ||||
2931 | $cur_rcvd_index++; | ||||
2932 | next if (defined $start_rcvd && !$include_start_rcvd && | ||||
2933 | $start_rcvd == $cur_rcvd_index); | ||||
2934 | last if (defined $end_rcvd && !$include_end_rcvd && | ||||
2935 | $end_rcvd == $cur_rcvd_index); | ||||
2936 | } | ||||
2937 | if ((!defined $start_rcvd || $start_rcvd <= $cur_rcvd_index) && | ||||
2938 | (!defined $end_rcvd || $cur_rcvd_index < $end_rcvd)) { | ||||
2939 | $result .= $hdr."\n"; | ||||
2940 | } | ||||
2941 | elsif (defined $end_rcvd && $cur_rcvd_index == $end_rcvd) { | ||||
2942 | $result .= $hdr."\n"; | ||||
2943 | last; | ||||
2944 | } | ||||
2945 | } | ||||
2946 | return ($result eq '' ? undef : $result); | ||||
2947 | } | ||||
2948 | |||||
2949 | ########################################################################### | ||||
2950 | |||||
2951 | sub sa_die { Mail::SpamAssassin::sa_die(@_); } | ||||
2952 | |||||
2953 | ########################################################################### | ||||
2954 | |||||
2955 | =item $status->create_fulltext_tmpfile (fulltext_ref) | ||||
2956 | |||||
2957 | This function creates a temporary file containing the passed scalar | ||||
2958 | reference data (typically the full/pristine text of the message). | ||||
2959 | This is typically used by external programs like pyzor and dccproc, to | ||||
2960 | avoid hangs due to buffering issues. Methods that need this, should | ||||
2961 | call $self->create_fulltext_tmpfile($fulltext) to retrieve the temporary | ||||
2962 | filename; it will be created if it has not already been. | ||||
2963 | |||||
2964 | Note: This can only be called once until $status->delete_fulltext_tmpfile() is | ||||
2965 | called. | ||||
2966 | |||||
2967 | =cut | ||||
2968 | |||||
2969 | sub create_fulltext_tmpfile { | ||||
2970 | my ($self, $fulltext) = @_; | ||||
2971 | |||||
2972 | if (defined $self->{fulltext_tmpfile}) { | ||||
2973 | return $self->{fulltext_tmpfile}; | ||||
2974 | } | ||||
2975 | |||||
2976 | my ($tmpf, $tmpfh) = Mail::SpamAssassin::Util::secure_tmpfile(); | ||||
2977 | $tmpfh or die "failed to create a temporary file"; | ||||
2978 | |||||
2979 | # PerlIO's buffered print writes in 8 kB chunks - which can be slow. | ||||
2980 | # print $tmpfh $$fulltext or die "error writing to $tmpf: $!"; | ||||
2981 | # | ||||
2982 | # reducing the number of writes and bypassing extra buffering in PerlIO | ||||
2983 | # speeds up writing of larger text by a factor of 2 | ||||
2984 | my $nwrites; | ||||
2985 | for (my $ofs = 0; $ofs < length($$fulltext); $ofs += $nwrites) { | ||||
2986 | $nwrites = $tmpfh->syswrite($$fulltext, length($$fulltext)-$ofs, $ofs); | ||||
2987 | defined $nwrites or die "error writing to $tmpf: $!"; | ||||
2988 | } | ||||
2989 | close $tmpfh or die "error closing $tmpf: $!"; | ||||
2990 | |||||
2991 | $self->{fulltext_tmpfile} = $tmpf; | ||||
2992 | |||||
2993 | dbg("check: create_fulltext_tmpfile, written %d bytes to file %s", | ||||
2994 | length($$fulltext), $tmpf); | ||||
2995 | |||||
2996 | return $self->{fulltext_tmpfile}; | ||||
2997 | } | ||||
2998 | |||||
2999 | =item $status->delete_fulltext_tmpfile () | ||||
3000 | |||||
3001 | Will cleanup after a $status->create_fulltext_tmpfile() call. Deletes the | ||||
3002 | temporary file and uncaches the filename. | ||||
3003 | |||||
3004 | =cut | ||||
3005 | |||||
3006 | # spent 2.92ms within Mail::SpamAssassin::PerMsgStatus::delete_fulltext_tmpfile which was called 313 times, avg 9µs/call:
# 313 times (2.92ms+0s) by Mail::SpamAssassin::PerMsgStatus::DESTROY at line 320, avg 9µs/call | ||||
3007 | 313 | 684µs | my ($self) = @_; | ||
3008 | 313 | 3.19ms | if (defined $self->{fulltext_tmpfile}) { | ||
3009 | if (!unlink $self->{fulltext_tmpfile}) { | ||||
3010 | my $msg = sprintf("cannot unlink %s: %s", $self->{fulltext_tmpfile}, $!); | ||||
3011 | # don't fuss too much if file is missing, perhaps it wasn't even created | ||||
3012 | if ($! == ENOENT) { warn $msg } else { die $msg } | ||||
3013 | } | ||||
3014 | $self->{fulltext_tmpfile} = undef; | ||||
3015 | } | ||||
3016 | } | ||||
3017 | |||||
3018 | ########################################################################### | ||||
3019 | |||||
3020 | # spent 239ms (48.1+191) within Mail::SpamAssassin::PerMsgStatus::all_from_addrs which was called 321 times, avg 746µs/call:
# 321 times (48.1ms+191ms) by Mail::SpamAssassin::Plugin::TxRep::check_senders_reputation at line 1230 of Mail/SpamAssassin/Plugin/TxRep.pm, avg 746µs/call | ||||
3021 | 321 | 712µs | my ($self) = @_; | ||
3022 | |||||
3023 | 495 | 2.25ms | if (exists $self->{all_from_addrs}) { return @{$self->{all_from_addrs}}; } | ||
3024 | |||||
3025 | 234 | 469µs | my @addrs; | ||
3026 | |||||
3027 | # Resent- headers take priority, if present. see bug 672 | ||||
3028 | 234 | 2.34ms | 234 | 47.4ms | my $resent = $self->get('Resent-From',undef); # spent 47.4ms making 234 calls to Mail::SpamAssassin::PerMsgStatus::get, avg 202µs/call |
3029 | 234 | 1.32ms | if (defined $resent && $resent =~ /\S/) { | ||
3030 | @addrs = $self->{main}->find_all_addrs_in_line ($resent); | ||||
3031 | } | ||||
3032 | else { | ||||
3033 | # bug 2292: Used to use find_all_addrs_in_line() with the same | ||||
3034 | # headers, but the would catch addresses in comments which caused | ||||
3035 | # FNs for things like whitelist_from. Since all of these are From | ||||
3036 | # headers, there should only be 1 address in each anyway (not exactly | ||||
3037 | # true, RFC 2822 allows multiple addresses in a From header field), | ||||
3038 | # so use the :addr code... | ||||
3039 | # bug 3366: some addresses come in as 'foo@bar...', which is invalid. | ||||
3040 | # so deal with the multiple periods. | ||||
3041 | ## no critic | ||||
3042 | 2340 | 24.7ms | 1170 | 142ms | @addrs = map { tr/././s; $_ } grep { $_ ne '' } # spent 142ms making 1170 calls to Mail::SpamAssassin::PerMsgStatus::get, avg 121µs/call |
3043 | ($self->get('From:addr'), # std | ||||
3044 | $self->get('Envelope-Sender:addr'), # qmail: new-inject(1) | ||||
3045 | $self->get('Resent-Sender:addr'), # procmailrc manpage | ||||
3046 | $self->get('X-Envelope-From:addr'), # procmailrc manpage | ||||
3047 | $self->get('EnvelopeFrom:addr')); # SMTP envelope | ||||
3048 | # http://www.cs.tut.fi/~jkorpela/headers.html is useful here | ||||
3049 | } | ||||
3050 | |||||
3051 | # Remove duplicate addresses | ||||
3052 | 702 | 5.11ms | my %addrs = map { $_ => 1 } @addrs; | ||
3053 | 234 | 1.18ms | @addrs = keys %addrs; | ||
3054 | |||||
3055 | 234 | 2.35ms | 234 | 1.86ms | dbg("eval: all '*From' addrs: " . join(" ", @addrs)); # spent 1.86ms making 234 calls to Mail::SpamAssassin::Logger::dbg, avg 8µs/call |
3056 | 234 | 870µs | $self->{all_from_addrs} = \@addrs; | ||
3057 | 234 | 15.3ms | return @addrs; | ||
3058 | } | ||||
3059 | |||||
3060 | =item all_from_addrs_domains | ||||
3061 | |||||
3062 | This function returns all the various from addresses in a message using all_from_addrs() | ||||
3063 | and then returns only the domain names. | ||||
3064 | |||||
3065 | =cut | ||||
3066 | |||||
3067 | sub all_from_addrs_domains { | ||||
3068 | my ($self) = @_; | ||||
3069 | |||||
3070 | if (exists $self->{all_from_addrs_domains}) { | ||||
3071 | return @{$self->{all_from_addrs_domains}}; | ||||
3072 | } | ||||
3073 | |||||
3074 | #TEST POINT - my @addrs = ("test.voipquotes2.net","test.voipquotes2.co.uk"); | ||||
3075 | #Start with all the normal from addrs | ||||
3076 | my @addrs = &all_from_addrs($self); | ||||
3077 | |||||
3078 | dbg("eval: all '*From' addrs domains (before): " . join(" ", @addrs)); | ||||
3079 | |||||
3080 | #loop through and limit to just the domain with a dummy address | ||||
3081 | for (my $i = 0; $i < scalar(@addrs); $i++) { | ||||
3082 | $addrs[$i] = 'dummy@'.$self->{main}->{registryboundaries}->uri_to_domain($addrs[$i]); | ||||
3083 | } | ||||
3084 | |||||
3085 | #Remove duplicate domains | ||||
3086 | my %addrs = map { $_ => 1 } @addrs; | ||||
3087 | @addrs = keys %addrs; | ||||
3088 | |||||
3089 | dbg("eval: all '*From' addrs domains (after uri to domain): " . join(" ", @addrs)); | ||||
3090 | |||||
3091 | $self->{all_from_addrs_domains} = \@addrs; | ||||
3092 | |||||
3093 | return @addrs; | ||||
3094 | } | ||||
3095 | |||||
3096 | # spent 3.61ms (441µs+3.17) within Mail::SpamAssassin::PerMsgStatus::all_to_addrs which was called:
# once (441µs+3.17ms) by Mail::SpamAssassin::Plugin::TxRep::check_senders_reputation at line 1299 of Mail/SpamAssassin/Plugin/TxRep.pm | ||||
3097 | 1 | 3µs | my ($self) = @_; | ||
3098 | |||||
3099 | 1 | 3µs | if (exists $self->{all_to_addrs}) { return @{$self->{all_to_addrs}}; } | ||
3100 | |||||
3101 | 1 | 2µs | my @addrs; | ||
3102 | |||||
3103 | # Resent- headers take priority, if present. see bug 672 | ||||
3104 | 1 | 16µs | 2 | 403µs | my $resent = join('', $self->get('Resent-To'), $self->get('Resent-Cc')); # spent 403µs making 2 calls to Mail::SpamAssassin::PerMsgStatus::get, avg 202µs/call |
3105 | 1 | 21µs | 1 | 2µs | if ($resent =~ /\S/) { # spent 2µs making 1 call to Mail::SpamAssassin::PerMsgStatus::CORE:match |
3106 | @addrs = $self->{main}->find_all_addrs_in_line($resent); | ||||
3107 | } else { | ||||
3108 | # OK, a fetchmail trick: try to find the recipient address from | ||||
3109 | # the most recent 3 Received lines. This is required for sendmail, | ||||
3110 | # since it does not add a helpful header like exim, qmail | ||||
3111 | # or Postfix do. | ||||
3112 | # | ||||
3113 | 1 | 8µs | 1 | 239µs | my $rcvd = $self->get('Received'); # spent 239µs making 1 call to Mail::SpamAssassin::PerMsgStatus::get |
3114 | 1 | 16µs | 1 | 8µs | $rcvd =~ s/\n[ \t]+/ /gs; # spent 8µs making 1 call to Mail::SpamAssassin::PerMsgStatus::CORE:subst |
3115 | 1 | 23µs | 1 | 13µs | $rcvd =~ s/\n+/\n/gs; # spent 13µs making 1 call to Mail::SpamAssassin::PerMsgStatus::CORE:subst |
3116 | |||||
3117 | 2 | 12µs | my @rcvdlines = split(/\n/, $rcvd, 4); pop @rcvdlines; # forget last one | ||
3118 | 1 | 2µs | my @rcvdaddrs; | ||
3119 | 1 | 4µs | foreach my $line (@rcvdlines) { | ||
3120 | 6 | 67µs | 3 | 20µs | if ($line =~ / for (\S+\@\S+);/) { push (@rcvdaddrs, $1); } # spent 20µs making 3 calls to Mail::SpamAssassin::PerMsgStatus::CORE:match, avg 7µs/call |
3121 | } | ||||
3122 | |||||
3123 | @addrs = $self->{main}->find_all_addrs_in_line ( | ||||
3124 | 1 | 138µs | 13 | 2.48ms | join('', # spent 2.16ms making 12 calls to Mail::SpamAssassin::PerMsgStatus::get, avg 180µs/call
# spent 313µs making 1 call to Mail::SpamAssassin::find_all_addrs_in_line |
3125 | join(" ", @rcvdaddrs)."\n", | ||||
3126 | $self->get('To'), # std | ||||
3127 | $self->get('Apparently-To'), # sendmail, from envelope | ||||
3128 | $self->get('Delivered-To'), # Postfix, poss qmail | ||||
3129 | $self->get('Envelope-Recipients'), # qmail: new-inject(1) | ||||
3130 | $self->get('Apparently-Resent-To'), # procmailrc manpage | ||||
3131 | $self->get('X-Envelope-To'), # procmailrc manpage | ||||
3132 | $self->get('Envelope-To'), # exim | ||||
3133 | $self->get('X-Delivered-To'), # procmail quick start | ||||
3134 | $self->get('X-Original-To'), # procmail quick start | ||||
3135 | $self->get('X-Rcpt-To'), # procmail quick start | ||||
3136 | $self->get('X-Real-To'), # procmail quick start | ||||
3137 | $self->get('Cc'))); # std | ||||
3138 | # those are taken from various sources; thanks to Nancy McGough, who | ||||
3139 | # noted some in <http://www.ii.com/internet/robots/procmail/qs/#envelope> | ||||
3140 | } | ||||
3141 | |||||
3142 | 1 | 13µs | 1 | 8µs | dbg("eval: all '*To' addrs: " . join(" ", @addrs)); # spent 8µs making 1 call to Mail::SpamAssassin::Logger::dbg |
3143 | 1 | 4µs | $self->{all_to_addrs} = \@addrs; | ||
3144 | 1 | 13µs | return @addrs; | ||
3145 | |||||
3146 | # http://www.cs.tut.fi/~jkorpela/headers.html is useful here, also | ||||
3147 | # http://www.exim.org/pipermail/exim-users/Week-of-Mon-20001009/021672.html | ||||
3148 | } | ||||
3149 | |||||
3150 | ########################################################################### | ||||
3151 | |||||
3152 | 1 | 16µs | 1; | ||
3153 | __END__ |