← Index
NYTProf Performance Profile   « line view »
For /usr/local/bin/sa-learn
  Run on Tue Nov 7 05:38:10 2017
Reported on Tue Nov 7 06:16:05 2017

Filename/usr/local/lib/perl5/site_perl/Mail/SpamAssassin/Locales.pm
StatementsExecuted 12 statements in 1.19ms
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
11151µs66µsMail::SpamAssassin::Locales::::BEGIN@20Mail::SpamAssassin::Locales::BEGIN@20
11125µs30µsMail::SpamAssassin::Locales::::BEGIN@22Mail::SpamAssassin::Locales::BEGIN@22
11123µs109µsMail::SpamAssassin::Locales::::BEGIN@25Mail::SpamAssassin::Locales::BEGIN@25
11123µs85µsMail::SpamAssassin::Locales::::BEGIN@23Mail::SpamAssassin::Locales::BEGIN@23
11119µs41µsMail::SpamAssassin::Locales::::BEGIN@21Mail::SpamAssassin::Locales::BEGIN@21
0000s0sMail::SpamAssassin::Locales::::is_charset_ok_for_localesMail::SpamAssassin::Locales::is_charset_ok_for_locales
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1# <@LICENSE>
2# Licensed to the Apache Software Foundation (ASF) under one or more
3# contributor license agreements. See the NOTICE file distributed with
4# this work for additional information regarding copyright ownership.
5# The ASF licenses this file to you under the Apache License, Version 2.0
6# (the "License"); you may not use this file except in compliance with
7# the License. You may obtain a copy of the License at:
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16# </@LICENSE>
17
18package Mail::SpamAssassin::Locales;
19
20280µs281µs
# spent 66µs (51+15) within Mail::SpamAssassin::Locales::BEGIN@20 which was called: # once (51µs+15µs) by Mail::SpamAssassin::Plugin::HTMLEval::BEGIN@26 at line 20
use strict;
# spent 66µs making 1 call to Mail::SpamAssassin::Locales::BEGIN@20 # spent 15µs making 1 call to strict::import
21250µs262µs
# spent 41µs (19+22) within Mail::SpamAssassin::Locales::BEGIN@21 which was called: # once (19µs+22µs) by Mail::SpamAssassin::Plugin::HTMLEval::BEGIN@26 at line 21
use warnings;
# spent 41µs making 1 call to Mail::SpamAssassin::Locales::BEGIN@21 # spent 22µs making 1 call to warnings::import
22279µs234µs
# spent 30µs (25+4) within Mail::SpamAssassin::Locales::BEGIN@22 which was called: # once (25µs+4µs) by Mail::SpamAssassin::Plugin::HTMLEval::BEGIN@26 at line 22
use bytes;
# spent 30µs making 1 call to Mail::SpamAssassin::Locales::BEGIN@22 # spent 4µs making 1 call to bytes::import
232112µs2147µs
# spent 85µs (23+62) within Mail::SpamAssassin::Locales::BEGIN@23 which was called: # once (23µs+62µs) by Mail::SpamAssassin::Plugin::HTMLEval::BEGIN@26 at line 23
use re 'taint';
# spent 85µs making 1 call to Mail::SpamAssassin::Locales::BEGIN@23 # spent 62µs making 1 call to re::import
24
2512µs
# spent 109µs (23+86) within Mail::SpamAssassin::Locales::BEGIN@25 which was called: # once (23µs+86µs) by Mail::SpamAssassin::Plugin::HTMLEval::BEGIN@26 at line 27
use vars qw{
26 %charsets_for_locale
271827µs2195µs};
# spent 109µs making 1 call to Mail::SpamAssassin::Locales::BEGIN@25 # spent 86µs making 1 call to vars::import
28
29###########################################################################
30
31# A mapping of known country codes to frequent charsets used therein.
32# note that the ISO and CP charsets will already have been permitted,
33# so only "unusual" charsets should be listed here.
34#
35# Country codes should be lowercase, charsets uppercase.
36#
37# A good listing is in /usr/share/config/charsets from KDE 2.2.1
38#
39116µs%charsets_for_locale = (
40
41 # Japanese: Peter Evans writes: iso-2022-jp = rfc approved, rfc 1468, created
42 # by Jun Murai in 1993 back when he didnt have white hair! rfc approved.
43 # (rfc 2237) <-- by M$.
44 'ja' => 'EUCJP JISX020119760 JISX020819830 JISX020819900 JISX020819970 '.
45 'JISX021219900 JISX021320001 JISX021320002 SHIFT_JIS SHIFTJIS '.
46 'ISO2022JP SJIS JIS7 JISX0201 JISX0208 JISX0212',
47
48 # Korea
49 'ko' => 'EUCKR KSC56011987',
50
51 # Cyrillic: Andrew Vasilyev notes CP866 is common (bug 2278)
52 'ru' => 'KOI8R KOI8U KOI8T ISOIR111 CP1251 GEORGIANPS CP1251 PT154 CP866',
53 'ka' => 'KOI8R KOI8U KOI8T ISOIR111 CP1251 GEORGIANPS CP1251 PT154 CP866',
54 'tg' => 'KOI8R KOI8U KOI8T ISOIR111 CP1251 GEORGIANPS CP1251 PT154 CP866',
55 'be' => 'KOI8R KOI8U KOI8T ISOIR111 CP1251 GEORGIANPS CP1251 PT154 CP866',
56 'uk' => 'KOI8R KOI8U KOI8T ISOIR111 CP1251 GEORGIANPS CP1251 PT154 CP866',
57 'bg' => 'KOI8R KOI8U KOI8T ISOIR111 CP1251 GEORGIANPS CP1251 PT154 CP866',
58
59 # Thai
60 'th' => 'TIS620',
61
62 # Chinese (simplified and traditional). Peter Evans writes: new government
63 # mandated chinese encoding = gb18030, chinese mail is supposed to be
64 # iso-2022-cn (rfc 1922?)
65 'zh' => 'GB1988 GB2312 GB231219800 GB18030 GBK BIG5HKSCS BIG5 EUCTW ISO2022CN',
66
67 # Chinese Traditional charsets only
68 'zh.big5' => 'BIG5HKSCS BIG5 EUCTW',
69
70 # Chinese Simplified charsets only
71 'zh.gb2312' => 'GB1988 GB2312 GB231219800 GB18030 GBK ISO2022CN',
72);
73
74###########################################################################
75
76sub is_charset_ok_for_locales {
77 my ($cs, @locales) = @_;
78
79 $cs = uc $cs; $cs =~ s/[^A-Z0-9]//g;
80 $cs =~ s/^3D//gs; # broken by quoted-printable
81 $cs =~ s/:.*$//gs; # trim off multiple charsets, just use 1st
82
83 study $cs; # study is a no-op since perl 5.16.0, eliminating related bugs
84 #warn "JMD $cs";
85
86 # always OK (the net speaks mostly roman charsets)
87 return 1 if ($cs eq 'USASCII');
88 return 1 if ($cs =~ /^ISO8859/);
89 return 1 if ($cs =~ /^ISO10646/);
90 return 1 if ($cs =~ /^UTF/);
91 return 1 if ($cs =~ /^UCS/);
92 return 1 if ($cs =~ /^CP125/);
93 return 1 if ($cs =~ /^WINDOWS/); # argh, Windows
94 return 1 if ($cs eq 'IBM852');
95 return 1 if ($cs =~ /^UNICODE11UTF[78]/); # wtf? never heard of it
96 return 1 if ($cs eq 'XUNKNOWN'); # added by sendmail when converting to 8bit
97 return 1 if ($cs eq 'ISO'); # Magellan, sending as 'charset=iso 8859-15'. grr
98
99 foreach my $locale (@locales) {
100 if (!defined($locale) || $locale eq 'C') { $locale = 'en'; }
101 $locale =~ s/^([a-z][a-z]).*$/$1/; # zh_TW... => zh
102
103 my $ok_for_loc = $charsets_for_locale{$locale};
104 next if (!defined $ok_for_loc);
105
106 if ($ok_for_loc =~ /(?:^| )\Q${cs}\E(?:$| )/) {
107 return 1;
108 }
109 }
110
111 return 0;
112}
113
114119µs1;