Filename | /usr/local/lib/perl5/site_perl/Mail/SpamAssassin/Locales.pm |
Statements | Executed 12 statements in 1.03ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 39µs | 47µs | BEGIN@20 | Mail::SpamAssassin::Locales::
1 | 1 | 1 | 25µs | 30µs | BEGIN@22 | Mail::SpamAssassin::Locales::
1 | 1 | 1 | 20µs | 67µs | BEGIN@23 | Mail::SpamAssassin::Locales::
1 | 1 | 1 | 20µs | 105µs | BEGIN@25 | Mail::SpamAssassin::Locales::
1 | 1 | 1 | 20µs | 40µs | BEGIN@21 | Mail::SpamAssassin::Locales::
0 | 0 | 0 | 0s | 0s | is_charset_ok_for_locales | Mail::SpamAssassin::Locales::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | # <@LICENSE> | ||||
2 | # Licensed to the Apache Software Foundation (ASF) under one or more | ||||
3 | # contributor license agreements. See the NOTICE file distributed with | ||||
4 | # this work for additional information regarding copyright ownership. | ||||
5 | # The ASF licenses this file to you under the Apache License, Version 2.0 | ||||
6 | # (the "License"); you may not use this file except in compliance with | ||||
7 | # the License. You may obtain a copy of the License at: | ||||
8 | # | ||||
9 | # http://www.apache.org/licenses/LICENSE-2.0 | ||||
10 | # | ||||
11 | # Unless required by applicable law or agreed to in writing, software | ||||
12 | # distributed under the License is distributed on an "AS IS" BASIS, | ||||
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
14 | # See the License for the specific language governing permissions and | ||||
15 | # limitations under the License. | ||||
16 | # </@LICENSE> | ||||
17 | |||||
18 | package Mail::SpamAssassin::Locales; | ||||
19 | |||||
20 | 2 | 53µs | 2 | 54µs | # spent 47µs (39+7) within Mail::SpamAssassin::Locales::BEGIN@20 which was called:
# once (39µs+7µs) by Mail::SpamAssassin::Plugin::HTMLEval::BEGIN@26 at line 20 # spent 47µs making 1 call to Mail::SpamAssassin::Locales::BEGIN@20
# spent 8µs making 1 call to strict::import |
21 | 2 | 55µs | 2 | 61µs | # spent 40µs (20+21) within Mail::SpamAssassin::Locales::BEGIN@21 which was called:
# once (20µs+21µs) by Mail::SpamAssassin::Plugin::HTMLEval::BEGIN@26 at line 21 # spent 40µs making 1 call to Mail::SpamAssassin::Locales::BEGIN@21
# spent 21µs making 1 call to warnings::import |
22 | 2 | 53µs | 2 | 34µs | # spent 30µs (25+5) within Mail::SpamAssassin::Locales::BEGIN@22 which was called:
# once (25µs+5µs) by Mail::SpamAssassin::Plugin::HTMLEval::BEGIN@26 at line 22 # spent 30µs making 1 call to Mail::SpamAssassin::Locales::BEGIN@22
# spent 5µs making 1 call to bytes::import |
23 | 2 | 76µs | 2 | 113µs | # spent 67µs (20+46) within Mail::SpamAssassin::Locales::BEGIN@23 which was called:
# once (20µs+46µs) by Mail::SpamAssassin::Plugin::HTMLEval::BEGIN@26 at line 23 # spent 67µs making 1 call to Mail::SpamAssassin::Locales::BEGIN@23
# spent 46µs making 1 call to re::import |
24 | |||||
25 | 1 | 2µs | # spent 105µs (20+84) within Mail::SpamAssassin::Locales::BEGIN@25 which was called:
# once (20µs+84µs) by Mail::SpamAssassin::Plugin::HTMLEval::BEGIN@26 at line 27 | ||
26 | %charsets_for_locale | ||||
27 | 1 | 768µs | 2 | 189µs | }; # spent 105µs making 1 call to Mail::SpamAssassin::Locales::BEGIN@25
# spent 84µs making 1 call to vars::import |
28 | |||||
29 | ########################################################################### | ||||
30 | |||||
31 | # A mapping of known country codes to frequent charsets used therein. | ||||
32 | # note that the ISO and CP charsets will already have been permitted, | ||||
33 | # so only "unusual" charsets should be listed here. | ||||
34 | # | ||||
35 | # Country codes should be lowercase, charsets uppercase. | ||||
36 | # | ||||
37 | # A good listing is in /usr/share/config/charsets from KDE 2.2.1 | ||||
38 | # | ||||
39 | 1 | 15µs | %charsets_for_locale = ( | ||
40 | |||||
41 | # Japanese: Peter Evans writes: iso-2022-jp = rfc approved, rfc 1468, created | ||||
42 | # by Jun Murai in 1993 back when he didnt have white hair! rfc approved. | ||||
43 | # (rfc 2237) <-- by M$. | ||||
44 | 'ja' => 'EUCJP JISX020119760 JISX020819830 JISX020819900 JISX020819970 '. | ||||
45 | 'JISX021219900 JISX021320001 JISX021320002 SHIFT_JIS SHIFTJIS '. | ||||
46 | 'ISO2022JP SJIS JIS7 JISX0201 JISX0208 JISX0212', | ||||
47 | |||||
48 | # Korea | ||||
49 | 'ko' => 'EUCKR KSC56011987', | ||||
50 | |||||
51 | # Cyrillic: Andrew Vasilyev notes CP866 is common (bug 2278) | ||||
52 | 'ru' => 'KOI8R KOI8U KOI8T ISOIR111 CP1251 GEORGIANPS CP1251 PT154 CP866', | ||||
53 | 'ka' => 'KOI8R KOI8U KOI8T ISOIR111 CP1251 GEORGIANPS CP1251 PT154 CP866', | ||||
54 | 'tg' => 'KOI8R KOI8U KOI8T ISOIR111 CP1251 GEORGIANPS CP1251 PT154 CP866', | ||||
55 | 'be' => 'KOI8R KOI8U KOI8T ISOIR111 CP1251 GEORGIANPS CP1251 PT154 CP866', | ||||
56 | 'uk' => 'KOI8R KOI8U KOI8T ISOIR111 CP1251 GEORGIANPS CP1251 PT154 CP866', | ||||
57 | 'bg' => 'KOI8R KOI8U KOI8T ISOIR111 CP1251 GEORGIANPS CP1251 PT154 CP866', | ||||
58 | |||||
59 | # Thai | ||||
60 | 'th' => 'TIS620', | ||||
61 | |||||
62 | # Chinese (simplified and traditional). Peter Evans writes: new government | ||||
63 | # mandated chinese encoding = gb18030, chinese mail is supposed to be | ||||
64 | # iso-2022-cn (rfc 1922?) | ||||
65 | 'zh' => 'GB1988 GB2312 GB231219800 GB18030 GBK BIG5HKSCS BIG5 EUCTW ISO2022CN', | ||||
66 | |||||
67 | # Chinese Traditional charsets only | ||||
68 | 'zh.big5' => 'BIG5HKSCS BIG5 EUCTW', | ||||
69 | |||||
70 | # Chinese Simplified charsets only | ||||
71 | 'zh.gb2312' => 'GB1988 GB2312 GB231219800 GB18030 GBK ISO2022CN', | ||||
72 | ); | ||||
73 | |||||
74 | ########################################################################### | ||||
75 | |||||
76 | sub is_charset_ok_for_locales { | ||||
77 | my ($cs, @locales) = @_; | ||||
78 | |||||
79 | $cs = uc $cs; $cs =~ s/[^A-Z0-9]//g; | ||||
80 | $cs =~ s/^3D//gs; # broken by quoted-printable | ||||
81 | $cs =~ s/:.*$//gs; # trim off multiple charsets, just use 1st | ||||
82 | |||||
83 | study $cs; # study is a no-op since perl 5.16.0, eliminating related bugs | ||||
84 | #warn "JMD $cs"; | ||||
85 | |||||
86 | # always OK (the net speaks mostly roman charsets) | ||||
87 | return 1 if ($cs eq 'USASCII'); | ||||
88 | return 1 if ($cs =~ /^ISO8859/); | ||||
89 | return 1 if ($cs =~ /^ISO10646/); | ||||
90 | return 1 if ($cs =~ /^UTF/); | ||||
91 | return 1 if ($cs =~ /^UCS/); | ||||
92 | return 1 if ($cs =~ /^CP125/); | ||||
93 | return 1 if ($cs =~ /^WINDOWS/); # argh, Windows | ||||
94 | return 1 if ($cs eq 'IBM852'); | ||||
95 | return 1 if ($cs =~ /^UNICODE11UTF[78]/); # wtf? never heard of it | ||||
96 | return 1 if ($cs eq 'XUNKNOWN'); # added by sendmail when converting to 8bit | ||||
97 | return 1 if ($cs eq 'ISO'); # Magellan, sending as 'charset=iso 8859-15'. grr | ||||
98 | |||||
99 | foreach my $locale (@locales) { | ||||
100 | if (!defined($locale) || $locale eq 'C') { $locale = 'en'; } | ||||
101 | $locale =~ s/^([a-z][a-z]).*$/$1/; # zh_TW... => zh | ||||
102 | |||||
103 | my $ok_for_loc = $charsets_for_locale{$locale}; | ||||
104 | next if (!defined $ok_for_loc); | ||||
105 | |||||
106 | if ($ok_for_loc =~ /(?:^| )\Q${cs}\E(?:$| )/) { | ||||
107 | return 1; | ||||
108 | } | ||||
109 | } | ||||
110 | |||||
111 | return 0; | ||||
112 | } | ||||
113 | |||||
114 | 1 | 11µs | 1; |