Filename | /usr/local/lib/perl5/5.24/Pod/Simple/BlackBox.pm |
Statements | Executed 23 statements in 22.6ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 46µs | 52µs | BEGIN@21 | Pod::Simple::BlackBox::
1 | 1 | 1 | 31µs | 40µs | BEGIN@1282 | Pod::Simple::BlackBox::
1 | 1 | 1 | 27µs | 33µs | BEGIN@22 | Pod::Simple::BlackBox::
1 | 1 | 1 | 20µs | 98µs | BEGIN@24 | Pod::Simple::BlackBox::
1 | 1 | 1 | 12µs | 12µs | BEGIN@27 | Pod::Simple::BlackBox::
1 | 1 | 1 | 10µs | 10µs | BEGIN@23 | Pod::Simple::BlackBox::
1 | 1 | 1 | 7µs | 7µs | CORE:qr (opcode) | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _closers_for_all_curr_open | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _dump_curr_open | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _gen_errata | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _handle_encoding_line | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _handle_encoding_second_level | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_Data | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_Plain | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_Verbatim | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_back | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_begin | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_doc_end | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_end | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_for | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_item | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_over | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_paragraph_buffer | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_pod | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _stringify_lol | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _traverse_treelet_bit | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _treelet_from_formatting_codes | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _verbatim_format | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | parse_line | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | parse_lines | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | pretty | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | reinit | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | stringify_lol | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | text_content_of_treelet | Pod::Simple::BlackBox::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package Pod::Simple::BlackBox; | ||||
2 | # | ||||
3 | # "What's in the box?" "Pain." | ||||
4 | # | ||||
5 | ########################################################################### | ||||
6 | # | ||||
7 | # This is where all the scary things happen: parsing lines into | ||||
8 | # paragraphs; and then into directives, verbatims, and then also | ||||
9 | # turning formatting sequences into treelets. | ||||
10 | # | ||||
11 | # Are you really sure you want to read this code? | ||||
12 | # | ||||
13 | #----------------------------------------------------------------------------- | ||||
14 | # | ||||
15 | # The basic work of this module Pod::Simple::BlackBox is doing the dirty work | ||||
16 | # of parsing Pod into treelets (generally one per non-verbatim paragraph), and | ||||
17 | # to call the proper callbacks on the treelets. | ||||
18 | # | ||||
19 | # Every node in a treelet is a ['name', {attrhash}, ...children...] | ||||
20 | |||||
21 | 2 | 67µs | 2 | 57µs | # spent 52µs (46+5) within Pod::Simple::BlackBox::BEGIN@21 which was called:
# once (46µs+5µs) by Pod::Simple::LinkSection::BEGIN@9 at line 21 # spent 52µs making 1 call to Pod::Simple::BlackBox::BEGIN@21
# spent 5µs making 1 call to integer::import |
22 | 2 | 60µs | 2 | 39µs | # spent 33µs (27+6) within Pod::Simple::BlackBox::BEGIN@22 which was called:
# once (27µs+6µs) by Pod::Simple::LinkSection::BEGIN@9 at line 22 # spent 33µs making 1 call to Pod::Simple::BlackBox::BEGIN@22
# spent 6µs making 1 call to strict::import |
23 | 2 | 82µs | 1 | 10µs | # spent 10µs within Pod::Simple::BlackBox::BEGIN@23 which was called:
# once (10µs+0s) by Pod::Simple::LinkSection::BEGIN@9 at line 23 # spent 10µs making 1 call to Pod::Simple::BlackBox::BEGIN@23 |
24 | 2 | 138µs | 2 | 175µs | # spent 98µs (20+78) within Pod::Simple::BlackBox::BEGIN@24 which was called:
# once (20µs+78µs) by Pod::Simple::LinkSection::BEGIN@9 at line 24 # spent 98µs making 1 call to Pod::Simple::BlackBox::BEGIN@24
# spent 78µs making 1 call to vars::import |
25 | 1 | 2µs | $VERSION = '3.32'; | ||
26 | #use constant DEBUG => 7; | ||||
27 | # spent 12µs within Pod::Simple::BlackBox::BEGIN@27 which was called:
# once (12µs+0s) by Pod::Simple::LinkSection::BEGIN@9 at line 30 | ||||
28 | 1 | 2µs | require Pod::Simple; | ||
29 | 1 | 10µs | *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG | ||
30 | 1 | 12.8ms | 1 | 12µs | } # spent 12µs making 1 call to Pod::Simple::BlackBox::BEGIN@27 |
31 | |||||
32 | # Matches a character iff the character will have a different meaning | ||||
33 | # if we choose CP1252 vs UTF-8 if there is no =encoding line. | ||||
34 | # This is broken for early Perls on non-ASCII platforms. | ||||
35 | 1 | 54µs | my $non_ascii_re = eval "qr/[[:^ascii:]]/"; # spent 29µs executing statements in string eval | ||
36 | 1 | 2µs | $non_ascii_re = qr/[\x80-\xFF]/ if ! defined $non_ascii_re; | ||
37 | |||||
38 | 1 | 1µs | my $utf8_bom; | ||
39 | 1 | 24µs | if (($] ge 5.007_003)) { | ||
40 | 1 | 2µs | $utf8_bom = "\x{FEFF}"; | ||
41 | 1 | 26µs | 1 | 8µs | utf8::encode($utf8_bom); # spent 8µs making 1 call to utf8::encode |
42 | } else { | ||||
43 | $utf8_bom = "\xEF\xBB\xBF"; # No EBCDIC BOM detection for early Perls. | ||||
44 | } | ||||
45 | |||||
46 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
47 | |||||
48 | sub parse_line { shift->parse_lines(@_) } # alias | ||||
49 | |||||
50 | # - - - Turn back now! Run away! - - - | ||||
51 | |||||
52 | sub parse_lines { # Usage: $parser->parse_lines(@lines) | ||||
53 | # an undef means end-of-stream | ||||
54 | my $self = shift; | ||||
55 | |||||
56 | my $code_handler = $self->{'code_handler'}; | ||||
57 | my $cut_handler = $self->{'cut_handler'}; | ||||
58 | my $wl_handler = $self->{'whiteline_handler'}; | ||||
59 | $self->{'line_count'} ||= 0; | ||||
60 | |||||
61 | my $scratch; | ||||
62 | |||||
63 | DEBUG > 4 and | ||||
64 | print STDERR "# Parsing starting at line ", $self->{'line_count'}, ".\n"; | ||||
65 | |||||
66 | DEBUG > 5 and | ||||
67 | print STDERR "# About to parse lines: ", | ||||
68 | join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n"; | ||||
69 | |||||
70 | my $paras = ($self->{'paras'} ||= []); | ||||
71 | # paragraph buffer. Because we need to defer processing of =over | ||||
72 | # directives and verbatim paragraphs. We call _ponder_paragraph_buffer | ||||
73 | # to process this. | ||||
74 | |||||
75 | $self->{'pod_para_count'} ||= 0; | ||||
76 | |||||
77 | my $line; | ||||
78 | foreach my $source_line (@_) { | ||||
79 | if( $self->{'source_dead'} ) { | ||||
80 | DEBUG > 4 and print STDERR "# Source is dead.\n"; | ||||
81 | last; | ||||
82 | } | ||||
83 | |||||
84 | unless( defined $source_line ) { | ||||
85 | DEBUG > 4 and print STDERR "# Undef-line seen.\n"; | ||||
86 | |||||
87 | push @$paras, ['~end', {'start_line' => $self->{'line_count'}}]; | ||||
88 | push @$paras, $paras->[-1], $paras->[-1]; | ||||
89 | # So that it definitely fills the buffer. | ||||
90 | $self->{'source_dead'} = 1; | ||||
91 | $self->_ponder_paragraph_buffer; | ||||
92 | next; | ||||
93 | } | ||||
94 | |||||
95 | |||||
96 | if( $self->{'line_count'}++ ) { | ||||
97 | ($line = $source_line) =~ tr/\n\r//d; | ||||
98 | # If we don't have two vars, we'll end up with that there | ||||
99 | # tr/// modding the (potentially read-only) original source line! | ||||
100 | |||||
101 | } else { | ||||
102 | DEBUG > 2 and print STDERR "First line: [$source_line]\n"; | ||||
103 | |||||
104 | if( ($line = $source_line) =~ s/^$utf8_bom//s ) { | ||||
105 | DEBUG and print STDERR "UTF-8 BOM seen. Faking a '=encoding utf8'.\n"; | ||||
106 | $self->_handle_encoding_line( "=encoding utf8" ); | ||||
107 | delete $self->{'_processed_encoding'}; | ||||
108 | $line =~ tr/\n\r//d; | ||||
109 | |||||
110 | } elsif( $line =~ s/^\xFE\xFF//s ) { | ||||
111 | DEBUG and print STDERR "Big-endian UTF-16 BOM seen. Aborting parsing.\n"; | ||||
112 | $self->scream( | ||||
113 | $self->{'line_count'}, | ||||
114 | "UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." | ||||
115 | ); | ||||
116 | splice @_; | ||||
117 | push @_, undef; | ||||
118 | next; | ||||
119 | |||||
120 | # TODO: implement somehow? | ||||
121 | |||||
122 | } elsif( $line =~ s/^\xFF\xFE//s ) { | ||||
123 | DEBUG and print STDERR "Little-endian UTF-16 BOM seen. Aborting parsing.\n"; | ||||
124 | $self->scream( | ||||
125 | $self->{'line_count'}, | ||||
126 | "UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." | ||||
127 | ); | ||||
128 | splice @_; | ||||
129 | push @_, undef; | ||||
130 | next; | ||||
131 | |||||
132 | # TODO: implement somehow? | ||||
133 | |||||
134 | } else { | ||||
135 | DEBUG > 2 and print STDERR "First line is BOM-less.\n"; | ||||
136 | ($line = $source_line) =~ tr/\n\r//d; | ||||
137 | } | ||||
138 | } | ||||
139 | |||||
140 | if(!$self->{'parse_characters'} && !$self->{'encoding'} | ||||
141 | && ($self->{'in_pod'} || $line =~ /^=/s) | ||||
142 | && $line =~ /$non_ascii_re/ | ||||
143 | ) { | ||||
144 | |||||
145 | my $encoding; | ||||
146 | |||||
147 | # No =encoding line, and we are at the first line in the input that | ||||
148 | # contains a non-ascii byte, that is one whose meaning varies depending | ||||
149 | # on whether the file is encoded in UTF-8 or CP1252, which are the two | ||||
150 | # possibilities permitted by the pod spec. (ASCII is assumed if the | ||||
151 | # file only contains ASCII bytes.) In order to process this line, we | ||||
152 | # need to figure out what encoding we will use for the file. | ||||
153 | # | ||||
154 | # Strictly speaking ISO 8859-1 (Latin 1) refers to the code points | ||||
155 | # 160-255, but it is used here, as it often colloquially is, to refer to | ||||
156 | # the complete set of code points 0-255, including ASCII (0-127), the C1 | ||||
157 | # controls (128-159), and strict Latin 1 (160-255). | ||||
158 | # | ||||
159 | # CP1252 is effectively a superset of Latin 1, because it differs only | ||||
160 | # from colloquial 8859-1 in the C1 controls, which are very unlikely to | ||||
161 | # actually be present in 8859-1 files, so can be used for other purposes | ||||
162 | # without conflict. CP 1252 uses most of them for graphic characters. | ||||
163 | # | ||||
164 | # Note that all ASCII-range bytes represent their corresponding code | ||||
165 | # points in CP1252 and UTF-8. In ASCII platform UTF-8 all other code | ||||
166 | # points require multiple (non-ASCII) bytes to represent. (A separate | ||||
167 | # paragraph for EBCDIC is below.) The multi-byte representation is | ||||
168 | # quite structured. If we find an isolated byte that requires multiple | ||||
169 | # bytes to represent in UTF-8, we know that the encoding is not UTF-8. | ||||
170 | # If we find a sequence of bytes that violates the UTF-8 structure, we | ||||
171 | # also can presume the encoding isn't UTF-8, and hence must be 1252. | ||||
172 | # | ||||
173 | # But there are ambiguous cases where we could guess wrong. If so, the | ||||
174 | # user will end up having to supply an =encoding line. We use all | ||||
175 | # readily available information to improve our chances of guessing | ||||
176 | # right. The odds of something not being UTF-8, but still passing a | ||||
177 | # UTF-8 validity test go down very rapidly with increasing length of the | ||||
178 | # sequence. Therefore we look at all the maximal length non-ascii | ||||
179 | # sequences on the line. If any of the sequences can't be UTF-8, we | ||||
180 | # quit there and choose CP1252. If all could be UTF-8, we guess UTF-8. | ||||
181 | # | ||||
182 | # On EBCDIC platforms, the situation is somewhat different. In | ||||
183 | # UTF-EBCDIC, not only do ASCII-range bytes represent their code points, | ||||
184 | # but so do the bytes that are for the C1 controls. Recall that these | ||||
185 | # correspond to the unused portion of 8859-1 that 1252 mostly takes | ||||
186 | # over. That means that there are fewer code points that are | ||||
187 | # represented by multi-bytes. But, note that the these controls are | ||||
188 | # very unlikely to be in pod text. So if we encounter one of them, it | ||||
189 | # means that it is quite likely CP1252 and not UTF-8. The net result is | ||||
190 | # the same code below is used for both platforms. | ||||
191 | while ($line =~ m/($non_ascii_re+)/g) { | ||||
192 | my $non_ascii_seq = $1; | ||||
193 | |||||
194 | if (length $non_ascii_seq == 1) { | ||||
195 | $encoding = 'CP1252'; | ||||
196 | goto guessed; | ||||
197 | } elsif ($] ge 5.007_003) { | ||||
198 | |||||
199 | # On Perls that have this function, we can see if the sequence is | ||||
200 | # valid UTF-8 or not. | ||||
201 | if (! utf8::decode($non_ascii_seq)) { | ||||
202 | $encoding = 'CP1252'; | ||||
203 | goto guessed; | ||||
204 | } | ||||
205 | } elsif (ord("A") == 65) { # An early Perl, ASCII platform | ||||
206 | |||||
207 | # Without utf8::decode, it's a lot harder to do a rigorous check | ||||
208 | # (though some early releases had a different function that | ||||
209 | # accomplished the same thing). Since these are ancient Perls, not | ||||
210 | # likely to be in use today, we take the easy way out, and look at | ||||
211 | # just the first two bytes of the sequence to see if they are the | ||||
212 | # start of a UTF-8 character. In ASCII UTF-8, continuation bytes | ||||
213 | # must be between 0x80 and 0xBF. Start bytes can range from 0xC2 | ||||
214 | # through 0xFF, but anything above 0xF4 is not Unicode, and hence | ||||
215 | # extremely unlikely to be in a pod. | ||||
216 | if ($non_ascii_seq !~ /^[\xC2-\xF4][\x80-\xBF]/) { | ||||
217 | $encoding = 'CP1252'; | ||||
218 | goto guessed; | ||||
219 | } | ||||
220 | |||||
221 | # We don't bother doing anything special for EBCDIC on early Perls. | ||||
222 | # If there is a solitary variant, CP1252 will be chosen; otherwise | ||||
223 | # UTF-8. | ||||
224 | } | ||||
225 | } # End of loop through all variant sequences on the line | ||||
226 | |||||
227 | # All sequences in the line could be UTF-8. Guess that. | ||||
228 | $encoding = 'UTF-8'; | ||||
229 | |||||
230 | guessed: | ||||
231 | $self->_handle_encoding_line( "=encoding $encoding" ); | ||||
232 | delete $self->{'_processed_encoding'}; | ||||
233 | $self->{'_transcoder'} && $self->{'_transcoder'}->($line); | ||||
234 | |||||
235 | my ($word) = $line =~ /(\S*$non_ascii_re\S*)/; | ||||
236 | |||||
237 | $self->whine( | ||||
238 | $self->{'line_count'}, | ||||
239 | "Non-ASCII character seen before =encoding in '$word'. Assuming $encoding" | ||||
240 | ); | ||||
241 | } | ||||
242 | |||||
243 | DEBUG > 5 and print STDERR "# Parsing line: [$line]\n"; | ||||
244 | |||||
245 | if(!$self->{'in_pod'}) { | ||||
246 | if($line =~ m/^=([a-zA-Z][a-zA-Z0-9]*)(?:\s|$)/s) { | ||||
247 | if($1 eq 'cut') { | ||||
248 | $self->scream( | ||||
249 | $self->{'line_count'}, | ||||
250 | "=cut found outside a pod block. Skipping to next block." | ||||
251 | ); | ||||
252 | |||||
253 | ## Before there were errata sections in the world, it was | ||||
254 | ## least-pessimal to abort processing the file. But now we can | ||||
255 | ## just barrel on thru (but still not start a pod block). | ||||
256 | #splice @_; | ||||
257 | #push @_, undef; | ||||
258 | |||||
259 | next; | ||||
260 | } else { | ||||
261 | $self->{'in_pod'} = $self->{'start_of_pod_block'} | ||||
262 | = $self->{'last_was_blank'} = 1; | ||||
263 | # And fall thru to the pod-mode block further down | ||||
264 | } | ||||
265 | } else { | ||||
266 | DEBUG > 5 and print STDERR "# It's a code-line.\n"; | ||||
267 | $code_handler->(map $_, $line, $self->{'line_count'}, $self) | ||||
268 | if $code_handler; | ||||
269 | # Note: this may cause code to be processed out of order relative | ||||
270 | # to pods, but in order relative to cuts. | ||||
271 | |||||
272 | # Note also that we haven't yet applied the transcoding to $line | ||||
273 | # by time we call $code_handler! | ||||
274 | |||||
275 | if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) { | ||||
276 | # That RE is from perlsyn, section "Plain Old Comments (Not!)", | ||||
277 | #$fname = $2 if defined $2; | ||||
278 | #DEBUG > 1 and defined $2 and print STDERR "# Setting fname to \"$fname\"\n"; | ||||
279 | DEBUG > 1 and print STDERR "# Setting nextline to $1\n"; | ||||
280 | $self->{'line_count'} = $1 - 1; | ||||
281 | } | ||||
282 | |||||
283 | next; | ||||
284 | } | ||||
285 | } | ||||
286 | |||||
287 | # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | ||||
288 | # Else we're in pod mode: | ||||
289 | |||||
290 | # Apply any necessary transcoding: | ||||
291 | $self->{'_transcoder'} && $self->{'_transcoder'}->($line); | ||||
292 | |||||
293 | # HERE WE CATCH =encoding EARLY! | ||||
294 | if( $line =~ m/^=encoding\s+\S+\s*$/s ) { | ||||
295 | next if $self->parse_characters; # Ignore this line | ||||
296 | $line = $self->_handle_encoding_line( $line ); | ||||
297 | } | ||||
298 | |||||
299 | if($line =~ m/^=cut/s) { | ||||
300 | # here ends the pod block, and therefore the previous pod para | ||||
301 | DEBUG > 1 and print STDERR "Noting =cut at line ${$self}{'line_count'}\n"; | ||||
302 | $self->{'in_pod'} = 0; | ||||
303 | # ++$self->{'pod_para_count'}; | ||||
304 | $self->_ponder_paragraph_buffer(); | ||||
305 | # by now it's safe to consider the previous paragraph as done. | ||||
306 | $cut_handler->(map $_, $line, $self->{'line_count'}, $self) | ||||
307 | if $cut_handler; | ||||
308 | |||||
309 | # TODO: add to docs: Note: this may cause cuts to be processed out | ||||
310 | # of order relative to pods, but in order relative to code. | ||||
311 | |||||
312 | } elsif($line =~ m/^(\s*)$/s) { # it's a blank line | ||||
313 | if (defined $1 and $1 =~ /[^\S\r\n]/) { # it's a white line | ||||
314 | $wl_handler->(map $_, $line, $self->{'line_count'}, $self) | ||||
315 | if $wl_handler; | ||||
316 | } | ||||
317 | |||||
318 | if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { | ||||
319 | DEBUG > 1 and print STDERR "Saving blank line at line ${$self}{'line_count'}\n"; | ||||
320 | push @{$paras->[-1]}, $line; | ||||
321 | } # otherwise it's not interesting | ||||
322 | |||||
323 | if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) { | ||||
324 | DEBUG > 1 and print STDERR "Noting para ends with blank line at ${$self}{'line_count'}\n"; | ||||
325 | } | ||||
326 | |||||
327 | $self->{'last_was_blank'} = 1; | ||||
328 | |||||
329 | } elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para... | ||||
330 | |||||
331 | if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(?:\s+|$)(.*)/s) { | ||||
332 | # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS | ||||
333 | my $new = [$1, {'start_line' => $self->{'line_count'}}, $2]; | ||||
334 | # Note that in "=head1 foo", the WS is lost. | ||||
335 | # Example: ['=head1', {'start_line' => 123}, ' foo'] | ||||
336 | |||||
337 | ++$self->{'pod_para_count'}; | ||||
338 | |||||
339 | $self->_ponder_paragraph_buffer(); | ||||
340 | # by now it's safe to consider the previous paragraph as done. | ||||
341 | |||||
342 | push @$paras, $new; # the new incipient paragraph | ||||
343 | DEBUG > 1 and print STDERR "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n"; | ||||
344 | |||||
345 | } elsif($line =~ m/^\s/s) { | ||||
346 | |||||
347 | if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { | ||||
348 | DEBUG > 1 and print STDERR "Resuming verbatim para at line ${$self}{'line_count'}\n"; | ||||
349 | push @{$paras->[-1]}, $line; | ||||
350 | } else { | ||||
351 | ++$self->{'pod_para_count'}; | ||||
352 | $self->_ponder_paragraph_buffer(); | ||||
353 | # by now it's safe to consider the previous paragraph as done. | ||||
354 | DEBUG > 1 and print STDERR "Starting verbatim para at line ${$self}{'line_count'}\n"; | ||||
355 | push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line]; | ||||
356 | } | ||||
357 | } else { | ||||
358 | ++$self->{'pod_para_count'}; | ||||
359 | $self->_ponder_paragraph_buffer(); | ||||
360 | # by now it's safe to consider the previous paragraph as done. | ||||
361 | push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line]; | ||||
362 | DEBUG > 1 and print STDERR "Starting plain para at line ${$self}{'line_count'}\n"; | ||||
363 | } | ||||
364 | $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; | ||||
365 | |||||
366 | } else { | ||||
367 | # It's a non-blank line /continuing/ the current para | ||||
368 | if(@$paras) { | ||||
369 | DEBUG > 2 and print STDERR "Line ${$self}{'line_count'} continues current paragraph\n"; | ||||
370 | push @{$paras->[-1]}, $line; | ||||
371 | } else { | ||||
372 | # Unexpected case! | ||||
373 | die "Continuing a paragraph but \@\$paras is empty?"; | ||||
374 | } | ||||
375 | $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; | ||||
376 | } | ||||
377 | |||||
378 | } # ends the big while loop | ||||
379 | |||||
380 | DEBUG > 1 and print STDERR (pretty(@$paras), "\n"); | ||||
381 | return $self; | ||||
382 | } | ||||
383 | |||||
384 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
385 | |||||
386 | sub _handle_encoding_line { | ||||
387 | my($self, $line) = @_; | ||||
388 | |||||
389 | return if $self->parse_characters; | ||||
390 | |||||
391 | # The point of this routine is to set $self->{'_transcoder'} as indicated. | ||||
392 | |||||
393 | return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s; | ||||
394 | DEBUG > 1 and print STDERR "Found an encoding line \"=encoding $1\"\n"; | ||||
395 | |||||
396 | my $e = $1; | ||||
397 | my $orig = $e; | ||||
398 | push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig"; | ||||
399 | |||||
400 | my $enc_error; | ||||
401 | |||||
402 | # Cf. perldoc Encode and perldoc Encode::Supported | ||||
403 | |||||
404 | require Pod::Simple::Transcode; | ||||
405 | |||||
406 | if( $self->{'encoding'} ) { | ||||
407 | my $norm_current = $self->{'encoding'}; | ||||
408 | my $norm_e = $e; | ||||
409 | foreach my $that ($norm_current, $norm_e) { | ||||
410 | $that = lc($that); | ||||
411 | $that =~ s/[-_]//g; | ||||
412 | } | ||||
413 | if($norm_current eq $norm_e) { | ||||
414 | DEBUG > 1 and print STDERR "The '=encoding $orig' line is ", | ||||
415 | "redundant. ($norm_current eq $norm_e). Ignoring.\n"; | ||||
416 | $enc_error = ''; | ||||
417 | # But that doesn't necessarily mean that the earlier one went okay | ||||
418 | } else { | ||||
419 | $enc_error = "Encoding is already set to " . $self->{'encoding'}; | ||||
420 | DEBUG > 1 and print STDERR $enc_error; | ||||
421 | } | ||||
422 | } elsif ( | ||||
423 | # OK, let's turn on the encoding | ||||
424 | do { | ||||
425 | DEBUG > 1 and print STDERR " Setting encoding to $e\n"; | ||||
426 | $self->{'encoding'} = $e; | ||||
427 | 1; | ||||
428 | } | ||||
429 | and $e eq 'HACKRAW' | ||||
430 | ) { | ||||
431 | DEBUG and print STDERR " Putting in HACKRAW (no-op) encoding mode.\n"; | ||||
432 | |||||
433 | } elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) { | ||||
434 | |||||
435 | die($enc_error = "WHAT? _transcoder is already set?!") | ||||
436 | if $self->{'_transcoder'}; # should never happen | ||||
437 | require Pod::Simple::Transcode; | ||||
438 | $self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e); | ||||
439 | eval { | ||||
440 | my @x = ('', "abc", "123"); | ||||
441 | $self->{'_transcoder'}->(@x); | ||||
442 | }; | ||||
443 | $@ && die( $enc_error = | ||||
444 | "Really unexpected error setting up encoding $e: $@\nAborting" | ||||
445 | ); | ||||
446 | $self->{'detected_encoding'} = $e; | ||||
447 | |||||
448 | } else { | ||||
449 | my @supported = Pod::Simple::Transcode::->all_encodings; | ||||
450 | |||||
451 | # Note unsupported, and complain | ||||
452 | DEBUG and print STDERR " Encoding [$e] is unsupported.", | ||||
453 | "\nSupporteds: @supported\n"; | ||||
454 | my $suggestion = ''; | ||||
455 | |||||
456 | # Look for a near match: | ||||
457 | my $norm = lc($e); | ||||
458 | $norm =~ tr[-_][]d; | ||||
459 | my $n; | ||||
460 | foreach my $enc (@supported) { | ||||
461 | $n = lc($enc); | ||||
462 | $n =~ tr[-_][]d; | ||||
463 | next unless $n eq $norm; | ||||
464 | $suggestion = " (Maybe \"$e\" should be \"$enc\"?)"; | ||||
465 | last; | ||||
466 | } | ||||
467 | my $encmodver = Pod::Simple::Transcode::->encmodver; | ||||
468 | $enc_error = join '' => | ||||
469 | "This document probably does not appear as it should, because its ", | ||||
470 | "\"=encoding $e\" line calls for an unsupported encoding.", | ||||
471 | $suggestion, " [$encmodver\'s supported encodings are: @supported]" | ||||
472 | ; | ||||
473 | |||||
474 | $self->scream( $self->{'line_count'}, $enc_error ); | ||||
475 | } | ||||
476 | push @{ $self->{'encoding_command_statuses'} }, $enc_error; | ||||
477 | if (defined($self->{'_processed_encoding'})) { | ||||
478 | # Double declaration. | ||||
479 | $self->scream( $self->{'line_count'}, 'Cannot have multiple =encoding directives'); | ||||
480 | } | ||||
481 | $self->{'_processed_encoding'} = $orig; | ||||
482 | |||||
483 | return $line; | ||||
484 | } | ||||
485 | |||||
486 | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||||
487 | |||||
488 | sub _handle_encoding_second_level { | ||||
489 | # By time this is called, the encoding (if well formed) will already | ||||
490 | # have been acted one. | ||||
491 | my($self, $para) = @_; | ||||
492 | my @x = @$para; | ||||
493 | my $content = join ' ', splice @x, 2; | ||||
494 | $content =~ s/^\s+//s; | ||||
495 | $content =~ s/\s+$//s; | ||||
496 | |||||
497 | DEBUG > 2 and print STDERR "Ogling encoding directive: =encoding $content\n"; | ||||
498 | |||||
499 | if (defined($self->{'_processed_encoding'})) { | ||||
500 | #if($content ne $self->{'_processed_encoding'}) { | ||||
501 | # Could it happen? | ||||
502 | #} | ||||
503 | delete $self->{'_processed_encoding'}; | ||||
504 | # It's already been handled. Check for errors. | ||||
505 | if(! $self->{'encoding_command_statuses'} ) { | ||||
506 | DEBUG > 2 and print STDERR " CRAZY ERROR: It wasn't really handled?!\n"; | ||||
507 | } elsif( $self->{'encoding_command_statuses'}[-1] ) { | ||||
508 | $self->whine( $para->[1]{'start_line'}, | ||||
509 | sprintf "Couldn't do %s: %s", | ||||
510 | $self->{'encoding_command_reqs' }[-1], | ||||
511 | $self->{'encoding_command_statuses'}[-1], | ||||
512 | ); | ||||
513 | } else { | ||||
514 | DEBUG > 2 and print STDERR " (Yup, it was successfully handled already.)\n"; | ||||
515 | } | ||||
516 | |||||
517 | } else { | ||||
518 | # Otherwise it's a syntax error | ||||
519 | $self->whine( $para->[1]{'start_line'}, | ||||
520 | "Invalid =encoding syntax: $content" | ||||
521 | ); | ||||
522 | } | ||||
523 | |||||
524 | return; | ||||
525 | } | ||||
526 | |||||
527 | #~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~` | ||||
528 | |||||
529 | { | ||||
530 | 1 | 2µs | my $m = -321; # magic line number | ||
531 | |||||
532 | sub _gen_errata { | ||||
533 | my $self = $_[0]; | ||||
534 | # Return 0 or more fake-o paragraphs explaining the accumulated | ||||
535 | # errors on this document. | ||||
536 | |||||
537 | return() unless $self->{'errata'} and keys %{$self->{'errata'}}; | ||||
538 | |||||
539 | my @out; | ||||
540 | |||||
541 | foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) { | ||||
542 | push @out, | ||||
543 | ['=item', {'start_line' => $m}, "Around line $line:"], | ||||
544 | map( ['~Para', {'start_line' => $m, '~cooked' => 1}, | ||||
545 | #['~Top', {'start_line' => $m}, | ||||
546 | $_ | ||||
547 | #] | ||||
548 | ], | ||||
549 | @{$self->{'errata'}{$line}} | ||||
550 | ) | ||||
551 | ; | ||||
552 | } | ||||
553 | |||||
554 | # TODO: report of unknown entities? unrenderable characters? | ||||
555 | |||||
556 | unshift @out, | ||||
557 | ['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'], | ||||
558 | ['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1}, | ||||
559 | "Hey! ", | ||||
560 | ['B', {}, | ||||
561 | 'The above document had some coding errors, which are explained below:' | ||||
562 | ] | ||||
563 | ], | ||||
564 | ['=over', {'start_line' => $m, 'errata' => 1}, ''], | ||||
565 | ; | ||||
566 | |||||
567 | push @out, | ||||
568 | ['=back', {'start_line' => $m, 'errata' => 1}, ''], | ||||
569 | ; | ||||
570 | |||||
571 | DEBUG and print STDERR "\n<<\n", pretty(\@out), "\n>>\n\n"; | ||||
572 | |||||
573 | return @out; | ||||
574 | } | ||||
575 | |||||
576 | } | ||||
577 | |||||
578 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
579 | |||||
580 | ############################################################################## | ||||
581 | ## | ||||
582 | ## stop reading now stop reading now stop reading now stop reading now stop | ||||
583 | ## | ||||
584 | ## HERE IT BECOMES REALLY SCARY | ||||
585 | ## | ||||
586 | ## stop reading now stop reading now stop reading now stop reading now stop | ||||
587 | ## | ||||
588 | ############################################################################## | ||||
589 | |||||
590 | 1 | 4µs | sub _ponder_paragraph_buffer { | ||
591 | |||||
592 | # Para-token types as found in the buffer. | ||||
593 | # ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end, | ||||
594 | # =over, =back, =item | ||||
595 | # and the null =pod (to be complained about if over one line) | ||||
596 | # | ||||
597 | # "~data" paragraphs are something we generate at this level, depending on | ||||
598 | # a currently open =over region | ||||
599 | |||||
600 | # Events fired: Begin and end for: | ||||
601 | # directivename (like head1 .. head4), item, extend, | ||||
602 | # for (from =begin...=end, =for), | ||||
603 | # over-bullet, over-number, over-text, over-block, | ||||
604 | # item-bullet, item-number, item-text, | ||||
605 | # Document, | ||||
606 | # Data, Para, Verbatim | ||||
607 | # B, C, longdirname (TODO -- wha?), etc. for all directives | ||||
608 | # | ||||
609 | |||||
610 | my $self = $_[0]; | ||||
611 | my $paras; | ||||
612 | return unless @{$paras = $self->{'paras'}}; | ||||
613 | my $curr_open = ($self->{'curr_open'} ||= []); | ||||
614 | |||||
615 | my $scratch; | ||||
616 | |||||
617 | DEBUG > 10 and print STDERR "# Paragraph buffer: <<", pretty($paras), ">>\n"; | ||||
618 | |||||
619 | # We have something in our buffer. So apparently the document has started. | ||||
620 | unless($self->{'doc_has_started'}) { | ||||
621 | $self->{'doc_has_started'} = 1; | ||||
622 | |||||
623 | my $starting_contentless; | ||||
624 | $starting_contentless = | ||||
625 | ( | ||||
626 | !@$curr_open | ||||
627 | and @$paras and ! grep $_->[0] ne '~end', @$paras | ||||
628 | # i.e., if the paras is all ~ends | ||||
629 | ) | ||||
630 | ; | ||||
631 | DEBUG and print STDERR "# Starting ", | ||||
632 | $starting_contentless ? 'contentless' : 'contentful', | ||||
633 | " document\n" | ||||
634 | ; | ||||
635 | |||||
636 | $self->_handle_element_start( | ||||
637 | ($scratch = 'Document'), | ||||
638 | { | ||||
639 | 'start_line' => $paras->[0][1]{'start_line'}, | ||||
640 | $starting_contentless ? ( 'contentless' => 1 ) : (), | ||||
641 | }, | ||||
642 | ); | ||||
643 | } | ||||
644 | |||||
645 | my($para, $para_type); | ||||
646 | while(@$paras) { | ||||
647 | last if @$paras == 1 and | ||||
648 | ( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim' | ||||
649 | or $paras->[0][0] eq '=item' ) | ||||
650 | ; | ||||
651 | # Those're the three kinds of paragraphs that require lookahead. | ||||
652 | # Actually, an "=item Foo" inside an <over type=text> region | ||||
653 | # and any =item inside an <over type=block> region (rare) | ||||
654 | # don't require any lookahead, but all others (bullets | ||||
655 | # and numbers) do. | ||||
656 | |||||
657 | # TODO: whinge about many kinds of directives in non-resolving =for regions? | ||||
658 | # TODO: many? like what? =head1 etc? | ||||
659 | |||||
660 | $para = shift @$paras; | ||||
661 | $para_type = $para->[0]; | ||||
662 | |||||
663 | DEBUG > 1 and print STDERR "Pondering a $para_type paragraph, given the stack: (", | ||||
664 | $self->_dump_curr_open(), ")\n"; | ||||
665 | |||||
666 | if($para_type eq '=for') { | ||||
667 | next if $self->_ponder_for($para,$curr_open,$paras); | ||||
668 | |||||
669 | } elsif($para_type eq '=begin') { | ||||
670 | next if $self->_ponder_begin($para,$curr_open,$paras); | ||||
671 | |||||
672 | } elsif($para_type eq '=end') { | ||||
673 | next if $self->_ponder_end($para,$curr_open,$paras); | ||||
674 | |||||
675 | } elsif($para_type eq '~end') { # The virtual end-document signal | ||||
676 | next if $self->_ponder_doc_end($para,$curr_open,$paras); | ||||
677 | } | ||||
678 | |||||
679 | |||||
680 | # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | ||||
681 | #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | ||||
682 | if(grep $_->[1]{'~ignore'}, @$curr_open) { | ||||
683 | DEBUG > 1 and | ||||
684 | print STDERR "Skipping $para_type paragraph because in ignore mode.\n"; | ||||
685 | next; | ||||
686 | } | ||||
687 | #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | ||||
688 | # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | ||||
689 | |||||
690 | if($para_type eq '=pod') { | ||||
691 | $self->_ponder_pod($para,$curr_open,$paras); | ||||
692 | |||||
693 | } elsif($para_type eq '=over') { | ||||
694 | next if $self->_ponder_over($para,$curr_open,$paras); | ||||
695 | |||||
696 | } elsif($para_type eq '=back') { | ||||
697 | next if $self->_ponder_back($para,$curr_open,$paras); | ||||
698 | |||||
699 | } else { | ||||
700 | |||||
701 | # All non-magical codes!!! | ||||
702 | |||||
703 | # Here we start using $para_type for our own twisted purposes, to | ||||
704 | # mean how it should get treated, not as what the element name | ||||
705 | # should be. | ||||
706 | |||||
707 | DEBUG > 1 and print STDERR "Pondering non-magical $para_type\n"; | ||||
708 | |||||
709 | my $i; | ||||
710 | |||||
711 | # Enforce some =headN discipline | ||||
712 | if($para_type =~ m/^=head\d$/s | ||||
713 | and ! $self->{'accept_heads_anywhere'} | ||||
714 | and @$curr_open | ||||
715 | and $curr_open->[-1][0] eq '=over' | ||||
716 | ) { | ||||
717 | DEBUG > 2 and print STDERR "'=$para_type' inside an '=over'!\n"; | ||||
718 | $self->whine( | ||||
719 | $para->[1]{'start_line'}, | ||||
720 | "You forgot a '=back' before '$para_type'" | ||||
721 | ); | ||||
722 | unshift @$paras, ['=back', {}, ''], $para; # close the =over | ||||
723 | next; | ||||
724 | } | ||||
725 | |||||
726 | |||||
727 | if($para_type eq '=item') { | ||||
728 | |||||
729 | my $over; | ||||
730 | unless(@$curr_open and | ||||
731 | $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) { | ||||
732 | $self->whine( | ||||
733 | $para->[1]{'start_line'}, | ||||
734 | "'=item' outside of any '=over'" | ||||
735 | ); | ||||
736 | unshift @$paras, | ||||
737 | ['=over', {'start_line' => $para->[1]{'start_line'}}, ''], | ||||
738 | $para | ||||
739 | ; | ||||
740 | next; | ||||
741 | } | ||||
742 | |||||
743 | |||||
744 | my $over_type = $over->[1]{'~type'}; | ||||
745 | |||||
746 | if(!$over_type) { | ||||
747 | # Shouldn't happen1 | ||||
748 | die "Typeless over in stack, starting at line " | ||||
749 | . $over->[1]{'start_line'}; | ||||
750 | |||||
751 | } elsif($over_type eq 'block') { | ||||
752 | unless($curr_open->[-1][1]{'~bitched_about'}) { | ||||
753 | $curr_open->[-1][1]{'~bitched_about'} = 1; | ||||
754 | $self->whine( | ||||
755 | $curr_open->[-1][1]{'start_line'}, | ||||
756 | "You can't have =items (as at line " | ||||
757 | . $para->[1]{'start_line'} | ||||
758 | . ") unless the first thing after the =over is an =item" | ||||
759 | ); | ||||
760 | } | ||||
761 | # Just turn it into a paragraph and reconsider it | ||||
762 | $para->[0] = '~Para'; | ||||
763 | unshift @$paras, $para; | ||||
764 | next; | ||||
765 | |||||
766 | } elsif($over_type eq 'text') { | ||||
767 | my $item_type = $self->_get_item_type($para); | ||||
768 | # That kills the content of the item if it's a number or bullet. | ||||
769 | DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; | ||||
770 | |||||
771 | if($item_type eq 'text') { | ||||
772 | # Nothing special needs doing for 'text' | ||||
773 | } elsif($item_type eq 'number' or $item_type eq 'bullet') { | ||||
774 | $self->whine( | ||||
775 | $para->[1]{'start_line'}, | ||||
776 | "Expected text after =item, not a $item_type" | ||||
777 | ); | ||||
778 | # Undo our clobbering: | ||||
779 | push @$para, $para->[1]{'~orig_content'}; | ||||
780 | delete $para->[1]{'number'}; | ||||
781 | # Only a PROPER item-number element is allowed | ||||
782 | # to have a number attribute. | ||||
783 | } else { | ||||
784 | die "Unhandled item type $item_type"; # should never happen | ||||
785 | } | ||||
786 | |||||
787 | # =item-text thingies don't need any assimilation, it seems. | ||||
788 | |||||
789 | } elsif($over_type eq 'number') { | ||||
790 | my $item_type = $self->_get_item_type($para); | ||||
791 | # That kills the content of the item if it's a number or bullet. | ||||
792 | DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; | ||||
793 | |||||
794 | my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; | ||||
795 | |||||
796 | if($item_type eq 'bullet') { | ||||
797 | # Hm, it's not numeric. Correct for this. | ||||
798 | $para->[1]{'number'} = $expected_value; | ||||
799 | $self->whine( | ||||
800 | $para->[1]{'start_line'}, | ||||
801 | "Expected '=item $expected_value'" | ||||
802 | ); | ||||
803 | push @$para, $para->[1]{'~orig_content'}; | ||||
804 | # restore the bullet, blocking the assimilation of next para | ||||
805 | |||||
806 | } elsif($item_type eq 'text') { | ||||
807 | # Hm, it's not numeric. Correct for this. | ||||
808 | $para->[1]{'number'} = $expected_value; | ||||
809 | $self->whine( | ||||
810 | $para->[1]{'start_line'}, | ||||
811 | "Expected '=item $expected_value'" | ||||
812 | ); | ||||
813 | # Text content will still be there and will block next ~Para | ||||
814 | |||||
815 | } elsif($item_type ne 'number') { | ||||
816 | die "Unknown item type $item_type"; # should never happen | ||||
817 | |||||
818 | } elsif($expected_value == $para->[1]{'number'}) { | ||||
819 | DEBUG > 1 and print STDERR " Numeric item has the expected value of $expected_value\n"; | ||||
820 | |||||
821 | } else { | ||||
822 | DEBUG > 1 and print STDERR " Numeric item has ", $para->[1]{'number'}, | ||||
823 | " instead of the expected value of $expected_value\n"; | ||||
824 | $self->whine( | ||||
825 | $para->[1]{'start_line'}, | ||||
826 | "You have '=item " . $para->[1]{'number'} . | ||||
827 | "' instead of the expected '=item $expected_value'" | ||||
828 | ); | ||||
829 | $para->[1]{'number'} = $expected_value; # correcting!! | ||||
830 | } | ||||
831 | |||||
832 | if(@$para == 2) { | ||||
833 | # For the cases where we /didn't/ push to @$para | ||||
834 | if($paras->[0][0] eq '~Para') { | ||||
835 | DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n"; | ||||
836 | push @$para, splice @{shift @$paras},2; | ||||
837 | } else { | ||||
838 | DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n"; | ||||
839 | push @$para, ''; # Just so it's not contentless | ||||
840 | } | ||||
841 | } | ||||
842 | |||||
843 | |||||
844 | } elsif($over_type eq 'bullet') { | ||||
845 | my $item_type = $self->_get_item_type($para); | ||||
846 | # That kills the content of the item if it's a number or bullet. | ||||
847 | DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; | ||||
848 | |||||
849 | if($item_type eq 'bullet') { | ||||
850 | # as expected! | ||||
851 | |||||
852 | if( $para->[1]{'~_freaky_para_hack'} ) { | ||||
853 | DEBUG and print STDERR "Accomodating '=item * Foo' tolerance hack.\n"; | ||||
854 | push @$para, delete $para->[1]{'~_freaky_para_hack'}; | ||||
855 | } | ||||
856 | |||||
857 | } elsif($item_type eq 'number') { | ||||
858 | $self->whine( | ||||
859 | $para->[1]{'start_line'}, | ||||
860 | "Expected '=item *'" | ||||
861 | ); | ||||
862 | push @$para, $para->[1]{'~orig_content'}; | ||||
863 | # and block assimilation of the next paragraph | ||||
864 | delete $para->[1]{'number'}; | ||||
865 | # Only a PROPER item-number element is allowed | ||||
866 | # to have a number attribute. | ||||
867 | } elsif($item_type eq 'text') { | ||||
868 | $self->whine( | ||||
869 | $para->[1]{'start_line'}, | ||||
870 | "Expected '=item *'" | ||||
871 | ); | ||||
872 | # But doesn't need processing. But it'll block assimilation | ||||
873 | # of the next para. | ||||
874 | } else { | ||||
875 | die "Unhandled item type $item_type"; # should never happen | ||||
876 | } | ||||
877 | |||||
878 | if(@$para == 2) { | ||||
879 | # For the cases where we /didn't/ push to @$para | ||||
880 | if($paras->[0][0] eq '~Para') { | ||||
881 | DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n"; | ||||
882 | push @$para, splice @{shift @$paras},2; | ||||
883 | } else { | ||||
884 | DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n"; | ||||
885 | push @$para, ''; # Just so it's not contentless | ||||
886 | } | ||||
887 | } | ||||
888 | |||||
889 | } else { | ||||
890 | die "Unhandled =over type \"$over_type\"?"; | ||||
891 | # Shouldn't happen! | ||||
892 | } | ||||
893 | |||||
894 | $para_type = 'Plain'; | ||||
895 | $para->[0] .= '-' . $over_type; | ||||
896 | # Whew. Now fall thru and process it. | ||||
897 | |||||
898 | |||||
899 | } elsif($para_type eq '=extend') { | ||||
900 | # Well, might as well implement it here. | ||||
901 | $self->_ponder_extend($para); | ||||
902 | next; # and skip | ||||
903 | } elsif($para_type eq '=encoding') { | ||||
904 | # Not actually acted on here, but we catch errors here. | ||||
905 | $self->_handle_encoding_second_level($para); | ||||
906 | next unless $self->keep_encoding_directive; | ||||
907 | $para_type = 'Plain'; | ||||
908 | } elsif($para_type eq '~Verbatim') { | ||||
909 | $para->[0] = 'Verbatim'; | ||||
910 | $para_type = '?Verbatim'; | ||||
911 | } elsif($para_type eq '~Para') { | ||||
912 | $para->[0] = 'Para'; | ||||
913 | $para_type = '?Plain'; | ||||
914 | } elsif($para_type eq 'Data') { | ||||
915 | $para->[0] = 'Data'; | ||||
916 | $para_type = '?Data'; | ||||
917 | } elsif( $para_type =~ s/^=//s | ||||
918 | and defined( $para_type = $self->{'accept_directives'}{$para_type} ) | ||||
919 | ) { | ||||
920 | DEBUG > 1 and print STDERR " Pondering known directive ${$para}[0] as $para_type\n"; | ||||
921 | } else { | ||||
922 | # An unknown directive! | ||||
923 | DEBUG > 1 and printf STDERR "Unhandled directive %s (Handled: %s)\n", | ||||
924 | $para->[0], join(' ', sort keys %{$self->{'accept_directives'}} ) | ||||
925 | ; | ||||
926 | $self->whine( | ||||
927 | $para->[1]{'start_line'}, | ||||
928 | "Unknown directive: $para->[0]" | ||||
929 | ); | ||||
930 | |||||
931 | # And maybe treat it as text instead of just letting it go? | ||||
932 | next; | ||||
933 | } | ||||
934 | |||||
935 | if($para_type =~ s/^\?//s) { | ||||
936 | if(! @$curr_open) { # usual case | ||||
937 | DEBUG and print STDERR "Treating $para_type paragraph as such because stack is empty.\n"; | ||||
938 | } else { | ||||
939 | my @fors = grep $_->[0] eq '=for', @$curr_open; | ||||
940 | DEBUG > 1 and print STDERR "Containing fors: ", | ||||
941 | join(',', map $_->[1]{'target'}, @fors), "\n"; | ||||
942 | |||||
943 | if(! @fors) { | ||||
944 | DEBUG and print STDERR "Treating $para_type paragraph as such because stack has no =for's\n"; | ||||
945 | |||||
946 | #} elsif(grep $_->[1]{'~resolve'}, @fors) { | ||||
947 | #} elsif(not grep !$_->[1]{'~resolve'}, @fors) { | ||||
948 | } elsif( $fors[-1][1]{'~resolve'} ) { | ||||
949 | # Look to the immediately containing for | ||||
950 | |||||
951 | if($para_type eq 'Data') { | ||||
952 | DEBUG and print STDERR "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; | ||||
953 | $para->[0] = 'Para'; | ||||
954 | $para_type = 'Plain'; | ||||
955 | } else { | ||||
956 | DEBUG and print STDERR "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; | ||||
957 | } | ||||
958 | } else { | ||||
959 | DEBUG and print STDERR "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n"; | ||||
960 | $para->[0] = $para_type = 'Data'; | ||||
961 | } | ||||
962 | } | ||||
963 | } | ||||
964 | |||||
965 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
966 | if($para_type eq 'Plain') { | ||||
967 | $self->_ponder_Plain($para); | ||||
968 | } elsif($para_type eq 'Verbatim') { | ||||
969 | $self->_ponder_Verbatim($para); | ||||
970 | } elsif($para_type eq 'Data') { | ||||
971 | $self->_ponder_Data($para); | ||||
972 | } else { | ||||
973 | die "\$para type is $para_type -- how did that happen?"; | ||||
974 | # Shouldn't happen. | ||||
975 | } | ||||
976 | |||||
977 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
978 | $para->[0] =~ s/^[~=]//s; | ||||
979 | |||||
980 | DEBUG and print STDERR "\n", pretty($para), "\n"; | ||||
981 | |||||
982 | # traverse the treelet (which might well be just one string scalar) | ||||
983 | $self->{'content_seen'} ||= 1; | ||||
984 | $self->_traverse_treelet_bit(@$para); | ||||
985 | } | ||||
986 | } | ||||
987 | |||||
988 | return; | ||||
989 | } | ||||
990 | |||||
991 | ########################################################################### | ||||
992 | # The sub-ponderers... | ||||
993 | |||||
- - | |||||
996 | sub _ponder_for { | ||||
997 | my ($self,$para,$curr_open,$paras) = @_; | ||||
998 | |||||
999 | # Fake it out as a begin/end | ||||
1000 | my $target; | ||||
1001 | |||||
1002 | if(grep $_->[1]{'~ignore'}, @$curr_open) { | ||||
1003 | DEBUG > 1 and print STDERR "Ignoring ignorable =for\n"; | ||||
1004 | return 1; | ||||
1005 | } | ||||
1006 | |||||
1007 | for(my $i = 2; $i < @$para; ++$i) { | ||||
1008 | if($para->[$i] =~ s/^\s*(\S+)\s*//s) { | ||||
1009 | $target = $1; | ||||
1010 | last; | ||||
1011 | } | ||||
1012 | } | ||||
1013 | unless(defined $target) { | ||||
1014 | $self->whine( | ||||
1015 | $para->[1]{'start_line'}, | ||||
1016 | "=for without a target?" | ||||
1017 | ); | ||||
1018 | return 1; | ||||
1019 | } | ||||
1020 | DEBUG > 1 and | ||||
1021 | print STDERR "Faking out a =for $target as a =begin $target / =end $target\n"; | ||||
1022 | |||||
1023 | $para->[0] = 'Data'; | ||||
1024 | |||||
1025 | unshift @$paras, | ||||
1026 | ['=begin', | ||||
1027 | {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, | ||||
1028 | $target, | ||||
1029 | ], | ||||
1030 | $para, | ||||
1031 | ['=end', | ||||
1032 | {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, | ||||
1033 | $target, | ||||
1034 | ], | ||||
1035 | ; | ||||
1036 | |||||
1037 | return 1; | ||||
1038 | } | ||||
1039 | |||||
1040 | sub _ponder_begin { | ||||
1041 | my ($self,$para,$curr_open,$paras) = @_; | ||||
1042 | my $content = join ' ', splice @$para, 2; | ||||
1043 | $content =~ s/^\s+//s; | ||||
1044 | $content =~ s/\s+$//s; | ||||
1045 | unless(length($content)) { | ||||
1046 | $self->whine( | ||||
1047 | $para->[1]{'start_line'}, | ||||
1048 | "=begin without a target?" | ||||
1049 | ); | ||||
1050 | DEBUG and print STDERR "Ignoring targetless =begin\n"; | ||||
1051 | return 1; | ||||
1052 | } | ||||
1053 | |||||
1054 | my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/; | ||||
1055 | $para->[1]{'title'} = $title if ($title); | ||||
1056 | $para->[1]{'target'} = $target; # without any ':' | ||||
1057 | $content = $target; # strip off the title | ||||
1058 | |||||
1059 | $content =~ s/^:!/!:/s; | ||||
1060 | my $neg; # whether this is a negation-match | ||||
1061 | $neg = 1 if $content =~ s/^!//s; | ||||
1062 | my $to_resolve; # whether to process formatting codes | ||||
1063 | $to_resolve = 1 if $content =~ s/^://s; | ||||
1064 | |||||
1065 | my $dont_ignore; # whether this target matches us | ||||
1066 | |||||
1067 | foreach my $target_name ( | ||||
1068 | split(',', $content, -1), | ||||
1069 | $neg ? () : '*' | ||||
1070 | ) { | ||||
1071 | DEBUG > 2 and | ||||
1072 | print STDERR " Considering whether =begin $content matches $target_name\n"; | ||||
1073 | next unless $self->{'accept_targets'}{$target_name}; | ||||
1074 | |||||
1075 | DEBUG > 2 and | ||||
1076 | print STDERR " It DOES match the acceptable target $target_name!\n"; | ||||
1077 | $to_resolve = 1 | ||||
1078 | if $self->{'accept_targets'}{$target_name} eq 'force_resolve'; | ||||
1079 | $dont_ignore = 1; | ||||
1080 | $para->[1]{'target_matching'} = $target_name; | ||||
1081 | last; # stop looking at other target names | ||||
1082 | } | ||||
1083 | |||||
1084 | if($neg) { | ||||
1085 | if( $dont_ignore ) { | ||||
1086 | $dont_ignore = ''; | ||||
1087 | delete $para->[1]{'target_matching'}; | ||||
1088 | DEBUG > 2 and print STDERR " But the leading ! means that this is a NON-match!\n"; | ||||
1089 | } else { | ||||
1090 | $dont_ignore = 1; | ||||
1091 | $para->[1]{'target_matching'} = '!'; | ||||
1092 | DEBUG > 2 and print STDERR " But the leading ! means that this IS a match!\n"; | ||||
1093 | } | ||||
1094 | } | ||||
1095 | |||||
1096 | $para->[0] = '=for'; # Just what we happen to call these, internally | ||||
1097 | $para->[1]{'~really'} ||= '=begin'; | ||||
1098 | $para->[1]{'~ignore'} = (! $dont_ignore) || 0; | ||||
1099 | $para->[1]{'~resolve'} = $to_resolve || 0; | ||||
1100 | |||||
1101 | DEBUG > 1 and print STDERR " Making note to ", $dont_ignore ? 'not ' : '', | ||||
1102 | "ignore contents of this region\n"; | ||||
1103 | DEBUG > 1 and $dont_ignore and print STDERR " Making note to treat contents as ", | ||||
1104 | ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n"; | ||||
1105 | DEBUG > 1 and print STDERR " (Stack now: ", $self->_dump_curr_open(), ")\n"; | ||||
1106 | |||||
1107 | push @$curr_open, $para; | ||||
1108 | if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) { | ||||
1109 | DEBUG > 1 and print STDERR "Ignoring ignorable =begin\n"; | ||||
1110 | } else { | ||||
1111 | $self->{'content_seen'} ||= 1; | ||||
1112 | $self->_handle_element_start((my $scratch='for'), $para->[1]); | ||||
1113 | } | ||||
1114 | |||||
1115 | return 1; | ||||
1116 | } | ||||
1117 | |||||
1118 | sub _ponder_end { | ||||
1119 | my ($self,$para,$curr_open,$paras) = @_; | ||||
1120 | my $content = join ' ', splice @$para, 2; | ||||
1121 | $content =~ s/^\s+//s; | ||||
1122 | $content =~ s/\s+$//s; | ||||
1123 | DEBUG and print STDERR "Ogling '=end $content' directive\n"; | ||||
1124 | |||||
1125 | unless(length($content)) { | ||||
1126 | $self->whine( | ||||
1127 | $para->[1]{'start_line'}, | ||||
1128 | "'=end' without a target?" . ( | ||||
1129 | ( @$curr_open and $curr_open->[-1][0] eq '=for' ) | ||||
1130 | ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' ) | ||||
1131 | : '' | ||||
1132 | ) | ||||
1133 | ); | ||||
1134 | DEBUG and print STDERR "Ignoring targetless =end\n"; | ||||
1135 | return 1; | ||||
1136 | } | ||||
1137 | |||||
1138 | unless($content =~ m/^\S+$/) { # i.e., unless it's one word | ||||
1139 | $self->whine( | ||||
1140 | $para->[1]{'start_line'}, | ||||
1141 | "'=end $content' is invalid. (Stack: " | ||||
1142 | . $self->_dump_curr_open() . ')' | ||||
1143 | ); | ||||
1144 | DEBUG and print STDERR "Ignoring mistargetted =end $content\n"; | ||||
1145 | return 1; | ||||
1146 | } | ||||
1147 | |||||
1148 | unless(@$curr_open and $curr_open->[-1][0] eq '=for') { | ||||
1149 | $self->whine( | ||||
1150 | $para->[1]{'start_line'}, | ||||
1151 | "=end $content without matching =begin. (Stack: " | ||||
1152 | . $self->_dump_curr_open() . ')' | ||||
1153 | ); | ||||
1154 | DEBUG and print STDERR "Ignoring mistargetted =end $content\n"; | ||||
1155 | return 1; | ||||
1156 | } | ||||
1157 | |||||
1158 | unless($content eq $curr_open->[-1][1]{'target'}) { | ||||
1159 | $self->whine( | ||||
1160 | $para->[1]{'start_line'}, | ||||
1161 | "=end $content doesn't match =begin " | ||||
1162 | . $curr_open->[-1][1]{'target'} | ||||
1163 | . ". (Stack: " | ||||
1164 | . $self->_dump_curr_open() . ')' | ||||
1165 | ); | ||||
1166 | DEBUG and print STDERR "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n"; | ||||
1167 | return 1; | ||||
1168 | } | ||||
1169 | |||||
1170 | # Else it's okay to close... | ||||
1171 | if(grep $_->[1]{'~ignore'}, @$curr_open) { | ||||
1172 | DEBUG > 1 and print STDERR "Not firing any event for this =end $content because in an ignored region\n"; | ||||
1173 | # And that may be because of this to-be-closed =for region, or some | ||||
1174 | # other one, but it doesn't matter. | ||||
1175 | } else { | ||||
1176 | $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'}; | ||||
1177 | # what's that for? | ||||
1178 | |||||
1179 | $self->{'content_seen'} ||= 1; | ||||
1180 | $self->_handle_element_end( my $scratch = 'for', $para->[1]); | ||||
1181 | } | ||||
1182 | DEBUG > 1 and print STDERR "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n"; | ||||
1183 | pop @$curr_open; | ||||
1184 | |||||
1185 | return 1; | ||||
1186 | } | ||||
1187 | |||||
1188 | sub _ponder_doc_end { | ||||
1189 | my ($self,$para,$curr_open,$paras) = @_; | ||||
1190 | if(@$curr_open) { # Deal with things left open | ||||
1191 | DEBUG and print STDERR "Stack is nonempty at end-document: (", | ||||
1192 | $self->_dump_curr_open(), ")\n"; | ||||
1193 | |||||
1194 | DEBUG > 9 and print STDERR "Stack: ", pretty($curr_open), "\n"; | ||||
1195 | unshift @$paras, $self->_closers_for_all_curr_open; | ||||
1196 | # Make sure there is exactly one ~end in the parastack, at the end: | ||||
1197 | @$paras = grep $_->[0] ne '~end', @$paras; | ||||
1198 | push @$paras, $para, $para; | ||||
1199 | # We need two -- once for the next cycle where we | ||||
1200 | # generate errata, and then another to be at the end | ||||
1201 | # when that loop back around to process the errata. | ||||
1202 | return 1; | ||||
1203 | |||||
1204 | } else { | ||||
1205 | DEBUG and print STDERR "Okay, stack is empty now.\n"; | ||||
1206 | } | ||||
1207 | |||||
1208 | # Try generating errata section, if applicable | ||||
1209 | unless($self->{'~tried_gen_errata'}) { | ||||
1210 | $self->{'~tried_gen_errata'} = 1; | ||||
1211 | my @extras = $self->_gen_errata(); | ||||
1212 | if(@extras) { | ||||
1213 | unshift @$paras, @extras; | ||||
1214 | DEBUG and print STDERR "Generated errata... relooping...\n"; | ||||
1215 | return 1; # I.e., loop around again to process these fake-o paragraphs | ||||
1216 | } | ||||
1217 | } | ||||
1218 | |||||
1219 | splice @$paras; # Well, that's that for this paragraph buffer. | ||||
1220 | DEBUG and print STDERR "Throwing end-document event.\n"; | ||||
1221 | |||||
1222 | $self->_handle_element_end( my $scratch = 'Document' ); | ||||
1223 | return 1; # Hasta la byebye | ||||
1224 | } | ||||
1225 | |||||
1226 | sub _ponder_pod { | ||||
1227 | my ($self,$para,$curr_open,$paras) = @_; | ||||
1228 | $self->whine( | ||||
1229 | $para->[1]{'start_line'}, | ||||
1230 | "=pod directives shouldn't be over one line long! Ignoring all " | ||||
1231 | . (@$para - 2) . " lines of content" | ||||
1232 | ) if @$para > 3; | ||||
1233 | |||||
1234 | # Content ignored unless 'pod_handler' is set | ||||
1235 | if (my $pod_handler = $self->{'pod_handler'}) { | ||||
1236 | my ($line_num, $line) = map $_, $para->[1]{'start_line'}, $para->[2]; | ||||
1237 | $line = $line eq '' ? "=pod" : "=pod $line"; # imitate cut_handler output | ||||
1238 | $pod_handler->($line, $line_num, $self); | ||||
1239 | } | ||||
1240 | |||||
1241 | # The surrounding methods set content_seen, so let us remain consistent. | ||||
1242 | # I do not know why it was not here before -- should it not be here? | ||||
1243 | # $self->{'content_seen'} ||= 1; | ||||
1244 | |||||
1245 | return; | ||||
1246 | } | ||||
1247 | |||||
1248 | sub _ponder_over { | ||||
1249 | my ($self,$para,$curr_open,$paras) = @_; | ||||
1250 | return 1 unless @$paras; | ||||
1251 | my $list_type; | ||||
1252 | |||||
1253 | if($paras->[0][0] eq '=item') { # most common case | ||||
1254 | $list_type = $self->_get_initial_item_type($paras->[0]); | ||||
1255 | |||||
1256 | } elsif($paras->[0][0] eq '=back') { | ||||
1257 | # Ignore empty lists by default | ||||
1258 | if ($self->{'parse_empty_lists'}) { | ||||
1259 | $list_type = 'empty'; | ||||
1260 | } else { | ||||
1261 | shift @$paras; | ||||
1262 | return 1; | ||||
1263 | } | ||||
1264 | } elsif($paras->[0][0] eq '~end') { | ||||
1265 | $self->whine( | ||||
1266 | $para->[1]{'start_line'}, | ||||
1267 | "=over is the last thing in the document?!" | ||||
1268 | ); | ||||
1269 | return 1; # But feh, ignore it. | ||||
1270 | } else { | ||||
1271 | $list_type = 'block'; | ||||
1272 | } | ||||
1273 | $para->[1]{'~type'} = $list_type; | ||||
1274 | push @$curr_open, $para; | ||||
1275 | # yes, we reuse the paragraph as a stack item | ||||
1276 | |||||
1277 | my $content = join ' ', splice @$para, 2; | ||||
1278 | my $overness; | ||||
1279 | if($content =~ m/^\s*$/s) { | ||||
1280 | $para->[1]{'indent'} = 4; | ||||
1281 | } elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) { | ||||
1282 | 2 | 9.29ms | 2 | 49µs | # spent 40µs (31+9) within Pod::Simple::BlackBox::BEGIN@1282 which was called:
# once (31µs+9µs) by Pod::Simple::LinkSection::BEGIN@9 at line 1282 # spent 40µs making 1 call to Pod::Simple::BlackBox::BEGIN@1282
# spent 9µs making 1 call to integer::unimport |
1283 | $para->[1]{'indent'} = $1; | ||||
1284 | if($1 == 0) { | ||||
1285 | $self->whine( | ||||
1286 | $para->[1]{'start_line'}, | ||||
1287 | "Can't have a 0 in =over $content" | ||||
1288 | ); | ||||
1289 | $para->[1]{'indent'} = 4; | ||||
1290 | } | ||||
1291 | } else { | ||||
1292 | $self->whine( | ||||
1293 | $para->[1]{'start_line'}, | ||||
1294 | "=over should be: '=over' or '=over positive_number'" | ||||
1295 | ); | ||||
1296 | $para->[1]{'indent'} = 4; | ||||
1297 | } | ||||
1298 | DEBUG > 1 and print STDERR "=over found of type $list_type\n"; | ||||
1299 | |||||
1300 | $self->{'content_seen'} ||= 1; | ||||
1301 | $self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]); | ||||
1302 | |||||
1303 | return; | ||||
1304 | } | ||||
1305 | |||||
1306 | sub _ponder_back { | ||||
1307 | my ($self,$para,$curr_open,$paras) = @_; | ||||
1308 | # TODO: fire off </item-number> or </item-bullet> or </item-text> ?? | ||||
1309 | |||||
1310 | my $content = join ' ', splice @$para, 2; | ||||
1311 | if($content =~ m/\S/) { | ||||
1312 | $self->whine( | ||||
1313 | $para->[1]{'start_line'}, | ||||
1314 | "=back doesn't take any parameters, but you said =back $content" | ||||
1315 | ); | ||||
1316 | } | ||||
1317 | |||||
1318 | if(@$curr_open and $curr_open->[-1][0] eq '=over') { | ||||
1319 | DEBUG > 1 and print STDERR "=back happily closes matching =over\n"; | ||||
1320 | # Expected case: we're closing the most recently opened thing | ||||
1321 | #my $over = pop @$curr_open; | ||||
1322 | $self->{'content_seen'} ||= 1; | ||||
1323 | $self->_handle_element_end( my $scratch = | ||||
1324 | 'over-' . ( (pop @$curr_open)->[1]{'~type'} ), $para->[1] | ||||
1325 | ); | ||||
1326 | } else { | ||||
1327 | DEBUG > 1 and print STDERR "=back found without a matching =over. Stack: (", | ||||
1328 | join(', ', map $_->[0], @$curr_open), ").\n"; | ||||
1329 | $self->whine( | ||||
1330 | $para->[1]{'start_line'}, | ||||
1331 | '=back without =over' | ||||
1332 | ); | ||||
1333 | return 1; # and ignore it | ||||
1334 | } | ||||
1335 | } | ||||
1336 | |||||
1337 | sub _ponder_item { | ||||
1338 | my ($self,$para,$curr_open,$paras) = @_; | ||||
1339 | my $over; | ||||
1340 | unless(@$curr_open and | ||||
1341 | $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) { | ||||
1342 | $self->whine( | ||||
1343 | $para->[1]{'start_line'}, | ||||
1344 | "'=item' outside of any '=over'" | ||||
1345 | ); | ||||
1346 | unshift @$paras, | ||||
1347 | ['=over', {'start_line' => $para->[1]{'start_line'}}, ''], | ||||
1348 | $para | ||||
1349 | ; | ||||
1350 | return 1; | ||||
1351 | } | ||||
1352 | |||||
1353 | |||||
1354 | my $over_type = $over->[1]{'~type'}; | ||||
1355 | |||||
1356 | if(!$over_type) { | ||||
1357 | # Shouldn't happen1 | ||||
1358 | die "Typeless over in stack, starting at line " | ||||
1359 | . $over->[1]{'start_line'}; | ||||
1360 | |||||
1361 | } elsif($over_type eq 'block') { | ||||
1362 | unless($curr_open->[-1][1]{'~bitched_about'}) { | ||||
1363 | $curr_open->[-1][1]{'~bitched_about'} = 1; | ||||
1364 | $self->whine( | ||||
1365 | $curr_open->[-1][1]{'start_line'}, | ||||
1366 | "You can't have =items (as at line " | ||||
1367 | . $para->[1]{'start_line'} | ||||
1368 | . ") unless the first thing after the =over is an =item" | ||||
1369 | ); | ||||
1370 | } | ||||
1371 | # Just turn it into a paragraph and reconsider it | ||||
1372 | $para->[0] = '~Para'; | ||||
1373 | unshift @$paras, $para; | ||||
1374 | return 1; | ||||
1375 | |||||
1376 | } elsif($over_type eq 'text') { | ||||
1377 | my $item_type = $self->_get_item_type($para); | ||||
1378 | # That kills the content of the item if it's a number or bullet. | ||||
1379 | DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; | ||||
1380 | |||||
1381 | if($item_type eq 'text') { | ||||
1382 | # Nothing special needs doing for 'text' | ||||
1383 | } elsif($item_type eq 'number' or $item_type eq 'bullet') { | ||||
1384 | $self->whine( | ||||
1385 | $para->[1]{'start_line'}, | ||||
1386 | "Expected text after =item, not a $item_type" | ||||
1387 | ); | ||||
1388 | # Undo our clobbering: | ||||
1389 | push @$para, $para->[1]{'~orig_content'}; | ||||
1390 | delete $para->[1]{'number'}; | ||||
1391 | # Only a PROPER item-number element is allowed | ||||
1392 | # to have a number attribute. | ||||
1393 | } else { | ||||
1394 | die "Unhandled item type $item_type"; # should never happen | ||||
1395 | } | ||||
1396 | |||||
1397 | # =item-text thingies don't need any assimilation, it seems. | ||||
1398 | |||||
1399 | } elsif($over_type eq 'number') { | ||||
1400 | my $item_type = $self->_get_item_type($para); | ||||
1401 | # That kills the content of the item if it's a number or bullet. | ||||
1402 | DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; | ||||
1403 | |||||
1404 | my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; | ||||
1405 | |||||
1406 | if($item_type eq 'bullet') { | ||||
1407 | # Hm, it's not numeric. Correct for this. | ||||
1408 | $para->[1]{'number'} = $expected_value; | ||||
1409 | $self->whine( | ||||
1410 | $para->[1]{'start_line'}, | ||||
1411 | "Expected '=item $expected_value'" | ||||
1412 | ); | ||||
1413 | push @$para, $para->[1]{'~orig_content'}; | ||||
1414 | # restore the bullet, blocking the assimilation of next para | ||||
1415 | |||||
1416 | } elsif($item_type eq 'text') { | ||||
1417 | # Hm, it's not numeric. Correct for this. | ||||
1418 | $para->[1]{'number'} = $expected_value; | ||||
1419 | $self->whine( | ||||
1420 | $para->[1]{'start_line'}, | ||||
1421 | "Expected '=item $expected_value'" | ||||
1422 | ); | ||||
1423 | # Text content will still be there and will block next ~Para | ||||
1424 | |||||
1425 | } elsif($item_type ne 'number') { | ||||
1426 | die "Unknown item type $item_type"; # should never happen | ||||
1427 | |||||
1428 | } elsif($expected_value == $para->[1]{'number'}) { | ||||
1429 | DEBUG > 1 and print STDERR " Numeric item has the expected value of $expected_value\n"; | ||||
1430 | |||||
1431 | } else { | ||||
1432 | DEBUG > 1 and print STDERR " Numeric item has ", $para->[1]{'number'}, | ||||
1433 | " instead of the expected value of $expected_value\n"; | ||||
1434 | $self->whine( | ||||
1435 | $para->[1]{'start_line'}, | ||||
1436 | "You have '=item " . $para->[1]{'number'} . | ||||
1437 | "' instead of the expected '=item $expected_value'" | ||||
1438 | ); | ||||
1439 | $para->[1]{'number'} = $expected_value; # correcting!! | ||||
1440 | } | ||||
1441 | |||||
1442 | if(@$para == 2) { | ||||
1443 | # For the cases where we /didn't/ push to @$para | ||||
1444 | if($paras->[0][0] eq '~Para') { | ||||
1445 | DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n"; | ||||
1446 | push @$para, splice @{shift @$paras},2; | ||||
1447 | } else { | ||||
1448 | DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n"; | ||||
1449 | push @$para, ''; # Just so it's not contentless | ||||
1450 | } | ||||
1451 | } | ||||
1452 | |||||
1453 | |||||
1454 | } elsif($over_type eq 'bullet') { | ||||
1455 | my $item_type = $self->_get_item_type($para); | ||||
1456 | # That kills the content of the item if it's a number or bullet. | ||||
1457 | DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; | ||||
1458 | |||||
1459 | if($item_type eq 'bullet') { | ||||
1460 | # as expected! | ||||
1461 | |||||
1462 | if( $para->[1]{'~_freaky_para_hack'} ) { | ||||
1463 | DEBUG and print STDERR "Accomodating '=item * Foo' tolerance hack.\n"; | ||||
1464 | push @$para, delete $para->[1]{'~_freaky_para_hack'}; | ||||
1465 | } | ||||
1466 | |||||
1467 | } elsif($item_type eq 'number') { | ||||
1468 | $self->whine( | ||||
1469 | $para->[1]{'start_line'}, | ||||
1470 | "Expected '=item *'" | ||||
1471 | ); | ||||
1472 | push @$para, $para->[1]{'~orig_content'}; | ||||
1473 | # and block assimilation of the next paragraph | ||||
1474 | delete $para->[1]{'number'}; | ||||
1475 | # Only a PROPER item-number element is allowed | ||||
1476 | # to have a number attribute. | ||||
1477 | } elsif($item_type eq 'text') { | ||||
1478 | $self->whine( | ||||
1479 | $para->[1]{'start_line'}, | ||||
1480 | "Expected '=item *'" | ||||
1481 | ); | ||||
1482 | # But doesn't need processing. But it'll block assimilation | ||||
1483 | # of the next para. | ||||
1484 | } else { | ||||
1485 | die "Unhandled item type $item_type"; # should never happen | ||||
1486 | } | ||||
1487 | |||||
1488 | if(@$para == 2) { | ||||
1489 | # For the cases where we /didn't/ push to @$para | ||||
1490 | if($paras->[0][0] eq '~Para') { | ||||
1491 | DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n"; | ||||
1492 | push @$para, splice @{shift @$paras},2; | ||||
1493 | } else { | ||||
1494 | DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n"; | ||||
1495 | push @$para, ''; # Just so it's not contentless | ||||
1496 | } | ||||
1497 | } | ||||
1498 | |||||
1499 | } else { | ||||
1500 | die "Unhandled =over type \"$over_type\"?"; | ||||
1501 | # Shouldn't happen! | ||||
1502 | } | ||||
1503 | $para->[0] .= '-' . $over_type; | ||||
1504 | |||||
1505 | return; | ||||
1506 | } | ||||
1507 | |||||
1508 | sub _ponder_Plain { | ||||
1509 | my ($self,$para) = @_; | ||||
1510 | DEBUG and print STDERR " giving plain treatment...\n"; | ||||
1511 | unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' ) | ||||
1512 | or $para->[1]{'~cooked'} | ||||
1513 | ) { | ||||
1514 | push @$para, | ||||
1515 | @{$self->_make_treelet( | ||||
1516 | join("\n", splice(@$para, 2)), | ||||
1517 | $para->[1]{'start_line'} | ||||
1518 | )}; | ||||
1519 | } | ||||
1520 | # Empty paragraphs don't need a treelet for any reason I can see. | ||||
1521 | # And precooked paragraphs already have a treelet. | ||||
1522 | return; | ||||
1523 | } | ||||
1524 | |||||
1525 | sub _ponder_Verbatim { | ||||
1526 | my ($self,$para) = @_; | ||||
1527 | DEBUG and print STDERR " giving verbatim treatment...\n"; | ||||
1528 | |||||
1529 | $para->[1]{'xml:space'} = 'preserve'; | ||||
1530 | |||||
1531 | my $indent = $self->strip_verbatim_indent; | ||||
1532 | if ($indent && ref $indent eq 'CODE') { | ||||
1533 | my @shifted = (shift @{$para}, shift @{$para}); | ||||
1534 | $indent = $indent->($para); | ||||
1535 | unshift @{$para}, @shifted; | ||||
1536 | } | ||||
1537 | |||||
1538 | for(my $i = 2; $i < @$para; $i++) { | ||||
1539 | foreach my $line ($para->[$i]) { # just for aliasing | ||||
1540 | # Strip indentation. | ||||
1541 | $line =~ s/^\Q$indent// if $indent | ||||
1542 | && !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted}); | ||||
1543 | while( $line =~ | ||||
1544 | # Sort of adapted from Text::Tabs -- yes, it's hardwired in that | ||||
1545 | # tabs are at every EIGHTH column. For portability, it has to be | ||||
1546 | # one setting everywhere, and 8th wins. | ||||
1547 | s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e | ||||
1548 | ) {} | ||||
1549 | |||||
1550 | # TODO: whinge about (or otherwise treat) unindented or overlong lines | ||||
1551 | |||||
1552 | } | ||||
1553 | } | ||||
1554 | |||||
1555 | # Now the VerbatimFormatted hoodoo... | ||||
1556 | if( $self->{'accept_codes'} and | ||||
1557 | $self->{'accept_codes'}{'VerbatimFormatted'} | ||||
1558 | ) { | ||||
1559 | while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para } | ||||
1560 | # Kill any number of terminal newlines | ||||
1561 | $self->_verbatim_format($para); | ||||
1562 | } elsif ($self->{'codes_in_verbatim'}) { | ||||
1563 | push @$para, | ||||
1564 | @{$self->_make_treelet( | ||||
1565 | join("\n", splice(@$para, 2)), | ||||
1566 | $para->[1]{'start_line'}, $para->[1]{'xml:space'} | ||||
1567 | )}; | ||||
1568 | $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines | ||||
1569 | } else { | ||||
1570 | push @$para, join "\n", splice(@$para, 2) if @$para > 3; | ||||
1571 | $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines | ||||
1572 | } | ||||
1573 | return; | ||||
1574 | } | ||||
1575 | |||||
1576 | sub _ponder_Data { | ||||
1577 | my ($self,$para) = @_; | ||||
1578 | DEBUG and print STDERR " giving data treatment...\n"; | ||||
1579 | $para->[1]{'xml:space'} = 'preserve'; | ||||
1580 | push @$para, join "\n", splice(@$para, 2) if @$para > 3; | ||||
1581 | return; | ||||
1582 | } | ||||
1583 | |||||
- - | |||||
1587 | ########################################################################### | ||||
1588 | |||||
1589 | sub _traverse_treelet_bit { # for use only by the routine above | ||||
1590 | my($self, $name) = splice @_,0,2; | ||||
1591 | |||||
1592 | my $scratch; | ||||
1593 | $self->_handle_element_start(($scratch=$name), shift @_); | ||||
1594 | |||||
1595 | while (@_) { | ||||
1596 | my $x = shift; | ||||
1597 | if (ref($x)) { | ||||
1598 | &_traverse_treelet_bit($self, @$x); | ||||
1599 | } else { | ||||
1600 | $x .= shift while @_ && !ref($_[0]); | ||||
1601 | $self->_handle_text($x); | ||||
1602 | } | ||||
1603 | } | ||||
1604 | |||||
1605 | $self->_handle_element_end($scratch=$name); | ||||
1606 | return; | ||||
1607 | } | ||||
1608 | |||||
1609 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
1610 | |||||
1611 | sub _closers_for_all_curr_open { | ||||
1612 | my $self = $_[0]; | ||||
1613 | my @closers; | ||||
1614 | foreach my $still_open (@{ $self->{'curr_open'} || return }) { | ||||
1615 | my @copy = @$still_open; | ||||
1616 | $copy[1] = {%{ $copy[1] }}; | ||||
1617 | #$copy[1]{'start_line'} = -1; | ||||
1618 | if($copy[0] eq '=for') { | ||||
1619 | $copy[0] = '=end'; | ||||
1620 | } elsif($copy[0] eq '=over') { | ||||
1621 | $self->whine( | ||||
1622 | $still_open->[1]{start_line} , | ||||
1623 | "=over without closing =back" | ||||
1624 | ); | ||||
1625 | |||||
1626 | $copy[0] = '=back'; | ||||
1627 | } else { | ||||
1628 | die "I don't know how to auto-close an open $copy[0] region"; | ||||
1629 | } | ||||
1630 | |||||
1631 | unless( @copy > 2 ) { | ||||
1632 | push @copy, $copy[1]{'target'}; | ||||
1633 | $copy[-1] = '' unless defined $copy[-1]; | ||||
1634 | # since =over's don't have targets | ||||
1635 | } | ||||
1636 | |||||
1637 | $copy[1]{'fake-closer'} = 1; | ||||
1638 | |||||
1639 | DEBUG and print STDERR "Queuing up fake-o event: ", pretty(\@copy), "\n"; | ||||
1640 | unshift @closers, \@copy; | ||||
1641 | } | ||||
1642 | return @closers; | ||||
1643 | } | ||||
1644 | |||||
1645 | #-------------------------------------------------------------------------- | ||||
1646 | |||||
1647 | sub _verbatim_format { | ||||
1648 | my($it, $p) = @_; | ||||
1649 | |||||
1650 | my $formatting; | ||||
1651 | |||||
1652 | for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines | ||||
1653 | DEBUG and print STDERR "_verbatim_format appends a newline to $i: $p->[$i]\n"; | ||||
1654 | $p->[$i] .= "\n"; | ||||
1655 | # Unlike with simple Verbatim blocks, we don't end up just doing | ||||
1656 | # a join("\n", ...) on the contents, so we have to append a | ||||
1657 | # newline to ever line, and then nix the last one later. | ||||
1658 | } | ||||
1659 | |||||
1660 | if( DEBUG > 4 ) { | ||||
1661 | print STDERR "<<\n"; | ||||
1662 | for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines | ||||
1663 | print STDERR "_verbatim_format $i: $p->[$i]"; | ||||
1664 | } | ||||
1665 | print STDERR ">>\n"; | ||||
1666 | } | ||||
1667 | |||||
1668 | for(my $i = $#$p; $i > 2; $i--) { | ||||
1669 | # work backwards over the lines, except the first (#2) | ||||
1670 | |||||
1671 | #next unless $p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s | ||||
1672 | # and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s; | ||||
1673 | # look at a formatty line preceding a nonformatty one | ||||
1674 | DEBUG > 5 and print STDERR "Scrutinizing line $i: $$p[$i]\n"; | ||||
1675 | if($p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s) { | ||||
1676 | DEBUG > 5 and print STDERR " It's a formatty line. ", | ||||
1677 | "Peeking at previous line ", $i-1, ": $$p[$i-1]: \n"; | ||||
1678 | |||||
1679 | if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) { | ||||
1680 | DEBUG > 5 and print STDERR " Previous line is formatty! Skipping this one.\n"; | ||||
1681 | next; | ||||
1682 | } else { | ||||
1683 | DEBUG > 5 and print STDERR " Previous line is non-formatty! Yay!\n"; | ||||
1684 | } | ||||
1685 | } else { | ||||
1686 | DEBUG > 5 and print STDERR " It's not a formatty line. Ignoring\n"; | ||||
1687 | next; | ||||
1688 | } | ||||
1689 | |||||
1690 | # A formatty line has to have #: in the first two columns, and uses | ||||
1691 | # "^" to mean bold, "/" to mean underline, and "%" to mean bold italic. | ||||
1692 | # Example: | ||||
1693 | # What do you want? i like pie. [or whatever] | ||||
1694 | # #:^^^^^^^^^^^^^^^^^ ///////////// | ||||
1695 | |||||
1696 | |||||
1697 | DEBUG > 4 and print STDERR "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n"; | ||||
1698 | |||||
1699 | $formatting = ' ' . $1; | ||||
1700 | $formatting =~ s/\s+$//s; # nix trailing whitespace | ||||
1701 | unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op | ||||
1702 | splice @$p,$i,1; # remove this line | ||||
1703 | $i--; # don't consider next line | ||||
1704 | next; | ||||
1705 | } | ||||
1706 | |||||
1707 | if( length($formatting) >= length($p->[$i-1]) ) { | ||||
1708 | $formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' '; | ||||
1709 | } else { | ||||
1710 | $formatting .= ' ' x (length($p->[$i-1]) - length($formatting)); | ||||
1711 | } | ||||
1712 | # Make $formatting and the previous line be exactly the same length, | ||||
1713 | # with $formatting having a " " as the last character. | ||||
1714 | |||||
1715 | DEBUG > 4 and print STDERR "Formatting <$formatting> on <", $p->[$i-1], ">\n"; | ||||
1716 | |||||
1717 | |||||
1718 | my @new_line; | ||||
1719 | while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) { | ||||
1720 | #print STDERR "Format matches $1\n"; | ||||
1721 | |||||
1722 | if($2) { | ||||
1723 | #print STDERR "SKIPPING <$2>\n"; | ||||
1724 | push @new_line, | ||||
1725 | substr($p->[$i-1], pos($formatting)-length($1), length($1)); | ||||
1726 | } else { | ||||
1727 | #print STDERR "SNARING $+\n"; | ||||
1728 | push @new_line, [ | ||||
1729 | ( | ||||
1730 | $3 ? 'VerbatimB' : | ||||
1731 | $4 ? 'VerbatimI' : | ||||
1732 | $5 ? 'VerbatimBI' : die("Should never get called") | ||||
1733 | ), {}, | ||||
1734 | substr($p->[$i-1], pos($formatting)-length($1), length($1)) | ||||
1735 | ]; | ||||
1736 | #print STDERR "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n"; | ||||
1737 | } | ||||
1738 | } | ||||
1739 | my @nixed = | ||||
1740 | splice @$p, $i-1, 2, @new_line; # replace myself and the next line | ||||
1741 | DEBUG > 10 and print STDERR "Nixed count: ", scalar(@nixed), "\n"; | ||||
1742 | |||||
1743 | DEBUG > 6 and print STDERR "New version of the above line is these tokens (", | ||||
1744 | scalar(@new_line), "):", | ||||
1745 | map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n"; | ||||
1746 | $i--; # So the next line we scrutinize is the line before the one | ||||
1747 | # that we just went and formatted | ||||
1748 | } | ||||
1749 | |||||
1750 | $p->[0] = 'VerbatimFormatted'; | ||||
1751 | |||||
1752 | # Collapse adjacent text nodes, just for kicks. | ||||
1753 | for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last | ||||
1754 | if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) { | ||||
1755 | DEBUG > 5 and print STDERR "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n"; | ||||
1756 | $p->[$i] .= splice @$p, $i+1, 1; # merge | ||||
1757 | --$i; # and back up | ||||
1758 | } | ||||
1759 | } | ||||
1760 | |||||
1761 | # Now look for the last text token, and remove the terminal newline | ||||
1762 | for( my $i = $#$p; $i >= 2; $i-- ) { | ||||
1763 | # work backwards over the tokens, even the first | ||||
1764 | if( !ref($p->[$i]) ) { | ||||
1765 | if($p->[$i] =~ s/\n$//s) { | ||||
1766 | DEBUG > 5 and print STDERR "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n"; | ||||
1767 | } else { | ||||
1768 | DEBUG > 5 and print STDERR | ||||
1769 | "No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n"; | ||||
1770 | } | ||||
1771 | last; # we only want the next one | ||||
1772 | } | ||||
1773 | } | ||||
1774 | |||||
1775 | return; | ||||
1776 | } | ||||
1777 | |||||
1778 | |||||
1779 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
1780 | |||||
1781 | |||||
1782 | sub _treelet_from_formatting_codes { | ||||
1783 | # Given a paragraph, returns a treelet. Full of scary tokenizing code. | ||||
1784 | # Like [ '~Top', {'start_line' => $start_line}, | ||||
1785 | # "I like ", | ||||
1786 | # [ 'B', {}, "pie" ], | ||||
1787 | # "!" | ||||
1788 | # ] | ||||
1789 | |||||
1790 | my($self, $para, $start_line, $preserve_space) = @_; | ||||
1791 | |||||
1792 | my $treelet = ['~Top', {'start_line' => $start_line},]; | ||||
1793 | |||||
1794 | unless ($preserve_space || $self->{'preserve_whitespace'}) { | ||||
1795 | $para =~ s/\s+/ /g; # collapse and trim all whitespace first. | ||||
1796 | $para =~ s/ $//; | ||||
1797 | $para =~ s/^ //; | ||||
1798 | } | ||||
1799 | |||||
1800 | # Only apparent problem the above code is that N<< >> turns into | ||||
1801 | # N<< >>. But then, word wrapping does that too! So don't do that! | ||||
1802 | |||||
1803 | my @stack; | ||||
1804 | my @lineage = ($treelet); | ||||
1805 | my $raw = ''; # raw content of L<> fcode before splitting/processing | ||||
1806 | # XXX 'raw' is not 100% accurate: all surrounding whitespace is condensed | ||||
1807 | # into just 1 ' '. Is this the regex's doing or 'raw's? | ||||
1808 | my $inL = 0; | ||||
1809 | |||||
1810 | DEBUG > 4 and print STDERR "Paragraph:\n$para\n\n"; | ||||
1811 | |||||
1812 | # Here begins our frightening tokenizer RE. The following regex matches | ||||
1813 | # text in four main parts: | ||||
1814 | # | ||||
1815 | # * Start-codes. The first alternative matches C< or C<<, the latter | ||||
1816 | # followed by some whitespace. $1 will hold the entire start code | ||||
1817 | # (including any space following a multiple-angle-bracket delimiter), | ||||
1818 | # and $2 will hold only the additional brackets past the first in a | ||||
1819 | # multiple-bracket delimiter. length($2) + 1 will be the number of | ||||
1820 | # closing brackets we have to find. | ||||
1821 | # | ||||
1822 | # * Closing brackets. Match some amount of whitespace followed by | ||||
1823 | # multiple close brackets. The logic to see if this closes anything | ||||
1824 | # is down below. Note that in order to parse C<< >> correctly, we | ||||
1825 | # have to use look-behind (?<=\s\s), since the match of the starting | ||||
1826 | # code will have consumed the whitespace. | ||||
1827 | # | ||||
1828 | # * A single closing bracket, to close a simple code like C<>. | ||||
1829 | # | ||||
1830 | # * Something that isn't a start or end code. We have to be careful | ||||
1831 | # about accepting whitespace, since perlpodspec says that any whitespace | ||||
1832 | # before a multiple-bracket closing delimiter should be ignored. | ||||
1833 | # | ||||
1834 | while($para =~ | ||||
1835 | m/\G | ||||
1836 | (?: | ||||
1837 | # Match starting codes, including the whitespace following a | ||||
1838 | # multiple-delimiter start code. $1 gets the whole start code and | ||||
1839 | # $2 gets all but one of the <s in the multiple-bracket case. | ||||
1840 | ([A-Z]<(?:(<+)\s+)?) | ||||
1841 | | | ||||
1842 | # Match multiple-bracket end codes. $3 gets the whitespace that | ||||
1843 | # should be discarded before an end bracket but kept in other cases | ||||
1844 | # and $4 gets the end brackets themselves. | ||||
1845 | (\s+|(?<=\s\s))(>{2,}) | ||||
1846 | | | ||||
1847 | (\s?>) # $5: simple end-codes | ||||
1848 | | | ||||
1849 | ( # $6: stuff containing no start-codes or end-codes | ||||
1850 | (?: | ||||
1851 | [^A-Z\s>] | ||||
1852 | | | ||||
1853 | (?: | ||||
1854 | [A-Z](?!<) | ||||
1855 | ) | ||||
1856 | | | ||||
1857 | # whitespace is ok, but we don't want to eat the whitespace before | ||||
1858 | # a multiple-bracket end code. | ||||
1859 | # NOTE: we may still have problems with e.g. S<< >> | ||||
1860 | (?: | ||||
1861 | \s(?!\s*>{2,}) | ||||
1862 | ) | ||||
1863 | )+ | ||||
1864 | ) | ||||
1865 | ) | ||||
1866 | /xgo | ||||
1867 | ) { | ||||
1868 | DEBUG > 4 and print STDERR "\nParagraphic tokenstack = (@stack)\n"; | ||||
1869 | if(defined $1) { | ||||
1870 | if(defined $2) { | ||||
1871 | DEBUG > 3 and print STDERR "Found complex start-text code \"$1\"\n"; | ||||
1872 | push @stack, length($2) + 1; | ||||
1873 | # length of the necessary complex end-code string | ||||
1874 | } else { | ||||
1875 | DEBUG > 3 and print STDERR "Found simple start-text code \"$1\"\n"; | ||||
1876 | push @stack, 0; # signal that we're looking for simple | ||||
1877 | } | ||||
1878 | push @lineage, [ substr($1,0,1), {}, ]; # new node object | ||||
1879 | push @{ $lineage[-2] }, $lineage[-1]; | ||||
1880 | if ('L' eq substr($1,0,1)) { | ||||
1881 | $raw = $inL ? $raw.$1 : ''; # reset raw content accumulator | ||||
1882 | $inL = 1; | ||||
1883 | } else { | ||||
1884 | $raw .= $1 if $inL; | ||||
1885 | } | ||||
1886 | |||||
1887 | } elsif(defined $4) { | ||||
1888 | DEBUG > 3 and print STDERR "Found apparent complex end-text code \"$3$4\"\n"; | ||||
1889 | # This is where it gets messy... | ||||
1890 | if(! @stack) { | ||||
1891 | # We saw " >>>>" but needed nothing. This is ALL just stuff then. | ||||
1892 | DEBUG > 4 and print STDERR " But it's really just stuff.\n"; | ||||
1893 | push @{ $lineage[-1] }, $3, $4; | ||||
1894 | next; | ||||
1895 | } elsif(!$stack[-1]) { | ||||
1896 | # We saw " >>>>" but needed only ">". Back pos up. | ||||
1897 | DEBUG > 4 and print STDERR " And that's more than we needed to close simple.\n"; | ||||
1898 | push @{ $lineage[-1] }, $3; # That was a for-real space, too. | ||||
1899 | pos($para) = pos($para) - length($4) + 1; | ||||
1900 | } elsif($stack[-1] == length($4)) { | ||||
1901 | # We found " >>>>", and it was exactly what we needed. Commonest case. | ||||
1902 | DEBUG > 4 and print STDERR " And that's exactly what we needed to close complex.\n"; | ||||
1903 | } elsif($stack[-1] < length($4)) { | ||||
1904 | # We saw " >>>>" but needed only " >>". Back pos up. | ||||
1905 | DEBUG > 4 and print STDERR " And that's more than we needed to close complex.\n"; | ||||
1906 | pos($para) = pos($para) - length($4) + $stack[-1]; | ||||
1907 | } else { | ||||
1908 | # We saw " >>>>" but needed " >>>>>>". So this is all just stuff! | ||||
1909 | DEBUG > 4 and print STDERR " But it's really just stuff, because we needed more.\n"; | ||||
1910 | push @{ $lineage[-1] }, $3, $4; | ||||
1911 | next; | ||||
1912 | } | ||||
1913 | #print STDERR "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n"; | ||||
1914 | |||||
1915 | push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; | ||||
1916 | # Keep the element from being childless | ||||
1917 | |||||
1918 | pop @stack; | ||||
1919 | pop @lineage; | ||||
1920 | |||||
1921 | unless (@stack) { # not in an L if there are no open fcodes | ||||
1922 | $inL = 0; | ||||
1923 | if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') { | ||||
1924 | $lineage[-1][-1][1]{'raw'} = $raw | ||||
1925 | } | ||||
1926 | } | ||||
1927 | $raw .= $3.$4 if $inL; | ||||
1928 | |||||
1929 | } elsif(defined $5) { | ||||
1930 | DEBUG > 3 and print STDERR "Found apparent simple end-text code \"$5\"\n"; | ||||
1931 | |||||
1932 | if(@stack and ! $stack[-1]) { | ||||
1933 | # We're indeed expecting a simple end-code | ||||
1934 | DEBUG > 4 and print STDERR " It's indeed an end-code.\n"; | ||||
1935 | |||||
1936 | if(length($5) == 2) { # There was a space there: " >" | ||||
1937 | push @{ $lineage[-1] }, ' '; | ||||
1938 | } elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element | ||||
1939 | push @{ $lineage[-1] }, ''; # keep it from being really childless | ||||
1940 | } | ||||
1941 | |||||
1942 | pop @stack; | ||||
1943 | pop @lineage; | ||||
1944 | } else { | ||||
1945 | DEBUG > 4 and print STDERR " It's just stuff.\n"; | ||||
1946 | push @{ $lineage[-1] }, $5; | ||||
1947 | } | ||||
1948 | |||||
1949 | unless (@stack) { # not in an L if there are no open fcodes | ||||
1950 | $inL = 0; | ||||
1951 | if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') { | ||||
1952 | $lineage[-1][-1][1]{'raw'} = $raw | ||||
1953 | } | ||||
1954 | } | ||||
1955 | $raw .= $5 if $inL; | ||||
1956 | |||||
1957 | } elsif(defined $6) { | ||||
1958 | DEBUG > 3 and print STDERR "Found stuff \"$6\"\n"; | ||||
1959 | push @{ $lineage[-1] }, $6; | ||||
1960 | $raw .= $6 if $inL; | ||||
1961 | # XXX does not capture multiplace whitespaces -- 'raw' ends up with | ||||
1962 | # at most 1 leading/trailing whitespace, why not all of it? | ||||
1963 | |||||
1964 | } else { | ||||
1965 | # should never ever ever ever happen | ||||
1966 | DEBUG and print STDERR "AYYAYAAAAA at line ", __LINE__, "\n"; | ||||
1967 | die "SPORK 512512!"; | ||||
1968 | } | ||||
1969 | } | ||||
1970 | |||||
1971 | if(@stack) { # Uhoh, some sequences weren't closed. | ||||
1972 | my $x= "..."; | ||||
1973 | while(@stack) { | ||||
1974 | push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; | ||||
1975 | # Hmmmmm! | ||||
1976 | |||||
1977 | my $code = (pop @lineage)->[0]; | ||||
1978 | my $ender_length = pop @stack; | ||||
1979 | if($ender_length) { | ||||
1980 | --$ender_length; | ||||
1981 | $x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length); | ||||
1982 | } else { | ||||
1983 | $x = $code . "<$x>"; | ||||
1984 | } | ||||
1985 | } | ||||
1986 | DEBUG > 1 and print STDERR "Unterminated $x sequence\n"; | ||||
1987 | $self->whine($start_line, | ||||
1988 | "Unterminated $x sequence", | ||||
1989 | ); | ||||
1990 | } | ||||
1991 | |||||
1992 | return $treelet; | ||||
1993 | } | ||||
1994 | |||||
1995 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
1996 | |||||
1997 | sub text_content_of_treelet { # method: $parser->text_content_of_treelet($lol) | ||||
1998 | return stringify_lol($_[1]); | ||||
1999 | } | ||||
2000 | |||||
2001 | sub stringify_lol { # function: stringify_lol($lol) | ||||
2002 | my $string_form = ''; | ||||
2003 | _stringify_lol( $_[0] => \$string_form ); | ||||
2004 | return $string_form; | ||||
2005 | } | ||||
2006 | |||||
2007 | sub _stringify_lol { # the real recursor | ||||
2008 | my($lol, $to) = @_; | ||||
2009 | for(my $i = 2; $i < @$lol; ++$i) { | ||||
2010 | if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) { | ||||
2011 | _stringify_lol( $lol->[$i], $to); # recurse! | ||||
2012 | } else { | ||||
2013 | $$to .= $lol->[$i]; | ||||
2014 | } | ||||
2015 | } | ||||
2016 | return; | ||||
2017 | } | ||||
2018 | |||||
2019 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
2020 | |||||
2021 | sub _dump_curr_open { # return a string representation of the stack | ||||
2022 | my $curr_open = $_[0]{'curr_open'}; | ||||
2023 | |||||
2024 | return '[empty]' unless @$curr_open; | ||||
2025 | return join '; ', | ||||
2026 | map {; | ||||
2027 | ($_->[0] eq '=for') | ||||
2028 | ? ( ($_->[1]{'~really'} || '=over') | ||||
2029 | . ' ' . $_->[1]{'target'}) | ||||
2030 | : $_->[0] | ||||
2031 | } | ||||
2032 | @$curr_open | ||||
2033 | ; | ||||
2034 | } | ||||
2035 | |||||
2036 | ########################################################################### | ||||
2037 | 1 | 19µs | my %pretty_form = ( | ||
2038 | "\a" => '\a', # ding! | ||||
2039 | "\b" => '\b', # BS | ||||
2040 | "\e" => '\e', # ESC | ||||
2041 | "\f" => '\f', # FF | ||||
2042 | "\t" => '\t', # tab | ||||
2043 | "\cm" => '\cm', | ||||
2044 | "\cj" => '\cj', | ||||
2045 | "\n" => '\n', # probably overrides one of either \cm or \cj | ||||
2046 | '"' => '\"', | ||||
2047 | '\\' => '\\\\', | ||||
2048 | '$' => '\\$', | ||||
2049 | '@' => '\\@', | ||||
2050 | '%' => '\\%', | ||||
2051 | '#' => '\\#', | ||||
2052 | ); | ||||
2053 | |||||
2054 | sub pretty { # adopted from Class::Classless | ||||
2055 | # Not the most brilliant routine, but passable. | ||||
2056 | # Don't give it a cyclic data structure! | ||||
2057 | my @stuff = @_; # copy | ||||
2058 | my $x; | ||||
2059 | my $out = | ||||
2060 | # join ",\n" . | ||||
2061 | join ", ", | ||||
2062 | map {; | ||||
2063 | if(!defined($_)) { | ||||
2064 | "undef"; | ||||
2065 | } elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') { | ||||
2066 | $x = "[ " . pretty(@$_) . " ]" ; | ||||
2067 | $x; | ||||
2068 | } elsif(ref($_) eq 'SCALAR') { | ||||
2069 | $x = "\\" . pretty($$_) ; | ||||
2070 | $x; | ||||
2071 | } elsif(ref($_) eq 'HASH') { | ||||
2072 | my $hr = $_; | ||||
2073 | $x = "{" . join(", ", | ||||
2074 | map(pretty($_) . '=>' . pretty($hr->{$_}), | ||||
2075 | sort keys %$hr ) ) . "}" ; | ||||
2076 | $x; | ||||
2077 | } elsif(!length($_)) { q{''} # empty string | ||||
2078 | } elsif( | ||||
2079 | $_ eq '0' # very common case | ||||
2080 | or( | ||||
2081 | m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s | ||||
2082 | and $_ ne '-0' # the strange case that RE lets thru | ||||
2083 | ) | ||||
2084 | ) { $_; | ||||
2085 | } else { | ||||
2086 | # Yes, explicitly name every character desired. There are shorcuts one | ||||
2087 | # could make, but I (Karl Williamson) was afraid that some Perl | ||||
2088 | # releases would have bugs in some of them. For example [A-Z] works | ||||
2089 | # even on EBCDIC platforms to match exactly the 26 uppercase English | ||||
2090 | # letters, but I don't know if it has always worked without bugs. It | ||||
2091 | # seemed safest just to list the characters. | ||||
2092 | # s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])> | ||||
2093 | s<([^ !#'()*+,\-./0123456789:;\<=\>?ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]^_`abcdefghijklmnopqrstuvwxyz{|}~])> | ||||
2094 | <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg; | ||||
2095 | #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg; | ||||
2096 | qq{"$_"}; | ||||
2097 | } | ||||
2098 | } @stuff; | ||||
2099 | # $out =~ s/\n */ /g if length($out) < 75; | ||||
2100 | return $out; | ||||
2101 | } | ||||
2102 | |||||
2103 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
2104 | |||||
2105 | # A rather unsubtle method of blowing away all the state information | ||||
2106 | # from a parser object so it can be reused. Provided as a utility for | ||||
2107 | # backward compatibility in Pod::Man, etc. but not recommended for | ||||
2108 | # general use. | ||||
2109 | |||||
2110 | sub reinit { | ||||
2111 | my $self = shift; | ||||
2112 | foreach (qw(source_dead source_filename doc_has_started | ||||
2113 | start_of_pod_block content_seen last_was_blank paras curr_open | ||||
2114 | line_count pod_para_count in_pod ~tried_gen_errata all_errata errata errors_seen | ||||
2115 | Title)) { | ||||
2116 | |||||
2117 | delete $self->{$_}; | ||||
2118 | } | ||||
2119 | } | ||||
2120 | |||||
2121 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
2122 | 1 | 23µs | 1; | ||
2123 | |||||
# spent 7µs within Pod::Simple::BlackBox::CORE:qr which was called:
# once (7µs+0s) by Pod::Simple::LinkSection::BEGIN@9 at line 1 of (eval 4)[Pod/Simple/BlackBox.pm:35] |