Symlink notifyd.py to osd_server.py for backward compatibility
[grml-scripts.git] / usr_bin / code2color
1 #!/usr/bin/env perl
2
3 #use strict;
4 use Getopt::Long;
5 use POSIX;
6
7 my $VERSION = "0.1";
8 my $vernr = "0.9.1"; # this is the underlying version of Peter Palfrader's script
9 ##########################################################################
10 #
11 # very early check whether this routine is called from less -R or less -r
12 # or if the variable LESS contains -R or -r
13 # return if not (escape sequences would not be interpreted as colors)
14 # on systems with process info in /proc this could be coded in lesspipe.sh
15 #
16 ##########################################################################
17 if ( $ARGV[0] =~ /^\d+$/  and $ARGV[1] ) {
18   # we were called from lesspipe.sh with first arg = PPID
19   my $PPID = shift;
20   # if env variable LESS contains -r or -R we are sure that colors get displayed
21   if ( $ENV{LESS} !~ /-\w*r\w*\b/i ) {
22     # check if less is called with -r or -R (highly OS dependent)
23     # tested only for Linux, Solaris, IRIX, True64, MacOS X, FreeBSD and AIX !!!
24     my $psargs = '-oppid= -oargs=';
25     if ( $^O eq 'darwin' || $^O =~ /bsd$/ ) {
26       $psargs = '-oppid -ocommand';
27     } elsif ( $^O eq 'hpux' ) {
28       $procvers = "0.36";
29       $psargs = '-f';
30     }
31     eval "use Proc::ProcessTable $procvers";
32     if ( $@ ) { 
33       my $p = `ps -p $PPID $psargs`;
34       exit 1 if $p =~ /\bless\s+/ and $p !~ /less\s+-\w*r\w*\b/is;
35       if ( $p !~ /\bless\s+/ ) {
36         if ($p =~ /\d+\s+(\d+)/) {
37           $PPID = $1;
38         } else {
39           $PPID = $1 if $p =~ /(\d+)/;
40         }
41         my $p2 = `ps -p $PPID $psargs`;
42         exit 1 if $p2 !~ /less\s+-\w*r\w*\b/is;
43       }
44     } else {
45       my $pt = new Proc::ProcessTable;
46       for (@{$pt->table}) {
47         next unless $_->pid eq $PPID;
48         $p = $_->cmndline;
49         exit 1 if $p =~ /\bless\s+/ and $p !~ /less\s+-\w*r\w*\b/i;
50         if ( $p !~ /\bless\s+/ ) {
51           $PPID = $_->ppid;
52           for (@{$pt->table}) {
53             next unless $_->pid eq $PPID;
54             $p = $_->cmndline;
55             exit 1 if $p !~ /less\s+-\w*r\w*\b/i;
56           }
57         }
58       }
59     }
60   }
61 }
62
63 ########################################################################
64 #                                                                      #
65 # Code2HTML                                                            #
66 # ---------                                                            #
67 #                                                                      #
68 # Code2Html, peter AT palfrader.org                                    #
69 #                                                                      #
70 # $Date: 2002/01/12 21:17:02 $
71 # $Revision: 1.13 $
72 # $Id: code2html,v 1.13 2002/01/12 21:17:02 weaselp Exp $
73 #                                                                      #
74 # AUTHOR                                                               #
75 #        Peter  Palfrader. Written in 1999, 2000, 2001, 2002.          #
76 #        A lot of other people. See CREDITS file.                      #
77 #                                                                      #
78 # DESCRIPTION                                                          #
79 #        code2html is a  perlscript  which  converts  a  program       #
80 #        source  code  to syntax highlighted HTML by applying a set    #
81 #        of   regular   expressions   depending   on  the  language    #
82 #        the source code is written.                                   #
83 #                                                                      #
84 #        see the man-page for details,                                 #
85 #                                                                      #
86 ########################################################################
87
88 #added 2/2001 bdk
89 my $LINE_NUMBER_DEFAULT = "none";       # 'none', 'normal', 'linked'
90 my $REPLACE_TAB_DEFAULT = "8";
91
92 my $LANG_TEST_LENGTH = 1024;
93
94 my $DEFAULT_OUTPUTFORMAT='xterm';
95 my $ENTITIES;
96 my %ENTITIES;
97 my $STYLE_AND_LANGUAGE_FLAG;
98 my %STYLESHEET;
99 my %LANGUAGE;
100 Usage() unless @ARGV;
101 # =======================================================================
102 # == subroutines ========================================================
103 # =======================================================================
104
105 sub Usage {
106   (my $prog = $0) =~ s!.*/!!;
107   my $time = strftime("%F", localtime( (stat($0))[9]));
108   print <<EOF;
109 $prog V$VERSION $time based on Code2Html version $vernr (peter\@palfrader.org)
110 EOF
111   my $origtext = <<EOF;
112 Usage: $prog [options] [input_file [output_file]]
113
114 Convert a program source to syntax highlighted HTML,
115 or any other format for wich rules are defined.
116
117 -l, --language-mode   set language mode
118     --fallback LANG   fallback language mode
119 -v, --verbose         prints progress information to STDER
120 -n, --linenumbers     print out the source code with line numbers
121 -P, --prefix          optional prefix to use for linenumber anchors
122 -N, --linknumbers     linenumbers will link to themselves
123 -t, --replace-tabs[=TABSTOP-WIDTH]
124                       replace <tabs> with spaces
125 -L, --language-file=LANGUAGE-FILE
126                       specify an alternate file for definitions
127 -m, --modes           print all available modes
128 -h, --help            print this message
129 -V, --version         print version
130 -c, --content-type    prints a Content-Type header
131 -o, --output-format   selects the output-format
132 -H, --no-header       don't use the template
133     --template=FILE   override template
134 -T, --title           set title
135
136 -w, --linewidth       max characters per line
137 -b, --linebreakprefix prefix of the new lines
138
139 see the man-page code2html for further help
140 EOF
141   exit;
142 }
143
144 ####
145 #### main
146 ####
147
148 sub main {
149     my %params = %{shift()};
150     my $html;   # end result
151
152     # undefine the input record separator so everything gets loaded in one turn
153     local $/ = undef;  # don't propogate this change outside this package.
154
155     # Only set %STYLESHEET and %LANGUAGE if they haven't been 
156     # already set in a previous call ( if, say, we're running 
157     # in a persistent environment under mod_perl) 
158     # or if the langfile is passed in explicitly.
159     if ( $params{'langfile'} or ! $STYLE_AND_LANGUAGE_FLAG ) {
160       $STYLE_AND_LANGUAGE_FLAG = 1;  # now they will be defined.
161       
162       print STDERR "getting patterns...\n"  if ($params{'verbose'});
163       # building up the database
164       # newer entries overwrite old ones
165       my @CONFIG_FILES;
166       push @CONFIG_FILES, "/etc/code2html.config";
167       push @CONFIG_FILES, 
168         $ENV{'HOME'}."/.code2html.config"   if $ENV{'HOME'};
169       push @CONFIG_FILES, 
170         split(/:/,$ENV{'CODE2HTML_CONFIG'}) if $ENV{'CODE2HTML_CONFIG'};
171       push @CONFIG_FILES, 
172         split(/:/,$params{'langfile'})      if $params{'langfile'};
173       
174       %STYLESHEET = %{ &get_default_stylesheet } ; 
175       %LANGUAGE   = %{ &get_default_database   } ; 
176
177       for (@CONFIG_FILES) {
178         if ( -r $_){
179           # if I use `do $_` instead of scalar eval... 
180           #  %LANGUAGE is not exported and imported correctly 
181           # (read: at all) (PP)
182           unless (scalar eval `cat $_`) {     
183             warn "couldn't parse $_: $@" if $@;
184           };
185         };
186       };
187     }
188
189     # set outputformat
190     #   When called as a package, "die" is impolite. Changed to "return".
191     # die "Outputformat $params{'outputformat'} not defined" 
192     # unless defined $STYLESHEET{$params{'outputformat'}};
193     return "Outputformat $params{'outputformat'} not defined" 
194       unless defined $STYLESHEET{$params{'outputformat'}};
195
196     my %STYLE = % { $STYLESHEET{$params{'outputformat'}} };
197       
198     # load alternate template if given
199     if (($params{'template'} ne "") && ( ! $params{'noheader'} )) {
200 #         open (FILE, $params{'template'}) || 
201 #           die ("Could not open template file $params{'template'}: $!");
202       open (FILE, $params{'template'}) || 
203         return ("Could not open template file $params{'template'}: $!");
204       $STYLE{'template'} = <FILE>;
205       close (FILE);
206     };
207
208     # set up the global ENTITIES variables ( the scalar and the hash ) 
209     # from the STYLE definition
210     $ENTITIES =     $ { $STYLE{'entities'} }{'listofchars'};
211     %ENTITIES = % { $ { $STYLE{'entities'} }{'replace_by' } };
212
213     # modify the header and footer so that the template variables 
214     # are set correcly
215
216     unless ($STYLE{'template'} =~ /^(.*)%%code%%(.*)$/s) {
217       return "template does not contain a %%code%% variable";
218     };
219
220     $STYLE{'header'} = $1;
221     $STYLE{'footer'} = $2;
222     $STYLE{'header'} =~ s/%%title%%/$params{'title'}/g;
223     $STYLE{'footer'} =~ s/%%title%%/$params{'title'}/g;
224     $STYLE{'header'} =~ s/%%version%%/$vernr/g;
225     $STYLE{'footer'} =~ s/%%version%%/$vernr/g;
226
227     # load the input file and set params{'langmode'} 
228     # if it is not already. this is done by probing a
229     # set of rules defined in %LANGUAGE
230     my $code_ref;
231     print STDERR "loading input file...\n"    if ($params{'verbose'});
232     $code_ref = &get_input_file(\%params, 
233                                 \%LANGUAGE, 
234                                 $params{'langmode'}, 
235                                 $params{'alt_langmode'});
236
237     return 0 if ! ref $code_ref;
238     # select the rules for out language.
239     my $language_rules_ref = 
240       $LANGUAGE{ lc($params{'langmode'}) }->{'patterns'};
241
242     print STDERR "applying stylesheet...\n"      if ($params{'verbose'});
243     # Apply the Stylesheets
244     # set 'starttag' and 'endtag' for every rule according to 
245     # its 'style' value the tags are defined in the stylesheet
246     &apply_stylesheets_to_rules( $language_rules_ref, \%STYLE );
247
248     print STDERR "getting headers ...\n"  if ($params{'verbose'});
249     $html = &put_headers(\%params, \%STYLE);
250
251     my $snippetlist_ref = [] ;
252     print STDERR "creating snippet-list...\n"    if $params{'verbose'};
253     &create_snippetlist( $language_rules_ref, 
254                          $$code_ref, $snippetlist_ref, \%STYLE);
255
256     print STDERR "getting html converted code ...\n"  if $params{'verbose'};
257     $html .= &put_output(\%params, $snippetlist_ref, \%STYLE);
258
259     # --- debug
260     # print " - debug :  \n";
261     # foreach my $key (keys %params) {
262     #   print " $key => " . $params{key} . "\n";
263     # }
264     #  return " - debug: done";
265     # ---------
266
267     $html =~ s/\e\[0m(\e\[\d\d?m)/$1/g;
268     $ii++ while $html =~ s/(\e\[0m[^\e]+)\e\[0m/$1/g;
269
270     # Output $html code.
271     if ( $params{outfile} ) {
272       if ( $params{outfile} eq '-') {
273         print $html;
274       }
275       else {
276         open(FILEHANDLE, '>'.$params{outfile}) or 
277           return( " Couldn't open output file " . $params{outfile} . "$!");
278
279         print FILEHANDLE $html;
280         close FILEHANDLE;
281       }
282     }
283     else {
284       return $html;
285     }
286
287   }
288
289
290 ####
291 #### parse_passed_params
292 ####   replaces parse_params for package version of program,
293 ####   constructing %RESULT hash from options passed by calling routine.
294 sub parse_passed_params {
295   if ( @_ == 1 ) {
296     @_ = ( input => $_[0] );
297   };
298
299   my %RESULT = (
300
301         input               =>  '',             # text to convert
302
303         infile              =>  '',             # filename to get text from
304         outfile             =>  '',             # file to write html to
305
306         langmode            =>  '',             # language (perl,java,html,...)
307         alt_langmode        =>  'html',         # language to use if can't tell
308         langfile            =>  '',             # more definitions of languages
309
310         line_number_prefix  =>  '',
311         linenumbers         =>  $LINE_NUMBER_DEFAULT,
312         outputformat        =>  $DEFAULT_OUTPUTFORMAT,
313         replacetabs         =>  $REPLACE_TAB_DEFAULT,
314
315         title               =>  '',
316         noheader            =>  '',             # 1 => don't print template
317         content_type        =>  '',
318         content_encoding    =>  '',
319         template            =>  '',             # more template definitions
320
321         verbose             =>  '',
322         what_to_do          =>  'normal',
323
324         @_ ,                                    # any input key=>value pairs
325                                                 # will override the defaults
326                                                 # given above.
327                );
328   $RESULT{title} = $RESULT{infile} if $RESULT{infile} && !$RESULT{title};
329   $RESULT{title} = 'Code2HTML' unless $RESULT{title};
330   if ( $RESULT{linenumbers} and 
331        $RESULT{linenumbers} !~ m/^none|normal|linked$/ ) {
332     $RESULT{linenumbers} = 'normal';
333   };
334   return \%RESULT;
335 }
336
337
338 ###########################################################################
339 ######################## checkTabulator ###################################
340 ##########################################################################
341 sub checkTabulator
342 {
343     my ($line, $TABSTOP) = @_;
344     
345     while ((my $at = index($line, "\t")) != -1)
346       {
347           my $cnt = ($TABSTOP - ($at % $TABSTOP));
348           my $replace_with = ' ' x $cnt if ($cnt);
349           $line =~ s/\t/$replace_with/;
350       };
351
352     return $line;
353 }
354
355 ##########################################################################
356 ####################### get_input_file ###################################
357 ##########################################################################
358 sub get_input_file
359   {
360
361     # in  : \%params
362     # in : \%LANGUAGE;
363     # in/out : $langmode;
364     # in/out : $alt_langmode;
365     # returns: input file
366     
367       my %PARAMS       = %{$_[0]};
368       my %LANGUAGE     = %{$_[1]};
369       my $langmode     = $_[2];
370       my $alt_langmode  = $_[3];
371       my $code;
372
373       if ( $PARAMS{'input'} )
374         {
375             $code = $PARAMS{'input'};
376             $code =~ s/\r//g;
377         }
378       else
379         {
380             if ($PARAMS{'infile'} eq '-') {
381                 *FILEHANDLE = *STDIN;
382             } else {
383                 open(FILEHANDLE, $PARAMS{'infile'}) 
384                 || return("While opening '$PARAMS{'infile'}' for input: ".$!."\n");
385             }
386             local $/=undef;
387             $code = <FILEHANDLE>;
388             close(FILEHANDLE);
389             $PARAMS{'infile'} = $opt_i || $PARAMS{'infile'};
390         };
391       
392       if ($PARAMS{'replacetabs'} != 0)
393         {
394             $code = join (
395                           "\n",
396                           map{
397                               &checkTabulator($_, $PARAMS{'replacetabs'})
398                           }
399                           my @dummy = split(/\n/, $code)
400                          );
401         };
402       
403
404       
405       if ( not $langmode )
406         {
407             my $test_code = substr($code, 0, $LANG_TEST_LENGTH);
408             # warn("language mode not given. guessing...\n");
409
410             $langmode = '';
411
412             for (keys %LANGUAGE)
413               {
414                   if (  (($LANGUAGE{$_}->{'filename'} ne '') 
415                          && ($PARAMS{'infile'} 
416                              =~  m/$LANGUAGE{$_}->{filename}/))  ||
417                         (($LANGUAGE{$_}->{'regex'}    ne '') 
418                          && ($test_code  =~  m/$LANGUAGE{$_}->{regex}/   ))   
419                      )
420                     {
421                         $langmode = $_;
422                         last;
423                     };
424               };
425
426             if ($langmode eq '')
427               {
428                   if ( not $alt_langmode )
429                     {
430                       warn("Guessing language mode failed. " . 
431                            "Using fallback mode: '$alt_langmode'\n");
432                       $langmode = $alt_langmode;
433                       $alt_langmode = '';
434                     }
435                   else
436                     {
437                         print $code unless $str;
438                         return("Guessing language mode failed.\n")
439                     };
440               }
441             else
442               {
443                   # warn("using '$langmode'\n");
444               };
445         };
446       
447       $_[2] = $langmode;
448       $_[3] = $alt_langmode;
449       print "==> append : to filename to switch off syntax highlighting\n";
450       return \$code;
451   };
452
453
454 ###########################################################################
455 ####################### put_headers #######################################
456 ###########################################################################
457 sub put_headers
458 {       
459       my $html;    
460       my %PARAMS = %{shift()};
461       my $STYLE_REF = shift();
462
463       if ( $PARAMS{'content_type'}) {
464         $html .= "Content-Type: $$STYLE_REF{'content-type'}\n";
465         if ($PARAMS{'content_encoding'}) {
466           $html .= "Content-Encoding: $PARAMS{'encoding'}\n";
467         }
468         $html .= "\n";
469       }
470       $html .= $$STYLE_REF{'header'} unless $PARAMS{'noheader'};
471
472       return $html;
473 };
474
475 ############################################################################
476 ####################### apply_stylesheets_to_rules #########################
477 ############################################################################
478 sub apply_stylesheets_to_rules
479   {
480       my ( $regexps_ref, $style_ref ) = @_;
481
482       for ( @$regexps_ref ) {
483           warn ("Style '".$_->{style}."' not defined in stylesheet.\n") unless defined $ { $$style_ref{'tags'} } { $_->{style} };
484           $_->{'starttag'} = $ { $ { $$style_ref{'tags'} } { $_->{style} } } { 'start' };
485           $_->{'endtag'}   = $ { $ { $$style_ref{'tags'} } { $_->{style} } } { 'stop' };
486           apply_stylesheets_to_rules( $_->{childregex}, $style_ref ) if $_->{childregex};
487       };
488   };
489
490 ###########################################################################
491 ####################### create_snippetlist ################################
492 ###########################################################################
493 sub create_snippetlist
494   {
495     my ( $regexps_ref, $code, $snippetlist_ref, $style_ref ) = @_ ;
496     my $length = length( $code );
497
498     ## An array of regular expression sturctures, each of which is an
499     ## array.  @res is kept sorted by starting position of the RExen and
500     ## then by the position of the regex in the language file.  This allows
501     ## us to just evaluate $res[0], and to hand write fast code that typically
502     ## handles 90% of the cases without resorting to the _big_ guns.
503     ##
504     ## FWIW, I pronounce '@res' REEZE, as in the plural of '$re'.
505     ##
506     my @res ;
507     
508     my $pos ;
509     
510     for ( @$regexps_ref ) {
511         pos( $code ) = 0 ;
512 #++$m ;
513         next unless $code =~ m/($_->{regex})/gms ;
514
515         $pos = pos( $code ) ;
516 #       $res[@res] = [ 
517 #                     $_->{regex},
518 #                     $ { $ { $$style_ref{'tags'} } { $_->{style} } } { 'start' },
519 #                     $ { $ { $$style_ref{'tags'} } { $_->{style} } } { 'stop' },
520 #                     $_->{childregex},
521 #                     $pos - length( $1 ),
522 #                     $pos,
523 #                     scalar( @res ),
524 #                    ] ;
525         $res[@res] = [ 
526                       $_->{regex},
527                       $_->{starttag},
528                       $_->{endtag},
529                       $_->{childregex},
530                       $pos - length( $1 ),
531                       $pos,
532                       scalar( @res ),
533                      ] ;
534     }
535     
536     ## 90% of all child regexes end up with 0 or 1 regex that needs to be
537     ## worried about. Trimming out the 0's speeds things up a bit and
538     ## makes the below loop simpler, since there's always at least
539     ## 1 regexp.  It donsn't speed things up much by itself: the percentage 
540     ## of times this fires is really small.  But it does simplify the loop
541     ## below and speed it up.
542     unless ( @res ) {
543         $code =~ s/($ENTITIES)/$ENTITIES{$1}/ge ;
544         push @$snippetlist_ref, $code ;
545         return ;
546     }
547     
548     @res = sort { $a->[4] <=> $b->[4] || $a->[6] <=> $b->[6] } @res ;
549     
550     ## Add a dummy at the end, which makes the logic below simpler / faster.
551     $res[@res] = [
552                   undef,
553                   undef,
554                   undef,
555                   undef,
556                   $length,
557                   $length,
558                   scalar( @res ),
559                  ] ;
560     
561     ## These are declared here for (minor) speed improvement.
562     my $re ;
563     my $match_spos ;
564     my $match_pos ;
565     my $re_spos ;
566     my $re_pos ;
567     my $re_num ;
568     my $prefix ;
569     my $snippet ;
570     my $rest ;
571     my $i ;
572     my $l ;
573     
574 my @changed_res ;
575 my $j ;
576
577     $pos = 0 ;
578 MAIN:
579     while ( $pos < $length ) {
580         $re = $res[0] ;
581         
582         $match_spos = $re->[4] ;
583         $match_pos  = $re->[5] ;
584         
585         if ( $match_spos > $pos ) {
586             $prefix  = substr( $code, $pos, $match_spos - $pos ) ;
587             $prefix  =~ s/($ENTITIES)/$ENTITIES{$1}/ge ;
588             push @$snippetlist_ref, $prefix ;
589         }
590         
591         if ( $match_pos > $match_spos ) {
592             $snippet = substr( $code, $match_spos, $match_pos - $match_spos ) ;
593             if ( @{$re->[3]} ) {
594                 push @$snippetlist_ref, $re->[1] ;
595                 create_snippetlist( $re->[3], $snippet, $snippetlist_ref, $style_ref ) ;
596                 push @$snippetlist_ref, $re->[2] ;
597             }
598             else {
599                 $snippet =~ s/($ENTITIES)/$ENTITIES{$1}/ge ;
600                 push @$snippetlist_ref, $re->[1], $snippet, $re->[2];
601             }
602         }
603         
604         $pos = $match_pos ;
605         
606         ##
607         ## Hand coded optimizations.  Luckily, the cases that arise most often
608         ## are the easiest to tune.
609         ##
610
611 # =pod
612
613         if ( $res[1]->[4] >= $pos ) {
614             ## Only first regex needs to be moved, 2nd and later are still valid.
615             ## This is often 90% of the cases for Perl or C (others not tested,
616             ## just uncomment the $n, $o, and $p lines and try it yourself).
617 #++$n{1} ;
618 #++$m ;
619             pos( $code ) = $pos ;
620             unless ( $code =~ m/($re->[0])/gms ) {
621 #++$o{'0'} ;
622                 if ( @res == 2 ) {
623                     ## If the only regexp left is the dummy, we're done.
624                     $rest = substr( $code, $pos ) ;
625                     $rest =~ s/($ENTITIES)/$ENTITIES{$1}/ge ;
626                     push @$snippetlist_ref, $rest ;
627                     last ;
628                 }
629                 shift @res ;
630             }
631             else {
632                 $re->[5] = $re_pos  = pos( $code ) ;
633                 $re->[4] = $re_spos = $re_pos - length( $1 ) ;
634                 
635                 ## Walk down the array looking for $re's new home.
636                 ## The first few loop iterations are unrolled and done manually 
637                 ## for speed, which handles 85 to 90% of the cases where only
638                 ## $re needs to be moved.
639                 ##
640                 ## Here's where that dummy regexp at the end of the array comes
641                 ## in handy: we don't need to worry about array size here, since
642                 ## it will always be after $re no matter what.  The unrolled
643                 ## loop stuff is outdented to make the conditionals fit on one
644                 ## 80 char line.
645                 ## Element 4 in @{$res[x]} is the start position of the match.
646                 ## Element 6 is the order in which it was declared in the lang file.
647                 $re_num = $re->[6] ;
648                 if ( ( $re_spos <=> $res[1]->[4] || $re_num <=> $res[1]->[6] ) <= 0 ) {
649 #++$o{'1'} ;
650                     next 
651                 }
652                 $res[0] = $res[1] ;
653
654 #++$o{'2'} ;
655                 if ( ( $re_spos <=> $res[2]->[4] || $re_num <=> $res[2]->[6] ) <= 0 ) {
656                     $res[1] = $re ;
657                     next ;
658                 }
659                 $res[1] = $res[2] ;
660                 
661                 if ( ( $re_spos <=> $res[3]->[4] || $re_num <=> $res[3]->[6] ) <= 0 ) {
662 #++$o{'3'} ;
663                     $res[2] = $re ;
664                     next ;
665                 }
666                 $res[2] = $res[3] ;
667                 
668                 if ( ( $re_spos <=> $res[4]->[4] || $re_num <=> $res[4]->[6] ) <= 0 ) {
669 #++$o{'3'} ;
670                     $res[3] = $re ;
671                     next ;
672                 }
673                 $res[3] = $res[4] ;
674                 
675                 if ( ( $re_spos <=> $res[5]->[4] || $re_num <=> $res[5]->[6] ) <= 0 ) {
676 #++$o{'4'} ;
677                     $res[4] = $re ;
678                     next ;
679                 }
680                 $res[4] = $res[5] ;
681
682 #++$o{'ugh'} ;
683                 $i = 6 ;
684                 $l = $#res ;
685                 for ( ; $i < $l ; ++$i ) {
686                     last
687                       if ( 
688                           ( $re_spos <=> $res[$i]->[4] || $re_num <=> $res[$i]->[6] )
689                           <= 0
690                          ) ;
691                     $res[$i-1] = $res[$i] ;
692                 }
693 #++$p{sprintf( "%2d", $i )} ;
694                 $res[$i-1] = $re ;
695             }
696             
697             next ;
698         }
699         
700 # =cut
701
702         ##
703         ## End optimizations.  You can comment them all out and this net
704         ## does all the work, just more slowly.  If you do that, then
705         ## you also need to comment out the code below that deals with
706         ## the second entry in @res.
707         ##
708
709 #my $ni = 0 ;
710         ## First re always needs to be tweaked
711 #++$m ;
712 #++$ni ;
713         pos( $code ) = $pos ;
714         unless ( $code =~ m/($re->[0])/gms ) {
715             if ( @res == 2 ) {
716                 ## If the only regexp left is the dummy, we're done.
717                 $rest = substr( $code, $pos ) ;
718                 $rest =~ s/($ENTITIES)/$ENTITIES{$1}/ge ;
719                 push @$snippetlist_ref, $rest ;
720                 last ;
721             }
722             shift @res ;
723             @changed_res = () ;
724             $i = 0 ;
725         }
726         else {
727             $re->[5] = $re_pos  = pos( $code ) ;
728             $re->[4] = $re_pos - length( $1 ) ;
729             @changed_res = ( $re ) ;
730             $i = 1 ;
731         }
732         
733         ## If the optimizations above are in, the second one always
734         ## needs to be tweaked, too.
735         $re = $res[$i] ;
736 #++$m ;
737 #++$ni ;
738         pos( $code ) = $pos ;
739         unless ( $code =~ m/($re->[0])/gms ) {
740             if ( @res == 2 ) {
741                 ## If the only regexp left is the dummy, we're done.
742                 $rest = substr( $code, $pos ) ;
743                 $rest =~ s/($ENTITIES)/$ENTITIES{$1}/ge ;
744                 push @$snippetlist_ref, $rest ;
745                 last ;
746             }
747             shift @res ;
748         }
749         else {
750             $re->[5] = $re_pos  = pos( $code ) ;
751             $re->[4] = $re_spos = $re_pos - length( $1 ) ;
752             if ( @changed_res &&
753                  ( $changed_res[0]->[4] <=> $re_spos || 
754                    $changed_res[0]->[6] <=> $re->[6]
755                  ) > 0
756                ) {
757                 unshift @changed_res, $re ;
758             }
759             else {
760                 $changed_res[$i] = $re ;
761             }
762             ++$i ;
763         }
764         
765         for ( ; ; ++$i ) {
766             local $_ = $res[$i] ;
767 #++$m ;
768             last if $_->[4] >= $pos ;
769 #++$ni ;
770 #++$m ;
771             pos( $code ) = $pos ;
772             unless ( $code =~ m/($_->[0])/gms ) {
773                 if ( @res <= 2 ) {
774                     $rest = substr( $code, $pos ) ;
775                     $rest =~ s/($ENTITIES)/$ENTITIES{$1}/ge ;
776                     push @$snippetlist_ref, $rest ;
777                     last MAIN ;
778                 }
779                 ## If this regex is no longer needed, remove it by not pushing it
780                 ## on to @changed_res.  This means we need one less slot in @res.
781                 shift @res ;
782                 redo ;
783             }
784
785             $_->[5] = $re_pos  = pos( $code ) ;
786             $_->[4] = $re_spos = $re_pos - length( $1 ) ;
787             
788             ## Insertion sort in to @changed_res
789             $re_num = $_->[6] ;
790             for ( $j = $#changed_res ; $j > -1 ; --$j ) {
791                 last
792                   if ( 
793                       ( $changed_res[$j]->[4] <=> $re_spos || 
794                         $changed_res[$j]->[6] <=> $re_num 
795                       ) < 0
796                      ) ;
797                 $changed_res[$j+1] = $changed_res[$j] ; 
798             }
799             $changed_res[$j+1] = $_ ;
800         }
801
802         ## Merge sort @changed_res and @res in to @res
803         $j = 0 ;
804         $l = $#res ;
805         for ( @changed_res ) {
806             while (
807                    $i < $l &&
808                    ( $_->[4] <=> $res[$i]->[4] || $_->[6] <=> $res[$i]->[6] ) > 0
809                   ) {
810                 $res[$j++] = $res[$i++] ;
811             }
812             $res[$j++] = $_ ;
813         }
814 # =cut
815     }
816 };
817
818
819 ##########################################################################
820 ####################### put_output #######################################
821 ##########################################################################
822 sub put_output {
823     my ( $params, $snippetlist_ref, $STYLE_REF ) = @_ ;
824
825     my $result;
826
827     my $prefix = ''; 
828     $prefix = $params->{'line_number_prefix'}.'_'  
829       if $params->{'line_number_prefix'};
830
831     $result = &{ $ { $$STYLE_REF{'linenumbers'}} {$params->{'linenumbers'}} 
832                }(join ('', @$snippetlist_ref), $prefix);
833
834     # print FILEHANDLE $result unless $params->{'dont_print_output'} ;
835     # print FILEHANDLE $$STYLE_REF{'footer'}  unless $params->{'noheader'};
836     
837     $result .= $$STYLE_REF{'footer'} unless $params->{noheader};
838
839     return $result;
840 };
841
842
843 ############################################################################
844 ####################### get_default_stylesheet #############################
845 ############################################################################
846 sub get_default_stylesheet
847 {
848
849 my %STYLESHEET;
850
851
852 ##########
853 ########## different color modes for html. 
854 # those are named html-dark, html-nobc and html-light. 
855 # html-light is also named html
856 # the only difference between html-light and html-nobc is
857 # that html-light defines a body background and text color.
858 # nobc stands for no body colors.
859
860 my ($bold, $underline, $reverse, $reset, $red, $green, $yellow, $blue,
861     $magenta, $cyan);
862 eval "use Term::ANSIColor";
863 if ($@) {
864   $bold = "\e[1m";
865   $underline = "\e[4m";
866   $reverse = "\e[7m";
867   $reset = "\e[0m";
868   $red = "\e[31m";
869   $green = "\e[32m";
870   $yellow = "\e[33m";
871   $blue = "\e[34m";
872   $magenta = "\e[35m";
873   $cyan = "\e[36m";
874 } else {
875   $bold = color('bold');
876   $underline = color('underline');
877   $reverse = color('reverse');
878   $reset = color('reset');
879   $red = color('red');
880   $green = color('green');
881   $yellow = color('yellow');
882   $blue = color('blue');
883   $magenta = color('magenta');
884   $cyan = color('cyan');
885 }
886 $STYLESHEET{'xterm'} =  { 'template'       => '%%code%%',
887                          'content-type' => 'text/html',
888                          'linenumbers'  => {
889                                             'none'          => sub { 
890                                                                     return $_[0];
891                                                                    },
892                                             'normal'        => sub { 
893                                                                    # o as the first parameter is the joined snippetlist
894                                                                    # o the second is an optional prefix, needed if more than one block
895                                                                    #   in a file is highlighted. needed in patch-mode. may be empty
896                                                                    # the sub should the return a scalar made up of the joined lines including linenumbers
897                                                                    my @lines = split ( /\n/, $_[0] );
898                                                                    my $nr = 0;
899                                                                    my $lengthofnr = length(@lines);
900                                                                    my $format = qq{%${lengthofnr}u %s\n} ;
901                                                                    join ('', map (  {$nr++; sprintf ( $format , $nr, $_ )} @lines));
902                                                                    },
903                                              'linked'       => sub { 
904                                                                    # is not defined for xterm output, therefore do nothing
905                                                                     return $_[0];
906                                                                    },
907                                            },
908                          'tags'         => { 
909                                             'comment'                => { 'start' => $blue,
910                                                                           'stop'  => $reset },
911                                             'doc comment'            => { 'start' => "$bold$blue",
912                                                                           'stop'  => $reset },
913                                             'string'                 => { 'start' => $red,
914                                                                           'stop'  => $reset },
915                                             'esc string'             => { 'start' => $magenta,
916                                                                           'stop'  => $reset },
917                                             'character'              => { 'start' => $reset,
918                                                                           'stop'  => $reset },
919                                             'esc character'          => { 'start' => $magenta,
920                                                                           'stop'  => $reset },
921                                             'numeric'                => { 'start' => $red,
922                                                                           'stop'  => $reset },
923                                             'identifier'             => { 'start' => $cyan,
924                                                                           'stop'  => $reset },
925                                             'predefined identifier'  => { 'start' => $cyan,
926                                                                           'stop'  => $reset },
927                                             'type'                   => { 'start' => $cyan,
928                                                                           'stop'  => $reset },
929                                             'predefined type'        => { 'start' => $green,
930                                                                           'stop'  => $reset },
931                                             'reserved word'          => { 'start' => "$yellow",
932                                                                           'stop'  => $reset },
933                                             'library function'       => { 'start' => $reset,
934                                                                           'stop'  => $reset },
935                                             'include'                => { 'start' => $green,
936                                                                           'stop'  => $reset },
937                                             'preprocessor'           => { 'start' => $green,
938                                                                           'stop'  => $reset },
939                                             'braces'                 => { 'start' => $reset,
940                                                                           'stop'  => $reset },
941                                             'symbol'                 => { 'start' => $green,
942                                                                           'stop'  => $reset },
943                                             'function header'        => { 'start' => "$bold$red",
944                                                                           'stop'  => $reset },
945                                             'function header name'   => { 'start' => "$bold$cyan",
946                                                                           'stop'  => $reset },
947                                             'function header args'   => { 'start' => $cyan,
948                                                                           'stop'  => $reset },
949                                             'regex'                  => { 'start' => $magenta,
950                                                                           'stop'  => $reset },
951                                             'text'                   => { 'start' => $red,
952                                                                           'stop'  => $reset},
953
954                                             # HTML
955                                             'entity'                 => { 'start' => $green,
956                                                                           'stop'  => $reset },
957
958                                             # MAKEFILE
959                                             'assignment'             => { 'start' => $green,
960                                                                           'stop'  => $reset },
961                                             'dependency line'        => { 'start' => $cyan,
962                                                                           'stop'  => $reset },
963                                             'dependency target'      => { 'start' => $blue,
964                                                                           'stop'  => $reset },
965                                             'dependency continuation'=> { 'start' => $magenta,
966                                                                           'stop'  => $reset },
967                                             'continuation'           => { 'start' => $magenta,
968                                                                           'stop'  => $reset },
969                                             'macro'                  => { 'start' => $red,
970                                                                           'stop'  => $reset },
971                                             'int macro'              => { 'start' => $red,
972                                                                           'stop'  => $reset },
973                                             'esc $$$'                => { 'start' => $yellow,
974                                                                           'stop'  => $reset },
975                                             'separator'              => { 'start' => $green,
976                                                                           'stop'  => $reset },
977                                             'line spec'              => { 'start' => $cyan,
978                                                                           'stop'  => $reset },
979                                             'deletion'               => { 'start' => $red,
980                                                                           'stop'  => $reset },
981                                             'insertion'              => { 'start' => $blue,
982                                                                           'stop'  => $reset },
983                                             'modification'           => { 'start' => $magenta,
984                                                                           'stop'  => $reset },
985                                      }
986                        };
987 $STYLESHEET{'html-light'} =  { 'template'       =>
988 '<html>
989 <head>
990   <title>%%title%%</title>
991 </head>
992 <body bgcolor="#ffffff" text="#000000">
993 <pre>
994 %%code%%
995 </pre>
996 <p align=right><small><font color=gray>syntax highlighted by 
997 <a href="http://www.palfrader.org/code2html"><font 
998 color=gray>Code2HTML</font></a>, v. %%version%%</font></small></p>
999 </body>
1000 </html>
1001 ',
1002                          'content-type' => 'text/html',
1003                          'entities'     => { 'listofchars' => '[<>&"]',   # a regex actually
1004                                              'replace_by'  => {
1005                                                                '&' => '&amp;',
1006                                                                '<' => '&lt;',
1007                                                                '>' => '&gt;',
1008                                                                '"' => '&quot;'
1009                                                               }
1010                                            },
1011                          'linenumbers'  => {
1012                                             'none'          => sub { 
1013                                                                     return $_[0];
1014                                                                    },
1015                                             'normal'        => sub { 
1016                                                                    # o as the first parameter is the joined snippetlist
1017                                                                    # o the second is an optional prefix, needed if more than one block
1018                                                                    #   in a file is highlighted. needed in patch-mode. may be empty
1019                                                                    # the sub should the return a scalar made up of the joined lines including linenumbers
1020                                                                    my @lines = split ( /\n/, $_[0] );
1021
1022                                                                    my $nr = 0;
1023                                                                    my $lengthofnr = length(@lines);
1024                                                                    my $format = qq{<a name="$_[1]line%u">%${lengthofnr}u</a> %s\n} ;
1025                                                                    join ('', map (  {$nr++; sprintf ( $format , $nr, $nr, $_ )} @lines));
1026                                                                    },
1027                                              'linked'       => sub { 
1028                                                                    # this should do the same as above only with linenumbers that link to themselves
1029                                                                    # If this style does not support this, use the same as above.
1030                                                                    my @lines = split ( /\n/, $_[0] );
1031
1032                                                                    my $nr = 0; 
1033                                                                    my $lengthofnr = length(@lines);
1034                                                                    my $format = qq{<a name="$_[1]line%u" href="#$_[1]line%u">%$ {lengthofnr}u</a> %s\n};
1035                                                                    join ('', map (  {$nr++; sprintf ( $format , $nr, $nr, $nr, $_ )} @lines));
1036                                                                    }
1037                                            },
1038                          'tags'         => { 
1039                                             'comment'                => { 'start' => '<font color="#444444">',
1040                                                                           'stop'  => '</font>' },
1041                                             'doc comment'            => { 'start' => '<font color="#444444"><i>',
1042                                                                           'stop'  => '</i></font>' },
1043                                             'string'                 => { 'start' => '<font color="#008000">',
1044                                                                           'stop'  => '</font>' },
1045                                             'esc string'             => { 'start' => '<font color="#77dd77">',
1046                                                                           'stop'  => '</font>' },
1047                                             'character'              => { 'start' => '<font color="#008000">',
1048                                                                           'stop'  => '</font>' },
1049                                             'esc character'          => { 'start' => '<font color="#77dd77">',
1050                                                                           'stop'  => '</font>' },
1051                                             'numeric'                => { 'start' => '<font color="#FF0000">',
1052                                                                           'stop'  => '</font>' },
1053                                             
1054                                             'identifier'             => { 'start' => '<font color="#2040a0">',
1055                                                                           'stop'  => '</font>' },
1056                                             'predefined identifier'  => { 'start' => '<font color="#2040a0"><strong>',
1057                                                                           'stop'  => '</strong></font>' },
1058                                      
1059                                             'type'                   => { 'start' => '<font color="#2040a0"><strong>',
1060                                                                           'stop'  => '</strong></font>' },
1061                                             'predefined type'        => { 'start' => '<font color="#2040a0"><strong>',
1062                                                                           'stop'  => '</strong></font>' },
1063                                             
1064                                             'reserved word'          => { 'start' => '<strong>',
1065                                                                           'stop'  => '</strong>' },
1066                                             'library function'       => { 'start' => '<font color="a52a2a"><strong>',
1067                                                                           'stop'  => '</strong></font>' },
1068                                             
1069                                             'include'                => { 'start' => '<font color="0000ff"><strong>',
1070                                                                           'stop'  => '</strong></font>' },
1071                                             'preprocessor'           => { 'start' => '<font color="0000ff"><strong>',
1072                                                                           'stop'  => '</strong></font>' },
1073                                             
1074                                             'braces'                 => { 'start' => '<font color="4444FF"><strong>',
1075                                                                           'stop'  => '</strong></font>' },
1076                                             'symbol'                 => { 'start' => '<font color="4444FF">',
1077                                                                           'stop'  => '</font>' },
1078
1079                                             'function header'        => { 'start' => '<strong>',
1080                                                                           'stop'  => '</strong>' },
1081                                             'function header name'   => { 'start' => '<font color="ff0000">',
1082                                                                           'stop'  => '</font>' },
1083                                             'function header args'   => { 'start' => '<font color="2040a0">',
1084                                                                           'stop'  => '</font>' },
1085                                             
1086                                             'regex'                  => { 'start' => '<font color="b000d0">',
1087                                                                           'stop'  => '</font>' },
1088                                             
1089                                             'text'                   => { 'start' => '<i>',
1090                                                                           'stop'  => '</i>'},
1091
1092                                             # HTML
1093                                             'entity'                 => { 'start' => '<font color="ff0000">',
1094                                                                           'stop'  => '</font>' },
1095
1096                                             # MAKEFILE
1097                                             'assignment'             => { 'start' => '<font color="2040a0">',
1098                                                                           'stop'  => '</font>' },
1099                                             'dependency line'        => { 'start' => '<font color="8b2252">',
1100                                                                           'stop'  => '</font>' },
1101                                             'dependency target'      => { 'start' => '<strong>',
1102                                                                           'stop'  => '</strong>' },
1103                                             'dependency continuation'=> { 'start' => '<font color="000000"><strong>',
1104                                                                           'stop'  => '</strong></font>' },
1105                                             'continuation'           => { 'start' => '<strong>',
1106                                                                           'stop'  => '</strong>' },
1107                                             'macro'                  => { 'start' => '<font color="2040a0">',
1108                                                                           'stop'  => '</font>' },
1109                                             'int macro'              => { 'start' => '<font color="4080ff">',
1110                                                                           'stop'  => '</font>' },
1111                                             'esc $$$'                => { 'start' => '<font color="444444">',
1112                                                                           'stop'  => '</font>' }
1113                                      }
1114                        };
1115 # html-light is also called html
1116
1117 $STYLESHEET{'html'} = $STYLESHEET{'html-light'};
1118
1119
1120 # html-nobc is a modification of html-light
1121 # in such a way, that the body tag does not define
1122 # a background and a text color
1123 # nobc stands for no body colors.
1124
1125 %{$STYLESHEET{'html-nobg'}} = %{$STYLESHEET{'html-light'}};
1126 ${ $STYLESHEET{'html-nobg'}} {'template'} = '<html>
1127 <head>
1128   <title>%%title%%</title>
1129 </head>
1130 <body>
1131 <pre>
1132 %%code%%
1133 </pre>
1134 <p align=right><small><font color=gray>syntax highlighted by 
1135 <a href="http://www.palfrader.org/code2html"><font 
1136 color=gray>Code2HTML</font></a>, v. %%version%%</font></small></p>
1137 </body>
1138 </html>
1139 ';
1140
1141
1142 # html-dark is a modification of html-light
1143 # in such a way, that the body tag does define 
1144 # different colors and that the <font> colors are different.
1145
1146 %{$STYLESHEET{'html-dark'}} = %{$STYLESHEET{'html-light'}};
1147 ${ $STYLESHEET{'html-dark'}} {'template'} = '<html>
1148 <head>
1149   <title>%%title%%</title>
1150 </head>
1151 <body bgcolor="#000000"  text="#C0C0C0" vlink="#FFFFFF" alink="#00FF00" link="#FFFFFF">
1152 <pre>
1153 %%code%%
1154 </pre>
1155 <p align=right><small><font color=gray>syntax highlighted by 
1156 <a href="http://www.palfrader.org/code2html"><font 
1157 color=gray>Code2HTML</font></a>, v. %%version%%</font></small></p>
1158 </body>
1159 </html>
1160 ';
1161 ${ $STYLESHEET{'html-dark'}} {'tags'} = {
1162                                             'comment'                => { 'start' => '<font color="#909000">',
1163                                                                           'stop'  => '</font>' },
1164                                             'doc comment'            => { 'start' => '<font color="#909000"><i>',
1165                                                                           'stop'  => '</i></font>' },
1166                                             'string'                 => { 'start' => '<font color="yellow">',
1167                                                                           'stop'  => '</font>' },
1168                                             'esc string'             => { 'start' => '<font color="#77dd77">',
1169                                                                           'stop'  => '</font>' },
1170                                             'character'              => { 'start' => '<font color="yellow">',
1171                                                                           'stop'  => '</font>' },
1172                                             'esc character'          => { 'start' => '<font color="#77dd77">',
1173                                                                           'stop'  => '</font>' },
1174                                             'numeric'                => { 'start' => '<font color="#FF0000">',
1175                                                                           'stop'  => '</font>' },
1176                                            
1177                                             'identifier'             => { 'start' => '<font color="#B0B0B0">',
1178                                                                           'stop'  => '</font>' },
1179                                             'predefined identifier'  => { 'start' => '<font color="#2040a0"><strong>',
1180                                                                           'stop'  => '</strong></font>' },
1181                                      
1182                                             'type'                   => { 'start' => '<font color="#2040a0"><strong>',
1183                                                                           'stop'  => '</strong></font>' },
1184                                             'predefined type'        => { 'start' => '<font color="#2040a0"><strong>',
1185                                                                           'stop'  => '</strong></font>' },
1186                                             
1187                                             'reserved word'          => { 'start' => '<strong>',
1188                                                                           'stop'  => '</strong>' },
1189                                             'library function'       => { 'start' => '<font color="a52a2a"><strong>',
1190                                                                           'stop'  => '</strong></font>' },
1191                                             
1192                                             'include'                => { 'start' => '<font color="#00FF00">',
1193                                                                           'stop'  => '</font>' },
1194                                             'preprocessor'           => { 'start' => '<font color="#00FF00">',
1195                                                                           'stop'  => '</font>' },
1196                                             
1197                                             'braces'                 => { 'start' => '<font color="darkCyan"><strong>',
1198                                                                           'stop'  => '</strong></font>' },
1199                                             'symbol'                 => { 'start' => '<font color="darkCyan">',
1200                                                                           'stop'  => '</font>' },
1201
1202                                             'function header'        => { 'start' => '<strong>',
1203                                                                           'stop'  => '</strong>' },
1204                                             'function header name'   => { 'start' => '<font color="ff0000">',
1205                                                                           'stop'  => '</font>' },
1206                                             'function header args'   => { 'start' => '<font color="2040a0">',
1207                                                                           'stop'  => '</font>' },
1208                                             
1209                                             'regex'                  => { 'start' => '<font color="b000d0">',
1210                                                                           'stop'  => '</font>' },
1211                                             
1212                                             'text'                   => { 'start' => '<i>',
1213                                                                           'stop'  => '</i>'},
1214
1215                                             # HTML
1216                                             'entity'                 => { 'start' => '<font color="ff0000">',
1217                                                                           'stop'  => '</font>' },
1218
1219                                             # MAKEFILE
1220                                             'assignment'             => { 'start' => '<font color="2040a0">',
1221                                                                           'stop'  => '</font>' },
1222                                             'dependency line'        => { 'start' => '<font color="8b2252">',
1223                                                                           'stop'  => '</font>' },
1224                                             'dependency target'      => { 'start' => '<strong>',
1225                                                                           'stop'  => '</strong>' },
1226                                             'dependency continuation'=> { 'start' => '<font color="000000"><strong>',
1227                                                                           'stop'  => '</strong></font>' },
1228                                             'continuation'           => { 'start' => '<strong>',
1229                                                                           'stop'  => '</strong>' },
1230                                             'macro'                  => { 'start' => '<font color="2040a0">',
1231                                                                           'stop'  => '</font>' },
1232                                             'int macro'              => { 'start' => '<font color="4080ff">',
1233                                                                           'stop'  => '</font>' },
1234                                             'esc $$$'                => { 'start' => '<font color="444444">',
1235                                                                           'stop'  => '</font>' }
1236                                      };
1237
1238
1239 return \%STYLESHEET;
1240
1241 };
1242
1243
1244
1245 #############################################################################
1246 ####################### get_default_database ################################
1247 #############################################################################
1248 sub get_default_database
1249 {
1250
1251 my %LANGUAGE;
1252
1253 # written by PP
1254 $LANGUAGE{'plain'}      = {
1255                             'filename'   => '',
1256                             'regex'      => '',
1257                             'patterns'   => []
1258                           };
1259  
1260  
1261
1262
1263
1264
1265 # taken from nedit
1266 # modified by PP
1267 $LANGUAGE{'ada'}        = {
1268                             'filename'   => '(?i)\\.a(d[asb]?)?$',
1269                             'regex'      => '',
1270                             'patterns'   => [
1271                                               {
1272                                                 'name'       => 'Comments',
1273                                                 'regex'      => '--.*?$',
1274                                                 'style'      => 'comment',
1275                                                 'childregex' => [],
1276                                               },
1277                                               {
1278                                                 'name'       => 'String Literals',
1279                                                 'regex'      => '".*?("|$)',
1280                                                 'style'      => 'string',
1281                                                 'childregex' => []
1282                                               },
1283                                               {
1284                                                 'name'       => 'Character Literals',
1285                                                 'regex'      => '\'.\'',
1286                                                 'style'      => 'character',
1287                                                 'childregex' => []
1288                                               },
1289                                               {
1290                                                 'name'       => 'Ada Attributes',
1291                                                 'regex'      => '\'[a-zA-Z][a-zA-Z_]+\\b',
1292                                                 'style'      => 'reserved word',
1293                                                 'childregex' => []
1294                                               },
1295                                               {
1296                                                 'name'       => 'Numeric Literals',
1297                                                 'regex'      => '(((2|8|10|16)#[_0-9a-fA-F]*#)|[0-9.]+)',
1298                                                 'style'      => 'numeric',
1299                                                 'childregex' => []
1300                                               },
1301                                               {
1302                                                 'name'       => 'Withs Pragmas Use',
1303                                                 'regex'      => '\\b(?i)((with|pragma|use)[ \\t\\n\\f\\r]+[a-zA-Z0-9_.]+;)+\\b',
1304                                                 'style'      => 'include',
1305                                                 'childregex' => []
1306                                               },
1307                                               {
1308                                                 'name'       => 'Predefined Types',
1309                                                 'regex'      => '\\b(?i)(boolean|character|count|duration|float|integer|long_float|long_integer|priority|short_float|short_integer|string)\\b',
1310                                                 'style'      => 'predefined type',
1311                                                 'childregex' => []
1312                                               },
1313                                               {
1314                                                 'name'       => 'Predefined Subtypes',
1315                                                 'regex'      => '\\b(?i)field|natural|number_base|positive|priority\\b',
1316                                                 'style'      => 'predefined type',
1317                                                 'childregex' => []
1318                                               },
1319                                               {
1320                                                 'name'       => 'Reserved Words',
1321                                                 'regex'      => '\\b(?i)(abort|abs|accept|access|and|array|at|begin|body|case|constant|declare|delay|delta|digits|do|else|elsif|end|entry|exception|exit|for|function|generic|goto|if|in|is|limited|loop|mod|new|not|null|of|or|others|out|package|pragma|private|procedure|raise|range|record|rem|renames|return|reverse|select|separate|subtype|task|terminate|then|type|use|when|while|with|xor)\\b',
1322                                                 'style'      => 'reserved word',
1323                                                 'childregex' => []
1324                                               },
1325                                               {
1326                                                 'name'       => 'Ada 95 Only',
1327                                                 'regex'      => '\\b(?i)(abstract|tagged|all|protected|aliased|requeue|until)\\b',
1328                                                 'style'      => 'reserved word',
1329                                                 'childregex' => []
1330                                               },
1331                                               {
1332                                                 'name'       => 'Identifiers',
1333                                                 'regex'      => '\\b[a-zA-Z][a-zA-Z0-9_]*\\b',
1334                                                 'style'      => 'identifier',
1335                                                 'childregex' => []
1336                                               },
1337                                               {
1338                                                 'name'       => 'Dot All',
1339                                                 'regex'      => '(?i)\\.all\\b',
1340                                                 'style'      => 'predefined identifier',
1341                                                 'childregex' => []
1342                                               }
1343                                             ]
1344                           };
1345 $LANGUAGE{'ada95'}      = $LANGUAGE{'ada'};
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361 # written by JA
1362 $LANGUAGE{'awk'}       =  {
1363                             'filename'   => '(?i)\\.awk$',
1364                             'regex'      => '^\\s*#\\s*![^\\s]*awk',
1365                             'patterns'   => [
1366                                               {
1367                                                 'name'       => 'comment',
1368                                                 'regex'      => '#.*?$',
1369                                                 'style'      => 'comment',
1370                                                 'childregex' => []
1371                                               },
1372                                               {
1373                                                 'name'       => 'string',
1374                                                 'regex'      => '""|".*?([^\\\\](\\\\\\\\)*)"|"\\\\\\\\"',
1375 #                                                'regex'      => '""|"\\\\\\\\"|"[^"\\\\]"|"[^"].*?[^\\\\]"',
1376                                                 'style'      => 'string',
1377                                                 'childregex' => [
1378                                                                   {
1379                                                                     'name'       => 'esc character',
1380                                                                     'regex'      => '\\\\.',
1381                                                                     'style'      => 'esc character',
1382                                                                     'childregex' => []
1383                                                                   }
1384                                                                 ]
1385                                               },
1386                                               {
1387                                                 'name'       => 'string',
1388                                                 'regex'      => '\'\'|\'.*?([^\\\\](\\\\\\\\)*)\'|\'\\\\\\\\\'',
1389 #                                                'regex'      => '\'\'|\'\\\\\\\\\'|\'[^\'\\\\]\'|\'[^\'].*?[^\\\\]\'',
1390                                                 'style'      => 'string',
1391                                                 'childregex' => [
1392                                                                   {
1393                                                                     'name'       => 'esc character',
1394                                                                     'regex'      => '\\\\.',
1395                                                                     'style'      => 'esc character',
1396                                                                     'childregex' => []
1397                                                                   }
1398                                                                 ]
1399                                               },
1400                                               {
1401                                                 'name'       => 'function header',
1402                                                 'regex'      => 'function[\\t ]+([a-zA-Z0-9_]+)[\\t \\n]*(\\{|\\n)',
1403                                                 'style'      => 'function header',
1404                                                 'childregex' => [
1405                                                                   {
1406                                                                     'name'       => 'function coloring',
1407                                                                     'regex'      => '[\\t ]([a-zA-Z0-9_]+)',
1408                                                                     'style'      => 'function header name',
1409                                                                     'childregex' => []
1410                                                                   }
1411                                                                 ]
1412                                               },
1413                                               {
1414                                                 'name'       => 'regex matching I 1',
1415                                                 'regex'      => '(\\b| )?(/)(\\\\/|[^/\\n])*(/[gimesox]*)',
1416                                                 'style'      => 'regex',
1417                                                 'childregex' => []
1418                                               },
1419                                               {
1420                                                 'name'       => 'regex matching I 2',
1421                                                 'regex'      => '(?:\\b| )(?:(?:m|q|qq)([!"#$%&\'*+-/]))(\\\\\\2|[^\\2\\n])*(\\2[gimesox]*)',
1422                                                 'style'      => 'regex',
1423                                                 'childregex' => []
1424                                               },
1425                                               {
1426                                                 'name'       => 'regex matching II',
1427                                                 'regex'      => '(?:\\b| )?(?:s([!"#$%&\'*+-/]))(?:\\\\\\2|[^\\2\\n])*?(\\2)[^(\\2)\\n]*?(\\2[gimesox]*)',
1428                                                 'style'      => 'regex',
1429                                                 'childregex' => []
1430                                               },
1431                                               {
1432                                                 'name'       => 'translate',
1433                                                 'regex'      => '(?:\\b| )(?:(?:tr|y)([^\w\s]))(?:\\\\\\2|[^\\2\\n])*?(\\2)[^(\\2)\\n]*?(\\2[gimesox]*)',
1434                                                 'style'      => 'regex',
1435                                                 'childregex' => []
1436                                               },
1437                                               {
1438                                                 'name'       => 'keywords',
1439                                                 'regex'      => '\\b(BEGIN|END|ARGC|ARGIND|ARGV|CONVFMT|ENVIRON|ERRNO|FIELDWIDTHS|FILENAME|FNR|FS|IGNORECASE|NF|NR|OFMT|OFS|ORS|RS|RT|RSTART|RLENGTH|SUBSEP)\\b',
1440                                                 'style'      => 'reserved word',
1441                                                 'childregex' => []
1442                                               },
1443                                               {
1444                                                 'name'       => 'keywords 2',
1445                                                 'regex'      => '\\b(if|while|do|for|in|break|continue|delete|exit|next|nextfile|function)\\b',
1446                                                 'style'      => 'reserved word',
1447                                                 'childregex' => []
1448                                               },
1449                                               {
1450                                                 'name'       => 'library fns',
1451                                                 'regex'      => '\\b(close|getline|print|printf|system|fflush|atan2|cos|exp|int|log|rand|sin|sqrt|srand|gensub|gsub|index|length|split|sprintf|sub|substr|tolower|toupper|systime|strftime)\\b',
1452                                                 'style'      => 'library function',
1453                                                 'childregex' => []
1454                                               },
1455                                               {
1456                                                 'name'       => 'braces and parens',
1457                                                 'regex'      => '[\\[\\]\\{\\}\\(\\)]',
1458                                                 'style'      => 'braces',
1459                                                 'childregex' => []
1460                                               },
1461                                               {
1462                                                 'name'       => '<< stuff',
1463                                                 'regex'      => '<<\'([^\\n]*)\';.*?^\\2$',
1464                                                 'style'      => 'text',
1465                                                 'childregex' => []
1466                                               },
1467                                               {
1468                                                 'name'       => '<< stuff',
1469                                                 'regex'      => '<<([^\\n]*).*?^\\2$',
1470                                                 'style'      => 'text',
1471                                                 'childregex' => []
1472                                               }
1473                                             ]
1474                            };
1475  
1476  
1477  
1478  
1479  
1480  
1481  
1482  
1483  
1484  
1485  
1486  
1487  
1488  
1489  
1490 # taken from nedit
1491 # modified by PP
1492 $LANGUAGE{'c'}          = {
1493                             'filename'   => '\\.[ch]$',
1494                             'regex'      => '',
1495                             'patterns'   => [
1496                                               {
1497                                                 'name'       => 'doc comment',
1498                                                 'regex'      => '/\\*\\*.*?\\*/',
1499                                                 'style'      => 'doc comment',
1500                                                 'childregex' => []
1501                                               },
1502                                               {
1503                                                 'name'       => 'comment',
1504                                                 'regex'      => '/\\*.*?\\*/',
1505                                                 'style'      => 'comment',
1506                                                 'childregex' => []
1507                                               },
1508                                               {
1509                                                 'name'       => 'string',
1510                                                 'regex'      => '""|".*?([^\\\\](\\\\\\\\)*)"|"\\\\\\\\"',
1511 #                                                'regex'      => '""|"\\\\\\\\"|"[^"\\\\]"|"[^"].*?[^\\\\]"',
1512                                                 'style'      => 'string',
1513                                                 'childregex' => [
1514                                                                   {
1515                                                                     'name'       => 'esc character',
1516                                                                     'regex'      => '\\\\.',
1517                                                                     'style'      => 'esc character',
1518                                                                     'childregex' => []
1519                                                                   }
1520                                                                 ]
1521                                               },
1522                                               {
1523                                                 'name'       => 'preprocessor line',
1524                                                 'regex'      => '^[ \\t]*#.*?$',
1525                                                 'style'      => 'preprocessor',
1526                                                 'childregex' => [
1527                                                                   {
1528                                                                     'name'       => 'string',
1529                                                                     'regex'      => '""|".*?([^\\\\](\\\\\\\\)*)"|"\\\\\\\\"',
1530 #                                                                    'regex'      => '""|"\\\\\\\\"|"[^"\\\\]"|"[^"].*?[^\\\\]"',
1531                                                                     'style'      => 'string',
1532                                                                     'childregex' => [
1533                                                                                       {
1534                                                                                         'name'       => 'esc character',
1535                                                                                         'regex'      => '\\\\.',
1536                                                                                         'style'      => 'esc character',
1537                                                                                         'childregex' => []
1538                                                                                       }
1539                                                                                     ]
1540                                                                   },
1541                                                                   {
1542                                                                     'name'       => '<files>',
1543                                                                     'regex'      => '<.*?>',
1544                                                                     'style'      => 'string',
1545                                                                     'childregex' => []
1546                                                                   },
1547                                                                   {
1548                                                                     'name'       => 'comment',
1549                                                                     'regex'      => '[^/]/\\*.*?\\*/',
1550                                                                     'style'      => 'comment',
1551                                                                     'childregex' => []
1552                                                                   }
1553                                                                 ]
1554                                               },
1555                                               {
1556                                                 'name'       => 'character constant',
1557                                                 'regex'      => '\'(\\\\)?.\'',
1558                                                 'style'      => 'character',
1559                                                 'childregex' => [
1560                                                                   {
1561                                                                     'name'       => 'esc character',
1562                                                                     'regex'      => '\\\\.',
1563                                                                     'style'      => 'esc character', 
1564                                                                     'childregex' => []
1565                                                                   }
1566                                                                 ]
1567                                               },
1568                                               {
1569                                                 'name'       => 'numeric constant',
1570                                                 'regex'      => '\\b((0(x|X)[0-9a-fA-F]*)|(([0-9]+\\.?[0-9]*)|(\\.[0-9]+))((e|E)(\\+|-)?[0-9]+)?)(L|l|UL|ul|u|U|F|f)?\\b',
1571                                                 'style'      => 'numeric',
1572                                                 'childregex' => []
1573                                               },
1574                                               {
1575                                                 'name'       => 'storage keyword',
1576                                                 'regex'      => '\\b(const|extern|auto|register|static|unsigned|signed|volatile|char|double|float|int|long|short|void|typedef|struct|union|enum)\\b',
1577                                                 'style'      => 'reserved word',
1578                                                 'childregex' => []
1579                                               },
1580                                               {
1581                                                 'name'       => 'keyword',
1582                                                 'regex'      => '\\b(return|goto|if|else|case|default|switch|break|continue|while|do|for|sizeof)\\b',
1583                                                 'style'      => 'reserved word',
1584                                                 'childregex' => []
1585                                               },
1586                                               {
1587                                                 'name'       => 'braces',
1588                                                 'regex'      => '[\\{\\}]',
1589                                                 'style'      => 'braces',
1590                                                 'childregex' => []
1591                                               },
1592                                               {
1593                                                 'name'       => 'symbols',
1594                                                 'regex'      => '([\\*\\-\\+=:;%&\\|<>\\(\\)\\[\\]!])',
1595                                                 'style'      => 'symbol',
1596                                                 'childregex' => []
1597                                               },
1598                                               { 
1599                                                 'name'       => 'identifiers',
1600                                                 'regex'      => '([a-zA-Z_][a-zA-Z_0-9]*)',
1601                                                 'style'      => 'identifier',
1602                                                 'childregex' => []
1603                                               }
1604                                             ]
1605                           };
1606  
1607  
1608  
1609  
1610  
1611  
1612  
1613  
1614  
1615  
1616  
1617  
1618  
1619  
1620  
1621 # taken from nedit
1622 # modified by PP
1623 $LANGUAGE{'c++'}        = {
1624                             'filename'   => '\\.(c(c|pp|xx)|h(h|pp|xx)|C(C|PP|XX)?|H(H|PP|XX)?|i)$',
1625                             'regex'      => '',
1626                             'patterns'   => [
1627                                               {
1628                                                 'name'       => 'doc comment',
1629                                                 'regex'      => '/\\*\\*.*?\\*/',
1630                                                 'style'      => 'doc comment',
1631                                                 'childregex' => []
1632                                               },
1633                                               {
1634                                                 'name'       => 'comment',
1635                                                 'regex'      => '/\\*.*?\\*/',
1636                                                 'style'      => 'comment',
1637                                                 'childregex' => []
1638                                               },
1639                                               {
1640                                                 'name'       => 'cplus comment',
1641                                                 'regex'      => '//.*?$',
1642                                                 'style'      => 'comment',
1643                                                 'childregex' => []
1644                                               },
1645                                               {
1646                                                 'name'       => 'string',
1647                                                 'regex'      => '""|"\\\\\\\\"|".*?([^\\\\](\\\\\\\\)*)"',
1648 #                                                'regex'      => '""|"\\\\\\\\"|"[^"\\\\]"|"[^"].*?[^\\\\]"',
1649                                                 'style'      => 'string',
1650                                                 'childregex' => [
1651                                                                   {
1652                                                                     'name'       => 'esc character',
1653                                                                     'regex'      => '\\\\.',
1654                                                                     'style'      => 'esc character',
1655                                                                     'childregex' => []
1656                                                                   }
1657                                                                 ]
1658                                               },
1659                                               {
1660                                                 'name'       => 'preprocessor line',
1661                                                 'regex'      => '^[ \\t]*#.*?$',
1662                                                 'style'      => 'preprocessor',
1663                                                 'childregex' => [
1664                                                                   {
1665                                                                     'name'       => 'string',
1666                                                                     'regex'      => '""|".*?([^\\\\](\\\\\\\\)*)"|"\\\\\\\\"',
1667 #                                                                    'regex'      => '""|"\\\\\\\\"|"[^"\\\\]"|"[^"].*?[^\\\\]"',
1668                                                                     'style'      => 'string',
1669                                                                     'childregex' => [
1670                                                                                       {
1671                                                                                         'name'       => 'esc character',
1672                                                                                         'regex'      => '\\\\.',
1673                                                                                         'style'      => 'esc character',
1674                                                                                         'childregex' => []
1675                                                                                       }
1676                                                                                     ]
1677                                                                   },
1678                                                                   {
1679                                                                     'name'       => '<files>',
1680                                                                     'regex'      => '<.*?>',
1681                                                                     'style'      => 'string',
1682                                                                     'childregex' => []
1683                                                                   },
1684                                                                   {
1685                                                                     'name'       => 'comment',
1686                                                                     'regex'      => '[^/]/\\*.*?\\*/',
1687                                                                     'style'      => 'comment',
1688                                                                     'childregex' => []
1689                                                                   },
1690                                                                   {
1691                                                                     'name'       => 'cplus comment',
1692                                                                     'regex'      => '//.*?$',
1693                                                                     'style'      => 'comment',
1694                                                                     'childregex' => []
1695                                                                   }
1696                                                                 ]
1697                                               },
1698                                               {
1699                                                 'name'       => 'character constant',
1700                                                 'regex'      => '\'(\\\\)?.\'',
1701                                                 'style'      => 'character',
1702                                                 'childregex' => [
1703                                                                   {
1704                                                                     'name'       => 'esc character',
1705                                                                     'regex'      => '\\\\.',
1706                                                                     'style'      => 'esc character',
1707                                                                     'childregex' => []
1708                                                                   }
1709                                                                 ]
1710                                               },
1711                                               {
1712                                                 'name'       => 'numeric constant',
1713                                                 'regex'      => '\\b((0(x|X)[0-9a-fA-F]*)|(([0-9]+\\.?[0-9]*)|(\\.[0-9]+))((e|E)(\\+|-)?[0-9]+)?)(L|l|UL|ul|u|U|F|f)?\\b',
1714                                                 'style'      => 'numeric',
1715                                                 'childregex' => []
1716                                               },
1717                                               {
1718                                                 'name'       => 'storage keyword',
1719                                                 'regex'      => '\\b(class|typename|typeid|template|friend|virtual|inline|explicit|operator|overload|public|private|protected|const|extern|auto|register|static|mutable|unsigned|signed|volatile|char|double|float|int|long|short|bool|wchar_t|void|typedef|struct|union|enum)\\b',
1720                                                 'style'      => 'reserved word',
1721                                                 'childregex' => [],
1722                                               },
1723                                               {
1724                                                 'name'       => 'keyword',
1725                                                 'regex'      => '\\b(new|delete|this|return|goto|if|else|case|default|switch|break|continue|while|do|for|catch|throw|sizeof|true|false|namespace|using|dynamic_cast|static_cast|reinterpret_cast)\\b',
1726                                                 'style'      => 'reserved word',
1727                                                 'childregex' => []
1728                                               },
1729                                               {
1730                                                 'name'       => 'braces',
1731                                                 'regex'      => '[\\{\\}]',
1732                                                 'style'      => 'braces',
1733                                                 'childregex' => []
1734                                               },
1735                                               {
1736                                                 'name'       => 'symbols',
1737                                                 'regex'      => '([\\*\\-\\+=:;%&\\|<>\\(\\)\\[\\]!])',
1738                                                 'style'      => 'symbol',
1739                                                 'childregex' => []
1740                                               },
1741                                               {
1742                                                 'name'       => 'identifiers',
1743                                                 'regex'      => '([a-zA-Z_][a-zA-Z_0-9]*)',
1744                                                 'style'      => 'identifier',
1745                                                 'childregex' => []
1746                                               }
1747                                             ]
1748                           };
1749 $LANGUAGE{'cc'}         = $LANGUAGE{'c++'};
1750 $LANGUAGE{'cpp'}        = $LANGUAGE{'c++'};
1751 $LANGUAGE{'cxx'}        = $LANGUAGE{'c++'};
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762 # written by VRS
1763 $LANGUAGE{'gpasm'}      = {
1764                             'filename'   => '(?i)\\.(asm|inc)$',
1765                             'regex'      => '',
1766                             'patterns'   => [
1767                                               {
1768                                                 'name'       => 'args',
1769                                                 'regex'      => '^.*$',
1770                                                 'style'      => 'symbol',
1771                                                 'childregex' => [
1772                                                  {
1773                                                      'name'       => 'comment',
1774                                                      'regex'      => ';.*?$',
1775                                                      'style'      => 'comment',
1776                                                      'childregex' => []
1777                                                  },
1778                                                  {
1779                                                      'name'       => 'labels',
1780                                                      'regex'      => '^[A-Za-z_][A-Za-z_0-9]*:?',
1781                                                      'style'      => 'identifier',
1782                                                      'childregex' => []
1783                                                  },
1784
1785                                                  {
1786                                                      'name'       => 'menonics',
1787                                                      'regex'      => '^[ \t]+[A-Za-z_][A-Za-z_0-9]*',
1788                                                      'style'      => 'reserved word',
1789                                                      'childregex' => []
1790                                                  },
1791                                               {
1792                                                 'name'       => 'string',
1793                                                 'regex'      => '""|".*?([^\\\\](\\\\\\\\)*)"|"\\\\\\\\"',
1794                                                 'style'      => 'string',
1795                                                 'childregex' => [
1796                                                                   {
1797                                                                     'name'       => 'esc character',
1798                                                                     'regex'      => '\\\\.',
1799                                                                     'style'      => 'esc character',
1800                                                                     'childregex' => []
1801                                                                   }
1802                                                                 ]
1803                                               }
1804
1805
1806                                                                  ]
1807                                               }
1808                                             ]
1809                           };
1810
1811
1812
1813
1814
1815
1816
1817
1818 # written by JA
1819 $LANGUAGE{'groff'}      = {
1820                             'filename'   => '\\.groff$',
1821                             'regex'      => '',
1822                             'patterns'   => [
1823                                               {
1824                                                 'name'       => 'comment',
1825                                                 'regex'      => '\\\\".*?$',
1826                                                 'style'      => 'comment',
1827                                                 'childregex' => []
1828                                               }
1829                                             ]
1830                           };
1831  
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843  
1844  
1845  
1846 # taken from nedit
1847 # modified by PP
1848 $LANGUAGE{'html'}       = {
1849                             'filename'   => '(?i)\\.(html?|mhtml|php)$',
1850                             'regex'      => '',
1851                             'patterns'   => [
1852                                               {
1853                                                 'name'       => 'comment',
1854                                                 'regex'      => '<!--.*?-->',
1855                                                 'style'      => 'comment',
1856                                                 'childregex' => []
1857                                               },
1858                                               {
1859                                                 'name'       => 'entity',
1860                                                 'regex'      => '\\&[-.a-zA-Z0-9#]*;?',
1861                                                 'style'      => 'entity',
1862                                                 'childregex' => []
1863                                               },
1864                                               {
1865                                                 'name'       => 'tag',
1866                                                 'regex'      => '<(/|!)?[-.a-zA-Z0-9]*.*?>',
1867                                                 'style'      => 'predefined identifier',
1868                                                 'childregex' => [
1869                                                                   {
1870                                                                     'name'       => 'double quote string',
1871                                                                     'regex'      => '".*?"',
1872                                                                     'style'      => 'string',
1873                                                                     'childregex' => []
1874                                                                   },
1875                                                                   {
1876                                                                     'name'       => 'single quote string',
1877                                                                     'regex'      => '\'.*?\'',
1878                                                                     'style'      => 'string',
1879                                                                     'childregex' => []
1880                                                                   },
1881                                                                   {
1882                                                                     'name'       => 'brackets',
1883                                                                     'regex'      => '[<>]',
1884                                                                     'style'      => 'braces',
1885                                                                     'childregex' => []
1886                                                                   },
1887                                                                   {
1888                                                                     'name'       => 'attribute',
1889                                                                     'regex'      => '[^\'" ]+(?=.)',
1890                                                                     'style'      => 'identifier',
1891                                                                     'childregex' => []
1892                                                                   }
1893                                                                 ]
1894                                               }
1895                                             ]
1896                        };
1897
1898  
1899  
1900 # Added May 17, 2002, Jim M. 
1901 $LANGUAGE{'xml'}       = {
1902                             'filename'   => '(?i)\\.(xml|xps|xsl|axp|ppd)?$',
1903                             'regex'      => '',
1904                             'patterns'   => [
1905                                               {
1906                                                 'name'       => 'comment',
1907                                                 'regex'      => '<!--.*?-->',
1908                                                 'style'      => 'comment',
1909                                                 'childregex' => []
1910                                               },
1911                                               {
1912                                                 'name'       => 'entity',
1913                                                 'regex'      => '\\&[-.a-zA-Z0-9#]*;?',
1914                                                 'style'      => 'entity',
1915                                                 'childregex' => []
1916                                               },
1917                                               {
1918                                                 'name'       => 'tag',
1919                                                 'regex'      => '<(/|!)?[-.a-zA-Z0-9]*.*?>',
1920                                                 'style'      => 'predefined identifier',
1921                                                 'childregex' => [
1922                                                                   {
1923                                                                     'name'       => 'double quote string',
1924                                                                     'regex'      => '".*?"',
1925                                                                     'style'      => 'string',
1926                                                                     'childregex' => []
1927                                                                   },
1928                                                                   {
1929                                                                     'name'       => 'single quote string',
1930                                                                     'regex'      => '\'.*?\'',
1931                                                                     'style'      => 'string',
1932                                                                     'childregex' => []
1933                                                                   },
1934                                                                   {
1935                                                                     'name'       => 'brackets',
1936                                                                     'regex'      => '[<>]',
1937                                                                     'style'      => 'braces',
1938                                                                     'childregex' => []
1939                                                                   },
1940                                                                   {
1941                                                                     'name'       => 'attribute',
1942                                                                     'regex'      => '[^\'" ]+(?=.)',
1943                                                                     'style'      => 'identifier',
1944                                                                     'childregex' => []
1945                                                                   }
1946                                                                 ]
1947                                               }
1948                                             ]
1949                        };
1950  
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963  
1964  
1965 # taken from nedit
1966 # modified by PP
1967 $LANGUAGE{'java'}       = {
1968                             'filename'   => '\\.java$',
1969                             'regex'      => '',
1970                             'patterns'   => [
1971                                               {
1972                                                 'name'       => 'doc comment',
1973                                                 'regex'      => '/\\*\\*.*?\\*/',
1974                                                 'style'      => 'doc comment',
1975                                                 'childregex' => []
1976                                               },
1977                                               {
1978                                                 'name'       => 'comment',
1979                                                 'regex'      => '/\\*.*?\\*/',
1980                                                 'style'      => 'comment',
1981                                                 'childregex' => []
1982                                               },
1983                                               {
1984                                                 'name'       => 'cplus comment',
1985                                                 'regex'      => '//.*?$',
1986                                                 'style'      => 'comment',
1987                                                 'childregex' => []
1988                                               },
1989                                               {
1990                                                 'name'       => 'string',
1991                                                 'regex'      => '""|".*?([^\\\\](\\\\\\\\)*)"|"\\\\\\\\"',
1992 #                                                'regex'      => '""|"\\\\\\\\"|"[^"\\\\]"|"[^"].*?[^\\\\]"',
1993                                                 'style'      => 'string',
1994                                                 'childregex' => [
1995                                                                   {
1996                                                                     'name'       => 'esc character',
1997                                                                     'regex'      => '\\\\.',
1998                                                                     'style'      => 'esc character',
1999                                                                     'childregex' => []
2000                                                                   }
2001                                                                 ]
2002                                               },
2003                                               {
2004                                                 'name'       => 'single quoted',
2005                                                 'regex'      => '\'\'|\'.*?([^\\\\](\\\\\\\\)*)\'|\'\\\\\\\\\'',
2006 #                                                'regex'      => '\'\'|\'\\\\\\\\\'|\'[^\'\\\\]\'|\'[^\'].*?[^\\\\]\'',
2007                                                 'style'      => 'string',
2008                                                 'childregex' => []
2009                                               },
2010                                               {
2011                                                 'name'       => 'numeric constant',
2012                                                 'regex'      => '\\b((0(x|X)[0-9a-fA-F]*)|(([0-9]+\\.?[0-9]*)|(\\.[0-9]+))((e|E)(\\+|-)?[0-9]+)?)(L|l|UL|ul|u|U|F|f)?\\b',
2013                                                 'style'      => 'numeric',
2014                                                 'childregex' => []
2015                                               },
2016                                               {
2017                                                 'name'       => 'include',
2018                                                 'regex'      => '\\b(import|package)\\b.*?$',
2019                                                 'style'      => 'include',
2020                                                 'childregex' => [
2021                                                                   {
2022                                                                     'name'       => 'esc character',
2023                                                                     'regex'      => '\\\\(.|\\n)',
2024                                                                     'style'      => 'esc character',
2025                                                                     'childregex' => []
2026                                                                   },
2027                                                                   {
2028                                                                     'name'       => 'comment',
2029                                                                     'regex'      => '[^/]/\\*.*?\\*/',
2030                                                                     'style'      => 'comment',
2031                                                                     'childregex' => []
2032                                                                   }
2033                                                                 ]
2034                                               },
2035                                               {
2036                                                 'name'       => 'storage keyword',
2037                                                 'regex'      => '\\b(abstract|boolean|byte|char|class|double|extends|final|float|int|interface|long|native|private|protected|public|short|static|transient|synchronized|void|volatile|implements)\\b',
2038                                                 'style'      => 'reserved word',
2039                                                 'childregex' => []
2040                                               },
2041                                               {
2042                                                 'name'       => 'keyword',
2043                                                 'regex'      => '\\b(break|case|catch|continue|default|do|else|false|finally|for|if|instanceof|new|null|return|super|switch|this|throw|throws|true|try|while)\\b',
2044                                                 'style'      => 'reserved word',
2045                                                 'childregex' => []
2046                                               },
2047                                               {
2048                                                 'name'       => 'braces and parens',
2049                                                 'regex'      => '[\\{\\}\\(\\)\\[\\]]',
2050                                                 'style'      => 'braces',
2051                                                 'childregex' => []
2052                                               },
2053                                               {
2054                                                 'name'       => 'Identifiers',
2055                                                 'regex'      => '\\b[a-zA-Z_][a-zA-Z0-9_]*\\b',
2056                                                 'style'      => 'identifier',
2057                                                 'childregex' => []
2058                                               },
2059                                               {
2060                                                 'name'       => 'symbols',
2061                                                 'regex'      => '([\\*\\-\\+=:;%&\\|<>!])',
2062                                                 'style'      => 'symbol',
2063                                                 'childregex' => []
2064                                               }
2065                                             ]
2066                           };
2067  
2068  
2069  
2070  
2071  
2072  
2073  
2074  
2075  
2076  
2077  
2078  
2079  
2080  
2081 # taken from nedit
2082 # modified by PP
2083 $LANGUAGE{'javascript'} = {
2084                             'filename'   => '(?i)\\.js$',
2085                             'regex'      => '',
2086                             'patterns'   => [
2087                                               {
2088                                                 'name'       => 'comment',
2089                                                 'regex'      => '/\\*.*?\\*/',
2090                                                 'style'      => 'comment',
2091                                                 'childregex' => []
2092                                               },
2093                                               {
2094                                                 'name'       => 'cplus comment',
2095                                                 'regex'      => '//.*?$',
2096                                                 'style'      => 'comment',
2097                                                 'childregex' => []
2098                                               },
2099                                               {
2100                                                 'name'       => 'numeric constant',
2101                                                 'regex'      => '\\b((0(x|X)[0-9a-fA-F]*)|(([0-9]+\\.?[0-9]*)|(\\.[0-9]+))((e|E)(\\+|-)?[0-9]+)?)(L|l|UL|ul|u|U|F|f)?\\b',
2102                                                 'style'      => 'numeric',
2103                                                 'childregex' => []
2104                                               },
2105                                               {
2106                                                 'name'       => 'events',
2107                                                 'regex'      => '\\b(onAbort|onBlur|onClick|onChange|onDblClick|onDragDrop|onError|onFocus|onKeyDown|onKeyPress|onLoad|onMouseDown|onMouseMove|onMouseOut|onMouseOver|onMouseUp|onMove|onResize|onSelect|onSubmit|onUnload)\\b',
2108                                                 'style'      => 'reserved word',
2109                                                 'childregex' => []
2110                                               },
2111                                               {
2112                                                 'name'       => 'braces',
2113                                                 'regex'      => '[\\{\\}]',
2114                                                 'style'      => 'braces',
2115                                                 'childregex' => []
2116                                               },
2117                                               {
2118                                                 'name'       => 'statements',
2119                                                 'regex'      => '\\b(break|continue|else|for|if|in|new|return|this|typeof|var|while|with)\\b',
2120                                                 'style'      => 'reserved word',
2121                                                 'childregex' => []
2122                                               },
2123                                               {
2124                                                 'name'       => 'function',
2125                                                 'regex'      => 'function[\\t ]+([a-zA-Z0-9_]+)[\\t \\(]+.*?[\\n{]',
2126                                                 'style'      => 'function header',
2127                                                 'childregex' => [
2128                                                                   {
2129                                                                     'name'       => 'function args',
2130                                                                     'regex'      => '\\(.*?\\)',
2131                                                                     'style'      => 'function header args',
2132                                                                     'childregex' => []
2133                                                                   },
2134                                                                   {
2135                                                                     'name'       => 'function name',
2136                                                                     'regex'      => '[\\t ][a-zA-Z0-9_]+',
2137                                                                     'style'      => 'function header name',
2138                                                                     'childregex' => []
2139                                                                   }
2140                                                                 ]
2141                                               },  
2142                                               {
2143                                                 'name'       => 'built in object type',
2144                                                 'regex'      => '\\b(anchor|Applet|Area|Array|button|checkbox|Date|document|elements|FileUpload|form|frame|Function|hidden|history|Image|link|location|Math|navigator|Option|password|Plugin|radio|reset|select|string|submit|text|textarea|window)\\b',
2145                                                 'style'      => 'predefined type',
2146                                                 'childregex' => []
2147                                               },
2148                                               {
2149                                                 'name'       => 'string',
2150                                                 'regex'      => '".*?("|$)',
2151                                                 'style'      => 'string',
2152                                                 'childregex' => [
2153                                                                   {
2154                                                                     'name'       => 'colors',
2155                                                                     'regex'      => '(aliceblue|antiquewhite|aqua|aquamarine|azure|beige|bisque|black|blanchedalmond|blue|blueviolet|brown|burlywood|cadetblue|chartreuse|chocolate|coral|cornflowerblue|cornsilk|crimson|cyan|darkblue|darkcyan|darkgoldenrod|darkgray|darkgreen|darkkhaki|darkmagenta|darkolivegreen|darkorange|darkorchid|darkred|darksalmon|darkseagreen|darkslateblue|darkslategray|darkturquoise|darkviolet|deeppink|deepskyblue|dimgray|dodgerblue|firebrick|floralwhite|forestgreen|fuchsia|gainsboro|ghostwhite|gold|goldenrod|gray|green|greenyellow|honeydew|hotpink|indianred|indigo|ivory|khaki|lavender|lavenderblush|lawngreen|lemonchiffon|lightblue|lightcoral|lightcyan|lightgoldenrodyellow|lightgreen|lightgrey|lightpink|lightsalmon|lightseagreen|lightskyblue|lightslategray|lightsteelblue|lightyellow|lime|limegreen|linen|magenta|#008000|mediumaquamarine|mediumblue|mediumorchid|mediumpurple|mediumseagreen|mediumslateblue|mediumspringgreen|mediumturquoise|mediumvioletred|midnightblue|mintcream|mistyrose|moccasin|navajowhite|navy|oldlace|olive|olivedrab|orange|orangered|orchid|palegoldenrod|palegreen|paleturquoise|palevioletred|papayawhip|peachpuff|peru|pink|plum|powderblue|purple|red|rosybrown|royalblue|saddlebrown|salmon|sandybrown|seagreen|seashell|sienna|silver|skyblue|slateblue|slategray|snow|springgreen|steelblue|tan|teal|thistle|tomato|turquoise|violet|wheat|white|whitesmoke|yellow|yellowgreen|#[A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9])',
2156                                                                     'style'      => 'identifier',
2157                                                                     'childregex' => []
2158                                                                   }
2159                                                                 ]
2160                                               },
2161                                               {
2162                                                 'name'       => 'string',
2163                                                 'regex'      => '\'.*?(\'|$)',
2164                                                 'style'      => 'string',
2165                                                 'childregex' => [
2166                                                                   {
2167                                                                     'name'       => 'colors',
2168                                                                     'regex'      => '(aliceblue|antiquewhite|aqua|aquamarine|azure|beige|bisque|black|blanchedalmond|blue|blueviolet|brown|burlywood|cadetblue|chartreuse|chocolate|coral|cornflowerblue|cornsilk|crimson|cyan|darkblue|darkcyan|darkgoldenrod|darkgray|darkgreen|darkkhaki|darkmagenta|darkolivegreen|darkorange|darkorchid|darkred|darksalmon|darkseagreen|darkslateblue|darkslategray|darkturquoise|darkviolet|deeppink|deepskyblue|dimgray|dodgerblue|firebrick|floralwhite|forestgreen|fuchsia|gainsboro|ghostwhite|gold|goldenrod|gray|green|greenyellow|honeydew|hotpink|indianred|indigo|ivory|khaki|lavender|lavenderblush|lawngreen|lemonchiffon|lightblue|lightcoral|lightcyan|lightgoldenrodyellow|lightgreen|lightgrey|lightpink|lightsalmon|lightseagreen|lightskyblue|lightslategray|lightsteelblue|lightyellow|lime|limegreen|linen|magenta|#008000|mediumaquamarine|mediumblue|mediumorchid|mediumpurple|mediumseagreen|mediumslateblue|mediumspringgreen|mediumturquoise|mediumvioletred|midnightblue|mintcream|mistyrose|moccasin|navajowhite|navy|oldlace|olive|olivedrab|orange|orangered|orchid|palegoldenrod|palegreen|paleturquoise|palevioletred|papayawhip|peachpuff|peru|pink|plum|powderblue|purple|red|rosybrown|royalblue|saddlebrown|salmon|sandybrown|seagreen|seashell|sienna|silver|skyblue|slateblue|slategray|snow|springgreen|steelblue|tan|teal|thistle|tomato|turquoise|violet|wheat|white|whitesmoke|yellow|yellowgreen|#[A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9])',
2169                                                                     'style'      => 'identifier',
2170                                                                     'childregex' => [],
2171                                                                   }
2172                                                                 ]
2173                                               },
2174                                               {
2175                                                 'name'       => 'event capturing',
2176                                                 'regex'      => '\\b(captureEvents|releaseEvents|routeEvent|handleEvent)\\b.*?(\\)|$)',
2177                                                 'style'      => 'reserved word',
2178                                                 'childregex' => []
2179                                               },
2180                                               {
2181                                                 'name'       => 'predefined methods',
2182                                                 'regex'      => '\\b(abs|acos|alert|anchor|asin|atan|atan2|back|big|blink|blur|bold|ceil|charAt|clear|clearTimeout|click|close|confirm|cos|escape|eval|exp|fixed|floor|focus|fontcolor|fontsize|forward|getDate|getDay|getHours|getMinutes|getMonth|getSeconds|getTime|getTimezoneOffset|getYear|go|indexOf|isNaN|italics|javaEnabled|join|lastIndexOf|link|log|max|min|open|parse|parseFloat|parseInt|pow|prompt|random|reload|replace|reset|reverse|round|scroll|select|setDate|setHours|setMinutes|setMonth|setSeconds|setTimeout|setTime|setYear|sin|small|sort|split|sqrt|strike|sub|submit|substring|sup|taint|tan|toGMTString|toLocaleString|toLowerCase|toString|toUpperCase|unescape|untaint|UTC|write|writeln)\\b',
2183                                                 'style'      => 'library function',
2184                                                 'childregex' => []
2185                                               },
2186                                               {
2187                                                 'name'       => 'properties',
2188                                                 'regex'      => '\\b(action|alinkColor|anchors|appCodeName|appName|appVersion|bgColor|border|checked|complete|cookie|defaultChecked|defaultSelected|defaultStatus|defaultValue|description|E|elements|enabledPlugin|encoding|fgColor|filename|forms|frames|hash|height|host|hostname|href|hspace|index|lastModified|length|linkColor|links|LN2|LN10|LOG2E|LOG10E|lowsrc|method|name|opener|options|parent|pathname|PI|port|protocol|prototype|referrer|search|selected|selectedIndex|self|SQRT1_2|SQRT2|src|status|target|text|title|top|type|URL|userAgent|value|vlinkColor|vspace|width|window)\\b',
2189                                                 'style'      => 'predefined identifier',
2190                                                 'childregex' => []
2191                                               },
2192                                               {
2193                                                 'name'       => 'operators',
2194                                                 'regex'      => '([=;->/&|])',
2195                                                 'style'      => 'symbol',
2196                                                 'childregex' => []
2197                                               }
2198                                             ]
2199                           };
2200 $LANGUAGE{'js'}         = $LANGUAGE{'javascript'};
2201
2202
2203
2204
2205
2206
2207
2208
2209 # written by Andreas Krennmair
2210 # extremely incomplete
2211
2212 $LANGUAGE{'lisp'}       = {
2213                             'filename' => '\\.(lsp|l)$',
2214                             'regex' => '',
2215                             'patterns' => [
2216                                {
2217                                  'name'       => 'parens',
2218                                  'regex'      => '[()]',
2219                                  'style'      => 'braces',
2220                                  'childregex' => []
2221                                },
2222                                {
2223                                  'name'       => 'comment',
2224                                  'regex'      => ';.*?$',
2225                                  'style'      => 'comment',
2226                                  'childregex' => []
2227                                },
2228                                {
2229                                  'name'       => 'string',
2230                                  'regex'      => '".*?("|$)',
2231                                  'style'      => 'string',
2232                                  'childregex' => []
2233                                },
2234                                {
2235                                  'name'       => 'keywords',
2236                                  'regex'      => '\\b(defun |xyz)\\b',
2237                                  'style'      => 'reserved word',
2238                                  'childregex' => []
2239                                },
2240                                {
2241                                  'name'       => 'numeric constant',
2242                                  'regex'      => '(#\([0-9]+ [0-9]+\)|[0-9]+)',
2243                                  'style'      => 'numeric',
2244                                  'childregex' => []
2245                                },
2246                                {
2247                                  'name'       => 'identifiers',
2248                                  'regex'      => '([-a-zA-Z]+)',
2249                                  'style'      => 'identifier',
2250                                  'childregex' => []
2251                                }
2252                             ]
2253                           };
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264 # written by JA
2265 $LANGUAGE{'m4'}         = {
2266                             'filename'   => '\\.m4$',
2267                             'regex'      => '',
2268                             'patterns' => [
2269                                             {
2270                                               'regex'      => 'dnl.*?$',
2271                                               'style'      => 'doc comment',
2272                                               'childregex' => []
2273                                             },
2274                                             {
2275                                               'regex'      => '#.*?$',
2276                                               'style'      => 'comment',
2277                                               'childregex' => []
2278                                             },
2279                                             {
2280                                               'regex'      => '\\b(define|undefine|defn|pushdef|popdef|indir|builtin|changequote|changecom|changeword|m4wrap|m4exit|include|sinclude|divert|undivert|divnum|cleardiv|shift|dumpdef|traceon|traceoff|debugfile|debugmode|len|index|regexp|substr|translit|patsubst|format|incr|decr|syscmd|esyscmd|sysval|maketemp|errprint)\\b',
2281                                               'style'      => 'reserved word',
2282                                               'childregex' => []
2283                                             },
2284                                             {
2285                                               'regex'      => '\\b(ifdef|ifelse|loops)\\b',
2286                                               'style'      => 'reserved word',
2287                                               'childregex' => [
2288                                                                 {
2289                                                                   'regex'      => '[$]\\$?({[^}]*}|[^a-zA-Z0-9_/\\t\\n\\.,\\\\[\\\\{\\\\(]|[0-9]+|[a-zA-Z_][a-zA-Z0-9_]*)?',
2290                                                                   'style'      => 'identifier',
2291                                                                   'childregex' => []
2292                                                                 }
2293                                                               ]
2294                                             }
2295                                           ]
2296                           };
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312 # taken from nedit
2313 # modified by PP
2314 $LANGUAGE{'make'}       = {
2315                             'filename'   => '[Mm]akefile.*',
2316                             'regex'      => '',
2317                             'patterns'   => [
2318                                               {
2319                                                 'name'       => 'Comment',
2320                                                 'regex'      => '#.*?$',
2321                                                 'style'      => 'comment',
2322                                                 'childregex' => []
2323                                               },
2324                                               {
2325                                                 'name'       => 'Assignment',
2326                                                 'regex'      => '^( *| [ \\t]*)[A-Za-z0-9_+]*[ \\t]*(\\+|:)?=',
2327                                                 'style'      => 'assignment',
2328                                                 'childregex' => []
2329                                               },
2330                                               {
2331                                                 'name'       => 'Dependency Line',
2332                                                 'regex'      => '^ *([A-Za-z0-9./$(){} _%+-]|\\n)*::?',
2333                                                 'style'      => 'dependency line',
2334                                                 'childregex' => [
2335                                                                   {
2336                                                                     'name'       => 'Dependency Target',
2337                                                                     'regex'      => '[A-Za-z0-9./$(){} _%+-]+',
2338                                                                     'style'      => 'dependency target',
2339                                                                     'childregex' => []
2340                                                                   },
2341                                                                   {
2342                                                                     'name'       => 'Dependency Continuation',
2343                                                                     'regex'      => '\\\\\\n',
2344                                                                     'style'      => 'dependency continuation',
2345                                                                     'childregex' => []
2346                                                                   },
2347                                                                   {
2348                                                                     'name'       => 'comment',
2349                                                                     'regex'      => '#.*?$',
2350                                                                     'style'      => 'comment',
2351                                                                     'childregex' => []
2352                                                                   },
2353                                                                   {
2354                                                                     'name'       => 'macro',
2355                                                                     'regex'      => '\\$([A-Za-z0-9_]|\\([^)]*\\)|{[^}]*})',
2356                                                                     'style'      => 'macro',
2357                                                                     'childregex' => []
2358                                                                   },
2359                                                                   {
2360                                                                     'name'       => 'int macro',
2361                                                                     'regex'      => '\\$([<@*?%]|\\$@)',
2362                                                                     'style'      => 'int macro',
2363                                                                     'childregex' => []
2364                                                                   }
2365                                                                 ]
2366                                               },
2367                                               {
2368                                                 'name'       => 'Continuation',
2369                                                 'regex'      => '\\\\$',
2370                                                 'style'      => 'continuation',
2371                                                 'childregex' => []
2372                                               },
2373                                               {
2374                                                 'name'       => 'Macro',
2375                                                 'regex'      => '\\$([A-Za-z0-9_]|\\([^)]*\\)|{[^}]*})',
2376                                                 'style'      => 'macro',
2377                                                 'childregex' => []
2378                                               },
2379                                               {
2380                                                 'name'       => 'Internal Macro',
2381                                                 'regex'      => '\\$([<@*?%]|\\$@)',
2382                                                 'style'      => 'int macro',
2383                                                 'childregex' => []
2384                                               },
2385                                               {
2386                                                 'name'       => 'Escaped $$$',
2387                                                 'regex'      => '\\$\\$',
2388                                                 'style'      => 'esc $$$',
2389                                                 'childregex' => []
2390                                               },
2391                                               {
2392                                                 'name'       => 'Include',
2393                                                 'regex'      => '^include[ \\t]',
2394                                                 'style'      => 'include',
2395                                                 'childregex' => []
2396                                               }
2397                                             ]
2398                           };
2399 $LANGUAGE{'makefile'} = $LANGUAGE{'make'};
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415 # taken from nedit
2416 # modified by PP
2417 $LANGUAGE{'pas'}        = {
2418                             'filename'   => '(?i)\\.p(as)?$',
2419                             'regex'      => '',
2420                             'patterns'   => [
2421                                               {
2422                                                 'name'       => 'comment1 (*    *)',
2423                                                 'regex'      => '\\(\\*.*?\\*\\)',
2424                                                 'style'      => 'comment',
2425                                                 'childregex' => []
2426                                               },
2427                                               {
2428                                                 'name'       => 'comment2 {    }',
2429                                                 'regex'      => '\\{.*?\\}',
2430                                                 'style'      => 'comment',
2431                                                 'childregex' => []
2432                                               },
2433                                               {
2434                                                 'name'       => 'string',
2435                                                 'regex'      => '\'.*?(\'|$)',
2436                                                 'style'      => 'string',
2437                                                 'childregex' => []
2438                                               },
2439                                               {
2440                                                 'name'       => 'preprocessor line',
2441                                                 'regex'      => '^[ \\t]*#.*?$',
2442                                                 'style'      => 'preprocessor',
2443                                                 'childregex' => [
2444                                                                   {
2445                                                                     'name'       => 'comment1 (*    *)',
2446                                                                     'regex'      => '\\(\\*.*?\\*\\)',
2447                                                                     'style'      => 'comment',
2448                                                                     'childregex' => []
2449                                                                   },
2450                                                                   {
2451                                                                     'name'       => 'comment2 {    }',
2452                                                                     'regex'      => '\\{.*?\\}',
2453                                                                     'style'      => 'comment',
2454                                                                     'childregex' => []
2455                                                                   }
2456                                                                 ]
2457                                               },
2458                                               {
2459                                                 'name'       => 'character constant',
2460                                                 'regex'      => '\'.\'',
2461                                                 'style'      => 'character',
2462                                                 'childregex' => []
2463                                               },
2464                                               {
2465                                                 'name'       => 'numeric constant',
2466                                                 'regex'      => '\\b((0(x|X)[0-9a-fA-F]*)|[0-9.]+((e|E)(\\+|-)?)?[0-9]*)(L|l|UL|ul|u|U|F|f)?\\b',
2467                                                 'style'      => 'numeric',
2468                                                 'childregex' => []
2469                                               },
2470                                               {
2471                                                 'name'       => 'storage and ops',
2472                                                 'regex'      => '\\b(?i)(and|array|const|div|export|file|function|import|in|label|mod|module|nil|not|only|or|packed|pow|pragma|procedure|program|protected|qualified|record|restricted|set|type|var)\\b',
2473                                                 'style'      => 'reserved word',
2474                                                 'childregex' => []
2475                                               },
2476                                               {
2477                                                 'name'       => 'keywords',
2478                                                 'regex'      => '\\b(?i)(begin|case|do|downto|else|end|for|goto|if|of|otherwise|repeat|then|to|until|while|with)\\b',
2479                                                 'style'      => 'reserved word',
2480                                                 'childregex' => []
2481                                               },
2482                                               {
2483                                                 'name'       => 'sumbols',
2484                                                 'regex'      => '([\\*\\-\\+=:;<>\\(\\)\\[\\]!]|[^/]/[^/])',
2485                                                 'style'      => 'symbol',
2486                                                 'childregex' => []
2487                                               },
2488                                               {
2489                                                 'name'       => 'identifiers',
2490                                                 'regex'      => '([a-zA-Z_][a-zA-Z_0-9.^]*[a-zA-Z_0-9]|[a-zA-Z_][a-zA-Z_0-9]*)',
2491                                                 'style'      => 'identifier',
2492                                                 'childregex' => [
2493                                                                   {
2494                                                                     'regex'      => '(\\.|\\^)+',
2495                                                                     'style'      => 'symbol',
2496                                                                     'childregex' => []
2497                                                                   }
2498                                                                 ]
2499                                               }
2500                                             ],
2501                           };
2502 $LANGUAGE{'pascal'}     = $LANGUAGE{'pas'};
2503
2504  
2505  
2506  
2507  
2508  
2509  
2510  
2511  
2512  
2513  
2514  
2515  
2516  
2517  
2518 # taken from nedit
2519 # modified by PP
2520 # modified by BS
2521 # modified by JD
2522 # modified by JP
2523 $LANGUAGE{'perl'}       = {
2524                             'filename'   => '(?i)\\.p([lm5]|od)$',
2525                             'regex'      => '^\\s*#\\s*!([^\\s]*\\b|.*env\\s+)perl',
2526                             'patterns'   => [
2527                                               {
2528                                                 'name'       => 'comment',
2529                                                 'regex'      => '(?:#.*?(?:\r?\n\s*)+)+',
2530                                                 'style'      => 'comment',
2531                                                 'childregex' => []
2532                                               },
2533                                               {
2534                                                 'name'       => 'variables',
2535                                                 'regex'      => '[\\$@%]\\$?(?:{[^}]*}|[^a-zA-Z0-9_/\\t\\n\\.,\\\\[\\\\{\\\\(]|[0-9]+|[a-zA-Z_][a-zA-Z0-9_]*)?',
2536                                                 'style'      => 'identifier',
2537                                                 'childregex' => []
2538                                               },
2539                                               {
2540                                                 'name'       => '"" string',
2541                                                 'regex'      => '""|".*?([^\\\\](\\\\\\\\)*)"|"\\\\\\\\"',
2542 #                                                'regex'      => '""|"\\\\\\\\"|"[^"\\\\]"|"[^"].*?[^\\\\]"',
2543                                                 'style'      => 'string',
2544                                                 'childregex' => [
2545                                                                   {
2546                                                                     'name'       => 'esc character',
2547                                                                     'regex'      => '\\\\.',
2548                                                                     'style'      => 'esc character',
2549                                                                     'childregex' => []
2550                                                                   },
2551                                                                   {
2552                                                                     'name'       => 'variables',
2553                                                                     'regex'      => '[\\$@%]\\$?(?:{[^}]*}|[^a-zA-Z0-9_/\\t\\n\\.,\\\\[\\\\{\\\\(]|[0-9]+|[a-zA-Z_][a-zA-Z0-9_]*)?',
2554                                                                     'style'      => 'identifier',
2555                                                                     'childregex' => []
2556                                                                   }
2557                                                                 ]
2558                                               },
2559                                               {
2560                                                 'name'       => '\'\' string',
2561                                                 'regex'      => '\'\'|\'.*?([^\\\\](\\\\\\\\)*)\'|\'\\\\\\\\\'',
2562 #                                                'regex'      => '\'\'|\'\\\\\\\\\'|\'[^\'\\\\]\'|\'[^\'].*?[^\\\\]\'',
2563                                                 'style'      => 'string',
2564                                                 'childregex' => [
2565                                                                   {
2566                                                                     'name'       => 'esc character',
2567                                                                     'regex'      => '\\\\.',
2568                                                                     'style'      => 'esc character',
2569                                                                     'childregex' => []
2570                                                                   }
2571                                                                 ]
2572                                               },
2573                                               {
2574                                                 'name'       => 'more strings - q// qw//',
2575                                                 'regex'      => '(?:\\b| )(?:q|qw)([^\w\s])(?:\\\\\\2|[^\\2\\n])*\\2',
2576                                                 'style'      => 'string',
2577                                                 'childregex' => [
2578                                                                   {
2579                                                                     'name'       => 'esc character',
2580                                                                     'regex'      => '\\\\.',
2581                                                                     'style'      => 'esc character',
2582                                                                     'childregex' => []
2583                                                                   }
2584                                                                 ]
2585                                               },
2586                                               {
2587                                                 'name'       => 'more strings - qq// qx//',
2588                                                 'regex'      => '(?:\\b| )(?:qq|qx)([^\w\s])(?:\\\\\\2|[^\\2\\n])*\\2',
2589                                                 'style'      => 'string',
2590                                                 'childregex' => [
2591                                                                   {
2592                                                                     'name'       => 'esc character',
2593                                                                     'regex'      => '\\\\.',
2594                                                                     'style'      => 'esc character',
2595                                                                     'childregex' => []
2596                                                                   },
2597                                                                   {
2598                                                                     'name'       => 'variables',
2599                                                                     'regex'      => '[\\$@%]\\$?(?:{[^}]*}|[^a-zA-Z0-9_/\\t\\n\\.,\\\\[\\\\{\\\\(]|[0-9]+|[a-zA-Z_][a-zA-Z0-9_]*)?',
2600                                                                     'style'      => 'identifier',
2601                                                                     'childregex' => []
2602                                                                   }
2603                                                                 ]
2604                                               },
2605                                               {
2606                                                 'name'       => 'subroutine header',
2607                                                 'regex'      => 'sub[\\t ]+(?:[a-zA-Z0-9_]+)[\\t \\n]*(?:\\{|\\(|\\n)',
2608                                                 'style'      => 'function header',
2609                                                 'childregex' => [
2610                                                                   {
2611                                                                     'name'       => 'subroutine header coloring',
2612                                                                     'regex'      => '[\\t ][a-zA-Z0-9_]+',
2613                                                                     'style'      => 'function header name',
2614                                                                     'childregex' => []
2615                                                                   }
2616                                                                 ]
2617                                               },
2618                                               {
2619                                                 'name'       => 'regex matching I',
2620                                                 'regex'      => '(?:\\b| )?(?:/(?:\\\\/|[^/\\n])*(?:/[gimesox]*)|s([^\w\s])(?:\\\\\\2|[^\\2\\n])*?(\\2)[^(\\2)\\n]*?(\\2[gimesox]*))',
2621                                                 'style'      => 'regex',
2622                                                 'childregex' => []
2623                                               },
2624                                               {
2625                                                 'name'       => 'regex matching II',
2626                                                 'regex'      => '(?:\\b| )(?:m|qq?|tr|y)([^\w\s])(?:\\\\\\2|[^\\2\\n])*(?:\\2[gimesox]*)',
2627                                                 'style'      => 'regex',
2628                                                 'childregex' => []
2629                                               },
2630                                               {
2631                                                 'name'       => 'keywords',
2632                                                 'regex'      => '\\b(my|local|new|if|until|while|elsif|else|eval|unless|for|foreach|continue|exit|die|last|goto|next|redo|return|local|exec|do|use|require|package|eval|BEGIN|END|eq|ne|not|\\|\\||\\&\\&|and|or)\\b',
2633                                                 'style'      => 'reserved word',
2634                                                 'childregex' => []
2635                                               },
2636                                               {
2637                                                 'name'       => 'library functions',
2638                                                 'regex'      => '\\b(?:a(?:bs|ccept|larm|tan2)|b(?:ind|inmode|less)|c(?:aller|hdir|hmod|homp|hop|hr|hroot|hown|losedir|lose|onnect|os|rypt)|d(?:bmclose|bmopen|efined|elete|ie|ump)|e(?:ach|nd(?:grent|hostent|netent|protoent|pwent|servent)|of|xec|xists|xp)|f(?:ctnl|ileno|lock|ork|ormat|ormline)|g(?:et(?:c|grent|grgid|grnam|hostbyaddr|hostbyname|hostent|login|netbyaddr|netbyname|netent|peername|pgrp|ppid|priority|protobyname|protobynumber|protoent|pwent|pwnam|pwuid|servbyname|servbyport|servent|sockname|sockopt)|lob|mtime|rep)|hex|i(?:mport|ndex|nt|octl)|join|keys|kill|l(?:cfirst|c|ength|ink|isten|og|ocaltime|stat)|m(?:ap|kdir|sgctl|sgget|sgrcv)|no|o(?:ct|pendir|pen|rd)|p(?:ack|ipe|op|os|rintf|rint|ush)|quotemeta|r(?:and|eaddir|ead|eadlink|ecv|ef|ename|eset|everse|ewinddir|index|mdir)|s(?:calar|eekdir|eek|elect|emctl|emget|emop|end|et(?:grent|hostent|netent|pgrp|priority|protoent|pwent|sockopt)|hift|hmctl|hmget|hmread|hmwrite|hutdown|in|leep|ocket|ocketpair|ort|plice|plit|printf|qrt|rand|tat|tudy|ubstr|ymlink|yscall|ysopen|ysread|ystem|yswrite)|t(?:elldir|ell|ie|ied|ime|imes|runcate)|u(?:c|cfirst|mask|ndef|nlink|npack|nshift|ntie|time)|values|vec|w(?:ait|aitpid|antarray|arn|rite)|qw|-[rwxoRWXOezsfdlpSbctugkTBMAC])\\b',
2639                                                 'style'      => 'library function',
2640                                                 'childregex' => []
2641                                               },
2642                                               {
2643                                                 'name'       => 'braces, parens and brakets',
2644                                                 'regex'      => '[\\[\\]\\{\\}\\(\\)]',
2645                                                 'style'      => 'braces',
2646                                                 'childregex' => []
2647                                               },
2648                                               {
2649                                                 'name'       => '<< stuff',
2650                                                 'regex'      => '<<(?:("|\')([^\\n]*)\\2|\\w*).*?^\\3$',
2651                                                 'style'      => 'text',
2652                                                 'childregex' => []
2653                                               },
2654                                               {
2655                                                 'name'       => 'POD',
2656                                                 'regex'      => '^=.*?^(?:=cut|\\Z)',
2657                                                 'style'      => 'doc comment',
2658                                                 'childregex' => []
2659                                               }
2660                                             ]
2661                           };
2662  
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677 # Thanks to Matt Giwer <jull43 AT ij.net>
2678 $LANGUAGE{'pov'}        = {
2679                             'filename'   => '(?i)\\.pov$',
2680                             'regex'      => '',
2681                             'patterns'   => [
2682                                               {
2683                                                 'name'       => 'doc comment',
2684                                                 'regex'      => '/\\*\\*.*?\\*/',
2685                                                 'style'      => 'doc comment',
2686                                                 'childregex' => []
2687                                               },
2688                                               {
2689                                                 'name'       => 'comment',
2690                                                 'regex'      => '/\\*.*?\\*/',
2691                                                 'style'      => 'comment',
2692                                                 'childregex' => []
2693                                               },
2694                                               {
2695                                                 'name'       => 'cplus comment',
2696                                                 'regex'      => '//.*?$',
2697                                                 'style'      => 'comment',
2698                                                 'childregex' => []
2699                                               },
2700                                               {
2701                                                 'name'       => 'string',
2702                                                 'regex'      => '""|".*?([^\\\\](\\\\\\\\)*)"|"\\\\\\\\"',
2703 #                                                'regex'      => '""|"\\\\\\\\"|"[^"\\\\]"|"[^"].*?[^\\\\]"',
2704                                                 'style'      => 'string',
2705                                                 'childregex' => [
2706                                                                   {
2707                                                                     'name'       => 'esc character',
2708                                                                     'regex'      => '\\\\.',
2709                                                                     'style'      => 'esc character',
2710                                                                     'childregex' => []
2711                                                                   }
2712                                                                 ]
2713                                               },
2714                                               {
2715                                                 'name'       => 'preprocessor line',
2716                                                 'regex'      => '^[ \\t]*#.*?$',
2717                                                 'style'      => 'preprocessor',
2718                                                 'childregex' => [
2719                                                                   {
2720                                                                     'name'       => 'string',
2721                                                                     'regex'      => '""|".*?([^\\\\](\\\\\\\\)*)"|"\\\\\\\\"',
2722 #                                                                    'regex'      => '""|"\\\\\\\\"|"[^"\\\\]"|"[^"].*?[^\\\\]"',
2723                                                                     'style'      => 'string',
2724                                                                     'childregex' => [
2725                                                                                       {
2726                                                                                         'name'       => 'esc character',
2727                                                                                         'regex'      => '\\\\.',
2728                                                                                         'style'      => 'esc character',
2729                                                                                         'childregex' => []
2730                                                                                       }
2731                                                                                     ]
2732                                                                   },
2733                                                                   {
2734                                                                     'name'       => '<files>',
2735                                                                     'regex'      => '<.*?>',
2736                                                                     'style'      => 'string',
2737                                                                     'childregex' => []
2738                                                                   },
2739                                                                   {
2740                                                                     'name'       => 'comment',
2741                                                                     'regex'      => '[^/]/\\*.*?\\*/',
2742                                                                     'style'      => 'comment',
2743                                                                     'childregex' => []
2744                                                                   },
2745                                                                   {
2746                                                                     'name'       => 'cplus comment',
2747                                                                     'regex'      => '//.*?$',
2748                                                                 'style'      => 'comment',
2749                                                                     'childregex' => []
2750                                                                   }
2751                                                                 ]
2752                                               },
2753                                               {
2754                                                 'name'       => 'character constant',
2755                                                 'regex'      => '\'(\\\\)?.\'',
2756                                                 'style'      => 'character',
2757                                                 'childregex' => [
2758                                                                   {
2759                                                                     'name'       => 'esc character',
2760                                                                     'regex'      => '\\\\.',
2761                                                                     'style'      => 'esc character', 
2762                                                                     'childregex' => []
2763                                                                   }
2764                                                                 ]
2765                                               },
2766                                               {
2767                                                 'name'       => 'numeric constant',
2768                                                 'regex'      => '\\b((0(x|X)[0-9a-fA-F]*)|(([0-9]+\\.?[0-9]*)|(\\.[0-9]+))((e|E)(\\+|-)?[0-9]+)?)(L|l|UL|ul|u|U|F|f)?\\b',
2769                                                 'style'      => 'numeric',
2770                                                 'childregex' => []
2771                                               },
2772                                               {
2773                                                 'name'       => 'keyword',
2774                                                 'regex'      => '\\b(abs|absorption|acos|acosh|adaptive|adc_bailout|agate|agate_turb|all|alpha|ambient|ambient_light|angle|aperture|append|arc_angle|area_light|array|asc|asin|asinh|assumed_gamma|atan|atan2|atanh|average|background|bezier_spline|bicubic_patch|black_hole|blob|blue|blur_samples|bounded_by|box|boxed|bozo|break|brick|brick_size|brightness|brilliance|bumps|bump_map|bump_size|camera|case|caustics|ceil|checker|chr|clipped_by|clock|clock_delta|color|color_map|colour|colour_map|component|composite|concat|cone|confidence|conic_sweep|control0|control1|cos|cosh|count|crackle|crand|cube|cubic|cubic_spline|cubic_wave|cylinder|cylindrical|debug|declare|default|defined|degrees|density|density_file|density_map|dents|difference|diffuse|dimensions|dimension_size|direction|disc|distance|distance_maximum|div|eccentricity|else|emission|end|error|error_bound|exp|extinction|fade_distance|fade_power|falloff|falloff_angle|false|fclose|file_exists|filter|finish|fisheye|flatness|flip|floor|focal_point|fog|fog_alt|fog_offset|fog_type|fopen|frequency|gif|global_settings|gradient|granite|gray_threshold|green|height_field|hexagon|hf_gray_16|hierarchy|hollow|hypercomplex|if|ifdef|iff|ifndef|image_map|include|int|interior|interpolate|intersection|intervals|inverse|ior|irid|irid_wavelength|jitter|julia_fractal|lambda|lathe|leopard|light_source|linear_spline|linear_sweep|local|location|log|looks_like|look_at|low_error_factor|macro|mandel|map_type|marble|material|material_map|matrix|max|max_intersections|max_iteration|max_trace_level|media|media_attenuation|media_interaction|merge|mesh|metallic|min|minimum_reuse|mod|mortar|nearest_count|no|normal|normal_map|no_shadow|number_of_waves|object|octaves|off|offset|omega|omnimax|on|once|onion|open|orthographic|panoramic|perspective|pgm|phase|phong|phong_size|pi|pigment|pigment_map|planar|plane|png|point_at|poly|polygon|poly_wave|pot|pow|ppm|precision|prism|pwr|quadratic_spline|quadric|quartic|quaternion|quick_color|quick_colour|quilted|radial|radians|radiosity|radius|rainbow|ramp_wave|rand|range|ratio|read|reciprocal|recursion_limit|red|reflection|reflection_exponent|refraction|render|repeat|rgb|rgbf|rgbft|rgbt|right|ripples|rotate|roughness|samples|scale|scallop_wave|scattering|seed|shadowless|sin|sine_wave|sinh|sky|sky_sphere|slice|slope_map|smooth|smooth_triangle|sor|specular|sphere|spherical|spiral1|spiral2|spotlight|spotted|sqr|sqrt|statistics|str|strcmp|strength|strlen|strlwr|strupr|sturm|substr|superellipsoid|switch|sys|t|tan|tanh|text|texture|texture_map|tga|thickness|threshold|tightness|tile2|tiles|torus|track|transform|translate|transmit|triangle|triangle_wave|true|ttf|turbulence|turb_depth|type|u|ultra_wide_angle|undef|union|up|use_color|use_colour|use_index|u_steps|v|val|variance|vaxis_rotate|vcross|vdot|version|vlength|vnormalize|vrotate|v_steps|warning|warp|water_level|waves|while|width|wood|wrinkles|write|x|y|yes|z)\\b',
2775                                                 'style'      => 'reserved word',
2776                                                 'childregex' => []
2777                                               },
2778                                               {
2779                                                 'name'       => 'braces',
2780                                                 'regex'      => '[\\{\\}]',
2781                                                 'style'      => 'braces',
2782                                                 'childregex' => []
2783                                               },
2784                                               {
2785                                                 'name'       => 'symbols',
2786                                                 'regex'      => '([\\*\\-\\+=:;%&\\|<>\\(\\)\\[\\]!])',
2787                                                 'style'      => 'symbol',
2788                                                 'childregex' => []
2789                                               },
2790                                               { 
2791                                                 'name'       => 'identifiers',
2792                                                 'regex'      => '([a-zA-Z_][a-zA-Z_0-9]*)',
2793                                                 'style'      => 'identifier',
2794                                                 'childregex' => []
2795                                               }
2796                                             ]
2797                             };
2798 $LANGUAGE{'povray'}     = $LANGUAGE{'pov'};
2799    
2800  
2801
2802
2803 # by Tom Good 
2804 $LANGUAGE{'python'}        = {
2805                             'filename'   => '(?i)\\.py$',
2806                             'regex'      => '^\\s*#\\s*![^\\s]*python',
2807                             'patterns'   => [
2808                                               {
2809                                                 'name'       => 'python comment',
2810                                                 'regex'      => '#.*?$',
2811                                                 'style'      => 'comment',
2812                                                 'childregex' => []
2813                                               },
2814                                               {
2815                                                 'name'       => 'single quote string',
2816                                                 'regex'      => '\'.*?\'',
2817                                                 'style'      => 'string',
2818                                                 'childregex' => []
2819                                               },
2820                                                             
2821                                               {
2822                                                 'name'       => 'string',
2823                                                 'regex'      => '""|"\\\\\\\\"|".*?([^\\\\](\\\\\\\\)*)"',
2824                                                 'regex'      => '""|".*?([^\\\\](\\\\\\\\)*)"|"\\\\\\\\"',
2825                                                 'regex'      => '""|"\\\\\\\\"|"[^"\\\\]"|"[^"].*?[^\\\\]"',
2826                                                 'style'      => 'string',
2827                                                 'childregex' => [
2828                                                                   {
2829                                                                     'name'       => 'esc character',
2830                                                                     'regex'      => '\\\\.',
2831                                                                     'style'      => 'esc character',
2832                                                                     'childregex' => []
2833                                                                   }
2834                                                                 ]
2835                                               },
2836                                               {
2837                                                 'name'       => 'character constant',
2838                                                 'regex'      => '\'(\\\\)?.\'',
2839                                                 'style'      => 'character',
2840                                                 'childregex' => [
2841                                                                   {
2842                                                                     'name'       => 'esc character',
2843                                                                     'regex'      => '\\\\.',
2844                                                                     'style'      => 'esc character',
2845                                                                     'childregex' => []
2846                                                                   }
2847                                                                 ]
2848                                               },
2849                                               {
2850                                                 'name'       => 'numeric constant',
2851                                                 'regex'      => '\\b((0(x|X)[0-9a-fA-F]*)|(([0-9]+\\.?[0-9]*)|(\\.[0-9]+))((e|E)(\\+|-)?[0-9]+)?)(L|l|UL|ul|u|U|F|f)?\\b',
2852                                                 'style'      => 'numeric',
2853                                                 'childregex' => []
2854                                               },
2855                                               {
2856                                                 'name'       => 'keyword',
2857                                                 'regex'      => '\\b(and|assert|break|class|continue|del|elif|else|except|exec|finally|for|from|global|if|import|in|is|lambda|not|or|pass|print|raise|return|try|while)\\b',
2858                                                 'style'      => 'reserved word',
2859                                                 'childregex' => []
2860                                               },
2861                                               {
2862                                                 'name'       => 'braces',
2863                                                 'regex'      => '[\\{\\}]',
2864                                                 'style'      => 'braces',
2865                                                 'childregex' => []
2866                                               },
2867                                               {
2868                                                 'name'       => 'symbols',
2869                                                 'regex'      => '([\\*\\-\\+=:;%&\\|<>\\(\\)\\[\\]!])',
2870                                                 'style'      => 'symbol',
2871                                                 'childregex' => []
2872                                               },
2873                                               {
2874                                                 'name'       => 'identifiers',
2875                                                 'regex'      => '([a-zA-Z_][a-zA-Z_0-9]*)',
2876                                                 'style'      => 'identifier',
2877                                                 'childregex' => []
2878                                               },
2879                                               {
2880                                                 'name'       => 'function',
2881                                                 'regex'      => '[\\t ]*def[\\t ]+([a-zA-Z0-9_]+)[\\t \\(]+.*?[\\n{]',
2882                                                 'style'      => 'function header',
2883                                                 'childregex' => [
2884                                                                   {
2885                                                                     'name'       => 'function args',
2886                                                                     'regex'      => '\\(.*?\\)',
2887                                                                     'style'      => 'function header args',
2888                                                                     'childregex' => []
2889                                                                   },
2890                                                                   {
2891                                                                     'name'       => 'function name',
2892                                                                     'regex'      => '[\\t ][a-zA-Z0-9_]+',
2893                                                                     'style'      => 'function header name',
2894                                                                     'childregex' => []
2895                                                                   }
2896                                                                 ]
2897                                               },  
2898                                               {
2899                                                 'name'       => 'library functions',
2900                                                 'regex'      => '\\b(__import__|abs|apply|buffer|callable|chr|cmp|coerce|compile|complex|delatter|dir|divmod|eval|execfile|filter|float|getattr|globals|hasattr|hash|hex|id|input|int|intern|isinstance|issubclass|len|list|locals|long|map|max|min|oct|open|ord|pow|range|raw_input|reduce|reload|repr|round|setattr|slice|str|tuple|type|unichr|unicode|vars|xrange|zip)\\b',
2901                                                 'style'      => 'library function',
2902                                                 'childregex' => []
2903                                               },
2904                                             ]
2905                           };
2906
2907  
2908
2909 # by Joshua Swink <jswink AT pacbell.net>
2910 $LANGUAGE{'ruby'}       = {
2911                             'filename'   => '\\.rb$',
2912                             'regex'      => '^\\s*#\\s*![^\\s]*\\bruby\\b',
2913                             'patterns'   => [
2914                                               {
2915                                                 'name'       => 'comment',
2916                                                 'regex'      => '(?:#.*?(?:\r?\n\s*)+)+',
2917                                                 'style'      => 'comment',
2918                                                 'childregex' => []
2919                                               },
2920                                               {
2921                                                 'name'       => 'predefined variables',
2922                                                 'regex'      => '(?:\\$(?:[!@&`\'+\\d~=/\\\\,;.<>_*\\$?:"]|DEBUG|FILENAME|LOAD_PATH|stdin|stdout|stderr|VERBOSE|-[0adFiIlpv])|\\b(?:TRUE|FALSE|NIL|STDIN|STDOUT|STDERR|ENV|ARGF|ARGV|DATA|RUBY_VERSION|RUBY_RELEASE_DATE|RUBY_PLATFORM)\\b)',
2923                                                 'style'      => 'predefined identifier',
2924                                                 'childregex' => []
2925                                               },
2926                                               {
2927                                                 'name'       => 'variables',
2928                                                 'regex'      => '[\\$@](?:{[^}]*}|[^\\w/\\t\\n\\.,\\\\[\\\\{\\\\(]|[0-9]+|[a-zA-Z_][\\w.]*)?',
2929                                                 'style'      => 'identifier',
2930                                                 'childregex' => []
2931                                               },
2932                                               {
2933                                                 'name'       => '"" string',
2934                                                 'regex'      => '""|"(?:\\\\\\\\)+"|".*?(?:[^\\\\](?:\\\\\\\\)*)"|%[Qwx]?([^\\w\\[\\](){}<>])\\2|%[Qwx]?([^\\w\\[\\](){}<>]).*?(?:[^\\\\](?:\\\\\\\\)*)\\3|%[Qwx]?([^\\w\\[\\](){}<>])\\\\\\\\\\4|%[Qwx]?\\[\\]|%[Qwx]?\\[.*?([^\\\\](\\\\\\\\)*)\\]|%[Qwx]?\\[\\\\\\\\\\]|%[Qwx]?\\{\\}|%[Qwx]?\\{.*?([^\\\\](\\\\\\\\)*)\\}|%[Qwx]?\\{\\\\\\\\\\}|%[Qwx]?\\(\\)|%[Qwx]?\\(.*?([^\\\\](\\\\\\\\)*)\\)|%[Qwx]?\\(\\\\\\\\\\)|%[Qwx]?<>|%[Qwx]?<.*?([^\\\\](\\\\\\\\)*)>|%[Qwx]?<\\\\\\\\>',
2935
2936                                                 'style'      => 'string',
2937                                                 'childregex' => [
2938                                                                   {
2939                                                                     'name'       => 'esc character',
2940                                                                     'regex',     => '\\\\(?:x[\\da-fA-F]{2}|\d\d\d|c.|M-\\\\C-.|M-.|C-.|.)',
2941                                                                     'style'      => 'esc character',
2942                                                                     'childregex' => []
2943                                                                   },
2944                                                                   {
2945                                                                     'name'       => 'string expression',
2946                                                                     'regex'      => '#[\\$\\@][a-zA-Z_][\\w.]*|#\\{[\\$\\@]?[^\\}]*\\}',
2947                                                                     'style'      => 'identifier',
2948                                                                     'childregex' => []
2949                                                                   }
2950                                                                 ]
2951                                               },
2952                                               {
2953                                                 'name'       => '\'\' string',
2954                                                 'regex'      => '\'\'|\'(?:\\\\\\\\)+\'|\'.*?(?:[^\\\\](?:\\\\\\\\)*)\'|%q([^\\w\\[\\](){}<>])\\2|%q([^\\w\\[\\](){}<>]).*?(?:[^\\\\](?:\\\\\\\\)*)\\3|%q([^\\w\\[\\](){}<>])\\\\\\\\\\4|%q\\[\\]|%q\\[.*?([^\\\\](\\\\\\\\)*)\\]|%q\\[\\\\\\\\\\]|%q\\{\\}|%q\\{.*?([^\\\\](\\\\\\\\)*)\\}|%q\\{\\\\\\\\\\}|%q\\(\\)|%q\\(.*?([^\\\\](\\\\\\\\)*)\\)|%q\\(\\\\\\\\\\)|%q<>|%q<.*?([^\\\\](\\\\\\\\)*)>|%q<\\\\\\\\>',
2955                                                 'style'      => 'string',
2956                                                 'childregex' => [
2957                                                                   {
2958                                                                     'name'       => 'esc character',
2959                                                                     'regex'      => '(?:\\\\\'|\\\\\\\\)',
2960                                                                     'style'      => 'esc character',
2961                                                                     'childregex' => []
2962                                                                   }
2963                                                                 ]
2964                                               },
2965                                               {
2966                                                 'name'       => 'subroutine header',
2967                                                 'regex'      => 'def[\\t ]+\\w[\\w.]*(?:\\([^)]*\\))?',
2968                                                 'style'      => 'function header',
2969                                                 'childregex' => [
2970                                                                   {
2971                                                                     'name'       => 'arg list',
2972                                                                     'regex'      => '\\(.*\\)',
2973                                                                     'style'      => 'function header args',
2974                                                                     'childregex' => [
2975                                                                          {
2976                                                                          'name' => 'arg list parens',
2977                                                                          'regex' => '[\\(\\)]',
2978                                                                          'style' => 'symbol',
2979                                                                          'childregex' => []
2980                                                                          }
2981                                                                                     ]
2982                                                                   },
2983                                                                   {
2984                                                                     'name'       => 'subroutine header',
2985                                                                     'regex'      => '[\\t ]\w+',
2986                                                                     'style'      => 'function header name',
2987                                                                     'childregex' => []
2988                                                                   }
2989                                                                 ]
2990                                               },
2991                                               {
2992                                                 'name'       => 'class header',
2993                                                 'regex'      => 'class[\\t ]+\\w+(?:\\s*<\\s*\\w+)?',
2994                                                 'style'      => 'function header',
2995                                                 'childregex' => [
2996                                                                   {
2997                                                                     'name'       => 'class ancestor',
2998                                                                     'regex'      => '<\\s*\\w+',
2999                                                                     'style'      => 'include',
3000                                                                     'childregex' => [
3001                                                                              {
3002                                                                              'name' => 'inheritance doohickey',
3003                                                                              'regex' => '<',
3004                                                                              'style' => 'symbol',
3005                                                                              'childregex' => []
3006                                                                              }
3007                                                                                     ]
3008                                                                   },
3009                                                                   {
3010                                                                     'name'       => 'class main',
3011                                                                     'regex'      => '[\\t ]\\w+',
3012                                                                     'style'      => 'type',
3013                                                                     'childregex' => []
3014                                                                   }
3015                                                                 ]
3016                                               },
3017                                               {
3018                                                 'name'       => 'regex matching 0',
3019                                                 'regex'      => '(?:%r([^\\w\\[\\](){}<>])\\2|%r([^\\w\\[\\](){}<>]).*?(?:[^\\\\](?:\\\\\\\\)*)\\3|%r([^\\w\\[\\](){}<>])\\\\\\\\\\4|%r\\[\\]|%r\\[.*?([^\\\\](\\\\\\\\)*)\\]|%r\\[\\\\\\\\\\]|%r\\{\\}|%r\\{.*?([^\\\\](\\\\\\\\)*)\\}|%r\\{\\\\\\\\\\}|%r\\(\\)|%r\\(.*?([^\\\\](\\\\\\\\)*)\\)|%r\\(\\\\\\\\\\)|%r<>|%r<.*?([^\\\\](\\\\\\\\)*)>|%r<\\\\\\\\>)[ixpno]*',
3020                                                 'style'      => 'regex',
3021                                                 'childregex' => [
3022                                                                   {
3023                                                                     'name'       => 'string expression',
3024                                                                     'regex'      => '#[\\$\\@][a-zA-Z_][\\w.]*|#\\{[\\$\\@]?[a-zA-Z_][^\\}]*\\}',
3025                                                                     'style'      => 'identifier',
3026                                                                     'childregex' => []
3027                                                                   }
3028                                                                 ]
3029                                               },
3030                                               {
3031                                                 'name'       => 'regex matching I',
3032                                                 'regex'      => '(?:\\b| )?(?:/(?:\\\\/|[^/\\n])*(?:/[ixpno]*))',
3033                                                 'style'      => 'regex',
3034                                                 'childregex' => [
3035                                                                   {
3036                                                                     'name'       => 'string expression',
3037                                                                     'regex'      => '#[\\$\\@][a-zA-Z_][\\w.]*|#\\{[\\$\\@]?[a-zA-Z_][^\\}]*\\}',
3038                                                                     'style'      => 'identifier',
3039                                                                     'childregex' => []
3040                                                                   }
3041                                                                 ]
3042                                               },
3043                                               {
3044                                                 'name'       => 'reserved words',
3045                                                 'regex'      => '\\b(BEGIN|class|ensure|nil|self|when|END|def|false|not|super|while|alias|defined|for|or|then|yield|and|do|if|redo|true|begin|else|in|rescue|undef|break|elsif|module|retry|unless|case|end|next|return|until)\\b',
3046                                                 'style'      => 'reserved word',
3047                                                 'childregex' => []
3048                                               },
3049                                               {
3050                                                 'name'       => 'kernel module methods',
3051                                                 'regex',     => '\\b(Array|Float|Integer|String|at_exit|autoload|binding|caller|catch|chop|chomp|chomp!|eval|exec|exit|fail|fork|format|gets|global_variables|gsub|iterator|lambda|load|local_variables|loop|open|p|print|printf|proc|putc|puts|raise|rand|readline|readlines|require|select|sleep|split|sprintf|srand|sub|syscall|system|test|trace_var|trap|untrace_var)\\b',
3052                                                 'style'      => 'library function',
3053                                                 'childregex' => []
3054                                               },
3055                                               {
3056                                                 'name'       => 'braces, parens and brakets',
3057                                                 'regex'      => '[\\[\\]\\{\\}\\(\\)]',
3058                                                 'style'      => 'braces',
3059                                                 'childregex' => []
3060                                               },
3061                                               {
3062                                                 'name'       => '<< stuff',
3063                                                 'regex'      => '<<(?:("|\')([^\\n]*)\\2|\\w*).*?^\\3$',
3064                                                 'style'      => 'text',
3065                                                 'childregex' => []
3066                                               },
3067                                               {
3068                                                 'name'       => 'symbols',
3069                                                 'regex'      => '(?:[:*-+<>=^!,/]+|\.\.+)',
3070                                                 'style'      => 'symbol',
3071                                                 'childregex' => []
3072                                               },
3073                                               {
3074                                                 'name'       => 'numbers',
3075                                                 'regex'      => '\d[\d.]*',
3076                                                 'style'      => 'numeric',
3077                                                 'childregex' => []
3078                                               },
3079                                               {
3080                                                 'name'       => 'embedded documentation',
3081                                                 'regex'      => '^=.*?^(?:=end|\\Z)',
3082                                                 'style'      => 'doc comment',
3083                                                 'childregex' => []
3084                                               }
3085                                             ]
3086                           };
3087
3088 # taken from nedit
3089 # modified by PP
3090 # very inclomplete!
3091 $LANGUAGE{'sql'}        = {
3092                             'filename'   => '(?i)\\.sql$',
3093                             'regex'      => '',
3094                             'patterns'   => [
3095                                               {
3096                                                 'name'       => 'keywords I',
3097                                                 'regex'      => '(?i)(,|%|<|>|:=|=|\\(|\\)|\\bselect|on|from|order by|desc|where|and|or|not|null|true|false)\\b',
3098                                                 'style'      => 'reserved word',
3099                                                 'childregex' => []
3100                                               },
3101                                               {
3102                                                 'name'       => 'comment I',
3103                                                 'regex'      => '--.*?$',
3104                                                 'style'      => 'comment',
3105                                                 'childregex' => []
3106                                               },
3107                                               {
3108                                                 'name'       => 'comment II',
3109                                                 'regex'      => '/\\*.*?\\*/',
3110                                                 'style'      => 'comment',
3111                                                 'childregex' => []
3112                                               },
3113                                               {
3114                                                 'name'       => 'string',
3115                                                 'regex'      => '\'\'|\'.*?([^\\\\](\\\\\\\\)*)\'|\'\\\\\\\\\'',
3116 #                                                'regex'      => '(\'\'|\'[^\'\\\\]\'|\'[^\'].*?[^\\\\]\')',
3117                                                 'style'      => 'string',
3118                                                 'childregex' => []
3119                                               },
3120                                               {
3121                                                 'name'       => 'keywords II',
3122                                                 'regex'      => '(?i)end if;|\\b(create|replace|begin|end|function|return|fetch|open|close|into|is|in|when|others|grant|on|to|exception|show|set|out|pragma|as|package)\\b',
3123                                                 'style'      => 'reserved word',
3124                                                 'childregex' => []
3125                                               },
3126                                               {
3127                                                 'name'       => 'keywords III',
3128                                                 'regex'      => '(?i)\\balter\\b',
3129                                                 'style'      => 'reserved word',
3130                                                 'childregex' => []
3131                                               },
3132                                               {
3133                                                 'name'       => 'datatypes',
3134                                                 'regex'      => '(?i)\\b(integer|blol|date|numeric|character|varying|varchar|char)\\b',
3135                                                 'style'      => 'predefined type',
3136                                                 'childregex' => []
3137                                               },
3138                                               {
3139                                                 'name'       => 'words',
3140                                                 'regex'      => '(?i)\\b(constraint|key|references|primary|table|foreign|add|insert|group by)\\b',
3141                                                 'style'      => 'reserved word',
3142                                                 'childregex' => []
3143                                               }
3144                                             ]
3145                             };
3146
3147    
3148  
3149
3150 # enhanced by W. Friebel
3151 $LANGUAGE{'patch'}        = {
3152                             'filename'   => '(?i)\\.patch$|\\.diff$',
3153                             'regex'      => '',
3154                             'patterns'   => [
3155                                               {
3156                                                 'name'       => 'header',
3157                                                 'regex'      => '^Index: .*?$|^===== .*?$|^diff .*?$|^--- .*?$|^\+\+\+ .*?$|^\*\*\* .*?$',
3158                                                 'style'      => 'separator',
3159                                                 'childregex' => []
3160                                               },
3161                                               {
3162                                                 'name'       => 'hunk',
3163                                                 'regex'      => '^@@ .*?$',
3164                                                 'style'      => 'line spec',
3165                                                 'childregex' => []
3166                                               },
3167                                               {
3168                                                 'name'       => 'from',
3169                                                 'regex'      => '^-.*?$',
3170                                                 'style'      => 'deletion',
3171                                                 'childregex' => []
3172                                               },
3173                                               {
3174                                                 'name'       => 'to',
3175                                                 'regex'      => '^\+.*?$',
3176                                                 'style'      => 'insertion',
3177                                                 'childregex' => []
3178                                               },
3179                                               {
3180                                                 'name'       => 'mod',
3181                                                 'regex'      => '^\!.*?$',
3182                                                 'style'      => 'modification',
3183                                                 'childregex' => []
3184                                               },
3185                                             ]
3186                             };
3187
3188
3189
3190 #####
3191 #
3192 # LANGUAGE: shell script
3193 #
3194
3195 $LANGUAGE{'shellscript'} = {
3196         'filename' => '\\.(sh|shell)$',
3197         'regex' => '^\\s*#\\s*![^\\s]*(sh|bash|ash|zsh|ksh)',
3198         'patterns' => [ {
3199                 'name' => 'comment',
3200 #               'regex' => '^[ \t]*[^$]?\#[^!]?.*?$',
3201                 'regex' => '(^| )#([^\\!].)*?$',
3202                 'style' => 'comment',
3203                 'childregex' => []
3204         }, {
3205                 'name' => 'identifier',
3206                 'regex' => '[a-zA-Z][a-zA-Z0-9_]*=',
3207                 'style' => 'identifier',
3208                 'childregex' => [ {
3209                         'name' => 'identifier',
3210                         'regex' => '[a-zA-Z][a-zA-Z0-9_]*',
3211                         'style' => 'identifier',
3212                         'childregex' => []
3213                 } ]
3214         }, {
3215                 'name' => 'identifier',
3216                 'regex' => '\\$([0-9#\\*]|[a-zA-Z][a-zA-Z0-9_]*)',
3217                 'style' => 'identifier',
3218                 'childregex' => []
3219         }, {
3220                 'name' => 'interpreter line',
3221                 'regex' => '^[ \t]*#!.*?$',
3222                 'style' => 'preprocessor',
3223                 childregex => []
3224         }, {
3225                 'name' => 'string',
3226                 'regex' => '""|"(\\\\"|[^\\"])*"',
3227                 'style' => 'string',
3228                 childregex => [ {
3229                         'name' => 'identifier',
3230                         'regex' => '\\$([0-9#\\*]|[a-zA-Z][a-zA-Z0-9_]*)',
3231                         'style' => 'identifier',
3232                         'childregex' => []
3233                 } ]
3234         } ]
3235 };
3236
3237 $LANGUAGE{'sh'} = $LANGUAGE{'shellscript'};
3238 return \%LANGUAGE;
3239
3240 };
3241 use Getopt::Std;
3242 getopts('i:l:') || exit 2;
3243   $str = main(parse_passed_params( infile        => $ARGV[0] || '-',
3244              outfile       => '-',
3245 #             linenumbers   => 1 ,
3246              langmode   => $opt_l ,
3247              outputformat  => 'xterm' ,
3248              # many other options
3249            ));
3250
3251 1;
3252
3253 __END__
3254
3255 =head1 Code2HTML
3256
3257  Convert source code (c,java,perl,html,...) into formatted html.
3258
3259 =head1 SYNOPSIS
3260
3261   use Code2HTML;
3262   $html = code2html( $sourcecode );
3263   # or
3264   code2html( infile        => 'file.java' , 
3265              outfile       => 'file.html', 
3266              linenumbers   => 1 ,
3267              langmode      => 'perl' ,
3268              # many other options
3269            );
3270
3271 =head1 DESCRIPTION
3272
3273 Code2HTML converts source code into color-coded, formatted html,
3274 either as a simple code2html() function call, or as an Apache handler.
3275
3276 This package is an adaptation of Peter Palfrader's code2html application.
3277
3278 The statement 
3279
3280  use Code2HTML;
3281
3282 exports the function code2html(), which takes the following arguments
3283
3284  $html = code2html(
3285                         input           => $source_code,
3286                         infile          => 'filename.extension',
3287
3288                         outfile         => 'file.html',
3289                         outputformat    => 'html',      # or html-dark, or ...
3290
3291                         langmode        => 'java',      # or perl,html,c,...
3292                         langfile        => 'langFile',  # specify alternative
3293                                                         # syntax definitions
3294
3295                         linenumbers     => 1,           # turn on linenumbers
3296                         linknumbers     => 1,           # linenumber links
3297                         line_number_prefix => '-',      # linenumber anchors
3298                         replacetabs     => 8,           # tabs to spaces 
3299
3300                         noheader        => '',          # don't use template
3301                         template        => 'filename',  # override template
3302
3303                         title           => $title,      # set html page title
3304                         content_type    => 1,           # output httpd header
3305                   );
3306
3307 All input parameters are optional except the source code 
3308 specification, which must be defined by either input or infile keys, or
3309 by passing exactly one argument which will then be taken to be the 
3310 source code.
3311
3312  input          source code to be converted (or set source -infile)
3313
3314  infile         name of file with code  to be converted (or use -input)
3315
3316  langmode       language of source file.  If omitted, code2html
3317                 will try to guess from the language from the file extension
3318                 or start of the source code.  Language modes provided are
3319
3320                         ada, ada95, awk, c, c++, cc, cxx, groff, html,
3321                         java, javascript, js, m4, make, makefile, pas,
3322                         pas, pascal, perl, plain, pov, povray, ruby, sql.
3323
3324  langfile       filename of file with alternative syntax definitions
3325
3326  outfile        name of file to put html in.  If omitted, 
3327                 just return html in $html=code2html(...)
3328
3329  outputformat   style of output html.  Available formats are 
3330                 html (default), html-dark, html-light, html-nobg.
3331
3332  replacetabs    replace tabs in source with given number of spaces
3333
3334  title          set title of output html page
3335
3336  content_type   output a Content-Type httpd header
3337  
3338  linenumbers    print line numbers in source code listing
3339
3340 =head1 AUTHOR
3341
3342 Jim Mahoney (mahoney AT marlboro.edu), Peter Palfrader, and others.
3343
3344 =head1 COPYRIGHT and LICENSE
3345
3346  Copyright (c) 1999, 2000 by Peter Palfrader and others.
3347
3348 Permission is hereby granted, free of charge, to any person obtaining
3349 a copy of this software and associated documentation files (the
3350 ``Software''), to deal in the Software without restriction, including
3351 without limitation the rights to use, copy, modify, merge, publish,
3352 distribute, sublicense, and/or sell copies of the Software, and to
3353 permit persons to whom the Software is furnished to do so, subject to
3354 the following conditions:
3355
3356 The above copyright notice and this permission notice shall be
3357 included in all copies or substantial portions of the Software.
3358
3359 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
3360 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
3361 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
3362 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
3363 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
3364 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
3365 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
3366
3367 =head1 SEE ALSO
3368
3369  Peter Palfrader's Code2HTML page at http://www.palfrader.org/code2html/
3370