8 my $vernr = "0.9.1"; # this is the underlying version of Peter Palfrader's script
9 ##########################################################################
11 # very early check whether this routine is called from less -R or less -r
12 # or if the variable LESS contains -R or -r
13 # return if not (escape sequences would not be interpreted as colors)
14 # on systems with process info in /proc this could be coded in lesspipe.sh
16 ##########################################################################
17 if ( $ARGV[0] =~ /^\d+$/ and $ARGV[1] ) {
18 # we were called from lesspipe.sh with first arg = PPID
20 # if env variable LESS contains -r or -R we are sure that colors get displayed
21 if ( $ENV{LESS} !~ /-\w*r\w*\b/i ) {
22 # check if less is called with -r or -R (highly OS dependent)
23 # tested only for Linux, Solaris, IRIX, True64, MacOS X, FreeBSD and AIX !!!
24 my $psargs = '-oppid= -oargs=';
25 if ( $^O eq 'darwin' || $^O =~ /bsd$/ ) {
26 $psargs = '-oppid -ocommand';
27 } elsif ( $^O eq 'hpux' ) {
31 eval "use Proc::ProcessTable $procvers";
33 my $p = `ps -p $PPID $psargs`;
34 exit 1 if $p =~ /\bless\s+/ and $p !~ /less\s+-\w*r\w*\b/is;
35 if ( $p !~ /\bless\s+/ ) {
36 if ($p =~ /\d+\s+(\d+)/) {
39 $PPID = $1 if $p =~ /(\d+)/;
41 my $p2 = `ps -p $PPID $psargs`;
42 exit 1 if $p2 !~ /less\s+-\w*r\w*\b/is;
45 my $pt = new Proc::ProcessTable;
47 next unless $_->pid eq $PPID;
49 exit 1 if $p =~ /\bless\s+/ and $p !~ /less\s+-\w*r\w*\b/i;
50 if ( $p !~ /\bless\s+/ ) {
53 next unless $_->pid eq $PPID;
55 exit 1 if $p !~ /less\s+-\w*r\w*\b/i;
63 ########################################################################
68 # Code2Html, peter AT palfrader.org #
70 # $Date: 2002/01/12 21:17:02 $
72 # $Id: code2html,v 1.13 2002/01/12 21:17:02 weaselp Exp $
75 # Peter Palfrader. Written in 1999, 2000, 2001, 2002. #
76 # A lot of other people. See CREDITS file. #
79 # code2html is a perlscript which converts a program #
80 # source code to syntax highlighted HTML by applying a set #
81 # of regular expressions depending on the language #
82 # the source code is written. #
84 # see the man-page for details, #
86 ########################################################################
89 my $LINE_NUMBER_DEFAULT = "none"; # 'none', 'normal', 'linked'
90 my $REPLACE_TAB_DEFAULT = "8";
92 my $LANG_TEST_LENGTH = 1024;
94 my $DEFAULT_OUTPUTFORMAT='xterm';
97 my $STYLE_AND_LANGUAGE_FLAG;
100 Usage() unless @ARGV;
101 # =======================================================================
102 # == subroutines ========================================================
103 # =======================================================================
106 (my $prog = $0) =~ s!.*/!!;
107 my $time = strftime("%F", localtime( (stat($0))[9]));
109 $prog V$VERSION $time based on Code2Html version $vernr (peter\@palfrader.org)
111 my $origtext = <<EOF;
112 Usage: $prog [options] [input_file [output_file]]
114 Convert a program source to syntax highlighted HTML,
115 or any other format for wich rules are defined.
117 -l, --language-mode set language mode
118 --fallback LANG fallback language mode
119 -v, --verbose prints progress information to STDER
120 -n, --linenumbers print out the source code with line numbers
121 -P, --prefix optional prefix to use for linenumber anchors
122 -N, --linknumbers linenumbers will link to themselves
123 -t, --replace-tabs[=TABSTOP-WIDTH]
124 replace <tabs> with spaces
125 -L, --language-file=LANGUAGE-FILE
126 specify an alternate file for definitions
127 -m, --modes print all available modes
128 -h, --help print this message
129 -V, --version print version
130 -c, --content-type prints a Content-Type header
131 -o, --output-format selects the output-format
132 -H, --no-header don't use the template
133 --template=FILE override template
134 -T, --title set title
136 -w, --linewidth max characters per line
137 -b, --linebreakprefix prefix of the new lines
139 see the man-page code2html for further help
149 my %params = %{shift()};
150 my $html; # end result
152 # undefine the input record separator so everything gets loaded in one turn
153 local $/ = undef; # don't propogate this change outside this package.
155 # Only set %STYLESHEET and %LANGUAGE if they haven't been
156 # already set in a previous call ( if, say, we're running
157 # in a persistent environment under mod_perl)
158 # or if the langfile is passed in explicitly.
159 if ( $params{'langfile'} or ! $STYLE_AND_LANGUAGE_FLAG ) {
160 $STYLE_AND_LANGUAGE_FLAG = 1; # now they will be defined.
162 print STDERR "getting patterns...\n" if ($params{'verbose'});
163 # building up the database
164 # newer entries overwrite old ones
166 push @CONFIG_FILES, "/etc/code2html.config";
168 $ENV{'HOME'}."/.code2html.config" if $ENV{'HOME'};
170 split(/:/,$ENV{'CODE2HTML_CONFIG'}) if $ENV{'CODE2HTML_CONFIG'};
172 split(/:/,$params{'langfile'}) if $params{'langfile'};
174 %STYLESHEET = %{ &get_default_stylesheet } ;
175 %LANGUAGE = %{ &get_default_database } ;
177 for (@CONFIG_FILES) {
179 # if I use `do $_` instead of scalar eval...
180 # %LANGUAGE is not exported and imported correctly
181 # (read: at all) (PP)
182 unless (scalar eval `cat $_`) {
183 warn "couldn't parse $_: $@" if $@;
190 # When called as a package, "die" is impolite. Changed to "return".
191 # die "Outputformat $params{'outputformat'} not defined"
192 # unless defined $STYLESHEET{$params{'outputformat'}};
193 return "Outputformat $params{'outputformat'} not defined"
194 unless defined $STYLESHEET{$params{'outputformat'}};
196 my %STYLE = % { $STYLESHEET{$params{'outputformat'}} };
198 # load alternate template if given
199 if (($params{'template'} ne "") && ( ! $params{'noheader'} )) {
200 # open (FILE, $params{'template'}) ||
201 # die ("Could not open template file $params{'template'}: $!");
202 open (FILE, $params{'template'}) ||
203 return ("Could not open template file $params{'template'}: $!");
204 $STYLE{'template'} = <FILE>;
208 # set up the global ENTITIES variables ( the scalar and the hash )
209 # from the STYLE definition
210 $ENTITIES = $ { $STYLE{'entities'} }{'listofchars'};
211 %ENTITIES = % { $ { $STYLE{'entities'} }{'replace_by' } };
213 # modify the header and footer so that the template variables
216 unless ($STYLE{'template'} =~ /^(.*)%%code%%(.*)$/s) {
217 return "template does not contain a %%code%% variable";
220 $STYLE{'header'} = $1;
221 $STYLE{'footer'} = $2;
222 $STYLE{'header'} =~ s/%%title%%/$params{'title'}/g;
223 $STYLE{'footer'} =~ s/%%title%%/$params{'title'}/g;
224 $STYLE{'header'} =~ s/%%version%%/$vernr/g;
225 $STYLE{'footer'} =~ s/%%version%%/$vernr/g;
227 # load the input file and set params{'langmode'}
228 # if it is not already. this is done by probing a
229 # set of rules defined in %LANGUAGE
231 print STDERR "loading input file...\n" if ($params{'verbose'});
232 $code_ref = &get_input_file(\%params,
235 $params{'alt_langmode'});
237 return 0 if ! ref $code_ref;
238 # select the rules for out language.
239 my $language_rules_ref =
240 $LANGUAGE{ lc($params{'langmode'}) }->{'patterns'};
242 print STDERR "applying stylesheet...\n" if ($params{'verbose'});
243 # Apply the Stylesheets
244 # set 'starttag' and 'endtag' for every rule according to
245 # its 'style' value the tags are defined in the stylesheet
246 &apply_stylesheets_to_rules( $language_rules_ref, \%STYLE );
248 print STDERR "getting headers ...\n" if ($params{'verbose'});
249 $html = &put_headers(\%params, \%STYLE);
251 my $snippetlist_ref = [] ;
252 print STDERR "creating snippet-list...\n" if $params{'verbose'};
253 &create_snippetlist( $language_rules_ref,
254 $$code_ref, $snippetlist_ref, \%STYLE);
256 print STDERR "getting html converted code ...\n" if $params{'verbose'};
257 $html .= &put_output(\%params, $snippetlist_ref, \%STYLE);
260 # print " - debug : \n";
261 # foreach my $key (keys %params) {
262 # print " $key => " . $params{key} . "\n";
264 # return " - debug: done";
267 $html =~ s/\e\[0m(\e\[\d\d?m)/$1/g;
268 $ii++ while $html =~ s/(\e\[0m[^\e]+)\e\[0m/$1/g;
271 if ( $params{outfile} ) {
272 if ( $params{outfile} eq '-') {
276 open(FILEHANDLE, '>'.$params{outfile}) or
277 return( " Couldn't open output file " . $params{outfile} . "$!");
279 print FILEHANDLE $html;
291 #### parse_passed_params
292 #### replaces parse_params for package version of program,
293 #### constructing %RESULT hash from options passed by calling routine.
294 sub parse_passed_params {
296 @_ = ( input => $_[0] );
301 input => '', # text to convert
303 infile => '', # filename to get text from
304 outfile => '', # file to write html to
306 langmode => '', # language (perl,java,html,...)
307 alt_langmode => 'html', # language to use if can't tell
308 langfile => '', # more definitions of languages
310 line_number_prefix => '',
311 linenumbers => $LINE_NUMBER_DEFAULT,
312 outputformat => $DEFAULT_OUTPUTFORMAT,
313 replacetabs => $REPLACE_TAB_DEFAULT,
316 noheader => '', # 1 => don't print template
318 content_encoding => '',
319 template => '', # more template definitions
322 what_to_do => 'normal',
324 @_ , # any input key=>value pairs
325 # will override the defaults
328 $RESULT{title} = $RESULT{infile} if $RESULT{infile} && !$RESULT{title};
329 $RESULT{title} = 'Code2HTML' unless $RESULT{title};
330 if ( $RESULT{linenumbers} and
331 $RESULT{linenumbers} !~ m/^none|normal|linked$/ ) {
332 $RESULT{linenumbers} = 'normal';
338 ###########################################################################
339 ######################## checkTabulator ###################################
340 ##########################################################################
343 my ($line, $TABSTOP) = @_;
345 while ((my $at = index($line, "\t")) != -1)
347 my $cnt = ($TABSTOP - ($at % $TABSTOP));
348 my $replace_with = ' ' x $cnt if ($cnt);
349 $line =~ s/\t/$replace_with/;
355 ##########################################################################
356 ####################### get_input_file ###################################
357 ##########################################################################
363 # in/out : $langmode;
364 # in/out : $alt_langmode;
365 # returns: input file
367 my %PARAMS = %{$_[0]};
368 my %LANGUAGE = %{$_[1]};
369 my $langmode = $_[2];
370 my $alt_langmode = $_[3];
373 if ( $PARAMS{'input'} )
375 $code = $PARAMS{'input'};
380 if ($PARAMS{'infile'} eq '-') {
381 *FILEHANDLE = *STDIN;
383 open(FILEHANDLE, $PARAMS{'infile'})
384 || return("While opening '$PARAMS{'infile'}' for input: ".$!."\n");
387 $code = <FILEHANDLE>;
389 $PARAMS{'infile'} = $opt_i || $PARAMS{'infile'};
392 if ($PARAMS{'replacetabs'} != 0)
397 &checkTabulator($_, $PARAMS{'replacetabs'})
399 my @dummy = split(/\n/, $code)
407 my $test_code = substr($code, 0, $LANG_TEST_LENGTH);
408 # warn("language mode not given. guessing...\n");
414 if ( (($LANGUAGE{$_}->{'filename'} ne '')
415 && ($PARAMS{'infile'}
416 =~ m/$LANGUAGE{$_}->{filename}/)) ||
417 (($LANGUAGE{$_}->{'regex'} ne '')
418 && ($test_code =~ m/$LANGUAGE{$_}->{regex}/ ))
428 if ( not $alt_langmode )
430 warn("Guessing language mode failed. " .
431 "Using fallback mode: '$alt_langmode'\n");
432 $langmode = $alt_langmode;
437 print $code unless $str;
438 return("Guessing language mode failed.\n")
443 # warn("using '$langmode'\n");
448 $_[3] = $alt_langmode;
449 print "==> append : to filename to switch off syntax highlighting\n";
454 ###########################################################################
455 ####################### put_headers #######################################
456 ###########################################################################
460 my %PARAMS = %{shift()};
461 my $STYLE_REF = shift();
463 if ( $PARAMS{'content_type'}) {
464 $html .= "Content-Type: $$STYLE_REF{'content-type'}\n";
465 if ($PARAMS{'content_encoding'}) {
466 $html .= "Content-Encoding: $PARAMS{'encoding'}\n";
470 $html .= $$STYLE_REF{'header'} unless $PARAMS{'noheader'};
475 ############################################################################
476 ####################### apply_stylesheets_to_rules #########################
477 ############################################################################
478 sub apply_stylesheets_to_rules
480 my ( $regexps_ref, $style_ref ) = @_;
482 for ( @$regexps_ref ) {
483 warn ("Style '".$_->{style}."' not defined in stylesheet.\n") unless defined $ { $$style_ref{'tags'} } { $_->{style} };
484 $_->{'starttag'} = $ { $ { $$style_ref{'tags'} } { $_->{style} } } { 'start' };
485 $_->{'endtag'} = $ { $ { $$style_ref{'tags'} } { $_->{style} } } { 'stop' };
486 apply_stylesheets_to_rules( $_->{childregex}, $style_ref ) if $_->{childregex};
490 ###########################################################################
491 ####################### create_snippetlist ################################
492 ###########################################################################
493 sub create_snippetlist
495 my ( $regexps_ref, $code, $snippetlist_ref, $style_ref ) = @_ ;
496 my $length = length( $code );
498 ## An array of regular expression sturctures, each of which is an
499 ## array. @res is kept sorted by starting position of the RExen and
500 ## then by the position of the regex in the language file. This allows
501 ## us to just evaluate $res[0], and to hand write fast code that typically
502 ## handles 90% of the cases without resorting to the _big_ guns.
504 ## FWIW, I pronounce '@res' REEZE, as in the plural of '$re'.
510 for ( @$regexps_ref ) {
513 next unless $code =~ m/($_->{regex})/gms ;
515 $pos = pos( $code ) ;
518 # $ { $ { $$style_ref{'tags'} } { $_->{style} } } { 'start' },
519 # $ { $ { $$style_ref{'tags'} } { $_->{style} } } { 'stop' },
521 # $pos - length( $1 ),
536 ## 90% of all child regexes end up with 0 or 1 regex that needs to be
537 ## worried about. Trimming out the 0's speeds things up a bit and
538 ## makes the below loop simpler, since there's always at least
539 ## 1 regexp. It donsn't speed things up much by itself: the percentage
540 ## of times this fires is really small. But it does simplify the loop
541 ## below and speed it up.
543 $code =~ s/($ENTITIES)/$ENTITIES{$1}/ge ;
544 push @$snippetlist_ref, $code ;
548 @res = sort { $a->[4] <=> $b->[4] || $a->[6] <=> $b->[6] } @res ;
550 ## Add a dummy at the end, which makes the logic below simpler / faster.
561 ## These are declared here for (minor) speed improvement.
579 while ( $pos < $length ) {
582 $match_spos = $re->[4] ;
583 $match_pos = $re->[5] ;
585 if ( $match_spos > $pos ) {
586 $prefix = substr( $code, $pos, $match_spos - $pos ) ;
587 $prefix =~ s/($ENTITIES)/$ENTITIES{$1}/ge ;
588 push @$snippetlist_ref, $prefix ;
591 if ( $match_pos > $match_spos ) {
592 $snippet = substr( $code, $match_spos, $match_pos - $match_spos ) ;
594 push @$snippetlist_ref, $re->[1] ;
595 create_snippetlist( $re->[3], $snippet, $snippetlist_ref, $style_ref ) ;
596 push @$snippetlist_ref, $re->[2] ;
599 $snippet =~ s/($ENTITIES)/$ENTITIES{$1}/ge ;
600 push @$snippetlist_ref, $re->[1], $snippet, $re->[2];
607 ## Hand coded optimizations. Luckily, the cases that arise most often
608 ## are the easiest to tune.
613 if ( $res[1]->[4] >= $pos ) {
614 ## Only first regex needs to be moved, 2nd and later are still valid.
615 ## This is often 90% of the cases for Perl or C (others not tested,
616 ## just uncomment the $n, $o, and $p lines and try it yourself).
619 pos( $code ) = $pos ;
620 unless ( $code =~ m/($re->[0])/gms ) {
623 ## If the only regexp left is the dummy, we're done.
624 $rest = substr( $code, $pos ) ;
625 $rest =~ s/($ENTITIES)/$ENTITIES{$1}/ge ;
626 push @$snippetlist_ref, $rest ;
632 $re->[5] = $re_pos = pos( $code ) ;
633 $re->[4] = $re_spos = $re_pos - length( $1 ) ;
635 ## Walk down the array looking for $re's new home.
636 ## The first few loop iterations are unrolled and done manually
637 ## for speed, which handles 85 to 90% of the cases where only
638 ## $re needs to be moved.
640 ## Here's where that dummy regexp at the end of the array comes
641 ## in handy: we don't need to worry about array size here, since
642 ## it will always be after $re no matter what. The unrolled
643 ## loop stuff is outdented to make the conditionals fit on one
645 ## Element 4 in @{$res[x]} is the start position of the match.
646 ## Element 6 is the order in which it was declared in the lang file.
648 if ( ( $re_spos <=> $res[1]->[4] || $re_num <=> $res[1]->[6] ) <= 0 ) {
655 if ( ( $re_spos <=> $res[2]->[4] || $re_num <=> $res[2]->[6] ) <= 0 ) {
661 if ( ( $re_spos <=> $res[3]->[4] || $re_num <=> $res[3]->[6] ) <= 0 ) {
668 if ( ( $re_spos <=> $res[4]->[4] || $re_num <=> $res[4]->[6] ) <= 0 ) {
675 if ( ( $re_spos <=> $res[5]->[4] || $re_num <=> $res[5]->[6] ) <= 0 ) {
685 for ( ; $i < $l ; ++$i ) {
688 ( $re_spos <=> $res[$i]->[4] || $re_num <=> $res[$i]->[6] )
691 $res[$i-1] = $res[$i] ;
693 #++$p{sprintf( "%2d", $i )} ;
703 ## End optimizations. You can comment them all out and this net
704 ## does all the work, just more slowly. If you do that, then
705 ## you also need to comment out the code below that deals with
706 ## the second entry in @res.
710 ## First re always needs to be tweaked
713 pos( $code ) = $pos ;
714 unless ( $code =~ m/($re->[0])/gms ) {
716 ## If the only regexp left is the dummy, we're done.
717 $rest = substr( $code, $pos ) ;
718 $rest =~ s/($ENTITIES)/$ENTITIES{$1}/ge ;
719 push @$snippetlist_ref, $rest ;
727 $re->[5] = $re_pos = pos( $code ) ;
728 $re->[4] = $re_pos - length( $1 ) ;
729 @changed_res = ( $re ) ;
733 ## If the optimizations above are in, the second one always
734 ## needs to be tweaked, too.
738 pos( $code ) = $pos ;
739 unless ( $code =~ m/($re->[0])/gms ) {
741 ## If the only regexp left is the dummy, we're done.
742 $rest = substr( $code, $pos ) ;
743 $rest =~ s/($ENTITIES)/$ENTITIES{$1}/ge ;
744 push @$snippetlist_ref, $rest ;
750 $re->[5] = $re_pos = pos( $code ) ;
751 $re->[4] = $re_spos = $re_pos - length( $1 ) ;
753 ( $changed_res[0]->[4] <=> $re_spos ||
754 $changed_res[0]->[6] <=> $re->[6]
757 unshift @changed_res, $re ;
760 $changed_res[$i] = $re ;
766 local $_ = $res[$i] ;
768 last if $_->[4] >= $pos ;
771 pos( $code ) = $pos ;
772 unless ( $code =~ m/($_->[0])/gms ) {
774 $rest = substr( $code, $pos ) ;
775 $rest =~ s/($ENTITIES)/$ENTITIES{$1}/ge ;
776 push @$snippetlist_ref, $rest ;
779 ## If this regex is no longer needed, remove it by not pushing it
780 ## on to @changed_res. This means we need one less slot in @res.
785 $_->[5] = $re_pos = pos( $code ) ;
786 $_->[4] = $re_spos = $re_pos - length( $1 ) ;
788 ## Insertion sort in to @changed_res
790 for ( $j = $#changed_res ; $j > -1 ; --$j ) {
793 ( $changed_res[$j]->[4] <=> $re_spos ||
794 $changed_res[$j]->[6] <=> $re_num
797 $changed_res[$j+1] = $changed_res[$j] ;
799 $changed_res[$j+1] = $_ ;
802 ## Merge sort @changed_res and @res in to @res
805 for ( @changed_res ) {
808 ( $_->[4] <=> $res[$i]->[4] || $_->[6] <=> $res[$i]->[6] ) > 0
810 $res[$j++] = $res[$i++] ;
819 ##########################################################################
820 ####################### put_output #######################################
821 ##########################################################################
823 my ( $params, $snippetlist_ref, $STYLE_REF ) = @_ ;
828 $prefix = $params->{'line_number_prefix'}.'_'
829 if $params->{'line_number_prefix'};
831 $result = &{ $ { $$STYLE_REF{'linenumbers'}} {$params->{'linenumbers'}}
832 }(join ('', @$snippetlist_ref), $prefix);
834 # print FILEHANDLE $result unless $params->{'dont_print_output'} ;
835 # print FILEHANDLE $$STYLE_REF{'footer'} unless $params->{'noheader'};
837 $result .= $$STYLE_REF{'footer'} unless $params->{noheader};
843 ############################################################################
844 ####################### get_default_stylesheet #############################
845 ############################################################################
846 sub get_default_stylesheet
853 ########## different color modes for html.
854 # those are named html-dark, html-nobc and html-light.
855 # html-light is also named html
856 # the only difference between html-light and html-nobc is
857 # that html-light defines a body background and text color.
858 # nobc stands for no body colors.
860 my ($bold, $underline, $reverse, $reset, $red, $green, $yellow, $blue,
862 eval "use Term::ANSIColor";
865 $underline = "\e[4m";
875 $bold = color('bold');
876 $underline = color('underline');
877 $reverse = color('reverse');
878 $reset = color('reset');
880 $green = color('green');
881 $yellow = color('yellow');
882 $blue = color('blue');
883 $magenta = color('magenta');
884 $cyan = color('cyan');
886 $STYLESHEET{'xterm'} = { 'template' => '%%code%%',
887 'content-type' => 'text/html',
893 # o as the first parameter is the joined snippetlist
894 # o the second is an optional prefix, needed if more than one block
895 # in a file is highlighted. needed in patch-mode. may be empty
896 # the sub should the return a scalar made up of the joined lines including linenumbers
897 my @lines = split ( /\n/, $_[0] );
899 my $lengthofnr = length(@lines);
900 my $format = qq{%${lengthofnr}u %s\n} ;
901 join ('', map ( {$nr++; sprintf ( $format , $nr, $_ )} @lines));
904 # is not defined for xterm output, therefore do nothing
909 'comment' => { 'start' => $blue,
911 'doc comment' => { 'start' => "$bold$blue",
913 'string' => { 'start' => $red,
915 'esc string' => { 'start' => $magenta,
917 'character' => { 'start' => $reset,
919 'esc character' => { 'start' => $magenta,
921 'numeric' => { 'start' => $red,
923 'identifier' => { 'start' => $cyan,
925 'predefined identifier' => { 'start' => $cyan,
927 'type' => { 'start' => $cyan,
929 'predefined type' => { 'start' => $green,
931 'reserved word' => { 'start' => "$yellow",
933 'library function' => { 'start' => $reset,
935 'include' => { 'start' => $green,
937 'preprocessor' => { 'start' => $green,
939 'braces' => { 'start' => $reset,
941 'symbol' => { 'start' => $green,
943 'function header' => { 'start' => "$bold$red",
945 'function header name' => { 'start' => "$bold$cyan",
947 'function header args' => { 'start' => $cyan,
949 'regex' => { 'start' => $magenta,
951 'text' => { 'start' => $red,
955 'entity' => { 'start' => $green,
959 'assignment' => { 'start' => $green,
961 'dependency line' => { 'start' => $cyan,
963 'dependency target' => { 'start' => $blue,
965 'dependency continuation'=> { 'start' => $magenta,
967 'continuation' => { 'start' => $magenta,
969 'macro' => { 'start' => $red,
971 'int macro' => { 'start' => $red,
973 'esc $$$' => { 'start' => $yellow,
975 'separator' => { 'start' => $green,
977 'line spec' => { 'start' => $cyan,
979 'deletion' => { 'start' => $red,
981 'insertion' => { 'start' => $blue,
983 'modification' => { 'start' => $magenta,
987 $STYLESHEET{'html-light'} = { 'template' =>
990 <title>%%title%%</title>
992 <body bgcolor="#ffffff" text="#000000">
996 <p align=right><small><font color=gray>syntax highlighted by
997 <a href="http://www.palfrader.org/code2html"><font
998 color=gray>Code2HTML</font></a>, v. %%version%%</font></small></p>
1002 'content-type' => 'text/html',
1003 'entities' => { 'listofchars' => '[<>&"]', # a regex actually
1016 # o as the first parameter is the joined snippetlist
1017 # o the second is an optional prefix, needed if more than one block
1018 # in a file is highlighted. needed in patch-mode. may be empty
1019 # the sub should the return a scalar made up of the joined lines including linenumbers
1020 my @lines = split ( /\n/, $_[0] );
1023 my $lengthofnr = length(@lines);
1024 my $format = qq{<a name="$_[1]line%u">%${lengthofnr}u</a> %s\n} ;
1025 join ('', map ( {$nr++; sprintf ( $format , $nr, $nr, $_ )} @lines));
1028 # this should do the same as above only with linenumbers that link to themselves
1029 # If this style does not support this, use the same as above.
1030 my @lines = split ( /\n/, $_[0] );
1033 my $lengthofnr = length(@lines);
1034 my $format = qq{<a name="$_[1]line%u" href="#$_[1]line%u">%$ {lengthofnr}u</a> %s\n};
1035 join ('', map ( {$nr++; sprintf ( $format , $nr, $nr, $nr, $_ )} @lines));
1039 'comment' => { 'start' => '<font color="#444444">',
1040 'stop' => '</font>' },
1041 'doc comment' => { 'start' => '<font color="#444444"><i>',
1042 'stop' => '</i></font>' },
1043 'string' => { 'start' => '<font color="#008000">',
1044 'stop' => '</font>' },
1045 'esc string' => { 'start' => '<font color="#77dd77">',
1046 'stop' => '</font>' },
1047 'character' => { 'start' => '<font color="#008000">',
1048 'stop' => '</font>' },
1049 'esc character' => { 'start' => '<font color="#77dd77">',
1050 'stop' => '</font>' },
1051 'numeric' => { 'start' => '<font color="#FF0000">',
1052 'stop' => '</font>' },
1054 'identifier' => { 'start' => '<font color="#2040a0">',
1055 'stop' => '</font>' },
1056 'predefined identifier' => { 'start' => '<font color="#2040a0"><strong>',
1057 'stop' => '</strong></font>' },
1059 'type' => { 'start' => '<font color="#2040a0"><strong>',
1060 'stop' => '</strong></font>' },
1061 'predefined type' => { 'start' => '<font color="#2040a0"><strong>',
1062 'stop' => '</strong></font>' },
1064 'reserved word' => { 'start' => '<strong>',
1065 'stop' => '</strong>' },
1066 'library function' => { 'start' => '<font color="a52a2a"><strong>',
1067 'stop' => '</strong></font>' },
1069 'include' => { 'start' => '<font color="0000ff"><strong>',
1070 'stop' => '</strong></font>' },
1071 'preprocessor' => { 'start' => '<font color="0000ff"><strong>',
1072 'stop' => '</strong></font>' },
1074 'braces' => { 'start' => '<font color="4444FF"><strong>',
1075 'stop' => '</strong></font>' },
1076 'symbol' => { 'start' => '<font color="4444FF">',
1077 'stop' => '</font>' },
1079 'function header' => { 'start' => '<strong>',
1080 'stop' => '</strong>' },
1081 'function header name' => { 'start' => '<font color="ff0000">',
1082 'stop' => '</font>' },
1083 'function header args' => { 'start' => '<font color="2040a0">',
1084 'stop' => '</font>' },
1086 'regex' => { 'start' => '<font color="b000d0">',
1087 'stop' => '</font>' },
1089 'text' => { 'start' => '<i>',
1093 'entity' => { 'start' => '<font color="ff0000">',
1094 'stop' => '</font>' },
1097 'assignment' => { 'start' => '<font color="2040a0">',
1098 'stop' => '</font>' },
1099 'dependency line' => { 'start' => '<font color="8b2252">',
1100 'stop' => '</font>' },
1101 'dependency target' => { 'start' => '<strong>',
1102 'stop' => '</strong>' },
1103 'dependency continuation'=> { 'start' => '<font color="000000"><strong>',
1104 'stop' => '</strong></font>' },
1105 'continuation' => { 'start' => '<strong>',
1106 'stop' => '</strong>' },
1107 'macro' => { 'start' => '<font color="2040a0">',
1108 'stop' => '</font>' },
1109 'int macro' => { 'start' => '<font color="4080ff">',
1110 'stop' => '</font>' },
1111 'esc $$$' => { 'start' => '<font color="444444">',
1112 'stop' => '</font>' }
1115 # html-light is also called html
1117 $STYLESHEET{'html'} = $STYLESHEET{'html-light'};
1120 # html-nobc is a modification of html-light
1121 # in such a way, that the body tag does not define
1122 # a background and a text color
1123 # nobc stands for no body colors.
1125 %{$STYLESHEET{'html-nobg'}} = %{$STYLESHEET{'html-light'}};
1126 ${ $STYLESHEET{'html-nobg'}} {'template'} = '<html>
1128 <title>%%title%%</title>
1134 <p align=right><small><font color=gray>syntax highlighted by
1135 <a href="http://www.palfrader.org/code2html"><font
1136 color=gray>Code2HTML</font></a>, v. %%version%%</font></small></p>
1142 # html-dark is a modification of html-light
1143 # in such a way, that the body tag does define
1144 # different colors and that the <font> colors are different.
1146 %{$STYLESHEET{'html-dark'}} = %{$STYLESHEET{'html-light'}};
1147 ${ $STYLESHEET{'html-dark'}} {'template'} = '<html>
1149 <title>%%title%%</title>
1151 <body bgcolor="#000000" text="#C0C0C0" vlink="#FFFFFF" alink="#00FF00" link="#FFFFFF">
1155 <p align=right><small><font color=gray>syntax highlighted by
1156 <a href="http://www.palfrader.org/code2html"><font
1157 color=gray>Code2HTML</font></a>, v. %%version%%</font></small></p>
1161 ${ $STYLESHEET{'html-dark'}} {'tags'} = {
1162 'comment' => { 'start' => '<font color="#909000">',
1163 'stop' => '</font>' },
1164 'doc comment' => { 'start' => '<font color="#909000"><i>',
1165 'stop' => '</i></font>' },
1166 'string' => { 'start' => '<font color="yellow">',
1167 'stop' => '</font>' },
1168 'esc string' => { 'start' => '<font color="#77dd77">',
1169 'stop' => '</font>' },
1170 'character' => { 'start' => '<font color="yellow">',
1171 'stop' => '</font>' },
1172 'esc character' => { 'start' => '<font color="#77dd77">',
1173 'stop' => '</font>' },
1174 'numeric' => { 'start' => '<font color="#FF0000">',
1175 'stop' => '</font>' },
1177 'identifier' => { 'start' => '<font color="#B0B0B0">',
1178 'stop' => '</font>' },
1179 'predefined identifier' => { 'start' => '<font color="#2040a0"><strong>',
1180 'stop' => '</strong></font>' },
1182 'type' => { 'start' => '<font color="#2040a0"><strong>',
1183 'stop' => '</strong></font>' },
1184 'predefined type' => { 'start' => '<font color="#2040a0"><strong>',
1185 'stop' => '</strong></font>' },
1187 'reserved word' => { 'start' => '<strong>',
1188 'stop' => '</strong>' },
1189 'library function' => { 'start' => '<font color="a52a2a"><strong>',
1190 'stop' => '</strong></font>' },
1192 'include' => { 'start' => '<font color="#00FF00">',
1193 'stop' => '</font>' },
1194 'preprocessor' => { 'start' => '<font color="#00FF00">',
1195 'stop' => '</font>' },
1197 'braces' => { 'start' => '<font color="darkCyan"><strong>',
1198 'stop' => '</strong></font>' },
1199 'symbol' => { 'start' => '<font color="darkCyan">',
1200 'stop' => '</font>' },
1202 'function header' => { 'start' => '<strong>',
1203 'stop' => '</strong>' },
1204 'function header name' => { 'start' => '<font color="ff0000">',
1205 'stop' => '</font>' },
1206 'function header args' => { 'start' => '<font color="2040a0">',
1207 'stop' => '</font>' },
1209 'regex' => { 'start' => '<font color="b000d0">',
1210 'stop' => '</font>' },
1212 'text' => { 'start' => '<i>',
1216 'entity' => { 'start' => '<font color="ff0000">',
1217 'stop' => '</font>' },
1220 'assignment' => { 'start' => '<font color="2040a0">',
1221 'stop' => '</font>' },
1222 'dependency line' => { 'start' => '<font color="8b2252">',
1223 'stop' => '</font>' },
1224 'dependency target' => { 'start' => '<strong>',
1225 'stop' => '</strong>' },
1226 'dependency continuation'=> { 'start' => '<font color="000000"><strong>',
1227 'stop' => '</strong></font>' },
1228 'continuation' => { 'start' => '<strong>',
1229 'stop' => '</strong>' },
1230 'macro' => { 'start' => '<font color="2040a0">',
1231 'stop' => '</font>' },
1232 'int macro' => { 'start' => '<font color="4080ff">',
1233 'stop' => '</font>' },
1234 'esc $$$' => { 'start' => '<font color="444444">',
1235 'stop' => '</font>' }
1239 return \%STYLESHEET;
1245 #############################################################################
1246 ####################### get_default_database ################################
1247 #############################################################################
1248 sub get_default_database
1254 $LANGUAGE{'plain'} = {
1267 $LANGUAGE{'ada'} = {
1268 'filename' => '(?i)\\.a(d[asb]?)?$',
1272 'name' => 'Comments',
1273 'regex' => '--.*?$',
1274 'style' => 'comment',
1278 'name' => 'String Literals',
1279 'regex' => '".*?("|$)',
1280 'style' => 'string',
1284 'name' => 'Character Literals',
1286 'style' => 'character',
1290 'name' => 'Ada Attributes',
1291 'regex' => '\'[a-zA-Z][a-zA-Z_]+\\b',
1292 'style' => 'reserved word',
1296 'name' => 'Numeric Literals',
1297 'regex' => '(((2|8|10|16)#[_0-9a-fA-F]*#)|[0-9.]+)',
1298 'style' => 'numeric',
1302 'name' => 'Withs Pragmas Use',
1303 'regex' => '\\b(?i)((with|pragma|use)[ \\t\\n\\f\\r]+[a-zA-Z0-9_.]+;)+\\b',
1304 'style' => 'include',
1308 'name' => 'Predefined Types',
1309 'regex' => '\\b(?i)(boolean|character|count|duration|float|integer|long_float|long_integer|priority|short_float|short_integer|string)\\b',
1310 'style' => 'predefined type',
1314 'name' => 'Predefined Subtypes',
1315 'regex' => '\\b(?i)field|natural|number_base|positive|priority\\b',
1316 'style' => 'predefined type',
1320 'name' => 'Reserved Words',
1321 'regex' => '\\b(?i)(abort|abs|accept|access|and|array|at|begin|body|case|constant|declare|delay|delta|digits|do|else|elsif|end|entry|exception|exit|for|function|generic|goto|if|in|is|limited|loop|mod|new|not|null|of|or|others|out|package|pragma|private|procedure|raise|range|record|rem|renames|return|reverse|select|separate|subtype|task|terminate|then|type|use|when|while|with|xor)\\b',
1322 'style' => 'reserved word',
1326 'name' => 'Ada 95 Only',
1327 'regex' => '\\b(?i)(abstract|tagged|all|protected|aliased|requeue|until)\\b',
1328 'style' => 'reserved word',
1332 'name' => 'Identifiers',
1333 'regex' => '\\b[a-zA-Z][a-zA-Z0-9_]*\\b',
1334 'style' => 'identifier',
1338 'name' => 'Dot All',
1339 'regex' => '(?i)\\.all\\b',
1340 'style' => 'predefined identifier',
1345 $LANGUAGE{'ada95'} = $LANGUAGE{'ada'};
1362 $LANGUAGE{'awk'} = {
1363 'filename' => '(?i)\\.awk$',
1364 'regex' => '^\\s*#\\s*![^\\s]*awk',
1367 'name' => 'comment',
1369 'style' => 'comment',
1374 'regex' => '""|".*?([^\\\\](\\\\\\\\)*)"|"\\\\\\\\"',
1375 # 'regex' => '""|"\\\\\\\\"|"[^"\\\\]"|"[^"].*?[^\\\\]"',
1376 'style' => 'string',
1379 'name' => 'esc character',
1381 'style' => 'esc character',
1388 'regex' => '\'\'|\'.*?([^\\\\](\\\\\\\\)*)\'|\'\\\\\\\\\'',
1389 # 'regex' => '\'\'|\'\\\\\\\\\'|\'[^\'\\\\]\'|\'[^\'].*?[^\\\\]\'',
1390 'style' => 'string',
1393 'name' => 'esc character',
1395 'style' => 'esc character',
1401 'name' => 'function header',
1402 'regex' => 'function[\\t ]+([a-zA-Z0-9_]+)[\\t \\n]*(\\{|\\n)',
1403 'style' => 'function header',
1406 'name' => 'function coloring',
1407 'regex' => '[\\t ]([a-zA-Z0-9_]+)',
1408 'style' => 'function header name',
1414 'name' => 'regex matching I 1',
1415 'regex' => '(\\b| )?(/)(\\\\/|[^/\\n])*(/[gimesox]*)',
1420 'name' => 'regex matching I 2',
1421 'regex' => '(?:\\b| )(?:(?:m|q|qq)([!"#$%&\'*+-/]))(\\\\\\2|[^\\2\\n])*(\\2[gimesox]*)',
1426 'name' => 'regex matching II',
1427 'regex' => '(?:\\b| )?(?:s([!"#$%&\'*+-/]))(?:\\\\\\2|[^\\2\\n])*?(\\2)[^(\\2)\\n]*?(\\2[gimesox]*)',
1432 'name' => 'translate',
1433 'regex' => '(?:\\b| )(?:(?:tr|y)([^\w\s]))(?:\\\\\\2|[^\\2\\n])*?(\\2)[^(\\2)\\n]*?(\\2[gimesox]*)',
1438 'name' => 'keywords',
1439 'regex' => '\\b(BEGIN|END|ARGC|ARGIND|ARGV|CONVFMT|ENVIRON|ERRNO|FIELDWIDTHS|FILENAME|FNR|FS|IGNORECASE|NF|NR|OFMT|OFS|ORS|RS|RT|RSTART|RLENGTH|SUBSEP)\\b',
1440 'style' => 'reserved word',
1444 'name' => 'keywords 2',
1445 'regex' => '\\b(if|while|do|for|in|break|continue|delete|exit|next|nextfile|function)\\b',
1446 'style' => 'reserved word',
1450 'name' => 'library fns',
1451 'regex' => '\\b(close|getline|print|printf|system|fflush|atan2|cos|exp|int|log|rand|sin|sqrt|srand|gensub|gsub|index|length|split|sprintf|sub|substr|tolower|toupper|systime|strftime)\\b',
1452 'style' => 'library function',
1456 'name' => 'braces and parens',
1457 'regex' => '[\\[\\]\\{\\}\\(\\)]',
1458 'style' => 'braces',
1462 'name' => '<< stuff',
1463 'regex' => '<<\'([^\\n]*)\';.*?^\\2$',
1468 'name' => '<< stuff',
1469 'regex' => '<<([^\\n]*).*?^\\2$',
1493 'filename' => '\\.[ch]$',
1497 'name' => 'doc comment',
1498 'regex' => '/\\*\\*.*?\\*/',
1499 'style' => 'doc comment',
1503 'name' => 'comment',
1504 'regex' => '/\\*.*?\\*/',
1505 'style' => 'comment',
1510 'regex' => '""|".*?([^\\\\](\\\\\\\\)*)"|"\\\\\\\\"',
1511 # 'regex' => '""|"\\\\\\\\"|"[^"\\\\]"|"[^"].*?[^\\\\]"',
1512 'style' => 'string',
1515 'name' => 'esc character',
1517 'style' => 'esc character',
1523 'name' => 'preprocessor line',
1524 'regex' => '^[ \\t]*#.*?$',
1525 'style' => 'preprocessor',
1529 'regex' => '""|".*?([^\\\\](\\\\\\\\)*)"|"\\\\\\\\"',
1530 # 'regex' => '""|"\\\\\\\\"|"[^"\\\\]"|"[^"].*?[^\\\\]"',
1531 'style' => 'string',
1534 'name' => 'esc character',
1536 'style' => 'esc character',
1542 'name' => '<files>',
1544 'style' => 'string',
1548 'name' => 'comment',
1549 'regex' => '[^/]/\\*.*?\\*/',
1550 'style' => 'comment',
1556 'name' => 'character constant',
1557 'regex' => '\'(\\\\)?.\'',
1558 'style' => 'character',
1561 'name' => 'esc character',
1563 'style' => 'esc character',
1569 'name' => 'numeric constant',
1570 'regex' => '\\b((0(x|X)[0-9a-fA-F]*)|(([0-9]+\\.?[0-9]*)|(\\.[0-9]+))((e|E)(\\+|-)?[0-9]+)?)(L|l|UL|ul|u|U|F|f)?\\b',
1571 'style' => 'numeric',
1575 'name' => 'storage keyword',
1576 'regex' => '\\b(const|extern|auto|register|static|unsigned|signed|volatile|char|double|float|int|long|short|void|typedef|struct|union|enum)\\b',
1577 'style' => 'reserved word',
1581 'name' => 'keyword',
1582 'regex' => '\\b(return|goto|if|else|case|default|switch|break|continue|while|do|for|sizeof)\\b',
1583 'style' => 'reserved word',
1588 'regex' => '[\\{\\}]',
1589 'style' => 'braces',
1593 'name' => 'symbols',
1594 'regex' => '([\\*\\-\\+=:;%&\\|<>\\(\\)\\[\\]!])',
1595 'style' => 'symbol',
1599 'name' => 'identifiers',
1600 'regex' => '([a-zA-Z_][a-zA-Z_0-9]*)',
1601 'style' => 'identifier',
1623 $LANGUAGE{'c++'} = {
1624 'filename' => '\\.(c(c|pp|xx)|h(h|pp|xx)|C(C|PP|XX)?|H(H|PP|XX)?|i)$',
1628 'name' => 'doc comment',
1629 'regex' => '/\\*\\*.*?\\*/',
1630 'style' => 'doc comment',
1634 'name' => 'comment',
1635 'regex' => '/\\*.*?\\*/',
1636 'style' => 'comment',
1640 'name' => 'cplus comment',
1641 'regex' => '//.*?$',
1642 'style' => 'comment',
1647 'regex' => '""|"\\\\\\\\"|".*?([^\\\\](\\\\\\\\)*)"',
1648 # 'regex' => '""|"\\\\\\\\"|"[^"\\\\]"|"[^"].*?[^\\\\]"',
1649 'style' => 'string',
1652 'name' => 'esc character',
1654 'style' => 'esc character',
1660 'name' => 'preprocessor line',
1661 'regex' => '^[ \\t]*#.*?$',
1662 'style' => 'preprocessor',
1666 'regex' => '""|".*?([^\\\\](\\\\\\\\)*)"|"\\\\\\\\"',
1667 # 'regex' => '""|"\\\\\\\\"|"[^"\\\\]"|"[^"].*?[^\\\\]"',
1668 'style' => 'string',
1671 'name' => 'esc character',
1673 'style' => 'esc character',
1679 'name' => '<files>',
1681 'style' => 'string',
1685 'name' => 'comment',
1686 'regex' => '[^/]/\\*.*?\\*/',
1687 'style' => 'comment',
1691 'name' => 'cplus comment',
1692 'regex' => '//.*?$',
1693 'style' => 'comment',
1699 'name' => 'character constant',
1700 'regex' => '\'(\\\\)?.\'',
1701 'style' => 'character',
1704 'name' => 'esc character',
1706 'style' => 'esc character',
1712 'name' => 'numeric constant',
1713 'regex' => '\\b((0(x|X)[0-9a-fA-F]*)|(([0-9]+\\.?[0-9]*)|(\\.[0-9]+))((e|E)(\\+|-)?[0-9]+)?)(L|l|UL|ul|u|U|F|f)?\\b',
1714 'style' => 'numeric',
1718 'name' => 'storage keyword',
1719 'regex' => '\\b(class|typename|typeid|template|friend|virtual|inline|explicit|operator|overload|public|private|protected|const|extern|auto|register|static|mutable|unsigned|signed|volatile|char|double|float|int|long|short|bool|wchar_t|void|typedef|struct|union|enum)\\b',
1720 'style' => 'reserved word',
1724 'name' => 'keyword',
1725 'regex' => '\\b(new|delete|this|return|goto|if|else|case|default|switch|break|continue|while|do|for|catch|throw|sizeof|true|false|namespace|using|dynamic_cast|static_cast|reinterpret_cast)\\b',
1726 'style' => 'reserved word',
1731 'regex' => '[\\{\\}]',
1732 'style' => 'braces',
1736 'name' => 'symbols',
1737 'regex' => '([\\*\\-\\+=:;%&\\|<>\\(\\)\\[\\]!])',
1738 'style' => 'symbol',
1742 'name' => 'identifiers',
1743 'regex' => '([a-zA-Z_][a-zA-Z_0-9]*)',
1744 'style' => 'identifier',
1749 $LANGUAGE{'cc'} = $LANGUAGE{'c++'};
1750 $LANGUAGE{'cpp'} = $LANGUAGE{'c++'};
1751 $LANGUAGE{'cxx'} = $LANGUAGE{'c++'};
1763 $LANGUAGE{'gpasm'} = {
1764 'filename' => '(?i)\\.(asm|inc)$',
1770 'style' => 'symbol',
1773 'name' => 'comment',
1775 'style' => 'comment',
1780 'regex' => '^[A-Za-z_][A-Za-z_0-9]*:?',
1781 'style' => 'identifier',
1786 'name' => 'menonics',
1787 'regex' => '^[ \t]+[A-Za-z_][A-Za-z_0-9]*',
1788 'style' => 'reserved word',
1793 'regex' => '""|".*?([^\\\\](\\\\\\\\)*)"|"\\\\\\\\"',
1794 'style' => 'string',
1797 'name' => 'esc character',
1799 'style' => 'esc character',
1819 $LANGUAGE{'groff'} = {
1820 'filename' => '\\.groff$',
1824 'name' => 'comment',
1825 'regex' => '\\\\".*?$',
1826 'style' => 'comment',
1848 $LANGUAGE{'html'} = {
1849 'filename' => '(?i)\\.(html?|mhtml|php)$',
1853 'name' => 'comment',
1854 'regex' => '<!--.*?-->',
1855 'style' => 'comment',
1860 'regex' => '\\&[-.a-zA-Z0-9#]*;?',
1861 'style' => 'entity',
1866 'regex' => '<(/|!)?[-.a-zA-Z0-9]*.*?>',
1867 'style' => 'predefined identifier',
1870 'name' => 'double quote string',
1872 'style' => 'string',
1876 'name' => 'single quote string',
1877 'regex' => '\'.*?\'',
1878 'style' => 'string',
1882 'name' => 'brackets',
1884 'style' => 'braces',
1888 'name' => 'attribute',
1889 'regex' => '[^\'" ]+(?=.)',
1890 'style' => 'identifier',
1900 # Added May 17, 2002, Jim M.
1901 $LANGUAGE{'xml'} = {
1902 'filename' => '(?i)\\.(xml|xps|xsl|axp|ppd)?$',
1906 'name' => 'comment',
1907 'regex' => '<!--.*?-->',
1908 'style' => 'comment',
1913 'regex' => '\\&[-.a-zA-Z0-9#]*;?',
1914 'style' => 'entity',
1919 'regex' => '<(/|!)?[-.a-zA-Z0-9]*.*?>',
1920 'style' => 'predefined identifier',
1923 'name' => 'double quote string',
1925 'style' => 'string',
1929 'name' => 'single quote string',
1930 'regex' => '\'.*?\'',
1931 'style' => 'string',
1935 'name' => 'brackets',
1937 'style' => 'braces',
1941 'name' => 'attribute',
1942 'regex' => '[^\'" ]+(?=.)',
1943 'style' => 'identifier',
1967 $LANGUAGE{'java'} = {
1968 'filename' => '\\.java$',
1972 'name' => 'doc comment',
1973 'regex' => '/\\*\\*.*?\\*/',
1974 'style' => 'doc comment',
1978 'name' => 'comment',
1979 'regex' => '/\\*.*?\\*/',
1980 'style' => 'comment',
1984 'name' => 'cplus comment',
1985 'regex' => '//.*?$',
1986 'style' => 'comment',
1991 'regex' => '""|".*?([^\\\\](\\\\\\\\)*)"|"\\\\\\\\"',
1992 # 'regex' => '""|"\\\\\\\\"|"[^"\\\\]"|"[^"].*?[^\\\\]"',
1993 'style' => 'string',
1996 'name' => 'esc character',
1998 'style' => 'esc character',
2004 'name' => 'single quoted',
2005 'regex' => '\'\'|\'.*?([^\\\\](\\\\\\\\)*)\'|\'\\\\\\\\\'',
2006 # 'regex' => '\'\'|\'\\\\\\\\\'|\'[^\'\\\\]\'|\'[^\'].*?[^\\\\]\'',
2007 'style' => 'string',
2011 'name' => 'numeric constant',
2012 'regex' => '\\b((0(x|X)[0-9a-fA-F]*)|(([0-9]+\\.?[0-9]*)|(\\.[0-9]+))((e|E)(\\+|-)?[0-9]+)?)(L|l|UL|ul|u|U|F|f)?\\b',
2013 'style' => 'numeric',
2017 'name' => 'include',
2018 'regex' => '\\b(import|package)\\b.*?$',
2019 'style' => 'include',
2022 'name' => 'esc character',
2023 'regex' => '\\\\(.|\\n)',
2024 'style' => 'esc character',
2028 'name' => 'comment',
2029 'regex' => '[^/]/\\*.*?\\*/',
2030 'style' => 'comment',
2036 'name' => 'storage keyword',
2037 'regex' => '\\b(abstract|boolean|byte|char|class|double|extends|final|float|int|interface|long|native|private|protected|public|short|static|transient|synchronized|void|volatile|implements)\\b',
2038 'style' => 'reserved word',
2042 'name' => 'keyword',
2043 'regex' => '\\b(break|case|catch|continue|default|do|else|false|finally|for|if|instanceof|new|null|return|super|switch|this|throw|throws|true|try|while)\\b',
2044 'style' => 'reserved word',
2048 'name' => 'braces and parens',
2049 'regex' => '[\\{\\}\\(\\)\\[\\]]',
2050 'style' => 'braces',
2054 'name' => 'Identifiers',
2055 'regex' => '\\b[a-zA-Z_][a-zA-Z0-9_]*\\b',
2056 'style' => 'identifier',
2060 'name' => 'symbols',
2061 'regex' => '([\\*\\-\\+=:;%&\\|<>!])',
2062 'style' => 'symbol',
2083 $LANGUAGE{'javascript'} = {
2084 'filename' => '(?i)\\.js$',
2088 'name' => 'comment',
2089 'regex' => '/\\*.*?\\*/',
2090 'style' => 'comment',
2094 'name' => 'cplus comment',
2095 'regex' => '//.*?$',
2096 'style' => 'comment',
2100 'name' => 'numeric constant',
2101 'regex' => '\\b((0(x|X)[0-9a-fA-F]*)|(([0-9]+\\.?[0-9]*)|(\\.[0-9]+))((e|E)(\\+|-)?[0-9]+)?)(L|l|UL|ul|u|U|F|f)?\\b',
2102 'style' => 'numeric',
2107 'regex' => '\\b(onAbort|onBlur|onClick|onChange|onDblClick|onDragDrop|onError|onFocus|onKeyDown|onKeyPress|onLoad|onMouseDown|onMouseMove|onMouseOut|onMouseOver|onMouseUp|onMove|onResize|onSelect|onSubmit|onUnload)\\b',
2108 'style' => 'reserved word',
2113 'regex' => '[\\{\\}]',
2114 'style' => 'braces',
2118 'name' => 'statements',
2119 'regex' => '\\b(break|continue|else|for|if|in|new|return|this|typeof|var|while|with)\\b',
2120 'style' => 'reserved word',
2124 'name' => 'function',
2125 'regex' => 'function[\\t ]+([a-zA-Z0-9_]+)[\\t \\(]+.*?[\\n{]',
2126 'style' => 'function header',
2129 'name' => 'function args',
2130 'regex' => '\\(.*?\\)',
2131 'style' => 'function header args',
2135 'name' => 'function name',
2136 'regex' => '[\\t ][a-zA-Z0-9_]+',
2137 'style' => 'function header name',
2143 'name' => 'built in object type',
2144 'regex' => '\\b(anchor|Applet|Area|Array|button|checkbox|Date|document|elements|FileUpload|form|frame|Function|hidden|history|Image|link|location|Math|navigator|Option|password|Plugin|radio|reset|select|string|submit|text|textarea|window)\\b',
2145 'style' => 'predefined type',
2150 'regex' => '".*?("|$)',
2151 'style' => 'string',
2155 'regex' => '(aliceblue|antiquewhite|aqua|aquamarine|azure|beige|bisque|black|blanchedalmond|blue|blueviolet|brown|burlywood|cadetblue|chartreuse|chocolate|coral|cornflowerblue|cornsilk|crimson|cyan|darkblue|darkcyan|darkgoldenrod|darkgray|darkgreen|darkkhaki|darkmagenta|darkolivegreen|darkorange|darkorchid|darkred|darksalmon|darkseagreen|darkslateblue|darkslategray|darkturquoise|darkviolet|deeppink|deepskyblue|dimgray|dodgerblue|firebrick|floralwhite|forestgreen|fuchsia|gainsboro|ghostwhite|gold|goldenrod|gray|green|greenyellow|honeydew|hotpink|indianred|indigo|ivory|khaki|lavender|lavenderblush|lawngreen|lemonchiffon|lightblue|lightcoral|lightcyan|lightgoldenrodyellow|lightgreen|lightgrey|lightpink|lightsalmon|lightseagreen|lightskyblue|lightslategray|lightsteelblue|lightyellow|lime|limegreen|linen|magenta|#008000|mediumaquamarine|mediumblue|mediumorchid|mediumpurple|mediumseagreen|mediumslateblue|mediumspringgreen|mediumturquoise|mediumvioletred|midnightblue|mintcream|mistyrose|moccasin|navajowhite|navy|oldlace|olive|olivedrab|orange|orangered|orchid|palegoldenrod|palegreen|paleturquoise|palevioletred|papayawhip|peachpuff|peru|pink|plum|powderblue|purple|red|rosybrown|royalblue|saddlebrown|salmon|sandybrown|seagreen|seashell|sienna|silver|skyblue|slateblue|slategray|snow|springgreen|steelblue|tan|teal|thistle|tomato|turquoise|violet|wheat|white|whitesmoke|yellow|yellowgreen|#[A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9])',
2156 'style' => 'identifier',
2163 'regex' => '\'.*?(\'|$)',
2164 'style' => 'string',
2168 'regex' => '(aliceblue|antiquewhite|aqua|aquamarine|azure|beige|bisque|black|blanchedalmond|blue|blueviolet|brown|burlywood|cadetblue|chartreuse|chocolate|coral|cornflowerblue|cornsilk|crimson|cyan|darkblue|darkcyan|darkgoldenrod|darkgray|darkgreen|darkkhaki|darkmagenta|darkolivegreen|darkorange|darkorchid|darkred|darksalmon|darkseagreen|darkslateblue|darkslategray|darkturquoise|darkviolet|deeppink|deepskyblue|dimgray|dodgerblue|firebrick|floralwhite|forestgreen|fuchsia|gainsboro|ghostwhite|gold|goldenrod|gray|green|greenyellow|honeydew|hotpink|indianred|indigo|ivory|khaki|lavender|lavenderblush|lawngreen|lemonchiffon|lightblue|lightcoral|lightcyan|lightgoldenrodyellow|lightgreen|lightgrey|lightpink|lightsalmon|lightseagreen|lightskyblue|lightslategray|lightsteelblue|lightyellow|lime|limegreen|linen|magenta|#008000|mediumaquamarine|mediumblue|mediumorchid|mediumpurple|mediumseagreen|mediumslateblue|mediumspringgreen|mediumturquoise|mediumvioletred|midnightblue|mintcream|mistyrose|moccasin|navajowhite|navy|oldlace|olive|olivedrab|orange|orangered|orchid|palegoldenrod|palegreen|paleturquoise|palevioletred|papayawhip|peachpuff|peru|pink|plum|powderblue|purple|red|rosybrown|royalblue|saddlebrown|salmon|sandybrown|seagreen|seashell|sienna|silver|skyblue|slateblue|slategray|snow|springgreen|steelblue|tan|teal|thistle|tomato|turquoise|violet|wheat|white|whitesmoke|yellow|yellowgreen|#[A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9])',
2169 'style' => 'identifier',
2175 'name' => 'event capturing',
2176 'regex' => '\\b(captureEvents|releaseEvents|routeEvent|handleEvent)\\b.*?(\\)|$)',
2177 'style' => 'reserved word',
2181 'name' => 'predefined methods',
2182 'regex' => '\\b(abs|acos|alert|anchor|asin|atan|atan2|back|big|blink|blur|bold|ceil|charAt|clear|clearTimeout|click|close|confirm|cos|escape|eval|exp|fixed|floor|focus|fontcolor|fontsize|forward|getDate|getDay|getHours|getMinutes|getMonth|getSeconds|getTime|getTimezoneOffset|getYear|go|indexOf|isNaN|italics|javaEnabled|join|lastIndexOf|link|log|max|min|open|parse|parseFloat|parseInt|pow|prompt|random|reload|replace|reset|reverse|round|scroll|select|setDate|setHours|setMinutes|setMonth|setSeconds|setTimeout|setTime|setYear|sin|small|sort|split|sqrt|strike|sub|submit|substring|sup|taint|tan|toGMTString|toLocaleString|toLowerCase|toString|toUpperCase|unescape|untaint|UTC|write|writeln)\\b',
2183 'style' => 'library function',
2187 'name' => 'properties',
2188 'regex' => '\\b(action|alinkColor|anchors|appCodeName|appName|appVersion|bgColor|border|checked|complete|cookie|defaultChecked|defaultSelected|defaultStatus|defaultValue|description|E|elements|enabledPlugin|encoding|fgColor|filename|forms|frames|hash|height|host|hostname|href|hspace|index|lastModified|length|linkColor|links|LN2|LN10|LOG2E|LOG10E|lowsrc|method|name|opener|options|parent|pathname|PI|port|protocol|prototype|referrer|search|selected|selectedIndex|self|SQRT1_2|SQRT2|src|status|target|text|title|top|type|URL|userAgent|value|vlinkColor|vspace|width|window)\\b',
2189 'style' => 'predefined identifier',
2193 'name' => 'operators',
2194 'regex' => '([=;->/&|])',
2195 'style' => 'symbol',
2200 $LANGUAGE{'js'} = $LANGUAGE{'javascript'};
2209 # written by Andreas Krennmair
2210 # extremely incomplete
2212 $LANGUAGE{'lisp'} = {
2213 'filename' => '\\.(lsp|l)$',
2219 'style' => 'braces',
2223 'name' => 'comment',
2225 'style' => 'comment',
2230 'regex' => '".*?("|$)',
2231 'style' => 'string',
2235 'name' => 'keywords',
2236 'regex' => '\\b(defun |xyz)\\b',
2237 'style' => 'reserved word',
2241 'name' => 'numeric constant',
2242 'regex' => '(#\([0-9]+ [0-9]+\)|[0-9]+)',
2243 'style' => 'numeric',
2247 'name' => 'identifiers',
2248 'regex' => '([-a-zA-Z]+)',
2249 'style' => 'identifier',
2266 'filename' => '\\.m4$',
2270 'regex' => 'dnl.*?$',
2271 'style' => 'doc comment',
2276 'style' => 'comment',
2280 'regex' => '\\b(define|undefine|defn|pushdef|popdef|indir|builtin|changequote|changecom|changeword|m4wrap|m4exit|include|sinclude|divert|undivert|divnum|cleardiv|shift|dumpdef|traceon|traceoff|debugfile|debugmode|len|index|regexp|substr|translit|patsubst|format|incr|decr|syscmd|esyscmd|sysval|maketemp|errprint)\\b',
2281 'style' => 'reserved word',
2285 'regex' => '\\b(ifdef|ifelse|loops)\\b',
2286 'style' => 'reserved word',
2289 'regex' => '[$]\\$?({[^}]*}|[^a-zA-Z0-9_/\\t\\n\\.,\\\\[\\\\{\\\\(]|[0-9]+|[a-zA-Z_][a-zA-Z0-9_]*)?',
2290 'style' => 'identifier',
2314 $LANGUAGE{'make'} = {
2315 'filename' => '[Mm]akefile.*',
2319 'name' => 'Comment',
2321 'style' => 'comment',
2325 'name' => 'Assignment',
2326 'regex' => '^( *| [ \\t]*)[A-Za-z0-9_+]*[ \\t]*(\\+|:)?=',
2327 'style' => 'assignment',
2331 'name' => 'Dependency Line',
2332 'regex' => '^ *([A-Za-z0-9./$(){} _%+-]|\\n)*::?',
2333 'style' => 'dependency line',
2336 'name' => 'Dependency Target',
2337 'regex' => '[A-Za-z0-9./$(){} _%+-]+',
2338 'style' => 'dependency target',
2342 'name' => 'Dependency Continuation',
2343 'regex' => '\\\\\\n',
2344 'style' => 'dependency continuation',
2348 'name' => 'comment',
2350 'style' => 'comment',
2355 'regex' => '\\$([A-Za-z0-9_]|\\([^)]*\\)|{[^}]*})',
2360 'name' => 'int macro',
2361 'regex' => '\\$([<@*?%]|\\$@)',
2362 'style' => 'int macro',
2368 'name' => 'Continuation',
2370 'style' => 'continuation',
2375 'regex' => '\\$([A-Za-z0-9_]|\\([^)]*\\)|{[^}]*})',
2380 'name' => 'Internal Macro',
2381 'regex' => '\\$([<@*?%]|\\$@)',
2382 'style' => 'int macro',
2386 'name' => 'Escaped $$$',
2387 'regex' => '\\$\\$',
2388 'style' => 'esc $$$',
2392 'name' => 'Include',
2393 'regex' => '^include[ \\t]',
2394 'style' => 'include',
2399 $LANGUAGE{'makefile'} = $LANGUAGE{'make'};
2417 $LANGUAGE{'pas'} = {
2418 'filename' => '(?i)\\.p(as)?$',
2422 'name' => 'comment1 (* *)',
2423 'regex' => '\\(\\*.*?\\*\\)',
2424 'style' => 'comment',
2428 'name' => 'comment2 { }',
2429 'regex' => '\\{.*?\\}',
2430 'style' => 'comment',
2435 'regex' => '\'.*?(\'|$)',
2436 'style' => 'string',
2440 'name' => 'preprocessor line',
2441 'regex' => '^[ \\t]*#.*?$',
2442 'style' => 'preprocessor',
2445 'name' => 'comment1 (* *)',
2446 'regex' => '\\(\\*.*?\\*\\)',
2447 'style' => 'comment',
2451 'name' => 'comment2 { }',
2452 'regex' => '\\{.*?\\}',
2453 'style' => 'comment',
2459 'name' => 'character constant',
2461 'style' => 'character',
2465 'name' => 'numeric constant',
2466 'regex' => '\\b((0(x|X)[0-9a-fA-F]*)|[0-9.]+((e|E)(\\+|-)?)?[0-9]*)(L|l|UL|ul|u|U|F|f)?\\b',
2467 'style' => 'numeric',
2471 'name' => 'storage and ops',
2472 'regex' => '\\b(?i)(and|array|const|div|export|file|function|import|in|label|mod|module|nil|not|only|or|packed|pow|pragma|procedure|program|protected|qualified|record|restricted|set|type|var)\\b',
2473 'style' => 'reserved word',
2477 'name' => 'keywords',
2478 'regex' => '\\b(?i)(begin|case|do|downto|else|end|for|goto|if|of|otherwise|repeat|then|to|until|while|with)\\b',
2479 'style' => 'reserved word',
2483 'name' => 'sumbols',
2484 'regex' => '([\\*\\-\\+=:;<>\\(\\)\\[\\]!]|[^/]/[^/])',
2485 'style' => 'symbol',
2489 'name' => 'identifiers',
2490 'regex' => '([a-zA-Z_][a-zA-Z_0-9.^]*[a-zA-Z_0-9]|[a-zA-Z_][a-zA-Z_0-9]*)',
2491 'style' => 'identifier',
2494 'regex' => '(\\.|\\^)+',
2495 'style' => 'symbol',
2502 $LANGUAGE{'pascal'} = $LANGUAGE{'pas'};
2523 $LANGUAGE{'perl'} = {
2524 'filename' => '(?i)\\.p([lm5]|od)$',
2525 'regex' => '^\\s*#\\s*!([^\\s]*\\b|.*env\\s+)perl',
2528 'name' => 'comment',
2529 'regex' => '(?:#.*?(?:\r?\n\s*)+)+',
2530 'style' => 'comment',
2534 'name' => 'variables',
2535 'regex' => '[\\$@%]\\$?(?:{[^}]*}|[^a-zA-Z0-9_/\\t\\n\\.,\\\\[\\\\{\\\\(]|[0-9]+|[a-zA-Z_][a-zA-Z0-9_]*)?',
2536 'style' => 'identifier',
2540 'name' => '"" string',
2541 'regex' => '""|".*?([^\\\\](\\\\\\\\)*)"|"\\\\\\\\"',
2542 # 'regex' => '""|"\\\\\\\\"|"[^"\\\\]"|"[^"].*?[^\\\\]"',
2543 'style' => 'string',
2546 'name' => 'esc character',
2548 'style' => 'esc character',
2552 'name' => 'variables',
2553 'regex' => '[\\$@%]\\$?(?:{[^}]*}|[^a-zA-Z0-9_/\\t\\n\\.,\\\\[\\\\{\\\\(]|[0-9]+|[a-zA-Z_][a-zA-Z0-9_]*)?',
2554 'style' => 'identifier',
2560 'name' => '\'\' string',
2561 'regex' => '\'\'|\'.*?([^\\\\](\\\\\\\\)*)\'|\'\\\\\\\\\'',
2562 # 'regex' => '\'\'|\'\\\\\\\\\'|\'[^\'\\\\]\'|\'[^\'].*?[^\\\\]\'',
2563 'style' => 'string',
2566 'name' => 'esc character',
2568 'style' => 'esc character',
2574 'name' => 'more strings - q// qw//',
2575 'regex' => '(?:\\b| )(?:q|qw)([^\w\s])(?:\\\\\\2|[^\\2\\n])*\\2',
2576 'style' => 'string',
2579 'name' => 'esc character',
2581 'style' => 'esc character',
2587 'name' => 'more strings - qq// qx//',
2588 'regex' => '(?:\\b| )(?:qq|qx)([^\w\s])(?:\\\\\\2|[^\\2\\n])*\\2',
2589 'style' => 'string',
2592 'name' => 'esc character',
2594 'style' => 'esc character',
2598 'name' => 'variables',
2599 'regex' => '[\\$@%]\\$?(?:{[^}]*}|[^a-zA-Z0-9_/\\t\\n\\.,\\\\[\\\\{\\\\(]|[0-9]+|[a-zA-Z_][a-zA-Z0-9_]*)?',
2600 'style' => 'identifier',
2606 'name' => 'subroutine header',
2607 'regex' => 'sub[\\t ]+(?:[a-zA-Z0-9_]+)[\\t \\n]*(?:\\{|\\(|\\n)',
2608 'style' => 'function header',
2611 'name' => 'subroutine header coloring',
2612 'regex' => '[\\t ][a-zA-Z0-9_]+',
2613 'style' => 'function header name',
2619 'name' => 'regex matching I',
2620 'regex' => '(?:\\b| )?(?:/(?:\\\\/|[^/\\n])*(?:/[gimesox]*)|s([^\w\s])(?:\\\\\\2|[^\\2\\n])*?(\\2)[^(\\2)\\n]*?(\\2[gimesox]*))',
2625 'name' => 'regex matching II',
2626 'regex' => '(?:\\b| )(?:m|qq?|tr|y)([^\w\s])(?:\\\\\\2|[^\\2\\n])*(?:\\2[gimesox]*)',
2631 'name' => 'keywords',
2632 'regex' => '\\b(my|local|new|if|until|while|elsif|else|eval|unless|for|foreach|continue|exit|die|last|goto|next|redo|return|local|exec|do|use|require|package|eval|BEGIN|END|eq|ne|not|\\|\\||\\&\\&|and|or)\\b',
2633 'style' => 'reserved word',
2637 'name' => 'library functions',
2638 'regex' => '\\b(?:a(?:bs|ccept|larm|tan2)|b(?:ind|inmode|less)|c(?:aller|hdir|hmod|homp|hop|hr|hroot|hown|losedir|lose|onnect|os|rypt)|d(?:bmclose|bmopen|efined|elete|ie|ump)|e(?:ach|nd(?:grent|hostent|netent|protoent|pwent|servent)|of|xec|xists|xp)|f(?:ctnl|ileno|lock|ork|ormat|ormline)|g(?:et(?:c|grent|grgid|grnam|hostbyaddr|hostbyname|hostent|login|netbyaddr|netbyname|netent|peername|pgrp|ppid|priority|protobyname|protobynumber|protoent|pwent|pwnam|pwuid|servbyname|servbyport|servent|sockname|sockopt)|lob|mtime|rep)|hex|i(?:mport|ndex|nt|octl)|join|keys|kill|l(?:cfirst|c|ength|ink|isten|og|ocaltime|stat)|m(?:ap|kdir|sgctl|sgget|sgrcv)|no|o(?:ct|pendir|pen|rd)|p(?:ack|ipe|op|os|rintf|rint|ush)|quotemeta|r(?:and|eaddir|ead|eadlink|ecv|ef|ename|eset|everse|ewinddir|index|mdir)|s(?:calar|eekdir|eek|elect|emctl|emget|emop|end|et(?:grent|hostent|netent|pgrp|priority|protoent|pwent|sockopt)|hift|hmctl|hmget|hmread|hmwrite|hutdown|in|leep|ocket|ocketpair|ort|plice|plit|printf|qrt|rand|tat|tudy|ubstr|ymlink|yscall|ysopen|ysread|ystem|yswrite)|t(?:elldir|ell|ie|ied|ime|imes|runcate)|u(?:c|cfirst|mask|ndef|nlink|npack|nshift|ntie|time)|values|vec|w(?:ait|aitpid|antarray|arn|rite)|qw|-[rwxoRWXOezsfdlpSbctugkTBMAC])\\b',
2639 'style' => 'library function',
2643 'name' => 'braces, parens and brakets',
2644 'regex' => '[\\[\\]\\{\\}\\(\\)]',
2645 'style' => 'braces',
2649 'name' => '<< stuff',
2650 'regex' => '<<(?:("|\')([^\\n]*)\\2|\\w*).*?^\\3$',
2656 'regex' => '^=.*?^(?:=cut|\\Z)',
2657 'style' => 'doc comment',
2677 # Thanks to Matt Giwer <jull43 AT ij.net>
2678 $LANGUAGE{'pov'} = {
2679 'filename' => '(?i)\\.pov$',
2683 'name' => 'doc comment',
2684 'regex' => '/\\*\\*.*?\\*/',
2685 'style' => 'doc comment',
2689 'name' => 'comment',
2690 'regex' => '/\\*.*?\\*/',
2691 'style' => 'comment',
2695 'name' => 'cplus comment',
2696 'regex' => '//.*?$',
2697 'style' => 'comment',
2702 'regex' => '""|".*?([^\\\\](\\\\\\\\)*)"|"\\\\\\\\"',
2703 # 'regex' => '""|"\\\\\\\\"|"[^"\\\\]"|"[^"].*?[^\\\\]"',
2704 'style' => 'string',
2707 'name' => 'esc character',
2709 'style' => 'esc character',
2715 'name' => 'preprocessor line',
2716 'regex' => '^[ \\t]*#.*?$',
2717 'style' => 'preprocessor',
2721 'regex' => '""|".*?([^\\\\](\\\\\\\\)*)"|"\\\\\\\\"',
2722 # 'regex' => '""|"\\\\\\\\"|"[^"\\\\]"|"[^"].*?[^\\\\]"',
2723 'style' => 'string',
2726 'name' => 'esc character',
2728 'style' => 'esc character',
2734 'name' => '<files>',
2736 'style' => 'string',
2740 'name' => 'comment',
2741 'regex' => '[^/]/\\*.*?\\*/',
2742 'style' => 'comment',
2746 'name' => 'cplus comment',
2747 'regex' => '//.*?$',
2748 'style' => 'comment',
2754 'name' => 'character constant',
2755 'regex' => '\'(\\\\)?.\'',
2756 'style' => 'character',
2759 'name' => 'esc character',
2761 'style' => 'esc character',
2767 'name' => 'numeric constant',
2768 'regex' => '\\b((0(x|X)[0-9a-fA-F]*)|(([0-9]+\\.?[0-9]*)|(\\.[0-9]+))((e|E)(\\+|-)?[0-9]+)?)(L|l|UL|ul|u|U|F|f)?\\b',
2769 'style' => 'numeric',
2773 'name' => 'keyword',
2774 'regex' => '\\b(abs|absorption|acos|acosh|adaptive|adc_bailout|agate|agate_turb|all|alpha|ambient|ambient_light|angle|aperture|append|arc_angle|area_light|array|asc|asin|asinh|assumed_gamma|atan|atan2|atanh|average|background|bezier_spline|bicubic_patch|black_hole|blob|blue|blur_samples|bounded_by|box|boxed|bozo|break|brick|brick_size|brightness|brilliance|bumps|bump_map|bump_size|camera|case|caustics|ceil|checker|chr|clipped_by|clock|clock_delta|color|color_map|colour|colour_map|component|composite|concat|cone|confidence|conic_sweep|control0|control1|cos|cosh|count|crackle|crand|cube|cubic|cubic_spline|cubic_wave|cylinder|cylindrical|debug|declare|default|defined|degrees|density|density_file|density_map|dents|difference|diffuse|dimensions|dimension_size|direction|disc|distance|distance_maximum|div|eccentricity|else|emission|end|error|error_bound|exp|extinction|fade_distance|fade_power|falloff|falloff_angle|false|fclose|file_exists|filter|finish|fisheye|flatness|flip|floor|focal_point|fog|fog_alt|fog_offset|fog_type|fopen|frequency|gif|global_settings|gradient|granite|gray_threshold|green|height_field|hexagon|hf_gray_16|hierarchy|hollow|hypercomplex|if|ifdef|iff|ifndef|image_map|include|int|interior|interpolate|intersection|intervals|inverse|ior|irid|irid_wavelength|jitter|julia_fractal|lambda|lathe|leopard|light_source|linear_spline|linear_sweep|local|location|log|looks_like|look_at|low_error_factor|macro|mandel|map_type|marble|material|material_map|matrix|max|max_intersections|max_iteration|max_trace_level|media|media_attenuation|media_interaction|merge|mesh|metallic|min|minimum_reuse|mod|mortar|nearest_count|no|normal|normal_map|no_shadow|number_of_waves|object|octaves|off|offset|omega|omnimax|on|once|onion|open|orthographic|panoramic|perspective|pgm|phase|phong|phong_size|pi|pigment|pigment_map|planar|plane|png|point_at|poly|polygon|poly_wave|pot|pow|ppm|precision|prism|pwr|quadratic_spline|quadric|quartic|quaternion|quick_color|quick_colour|quilted|radial|radians|radiosity|radius|rainbow|ramp_wave|rand|range|ratio|read|reciprocal|recursion_limit|red|reflection|reflection_exponent|refraction|render|repeat|rgb|rgbf|rgbft|rgbt|right|ripples|rotate|roughness|samples|scale|scallop_wave|scattering|seed|shadowless|sin|sine_wave|sinh|sky|sky_sphere|slice|slope_map|smooth|smooth_triangle|sor|specular|sphere|spherical|spiral1|spiral2|spotlight|spotted|sqr|sqrt|statistics|str|strcmp|strength|strlen|strlwr|strupr|sturm|substr|superellipsoid|switch|sys|t|tan|tanh|text|texture|texture_map|tga|thickness|threshold|tightness|tile2|tiles|torus|track|transform|translate|transmit|triangle|triangle_wave|true|ttf|turbulence|turb_depth|type|u|ultra_wide_angle|undef|union|up|use_color|use_colour|use_index|u_steps|v|val|variance|vaxis_rotate|vcross|vdot|version|vlength|vnormalize|vrotate|v_steps|warning|warp|water_level|waves|while|width|wood|wrinkles|write|x|y|yes|z)\\b',
2775 'style' => 'reserved word',
2780 'regex' => '[\\{\\}]',
2781 'style' => 'braces',
2785 'name' => 'symbols',
2786 'regex' => '([\\*\\-\\+=:;%&\\|<>\\(\\)\\[\\]!])',
2787 'style' => 'symbol',
2791 'name' => 'identifiers',
2792 'regex' => '([a-zA-Z_][a-zA-Z_0-9]*)',
2793 'style' => 'identifier',
2798 $LANGUAGE{'povray'} = $LANGUAGE{'pov'};
2804 $LANGUAGE{'python'} = {
2805 'filename' => '(?i)\\.py$',
2806 'regex' => '^\\s*#\\s*![^\\s]*python',
2809 'name' => 'python comment',
2811 'style' => 'comment',
2815 'name' => 'single quote string',
2816 'regex' => '\'.*?\'',
2817 'style' => 'string',
2823 'regex' => '""|"\\\\\\\\"|".*?([^\\\\](\\\\\\\\)*)"',
2824 'regex' => '""|".*?([^\\\\](\\\\\\\\)*)"|"\\\\\\\\"',
2825 'regex' => '""|"\\\\\\\\"|"[^"\\\\]"|"[^"].*?[^\\\\]"',
2826 'style' => 'string',
2829 'name' => 'esc character',
2831 'style' => 'esc character',
2837 'name' => 'character constant',
2838 'regex' => '\'(\\\\)?.\'',
2839 'style' => 'character',
2842 'name' => 'esc character',
2844 'style' => 'esc character',
2850 'name' => 'numeric constant',
2851 'regex' => '\\b((0(x|X)[0-9a-fA-F]*)|(([0-9]+\\.?[0-9]*)|(\\.[0-9]+))((e|E)(\\+|-)?[0-9]+)?)(L|l|UL|ul|u|U|F|f)?\\b',
2852 'style' => 'numeric',
2856 'name' => 'keyword',
2857 'regex' => '\\b(and|assert|break|class|continue|del|elif|else|except|exec|finally|for|from|global|if|import|in|is|lambda|not|or|pass|print|raise|return|try|while)\\b',
2858 'style' => 'reserved word',
2863 'regex' => '[\\{\\}]',
2864 'style' => 'braces',
2868 'name' => 'symbols',
2869 'regex' => '([\\*\\-\\+=:;%&\\|<>\\(\\)\\[\\]!])',
2870 'style' => 'symbol',
2874 'name' => 'identifiers',
2875 'regex' => '([a-zA-Z_][a-zA-Z_0-9]*)',
2876 'style' => 'identifier',
2880 'name' => 'function',
2881 'regex' => '[\\t ]*def[\\t ]+([a-zA-Z0-9_]+)[\\t \\(]+.*?[\\n{]',
2882 'style' => 'function header',
2885 'name' => 'function args',
2886 'regex' => '\\(.*?\\)',
2887 'style' => 'function header args',
2891 'name' => 'function name',
2892 'regex' => '[\\t ][a-zA-Z0-9_]+',
2893 'style' => 'function header name',
2899 'name' => 'library functions',
2900 'regex' => '\\b(__import__|abs|apply|buffer|callable|chr|cmp|coerce|compile|complex|delatter|dir|divmod|eval|execfile|filter|float|getattr|globals|hasattr|hash|hex|id|input|int|intern|isinstance|issubclass|len|list|locals|long|map|max|min|oct|open|ord|pow|range|raw_input|reduce|reload|repr|round|setattr|slice|str|tuple|type|unichr|unicode|vars|xrange|zip)\\b',
2901 'style' => 'library function',
2909 # by Joshua Swink <jswink AT pacbell.net>
2910 $LANGUAGE{'ruby'} = {
2911 'filename' => '\\.rb$',
2912 'regex' => '^\\s*#\\s*![^\\s]*\\bruby\\b',
2915 'name' => 'comment',
2916 'regex' => '(?:#.*?(?:\r?\n\s*)+)+',
2917 'style' => 'comment',
2921 'name' => 'predefined variables',
2922 'regex' => '(?:\\$(?:[!@&`\'+\\d~=/\\\\,;.<>_*\\$?:"]|DEBUG|FILENAME|LOAD_PATH|stdin|stdout|stderr|VERBOSE|-[0adFiIlpv])|\\b(?:TRUE|FALSE|NIL|STDIN|STDOUT|STDERR|ENV|ARGF|ARGV|DATA|RUBY_VERSION|RUBY_RELEASE_DATE|RUBY_PLATFORM)\\b)',
2923 'style' => 'predefined identifier',
2927 'name' => 'variables',
2928 'regex' => '[\\$@](?:{[^}]*}|[^\\w/\\t\\n\\.,\\\\[\\\\{\\\\(]|[0-9]+|[a-zA-Z_][\\w.]*)?',
2929 'style' => 'identifier',
2933 'name' => '"" string',
2934 'regex' => '""|"(?:\\\\\\\\)+"|".*?(?:[^\\\\](?:\\\\\\\\)*)"|%[Qwx]?([^\\w\\[\\](){}<>])\\2|%[Qwx]?([^\\w\\[\\](){}<>]).*?(?:[^\\\\](?:\\\\\\\\)*)\\3|%[Qwx]?([^\\w\\[\\](){}<>])\\\\\\\\\\4|%[Qwx]?\\[\\]|%[Qwx]?\\[.*?([^\\\\](\\\\\\\\)*)\\]|%[Qwx]?\\[\\\\\\\\\\]|%[Qwx]?\\{\\}|%[Qwx]?\\{.*?([^\\\\](\\\\\\\\)*)\\}|%[Qwx]?\\{\\\\\\\\\\}|%[Qwx]?\\(\\)|%[Qwx]?\\(.*?([^\\\\](\\\\\\\\)*)\\)|%[Qwx]?\\(\\\\\\\\\\)|%[Qwx]?<>|%[Qwx]?<.*?([^\\\\](\\\\\\\\)*)>|%[Qwx]?<\\\\\\\\>',
2936 'style' => 'string',
2939 'name' => 'esc character',
2940 'regex', => '\\\\(?:x[\\da-fA-F]{2}|\d\d\d|c.|M-\\\\C-.|M-.|C-.|.)',
2941 'style' => 'esc character',
2945 'name' => 'string expression',
2946 'regex' => '#[\\$\\@][a-zA-Z_][\\w.]*|#\\{[\\$\\@]?[^\\}]*\\}',
2947 'style' => 'identifier',
2953 'name' => '\'\' string',
2954 'regex' => '\'\'|\'(?:\\\\\\\\)+\'|\'.*?(?:[^\\\\](?:\\\\\\\\)*)\'|%q([^\\w\\[\\](){}<>])\\2|%q([^\\w\\[\\](){}<>]).*?(?:[^\\\\](?:\\\\\\\\)*)\\3|%q([^\\w\\[\\](){}<>])\\\\\\\\\\4|%q\\[\\]|%q\\[.*?([^\\\\](\\\\\\\\)*)\\]|%q\\[\\\\\\\\\\]|%q\\{\\}|%q\\{.*?([^\\\\](\\\\\\\\)*)\\}|%q\\{\\\\\\\\\\}|%q\\(\\)|%q\\(.*?([^\\\\](\\\\\\\\)*)\\)|%q\\(\\\\\\\\\\)|%q<>|%q<.*?([^\\\\](\\\\\\\\)*)>|%q<\\\\\\\\>',
2955 'style' => 'string',
2958 'name' => 'esc character',
2959 'regex' => '(?:\\\\\'|\\\\\\\\)',
2960 'style' => 'esc character',
2966 'name' => 'subroutine header',
2967 'regex' => 'def[\\t ]+\\w[\\w.]*(?:\\([^)]*\\))?',
2968 'style' => 'function header',
2971 'name' => 'arg list',
2972 'regex' => '\\(.*\\)',
2973 'style' => 'function header args',
2976 'name' => 'arg list parens',
2977 'regex' => '[\\(\\)]',
2978 'style' => 'symbol',
2984 'name' => 'subroutine header',
2985 'regex' => '[\\t ]\w+',
2986 'style' => 'function header name',
2992 'name' => 'class header',
2993 'regex' => 'class[\\t ]+\\w+(?:\\s*<\\s*\\w+)?',
2994 'style' => 'function header',
2997 'name' => 'class ancestor',
2998 'regex' => '<\\s*\\w+',
2999 'style' => 'include',
3002 'name' => 'inheritance doohickey',
3004 'style' => 'symbol',
3010 'name' => 'class main',
3011 'regex' => '[\\t ]\\w+',
3018 'name' => 'regex matching 0',
3019 'regex' => '(?:%r([^\\w\\[\\](){}<>])\\2|%r([^\\w\\[\\](){}<>]).*?(?:[^\\\\](?:\\\\\\\\)*)\\3|%r([^\\w\\[\\](){}<>])\\\\\\\\\\4|%r\\[\\]|%r\\[.*?([^\\\\](\\\\\\\\)*)\\]|%r\\[\\\\\\\\\\]|%r\\{\\}|%r\\{.*?([^\\\\](\\\\\\\\)*)\\}|%r\\{\\\\\\\\\\}|%r\\(\\)|%r\\(.*?([^\\\\](\\\\\\\\)*)\\)|%r\\(\\\\\\\\\\)|%r<>|%r<.*?([^\\\\](\\\\\\\\)*)>|%r<\\\\\\\\>)[ixpno]*',
3023 'name' => 'string expression',
3024 'regex' => '#[\\$\\@][a-zA-Z_][\\w.]*|#\\{[\\$\\@]?[a-zA-Z_][^\\}]*\\}',
3025 'style' => 'identifier',
3031 'name' => 'regex matching I',
3032 'regex' => '(?:\\b| )?(?:/(?:\\\\/|[^/\\n])*(?:/[ixpno]*))',
3036 'name' => 'string expression',
3037 'regex' => '#[\\$\\@][a-zA-Z_][\\w.]*|#\\{[\\$\\@]?[a-zA-Z_][^\\}]*\\}',
3038 'style' => 'identifier',
3044 'name' => 'reserved words',
3045 'regex' => '\\b(BEGIN|class|ensure|nil|self|when|END|def|false|not|super|while|alias|defined|for|or|then|yield|and|do|if|redo|true|begin|else|in|rescue|undef|break|elsif|module|retry|unless|case|end|next|return|until)\\b',
3046 'style' => 'reserved word',
3050 'name' => 'kernel module methods',
3051 'regex', => '\\b(Array|Float|Integer|String|at_exit|autoload|binding|caller|catch|chop|chomp|chomp!|eval|exec|exit|fail|fork|format|gets|global_variables|gsub|iterator|lambda|load|local_variables|loop|open|p|print|printf|proc|putc|puts|raise|rand|readline|readlines|require|select|sleep|split|sprintf|srand|sub|syscall|system|test|trace_var|trap|untrace_var)\\b',
3052 'style' => 'library function',
3056 'name' => 'braces, parens and brakets',
3057 'regex' => '[\\[\\]\\{\\}\\(\\)]',
3058 'style' => 'braces',
3062 'name' => '<< stuff',
3063 'regex' => '<<(?:("|\')([^\\n]*)\\2|\\w*).*?^\\3$',
3068 'name' => 'symbols',
3069 'regex' => '(?:[:*-+<>=^!,/]+|\.\.+)',
3070 'style' => 'symbol',
3074 'name' => 'numbers',
3075 'regex' => '\d[\d.]*',
3076 'style' => 'numeric',
3080 'name' => 'embedded documentation',
3081 'regex' => '^=.*?^(?:=end|\\Z)',
3082 'style' => 'doc comment',
3091 $LANGUAGE{'sql'} = {
3092 'filename' => '(?i)\\.sql$',
3096 'name' => 'keywords I',
3097 'regex' => '(?i)(,|%|<|>|:=|=|\\(|\\)|\\bselect|on|from|order by|desc|where|and|or|not|null|true|false)\\b',
3098 'style' => 'reserved word',
3102 'name' => 'comment I',
3103 'regex' => '--.*?$',
3104 'style' => 'comment',
3108 'name' => 'comment II',
3109 'regex' => '/\\*.*?\\*/',
3110 'style' => 'comment',
3115 'regex' => '\'\'|\'.*?([^\\\\](\\\\\\\\)*)\'|\'\\\\\\\\\'',
3116 # 'regex' => '(\'\'|\'[^\'\\\\]\'|\'[^\'].*?[^\\\\]\')',
3117 'style' => 'string',
3121 'name' => 'keywords II',
3122 'regex' => '(?i)end if;|\\b(create|replace|begin|end|function|return|fetch|open|close|into|is|in|when|others|grant|on|to|exception|show|set|out|pragma|as|package)\\b',
3123 'style' => 'reserved word',
3127 'name' => 'keywords III',
3128 'regex' => '(?i)\\balter\\b',
3129 'style' => 'reserved word',
3133 'name' => 'datatypes',
3134 'regex' => '(?i)\\b(integer|blol|date|numeric|character|varying|varchar|char)\\b',
3135 'style' => 'predefined type',
3140 'regex' => '(?i)\\b(constraint|key|references|primary|table|foreign|add|insert|group by)\\b',
3141 'style' => 'reserved word',
3150 # enhanced by W. Friebel
3151 $LANGUAGE{'patch'} = {
3152 'filename' => '(?i)\\.patch$|\\.diff$',
3157 'regex' => '^Index: .*?$|^===== .*?$|^diff .*?$|^--- .*?$|^\+\+\+ .*?$|^\*\*\* .*?$',
3158 'style' => 'separator',
3163 'regex' => '^@@ .*?$',
3164 'style' => 'line spec',
3169 'regex' => '^-.*?$',
3170 'style' => 'deletion',
3175 'regex' => '^\+.*?$',
3176 'style' => 'insertion',
3181 'regex' => '^\!.*?$',
3182 'style' => 'modification',
3192 # LANGUAGE: shell script
3195 $LANGUAGE{'shellscript'} = {
3196 'filename' => '\\.(sh|shell)$',
3197 'regex' => '^\\s*#\\s*![^\\s]*(sh|bash|ash|zsh|ksh)',
3199 'name' => 'comment',
3200 # 'regex' => '^[ \t]*[^$]?\#[^!]?.*?$',
3201 'regex' => '(^| )#([^\\!].)*?$',
3202 'style' => 'comment',
3205 'name' => 'identifier',
3206 'regex' => '[a-zA-Z][a-zA-Z0-9_]*=',
3207 'style' => 'identifier',
3209 'name' => 'identifier',
3210 'regex' => '[a-zA-Z][a-zA-Z0-9_]*',
3211 'style' => 'identifier',
3215 'name' => 'identifier',
3216 'regex' => '\\$([0-9#\\*]|[a-zA-Z][a-zA-Z0-9_]*)',
3217 'style' => 'identifier',
3220 'name' => 'interpreter line',
3221 'regex' => '^[ \t]*#!.*?$',
3222 'style' => 'preprocessor',
3226 'regex' => '""|"(\\\\"|[^\\"])*"',
3227 'style' => 'string',
3229 'name' => 'identifier',
3230 'regex' => '\\$([0-9#\\*]|[a-zA-Z][a-zA-Z0-9_]*)',
3231 'style' => 'identifier',
3237 $LANGUAGE{'sh'} = $LANGUAGE{'shellscript'};
3242 getopts('i:l:') || exit 2;
3243 $str = main(parse_passed_params( infile => $ARGV[0] || '-',
3245 # linenumbers => 1 ,
3246 langmode => $opt_l ,
3247 outputformat => 'xterm' ,
3248 # many other options
3257 Convert source code (c,java,perl,html,...) into formatted html.
3262 $html = code2html( $sourcecode );
3264 code2html( infile => 'file.java' ,
3265 outfile => 'file.html',
3267 langmode => 'perl' ,
3268 # many other options
3273 Code2HTML converts source code into color-coded, formatted html,
3274 either as a simple code2html() function call, or as an Apache handler.
3276 This package is an adaptation of Peter Palfrader's code2html application.
3282 exports the function code2html(), which takes the following arguments
3285 input => $source_code,
3286 infile => 'filename.extension',
3288 outfile => 'file.html',
3289 outputformat => 'html', # or html-dark, or ...
3291 langmode => 'java', # or perl,html,c,...
3292 langfile => 'langFile', # specify alternative
3293 # syntax definitions
3295 linenumbers => 1, # turn on linenumbers
3296 linknumbers => 1, # linenumber links
3297 line_number_prefix => '-', # linenumber anchors
3298 replacetabs => 8, # tabs to spaces
3300 noheader => '', # don't use template
3301 template => 'filename', # override template
3303 title => $title, # set html page title
3304 content_type => 1, # output httpd header
3307 All input parameters are optional except the source code
3308 specification, which must be defined by either input or infile keys, or
3309 by passing exactly one argument which will then be taken to be the
3312 input source code to be converted (or set source -infile)
3314 infile name of file with code to be converted (or use -input)
3316 langmode language of source file. If omitted, code2html
3317 will try to guess from the language from the file extension
3318 or start of the source code. Language modes provided are
3320 ada, ada95, awk, c, c++, cc, cxx, groff, html,
3321 java, javascript, js, m4, make, makefile, pas,
3322 pas, pascal, perl, plain, pov, povray, ruby, sql.
3324 langfile filename of file with alternative syntax definitions
3326 outfile name of file to put html in. If omitted,
3327 just return html in $html=code2html(...)
3329 outputformat style of output html. Available formats are
3330 html (default), html-dark, html-light, html-nobg.
3332 replacetabs replace tabs in source with given number of spaces
3334 title set title of output html page
3336 content_type output a Content-Type httpd header
3338 linenumbers print line numbers in source code listing
3342 Jim Mahoney (mahoney AT marlboro.edu), Peter Palfrader, and others.
3344 =head1 COPYRIGHT and LICENSE
3346 Copyright (c) 1999, 2000 by Peter Palfrader and others.
3348 Permission is hereby granted, free of charge, to any person obtaining
3349 a copy of this software and associated documentation files (the
3350 ``Software''), to deal in the Software without restriction, including
3351 without limitation the rights to use, copy, modify, merge, publish,
3352 distribute, sublicense, and/or sell copies of the Software, and to
3353 permit persons to whom the Software is furnished to do so, subject to
3354 the following conditions:
3356 The above copyright notice and this permission notice shall be
3357 included in all copies or substantial portions of the Software.
3359 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
3360 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
3361 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
3362 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
3363 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
3364 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
3365 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
3369 Peter Palfrader's Code2HTML page at http://www.palfrader.org/code2html/