2 # Filename: bincompare.pl
3 # Purpose: Binary File Similarity Checking
4 # Authors: (C) Copyright 2004 Diomidis Spinellis
5 # Bug-Reports: see http://grml.org/bugs/
6 # License: See notes by author (Diomidis Spinellis).
7 # Latest change: Sam Okt 16 22:54:03 CEST 2004 [mika]
8 ################################################################################
9 # See http://www.dmst.aueb.gr/dds/blog/20040319/index.html
13 # (C) Copyright 2004 Diomidis Spinellis
15 # Permission to use, copy, and distribute this software and its
16 # documentation for any purpose and without fee is hereby granted,
17 # provided that the above copyright notice appear in all copies and that
18 # both that copyright notice and this permission notice appear in
19 # supporting documentation.
21 # THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
22 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
23 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
25 # Return the entropy of the file passed as the argument
34 # File information (approximation)
35 $i = `bzip2 -c $file | wc -c`;
36 print STDERR "$0: warning file size exceeds bzip2 block size\n" if ($l > 900 * 1024);
41 # Return the entropy of the two files passed as arguments
45 my($file1, $file2) = @_;
47 my($tmp) = ("/tmp/entropy.$$");
50 open(IN, $file1) || die "read from $file1: $!\n";
52 open(OUT, ">$tmp") || die "write to $tmp: $!\n";
54 open(IN, $file2) || die "read from $file2: $!\n";
59 my($e) = (entropy($tmp));
64 $#ARGV == 1 || die "Usage $0: file1 file2\n";
66 printf("%.3g - Entropy of $ARGV[0]\n", $e0 = entropy($ARGV[0]));
67 printf("%.3g - Entropy of $ARGV[1]\n", $e1 = entropy($ARGV[1]));
68 printf("%.3g - Combined predicted entropy\n", ($e0 + $e1) / 2);
69 printf("%.3g - Combined actual entropy\n", entropy2($ARGV[0], $ARGV[1]));