2 # Filename: bincompare.pl
3 # Purpose: Binary File Similarity Checking
4 # Authors: (C) Copyright 2004 Diomidis Spinellis
5 # Bug-Reports: see http://grml.org/bugs/
6 # License: See notes by author (Diomidis Spinellis).
7 ################################################################################
8 # See http://www.dmst.aueb.gr/dds/blog/20040319/index.html
12 # (C) Copyright 2004 Diomidis Spinellis
14 # Permission to use, copy, and distribute this software and its
15 # documentation for any purpose and without fee is hereby granted,
16 # provided that the above copyright notice appear in all copies and that
17 # both that copyright notice and this permission notice appear in
18 # supporting documentation.
20 # THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
21 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
22 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
24 # Return the entropy of the file passed as the argument
33 # File information (approximation)
34 $i = `bzip2 -c $file | wc -c`;
35 print STDERR "$0: warning file size exceeds bzip2 block size\n" if ($l > 900 * 1024);
40 # Return the entropy of the two files passed as arguments
44 my($file1, $file2) = @_;
46 my($tmp) = ("/tmp/entropy.$$");
49 open(IN, $file1) || die "read from $file1: $!\n";
51 open(OUT, ">$tmp") || die "write to $tmp: $!\n";
53 open(IN, $file2) || die "read from $file2: $!\n";
58 my($e) = (entropy($tmp));
63 $#ARGV == 1 || die "Usage $0: file1 file2\n";
65 printf("%.3g - Entropy of $ARGV[0]\n", $e0 = entropy($ARGV[0]));
66 printf("%.3g - Entropy of $ARGV[1]\n", $e1 = entropy($ARGV[1]));
67 printf("%.3g - Combined predicted entropy\n", ($e0 + $e1) / 2);
68 printf("%.3g - Combined actual entropy\n", entropy2($ARGV[0], $ARGV[1]));