[C++] Measuring entropy of file / partition

[C++] Measuring entropy of file / partition

You can get further information about entropy here.
Here is code of my program entropy.cpp:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
// entropy.cpp
// Author: Wojtek Jamrozy (www.wojtekrj.net)
#include <cstdio>
#include <cstring>
#include <cmath>
unsigned char buffer[524290];
long long sum;
long long count[256];
 
int main(int argc, char* argv[]) 
{	
	if(argc!=2)
	{
		printf("Usage: entropy <file>");
		return 1;
	}
	FILE * pFile;
	pFile = fopen (argv[1] , "rb");
	if (pFile == NULL)
		perror ("Error opening file");
	else {
		int rnumber;
		while(rnumber = fread (buffer,1,524288,pFile))
		{
			sum += rnumber;
			for(int i=0;i<rnumber;++i)
				count[buffer[i]]++;
		}
		fclose (pFile);
		long double result=0.0, p;
		for(int i=0;i<256;++i)
		{
			if(!count[i]) continue;
			p = (long double)count[i]/(long double)sum;
			result -= p*log(p);
		}
		result /= log(256);
		printf("Numer of samples: %lld\n", sum);
		printf("Entropy %.10Lf bits per byte (%.2LF %%)\n", result*8, result*100.0);
	}
	return 0;
}

To get better performance, compile it with following syntax:

g++ -O2 -static entropy.cpp -o entropy

Usage:

./entropy file

Leave a Reply

Your email address will not be published. Required fields are marked *