kiddy

Same File

leave a comment »

bool isDir(const char* path) {
	struct stat st;
	lstat(path, &st);
	return 0 != S_ISDIR(st.st_mode);
}

uint64_t getHash(string& path) {
	FILE *fp = fopen(path.c_str(), "r");
	if (fp == NULL) {
		cerr << " Can not open the file " << path << endl;
		return 0;
	}
	uint64_t hash_key = 0;
	char c;
	while (fread(&c, 1, 1, fp) == 1) {
		hash_key = hashFunc(hash_key, c);
	}
	fclose(fp);
	return hash_key;
}

void getSimilarFiles(string& path, unordered_map<uint64_t, vector<string> >& hash) {
	DIR* dirFile = opendir(path.c_str());
	if (dirFile == NULL)	return;
	struct dirent* hFile;
	while ((hFile = readdir(dirFile)) != NULL) {
		if (!strcmp(hFile->d_name, "."))	continue;
		if (!strcmp(hFile->d_name, ".."))	continue;
		string srcPath(path);
		srcPath.append("/");
		srcPath.append(hFile->d_name);
		if (isDir(srcPath.c_str())) {
			getSimilarFiles(srcPath, hash);
		} else {
			uint64_t hashValue = getHash(srcPath);
			if (hashValue) {
				hash[hashValue].push_back(srcPath);
			}
		}
	}
}

void printHash(unordered_map<uint64_t, vector<string> >& hash) {
	unordered_map<uint64_t, vector<string> >::iterator it = hash.begin();
	for (; it != hash.end(); it++) {
		if (it->second.size() <= 1) {
			continue;
		}
		cout << "=========the files below are the same" << endl;
		for (int i = 0; i < it->second.size(); i++) {
			cout << (it->second)[i] << endl;
		}
	}
}

void getFileSet(const char* pathInput) {
	unordered_map<uint64_t, vector<string> > hash;
	string path(pathInput);
	getSimilarFiles(path, hash);
	printHash(hash);
}

Written by linzhongzl

October 29, 2014 at 3:25 pm

Posted in Others

Leave a comment