bool isDir(const char* path) {
struct stat st;
lstat(path, &st);
return 0 != S_ISDIR(st.st_mode);
}
uint64_t getHash(string& path) {
FILE *fp = fopen(path.c_str(), "r");
if (fp == NULL) {
cerr << " Can not open the file " << path << endl;
return 0;
}
uint64_t hash_key = 0;
char c;
while (fread(&c, 1, 1, fp) == 1) {
hash_key = hashFunc(hash_key, c);
}
fclose(fp);
return hash_key;
}
void getSimilarFiles(string& path, unordered_map<uint64_t, vector<string> >& hash) {
DIR* dirFile = opendir(path.c_str());
if (dirFile == NULL) return;
struct dirent* hFile;
while ((hFile = readdir(dirFile)) != NULL) {
if (!strcmp(hFile->d_name, ".")) continue;
if (!strcmp(hFile->d_name, "..")) continue;
string srcPath(path);
srcPath.append("/");
srcPath.append(hFile->d_name);
if (isDir(srcPath.c_str())) {
getSimilarFiles(srcPath, hash);
} else {
uint64_t hashValue = getHash(srcPath);
if (hashValue) {
hash[hashValue].push_back(srcPath);
}
}
}
}
void printHash(unordered_map<uint64_t, vector<string> >& hash) {
unordered_map<uint64_t, vector<string> >::iterator it = hash.begin();
for (; it != hash.end(); it++) {
if (it->second.size() <= 1) {
continue;
}
cout << "=========the files below are the same" << endl;
for (int i = 0; i < it->second.size(); i++) {
cout << (it->second)[i] << endl;
}
}
}
void getFileSet(const char* pathInput) {
unordered_map<uint64_t, vector<string> > hash;
string path(pathInput);
getSimilarFiles(path, hash);
printHash(hash);
}
Leave a comment