ewmscp  ..
sparseMap.cpp
Go to the documentation of this file.
1 #include <fcntl.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <sys/mman.h>
5 #include <sys/stat.h>
6 #include <sys/types.h>
7 #include <unistd.h>
8 
9 #include "throwcall.h"
10 #include <Options.h>
11 #include <algorithm>
12 #include <list>
13 #include <set>
14 
20 bool is_nul(void *buffer, size_t count) {
21  // we rely on start and size being properly aligned
22  size_t words = count / sizeof(long);
23 
24  for (auto word = static_cast<long*>(buffer); words; --words) {
25  if (*word++) {
26  return false;
27  }
28  }
29 
30  return true;
31 }
32 
33 template <typename T> void printNumber(std::ostream& ost, T number) {
34  ost << std::dec << std::setw(16) << number;
35  ost << " (" << std::hex << std::showbase << std::setw(10) << number << " )";
36  ost << std::dec;
37 }
38 
39 template <typename T1, typename T2> void printRegion(std::ostream& ost, T1 start, T2 end) {
40  ost << "from ";
41  printNumber(ost, start);
42  ost << " to ";
43  printNumber(ost, end);
44  ost << " size ";
45  printNumber(ost, end - start);
46 }
47 
48 int main(int argc, const char *argv[]) {
49  options::parser parser("map data and hole regions in sparse files");
50  options::single<bool> findZeroes('f', "findZeroes", "look for blocks of zeroes", false);
51  options::single<size_t> defaultBlockSize('b', "blockSize", "block size to search in (o: take from filesystem)", 0);
52  options::single<bool> preserveAtime('p', "preserveAtime", "preserve atime of the file", false);
53  preserveAtime.fRequire(&findZeroes);
54  defaultBlockSize.fRequire(&findZeroes);
55  auto remainingOptions = parser.fParse(argc, argv);
56 
57 
58 
59  for (const auto& file : remainingOptions) {
60  auto fd = throwcall::badval(open(file.c_str(), O_RDONLY), -1, "open ", file);
61  struct stat statbuf;
62  throwcall::good0(fstat(fd, &statbuf), "stat ", file);
63  std::cout << file << " has " << statbuf.st_size << " bytes, " <<
64  statbuf.st_blocks << " blocks, i.e. " << statbuf.st_blocks * 512 << " bytes allocated, " <<
65  (statbuf.st_blocks * 512.) / static_cast<double>(statbuf.st_size) << " fill factor\n";
66 
67  auto blockSize = statbuf.st_blksize;
68  if (defaultBlockSize != 0) {
69  blockSize = defaultBlockSize;
70  }
71 
72  void* blkBuf = nullptr;
73  if (findZeroes) {
74  blkBuf = throwcall::badval(mmap(nullptr, blockSize,
75  PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE,
76  -1, 0), MAP_FAILED,
77  "mmap block buffer with ", blockSize, " bytes");
78  }
79 
80 
81  std::vector<std::pair<int, std::string>> what({{SEEK_HOLE, "data"}, {SEEK_DATA, "hole"}});
82  for (decltype(statbuf.st_size) pos = 0, toggle = 1, region = 0;
83  pos < statbuf.st_size;
84  toggle ^= 1) {
85  auto next = lseek(fd, pos, what.at(toggle).first);
86  if (next == -1 && errno == ENXIO) { // we found a hole at the end of the file
87  next = statbuf.st_size;
88  } else {
89  throwcall::badval(next, -1,
90  "seek ", file, "for ", what.at(toggle).second,
91  ", from offset ", pos);
92  }
93  if (next != pos) {
94  region++;
95  std::cout << std::setw(6) << region << ": " << what.at(toggle).second << "\t";
96  printRegion(std::cout, pos, next);
97  std::cout << "\n";
98  if (findZeroes && what.at(toggle).first == SEEK_HOLE) {
99  size_t zeroBlocksStart = 0;
100  bool lastBlockWasData = true;
101  for (auto offset = pos; offset < next;) {
102  auto bytesRead = pread(fd, blkBuf, blockSize, offset);
103  if (is_nul(blkBuf, blockSize)) {
104  if (lastBlockWasData) {
105  zeroBlocksStart = offset;
106  }
107  lastBlockWasData = false;
108  } else {
109  if (!lastBlockWasData) {
110  std::cout << "\tzeroes\t";
111  printRegion(std::cout, zeroBlocksStart, offset);
112  std::cout << "\n";
113  }
114  lastBlockWasData = true;
115  }
116  offset += bytesRead;
117  }
118  if (!lastBlockWasData) {
119  std::cout << "\tzeroes";
120  printRegion(std::cout, zeroBlocksStart, next);
121  std::cout << "\n";
122  }
123  }
124  }
125  pos = next;
126  }
127  if (preserveAtime) {
128  struct timespec times[2];
129  memcpy(&times[0], &statbuf.st_atim, sizeof(struct timespec));
130  memcpy(&times[1], &statbuf.st_mtim, sizeof(struct timespec));
131  throwcall::good0(futimens(fd, times), "set tims stamps on ", file, " to old values");
132  }
133 
134  throwcall::good0(close(fd), "close ", file);
135  if (blkBuf != nullptr) {
136  throwcall::good0(munmap(blkBuf, blockSize), "munmap block buffer with ", blockSize, " bytes");
137  }
138  }
139  return EXIT_SUCCESS;
140 }
options::parser
class that contains the parser, i.e. does that option handling
Definition: Options.h:363
main
int main(int argc, const char *argv[])
Definition: sparseMap.cpp:48
options::single< size_t >
printRegion
void printRegion(std::ostream &ost, T1 start, T2 end)
Definition: sparseMap.cpp:39
options::single< bool >
class specialisation for options of type bool
Definition: Options.h:595
throwcall::badval
T badval(T call, t badvalue, const Args &... args)
template function to wrap system calls that return a special bad value on failure
Definition: throwcall.h:54
options::parser::fParse
const std::vector< std::string > & fParse(int argc, const char *argv[])
parse the options on the command line
Definition: Options.cpp:168
is_nul
bool is_nul(void *buffer, size_t count)
Definition: sparseMap.cpp:20
printNumber
void printNumber(std::ostream &ost, T number)
Definition: sparseMap.cpp:33
Options.h
throttle::start
static auto start
Definition: throttle.h:10
options::base::fRequire
virtual void fRequire(const base *aOtherOption)
require aOtherOption when this option is set
Definition: Options.cpp:611
throwcall.h
throwcall::good0
void good0(T call, const Args &... args)
template function to wrap system calls that return 0 on success
Definition: throwcall.h:40