ewmscp  ..
inputHandlerPosixFile.cpp
Go to the documentation of this file.
2 #include "block.h"
3 #include "copyRequestTypes.h"
4 #include "ewmscp.h"
5 #include "timer.h"
6 
7 #include <fcntl.h>
8 #include <errMsgQueue.h>
9 #include <throwcall.h>
10 #include <unistd.h>
11 
12 #include <memory>
13 
14 namespace inputHandler {
15  decltype(posixFile::factory) posixFile::factory("posixFile");
16  decltype(posixFile::preserveAtime) posixFile::preserveAtime('\0', "preserveAtime",
17  "preserve atime of inpout files", false);
18  std::unique_ptr<base::reader> posixFile::newReader(const std::string& aPath,
20  const genericStat& inititalStat) {
21  return std::unique_ptr<base::reader>(new readerPosixFile(aPath,
22  state,
23  inititalStat));
24  }
25 
26 
27  bool posixFile::readLinkTarget(const std::string& path,
28  std::vector<char>& target) {
29  timerInst(readlink);
30  auto linklength = readlink(path.c_str(), target.data(), target.size());
31  if (linklength == -1 && errno == ENOENT) {
32  return false;
33  }
34  throwcall::badval(linklength, -1, "could not read link ", path);
35  if (linklength >= static_cast<ssize_t>(target.size())) {
36  throw std::runtime_error("link size increased after stat for " + path);
37  }
38  target[linklength] = '\0';
39  return true;
40  }
41 
42 
43 
46  const genericStat&inititalStat):
47  posixFileIoCommon(aPath),
48  reader(inititalStat) {
49  {
50  timerInst(open);
51  fd = open(path.c_str(), O_RDONLY);
52  }
53  if (fd == -1) {
54  if (errno == ENOENT
55  || (errno == ENOTDIR && !S_ISDIR(readInitialStat.mode))) {
57  }
58  }
59  throwcall::badval(fd, -1, "can't open ", path, " for reading");
61  {
62  timerInst(posix_fadvise);
63  throwcall::good0(posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL),
64  "can't advise ", path, " as sequential");
65  }
66  {
67  timerInst(posix_fadvise);
68  throwcall::good0(posix_fadvise(fd, 0, 0, POSIX_FADV_NOREUSE),
69  "can't advise ", path, " as use only once");
70  }
71  }
72 
74  if (fd != -1) {
75  if (isUnwinding()) {
76  timerInst(close);
77  if (close(fd) != 0) {
79  path, "close during unwind ",
80  std::system_category().default_error_condition(errno).message());
81  }
82  return;
83  }
84  if (preserveAtime) {
85  struct timespec times[2];
86  readInitialStat.getAtime(times[0]);
87  times[1].tv_nsec = UTIME_OMIT; // leave mtime unchanged
88  timerInst(futimens);
89  throwcall::good0(futimens(fd, times), "can't set time stamps on ", path, " to old values");
90  }
91  {
92  timerInst(close);
93  throwcall::good0(close(fd), "can't close ", path, " after reading");
94  }
95  }
96  }
97 
98 
99 
103  bool posixFile::readerPosixFile::setupSparseRegions(const std::string& sparseHandling) {
104  if (readInitialStat.size < readInitialStat.blksize) { // cannot be sparse
105  return false;
106  }
107  if (sparseHandling == "auto" &&
108  readInitialStat.size <= readInitialStat.sizeOnDisk) {
109  return false;
110  } else if (sparseHandling == "never") {
111  return false;
112  }
113  auto what = SEEK_HOLE;
114  for (std::remove_const<decltype(readInitialStat.size)>::type pos = 0;
115  pos < readInitialStat.size;
116  what = what == SEEK_HOLE ? SEEK_DATA : SEEK_HOLE) {
117  timerInst(lseek);
118  size_t next;
119  auto result = lseek(fd, pos, what);
120  if (result == -1 && errno == ENXIO) { // we found a hole at the end of the file
121  next = readInitialStat.size;
122  } else {
123  throwcall::badval(result, -1,
124  "seek ", path, " for ", what == SEEK_HOLE ? "hole" : "data",
125  ", from offset ", pos);
126  next = result;
127  }
128  if (next != pos) {
129  regions.emplace_back(pos, next, what == SEEK_DATA);
130  pos = next;
131  }
132  }
133  timerInst(lseek);
134  throwcall::good0(lseek(fd, 0, SEEK_SET), "re-seek to start of ", path);
135  return !regions.empty();
136  }
137 
138 
140  throwcall::badval(lseek(fd, pos, SEEK_SET),
141  -1, "can't seek ", path, " to ", pos);
142  if (! regions.empty()) {
143  while (regions.front().getEnd() < pos) {
144  regions.pop_front();
145  if (regions.empty()) {
146  break;
147  }
148  }
149  }
150  }
152  return regions.empty();
153  }
155  b.clear(totalBytesRead);
156  bool lastblock = false;
157 
158  auto bytesToRead = b.max_size();
159  if (!regions.empty()) {
160  auto region = regions.front();
161  if (region.isHole()) {
162  b.setHoleState(true);
163  b.bump_size(region.size());
164  timerInst(lseek);
165  size_t nextData;
166  auto result = lseek(fd, totalBytesRead, SEEK_DATA);
167  if (result == -1 && errno == ENXIO) { // this is the final hole
168  nextData = region.getEnd();
169  } else {
170  throwcall::badval(result, -1, "seek in ", path, " to data after ", totalBytesRead);
171  nextData = result;
172  }
173  totalBytesRead += region.size(); // hole counts as total size...
174  if (nextData != totalBytesRead) {
175  throw delayAdvisingError(path + " hole moved druring reading, (" +
176  std::to_string(totalBytesRead) +
177  " -> " +
178  std::to_string(nextData) +
179  ")");
180  }
181  regions.pop_front();
182  return regions.empty();
183  } else { // we are somewhere in a data region...
184  if (region.getEnd() < totalBytesRead + bytesToRead) {
185  bytesToRead = region.getEnd() - totalBytesRead; // read only up to end of region
186  }
187  if (region.getEnd() == totalBytesRead + bytesToRead) { // we finish the region in this block
188  regions.pop_front();
189  }
190  }
191  }
192 
193  while (b.size() + blockSize <= bytesToRead) {
195  timerInst(read);
196  auto bytes_read = throwcall::badval(read(fd, b.bufferAt(b.size()), blockSize),
197  -1, "read failed on ", path);
198  readRateLimit.update(bytes_read);
199  if (bytes_read == 0) {
200  lastblock = true;
201  if (totalBytesRead < readInitialStat.size) {
202  throw delayAdvisingError(path + " has shrunk while reading, (" +
203  std::to_string(readInitialStat.size) +
204  " -> " +
205  std::to_string(totalBytesRead) +
206  ")");
207  }
208  break;
209  }
210  totalBytesRead += bytes_read;
211  if (totalBytesRead > readInitialStat.size) {
212  throw delayAdvisingError(path + " has grown while reading, (" +
213  std::to_string(readInitialStat.size) +
214  " -> " +
215  std::to_string(totalBytesRead) +
216  ")");
217  }
218 
219  b.bump_size(bytes_read);
220  }
221 
222  return lastblock;
223  }
224 
225 
226  void posixFile::readerPosixFile::readBlockP(block& b, size_t bytesToRead, off_t offset) {
227  if (!regions.empty()) {
228  throw std::logic_error("parallel read attempted on sparse file");
229  }
230  b.clear(offset);
231  while (b.size() + blockSize <= b.max_size()) {
232  timerInst(pread);
233  auto bytes_read = throwcall::badval(pread(fd, b.bufferAt(b.size()), blockSize, offset + b.size()),
234  -1, "read failed on ", path);
235  if (bytes_read == 0) {
236  break;
237  }
238  b.bump_size(bytes_read);
239  if (b.size() > bytesToRead) {
240  throw delayAdvisingError(path + " has grown while reading");
241  }
242  }
243  if (b.size() < bytesToRead) {
244  throw delayAdvisingError(path + " has shrunk while reading "
245  + std::to_string(bytesToRead)
246  + " "
247  + std::to_string(b.size()));
248  }
249  }
250 
251 
253  struct stat readFinalStatBuf;
254  {
255  timerInst(fstat);
256  throwcall::good0(fstat(fd, &readFinalStatBuf), "can't stat path file ", path);
257  }
258  genericStat readFinalStat(readFinalStatBuf, std::chrono::nanoseconds(1));
259  if (readFinalStat.size != readInitialStat.size) {
260  throw delayAdvisingError("file size has changed (" +
261  std::to_string(readInitialStat.size) +
262  " -> " +
263  std::to_string(readFinalStat.size) +
264  ") during reading on " + path);
265  }
266 
267  if (!readFinalStat.isSameMtimeAs(readInitialStat)) {
268  throw delayAdvisingError("file " + path + " was modified (" +
269  std::to_string(std::chrono::duration_cast<std::chrono::duration<double>>(readFinalStat.getMtime() - readInitialStat.getMtime()).count()) +
270  "s different mtime) during reading");
271  }
272  }
273  posixFile::PosixDirectory::PosixDirectory(const std::string& aPath): Directory(aPath) {
274  timerInst(opendir);
275  dir = throwcall::badval(opendir(path.c_str()), nullptr, "can't open directory ", path);
276  }
278  if (isUnwinding()) {
279  if (closedir(dir) != 0) {
281  "", "close directory during unwind ",
282  std::system_category().default_error_condition(errno).message());
283  }
284  } else {
285  timerInst(closedir);
286  throwcall::good0(closedir(dir), "can't close directory ", path);
287  }
288  }
289  std::unique_ptr<base::Directory::Entry> posixFile::PosixDirectory::getNextEntry(bool ignoreMissing) {
290  static timer::anchor a("readdir");
292  while (auto entry = readdir(dir)) {
293  i.stop();
294  if (entry->d_name[entry->d_name[0] != '.' ? 0 : entry->d_name[1] != '.' ? 1 : 2] == '\0') {
295  continue; // skip . .. and empty strings
296  i.restart();
297  }
298  struct stat statbuf;
299  {
300  timerInst(fstatat);
301  auto result = fstatat(dirfd(dir), entry->d_name, &statbuf, AT_SYMLINK_NOFOLLOW);
302  if (result != 0 && errno == ENOENT && ignoreMissing) {
303  continue;
304  }
305  throwcall::good0(result, "can't stat ", entry->d_name);
306  }
307  auto genStat = std::unique_ptr<const genericStat>(new genericStat(statbuf, std::chrono::nanoseconds(1)));
308  return std::unique_ptr<Entry>(new Entry(entry->d_name, genStat));
309  }
310  return nullptr;
311  }
312  std::unique_ptr<base::Directory> posixFile::getDirectory(const std::string& path) {
313  return std::unique_ptr<Directory>(new PosixDirectory(path));
314  }
315 
316 } //end namespace inputHandler
inputHandler::posixFile::readerPosixFile::seek
void seek(size_t pos) override
like the standard seek, to be used only when appending new data to a file
Definition: inputHandlerPosixFile.cpp:139
block.h
timer::instanceUnscoped::restart
void restart()
Definition: timer.h:133
inputHandler::posixFile::readerPosixFile::readBlock
bool readBlock(block &b) override
read one block from the file
Definition: inputHandlerPosixFile.cpp:154
genericStat::mode
mode_t mode
Definition: genericStat.h:22
delayAdvisingError
class for exceptions that advise for delays Exceptions of this kind are to be thrown when circumstanc...
Definition: inputHandler.h:22
inputHandler::posixFile::readerPosixFile::readBlockP
void readBlockP(block &b, size_t bytesToRead, off_t offset) override
read one block from the file, starting at offset.
Definition: inputHandlerPosixFile.cpp:226
errMsgQueue.h
block::max_size
size_t max_size() const
Definition: block.h:22
inputHandler::posixFile::PosixDirectory::~PosixDirectory
~PosixDirectory() noexcept(false) override
Definition: inputHandlerPosixFile.cpp:277
inputHandler
Definition: inputHandler.h:29
errMsg::location
class for defining the location of a error message in the source code.
Definition: errMsgQueue.h:14
inputHandler::posixFile::factory
static factoryTemplate< posixFile > factory
Definition: inputHandlerPosixFile.h:16
throwcall::badval
T badval(T call, t badvalue, const Args &... args)
template function to wrap system calls that return a special bad value on failure
Definition: throwcall.h:54
genericStat
generic stat abstraction class Used to abstract the variants of the stat structure.
Definition: genericStat.h:12
inputHandler::posixFile::PosixDirectory::getNextEntry
std::unique_ptr< Entry > getNextEntry(bool ignoreMissing) override
Definition: inputHandlerPosixFile.cpp:289
copyRequestTypes.h
inputHandler::posixFile::readerPosixFile::readerPosixFile
readerPosixFile(const std::string &aPath, copyRequest::stateType &state, const genericStat &inititalStat)
Definition: inputHandlerPosixFile.cpp:44
inputHandler::posixFile::newReader
std::unique_ptr< reader > newReader(const std::string &aPath, copyRequest::stateType &state, const genericStat &inititalStat) override
get a reader for the file at path
Definition: inputHandlerPosixFile.cpp:18
posixFileIoCommon::fd
int fd
Definition: posixFileCommon.h:21
genericStat::getMtime
void getMtime(struct timespec &spec) const
Definition: genericStat.cpp:65
inputHandler::posixFile::readerPosixFile::~readerPosixFile
~readerPosixFile() override
Definition: inputHandlerPosixFile.cpp:73
copyRequest::stateBitType::vanished
@ vanished
block::bump_size
void bump_size(size_t additionalBytes)
Definition: block.h:33
copyRequest::stateType
Definition: copyRequestTypes.h:66
inputHandlerPosixFile.h
inputHandler::base::reader::readInitialStat
const genericStat & readInitialStat
stat at beginning, needed for unchngedness check
Definition: inputHandler.h:74
genericStat::isSameMtimeAs
bool isSameMtimeAs(const genericStat &that) const
Definition: genericStat.cpp:87
readRateLimit
throttle::watch readRateLimit
errMsg::level::debug
@ debug
genericStat::blksize
size_t blksize
Definition: genericStat.h:18
inputHandler::posixFile::readerPosixFile
Definition: inputHandlerPosixFile.h:44
ioHandle::blockSize
size_t blockSize
in bytes, block size to be used when reading or writing
Definition: ioHandle.h:17
inputHandler::posixFile::preserveAtime
static options::single< bool > preserveAtime
Definition: inputHandlerPosixFile.h:17
genericStat::size
size_t size
Definition: genericStat.h:16
timer::instanceUnscoped::stop
void stop()
Definition: timer.h:107
block::setHoleState
void setHoleState(bool holeState)
Definition: block.h:39
block::bufferAt
void * bufferAt(size_t offset)
only way to access the data in the block
Definition: block.cpp:28
timer.h
throwcall.h
block
data block, used to hold the data that are being copied (or checksummed).
Definition: block.h:7
inputHandler::posixFile::readerPosixFile::checkUnchangedness
void checkUnchangedness() override
check if the file is unchanged by comparing cuttent stat to initial stat
Definition: inputHandlerPosixFile.cpp:252
inputHandler::posixFile::PosixDirectory::dir
DIR * dir
Definition: inputHandlerPosixFile.h:70
timer::instanceUnscoped
Definition: timer.h:95
timer::anchor
Definition: timer.h:22
inputHandler::posixFile::readLinkTarget
bool readLinkTarget(const std::string &path, std::vector< char > &target) override
read link target from a symlink
Definition: inputHandlerPosixFile.cpp:27
throttle::watch::wait
void wait()
Definition: throttle.h:50
errMsg::emit
void emit(level aLogLevel, const location &loc, const std::string &aObject, const std::string &aAction, const Args &... args)
function to create and enqueue a message, this is the only way that messages should be created!
Definition: errMsgQueue.h:148
timerInst
#define timerInst(subfunc)
Definition: timer.h:157
block::size
size_t size() const
Definition: block.h:16
posixFileIoCommon
base class for posixFile reader and writer class with the common stuff like fd, path and xattr handli...
Definition: posixFileCommon.h:17
inputHandler::posixFile::readerPosixFile::setupSparseRegions
bool setupSparseRegions(const std::string &sparseHandling) override
create a region list which maps the holes in the source file
Definition: inputHandlerPosixFile.cpp:103
inputHandler::posixFile::readerPosixFile::parallelizable
bool parallelizable() const override
tell if this handler is capable of parallel IO. Unsually not the case
Definition: inputHandlerPosixFile.cpp:151
inputHandler::posixFile::PosixDirectory
Definition: inputHandlerPosixFile.h:69
inputHandler::posixFile::getDirectory
std::unique_ptr< Directory > getDirectory(const std::string &path) override
Definition: inputHandlerPosixFile.cpp:312
inputHandler::posixFile::PosixDirectory::PosixDirectory
PosixDirectory(const std::string &path)
Definition: inputHandlerPosixFile.cpp:273
throwcall::good0
void good0(T call, const Args &... args)
template function to wrap system calls that return 0 on success
Definition: throwcall.h:40
block::clear
void clear(size_t aOffset)
Definition: block.h:28
throttle::watch::update
void update(double units=1.0)
Definition: throttle.h:35
posixFileIoCommon::path
const std::string & path
Definition: posixFileCommon.h:20
inputHandler::base::reader
(abstract) class for reading a file An instance of this class is used to read data from a file vie th...
Definition: inputHandler.h:72
ewmscp.h