Copying files byte-by-byte does work, but is slow and wasteful on modern UNIXes. Modern UNIXes have “copy-on-write” support built-in to the filesystem: a system call makes a new directory entry pointing at the existing bytes on disk, and no file content bytes on disk are touched until one of the copies is modified, at which point only the changed blocks are written to disk. This allows near-instant file copies that use no additional file blocks, regardless of file size. For example, here are some details about how this works in xfs.
On linux, use the FICLONE
ioctl
as coreutils cp now does by default.
#ifdef FICLONE
return ioctl (dest_fd, FICLONE, src_fd);
#else
errno = ENOTSUP;
return -1;
#endif
On macOS, use clonefile(2) for instant copies on APFS volumes. This is what Apple’s cp -c
uses. The docs are not completely clear but it is likely that copyfile(3) with COPYFILE_CLONE
also uses this. Leave a comment if you’d like me to test that.
In case these copy-on-write operations are not supported—whether the OS is too old, the underlying file system does not support it, or because you are copying files between different filesystems—you do need to fall back to trying sendfile, or as a last resort, copying byte-by-byte. But to save everyone a lot of time and disk space, please give FICLONE
and clonefile(2)
a try first.
There is no baked-in equivalent CopyFile function in the APIs. But sendfile can be used to copy a file in kernel mode which is a faster and better solution (for numerous reasons) than opening a file, looping over it to read into a buffer, and writing the output to another file.
Update:
As of Linux kernel version 2.6.33, the limitation requiring the output of sendfile
to be a socket was lifted and the original code would work on both Linux and — however, as of OS X 10.9 Mavericks, sendfile
on OS X now requires the output to be a socket and the code won't work!
The following code snippet should work on the most OS X (as of 10.5), (Free)BSD, and Linux (as of 2.6.33). The implementation is "zero-copy" for all platforms, meaning all of it is done in kernelspace and there is no copying of buffers or data in and out of userspace. Pretty much the best performance you can get.
#include <fcntl.h>
#include <unistd.h>
#if defined(__APPLE__) || defined(__FreeBSD__)
#include <copyfile.h>
#else
#include <sys/sendfile.h>
#endif
int OSCopyFile(const char* source, const char* destination)
{
int input, output;
if ((input = open(source, O_RDONLY)) == -1)
{
return -1;
}
if ((output = creat(destination, 0660)) == -1)
{
close(input);
return -1;
}
//Here we use kernel-space copying for performance reasons
#if defined(__APPLE__) || defined(__FreeBSD__)
//fcopyfile works on FreeBSD and OS X 10.5+
int result = fcopyfile(input, output, 0, COPYFILE_ALL);
#else
//sendfile will work with non-socket output (i.e. regular file) on Linux 2.6.33+
off_t bytesCopied = 0;
struct stat fileinfo = {0};
fstat(input, &fileinfo);
int result = sendfile(output, input, &bytesCopied, fileinfo.st_size);
#endif
close(input);
close(output);
return result;
}
EDIT: Replaced the opening of the destination with the call to creat()
as we want the flag O_TRUNC
to be specified. See comment below.
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
#define print_err(format, args...) printf("[%s:%d][error]" format "\n", __func__, __LINE__, ##args)
#define DATA_BUF_SIZE (64 * 1024) //limit to read maximum 64 KB data per time
int32_t get_file_size(const char *fname){
struct stat sbuf;
if (NULL == fname || strlen(fname) < 1){
return 0;
}
if (stat(fname, &sbuf) < 0){
print_err("%s, %s", fname, strerror(errno));
return 0;
}
return sbuf.st_size; /* off_t shall be signed interge types, used for file size */
}
bool copyFile(CHAR *pszPathIn, CHAR *pszPathOut)
{
INT32 fdIn, fdOut;
UINT32 ulFileSize_in = 0;
UINT32 ulFileSize_out = 0;
CHAR *szDataBuf;
if (!pszPathIn || !pszPathOut)
{
print_err(" Invalid param!");
return false;
}
if ((1 > strlen(pszPathIn)) || (1 > strlen(pszPathOut)))
{
print_err(" Invalid param!");
return false;
}
if (0 != access(pszPathIn, F_OK))
{
print_err(" %s, %s!", pszPathIn, strerror(errno));
return false;
}
if (0 > (fdIn = open(pszPathIn, O_RDONLY)))
{
print_err("open(%s, ) failed, %s", pszPathIn, strerror(errno));
return false;
}
if (0 > (fdOut = open(pszPathOut, O_CREAT | O_WRONLY | O_TRUNC, 0777)))
{
print_err("open(%s, ) failed, %s", pszPathOut, strerror(errno));
close(fdIn);
return false;
}
szDataBuf = malloc(DATA_BUF_SIZE);
if (NULL == szDataBuf)
{
print_err("malloc() failed!");
return false;
}
while (1)
{
INT32 slSizeRead = read(fdIn, szDataBuf, sizeof(szDataBuf));
INT32 slSizeWrite;
if (slSizeRead <= 0)
{
break;
}
slSizeWrite = write(fdOut, szDataBuf, slSizeRead);
if (slSizeWrite < 0)
{
print_err("write(, , slSizeRead) failed, %s", slSizeRead, strerror(errno));
break;
}
if (slSizeWrite != slSizeRead) /* verify wheter write all byte data successfully */
{
print_err(" write(, , %d) failed!", slSizeRead);
break;
}
}
close(fdIn);
fsync(fdOut); /* causes all modified data and attributes to be moved to a permanent storage device */
close(fdOut);
ulFileSize_in = get_file_size(pszPathIn);
ulFileSize_out = get_file_size(pszPathOut);
if (ulFileSize_in == ulFileSize_out) /* verify again wheter write all byte data successfully */
{
free(szDataBuf);
return true;
}
free(szDataBuf);
return false;
}
One option is that you could use system()
to execute cp
. This just re-uses the cp(1)
command to do the work. If you only need to make another link to the file, this can be done with link()
or symlink()
.
There is no need to either call non-portable APIs like sendfile
, or shell out to external utilities. The same method that worked back in the 70s still works now:
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
int cp(const char *to, const char *from)
{
int fd_to, fd_from;
char buf[4096];
ssize_t nread;
int saved_errno;
fd_from = open(from, O_RDONLY);
if (fd_from < 0)
return -1;
fd_to = open(to, O_WRONLY | O_CREAT | O_EXCL, 0666);
if (fd_to < 0)
goto out_error;
while (nread = read(fd_from, buf, sizeof buf), nread > 0)
{
char *out_ptr = buf;
ssize_t nwritten;
do {
nwritten = write(fd_to, out_ptr, nread);
if (nwritten >= 0)
{
nread -= nwritten;
out_ptr += nwritten;
}
else if (errno != EINTR)
{
goto out_error;
}
} while (nread > 0);
}
if (nread == 0)
{
if (close(fd_to) < 0)
{
fd_to = -1;
goto out_error;
}
close(fd_from);
/* Success! */
return 0;
}
out_error:
saved_errno = errno;
close(fd_from);
if (fd_to >= 0)
close(fd_to);
errno = saved_errno;
return -1;
}
There is a way to do this, without resorting to the system
call, you need to incorporate a wrapper something like this:
#include <sys/sendfile.h>
#include <fcntl.h>
#include <unistd.h>
/*
** http://www.unixguide.net/unix/programming/2.5.shtml
** About locking mechanism...
*/
int copy_file(const char *source, const char *dest){
int fdSource = open(source, O_RDWR);
/* Caf's comment about race condition... */
if (fdSource > 0){
if (lockf(fdSource, F_LOCK, 0) == -1) return 0; /* FAILURE */
}else return 0; /* FAILURE */
/* Now the fdSource is locked */
int fdDest = open(dest, O_CREAT);
off_t lCount;
struct stat sourceStat;
if (fdSource > 0 && fdDest > 0){
if (!stat(source, &sourceStat)){
int len = sendfile(fdDest, fdSource, &lCount, sourceStat.st_size);
if (len > 0 && len == sourceStat.st_size){
close(fdDest);
close(fdSource);
/* Sanity Check for Lock, if this is locked -1 is returned! */
if (lockf(fdSource, F_TEST, 0) == 0){
if (lockf(fdSource, F_ULOCK, 0) == -1){
/* WHOOPS! WTF! FAILURE TO UNLOCK! */
}else{
return 1; /* Success */
}
}else{
/* WHOOPS! WTF! TEST LOCK IS -1 WTF! */
return 0; /* FAILURE */
}
}
}
}
return 0; /* Failure */
}
The above sample (error checking is omitted!) employs open
, close
and sendfile
.
Edit: As caf has pointed out a race condition can occur between the open
and stat
so I thought I'd make this a bit more robust...Keep in mind that the locking mechanism varies from platform to platform...under Linux, this locking mechanism with lockf
would suffice. If you want to make this portable, use the #ifdef
macros to distinguish between different platforms/compilers...Thanks caf for spotting this...There is a link to a site that yielded "universal locking routines" here.
It's straight forward to use fork/execl to run cp to do the work for you. This has advantages over system in that it is not prone to a Bobby Tables attack and you don't need to sanitize the arguments to the same degree. Further, since system() requires you to cobble together the command argument, you are not likely to have a buffer overflow issue due to sloppy sprintf() checking.
The advantage to calling cp directly instead of writing it is not having to worry about elements of the target path existing in the destination. Doing that in roll-you-own code is error-prone and tedious.
I wrote this example in ANSI C and only stubbed out the barest error handling, other than that it's straight forward code.
void copy(char *source, char *dest)
{
int childExitStatus;
pid_t pid;
int status;
if (!source || !dest) {
/* handle as you wish */
}
pid = fork();
if (pid == 0) { /* child */
execl("/bin/cp", "/bin/cp", source, dest, (char *)0);
}
else if (pid < 0) {
/* error - couldn't start process - you decide how to handle */
}
else {
/* parent - wait for child - this has all error handling, you
* could just call wait() as long as you are only expecting to
* have one child process at a time.
*/
pid_t ws = waitpid( pid, &childExitStatus, WNOHANG);
if (ws == -1)
{ /* error - handle as you wish */
}
if( WIFEXITED(childExitStatus)) /* exit code in childExitStatus */
{
status = WEXITSTATUS(childExitStatus); /* zero is normal exit */
/* handle non-zero as you wish */
}
else if (WIFSIGNALED(childExitStatus)) /* killed */
{
}
else if (WIFSTOPPED(childExitStatus)) /* stopped */
{
}
}
}
sprintf( cmd, "/bin/cp -p \'%s\' \'%s\'", old, new);
system( cmd);
Add some error checks...
Otherwise, open both and loop on read/write, but probably not what you want.
...
UPDATE to address valid security concerns:
Rather than using "system()", do a fork/wait, and call execv() or execl() in the child.
execl( "/bin/cp", "-p", old, new);
Another variant of the copy function using normal POSIX calls and without any loop. Code inspired from the buffer copy variant of the answer of caf.
Warning: Using mmap
can easily fail on 32 bit systems, on 64 bit system the danger is less likely.
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <sys/mman.h>
int cp(const char *to, const char *from)
{
int fd_from = open(from, O_RDONLY);
if(fd_from < 0)
return -1;
struct stat Stat;
if(fstat(fd_from, &Stat)<0)
goto out_error;
void *mem = mmap(NULL, Stat.st_size, PROT_READ, MAP_SHARED, fd_from, 0);
if(mem == MAP_FAILED)
goto out_error;
int fd_to = creat(to, 0666);
if(fd_to < 0)
goto out_error;
ssize_t nwritten = write(fd_to, mem, Stat.st_size);
if(nwritten < Stat.st_size)
goto out_error;
if(close(fd_to) < 0) {
fd_to = -1;
goto out_error;
}
close(fd_from);
/* Success! */
return 0;
}
out_error:;
int saved_errno = errno;
close(fd_from);
if(fd_to >= 0)
close(fd_to);
errno = saved_errno;
return -1;
}
EDIT: Corrected the file creation bug. See comment in http://stackoverflow.com/questions/2180079/how-can-i-copy-a-file-on-unix-using-c/2180157#2180157 answer.
Source: Stackoverflow.com