diff options
| author | Markus Mittendrein <git@maxmitti.tk> | 2019-03-16 03:50:28 +0100 |
|---|---|---|
| committer | Markus Mittendrein <git@maxmitti.tk> | 2019-03-16 03:50:28 +0100 |
| commit | 9103fb5b961817d1a91248632b4cedfec770b198 (patch) | |
| tree | 875e16693f78efd016e0cdb09c5c14486e5b365d /src | |
| parent | 7c2cda2544f22add8114828535a397a2d733361d (diff) | |
| download | cc4group-9103fb5b961817d1a91248632b4cedfec770b198.tar.gz cc4group-9103fb5b961817d1a91248632b4cedfec770b198.zip | |
Add openFd, openFilePointer and openWithReadCallback methods
Diffstat (limited to 'src')
| -rw-r--r-- | src/cc4group.c | 267 | ||||
| -rw-r--r-- | src/cc4group.h | 21 | ||||
| -rw-r--r-- | src/cppc4group.cpp | 20 | ||||
| -rw-r--r-- | src/cppc4group.hpp | 7 |
4 files changed, 297 insertions, 18 deletions
diff --git a/src/cc4group.c b/src/cc4group.c index f3975ee..abf0b12 100644 --- a/src/cc4group.c +++ b/src/cc4group.c @@ -28,6 +28,7 @@ #define SET_ERROR(errorCauser, errorCode, errorFormatter, data) do { this->error.code = errorCode; this->error.formatter.formatter = errorFormatter; this->error.causer = errorCauser; this->error.method = __func__; } while(0) #define SET_MESSAGE_ERROR(message) SET_ERROR(message, 1337, cc4group_messageFormatter, NULL) +#define SET_EOD_ERROR(message) SET_MESSAGE_ERROR("Unexpected end of group data while " message) #define SET_MALFORMED_MESSAGE_ERROR(message) SET_MESSAGE_ERROR("The group file is malformed: " message) #define SET_ERRNO_ERROR(causer) SET_ERROR(causer, errno, cc4group_strerrorFormatter, NULL) #define SET_ZERROR_ERROR(causer, error) SET_ERROR(causer, error, cc4group_zerrorFormatter, (void*)errno) @@ -96,6 +97,28 @@ typedef enum { Reference } CC4Group_MemoryManagement; +static bool cc4group_applyMemoryManagementStart(CC4Group_MemoryManagement const management, const uint8_t** data, size_t size) +{ + if(management == Copy) + { + uint8_t* copy = malloc(size); + if(copy == NULL) + { + return false; + } + memcpy(copy, data, size); + *data = copy; + } + return true; +} + +static void cc4group_applyMemoryManagementEnd(CC4Group_MemoryManagement const management, const uint8_t* data) +{ + if(management == Copy || management == Take) + { + free((void*)data); + } +} static const uint8_t* cc4group_getOnlyEntryData(CC4Group* const this, const C4GroupEntryData* entry); static const C4GroupEntryData* cc4group_getEntryByPath(const CC4Group* const this, const char* const entryPath); @@ -339,8 +362,61 @@ static bool cc4group_buildGroupHierarchy(CC4Group* const this) return ret; } -static bool cc4group_uncompressGroup(CC4Group* const this, const uint8_t* const compressedData, size_t const size) +static bool cc4group_inflateFillOutput(z_stream* const strm, CC4Group_ReadCallback const callback, void* const callbackArg, CC4Group_MemoryManagement const memoryManagement, bool* eof, const uint8_t** const lastData, int* inflateRet) +{ + int ret = Z_OK; + while(strm->avail_out > 0) + { + if(strm->avail_in == 0) + { + if(*eof) + { + *inflateRet = Z_OK; + return false; + } + + if(*lastData != NULL) + { + cc4group_applyMemoryManagementEnd(memoryManagement, *lastData); + *lastData = NULL; + } + + size_t readSize = 0; + *eof = callback(lastData, &readSize, callbackArg); + + if(readSize == 0) + { + continue; + } + + cc4group_applyMemoryManagementStart(memoryManagement, lastData, readSize); + + strm->avail_in = readSize; + strm->next_in = (uint8_t*)*lastData; // I don't know why this must be non-const; anyway a read-only mapped input doesn't segfault. so it seems to be used read-only + } + + ret = inflate(strm, *eof ? Z_FINISH : Z_NO_FLUSH); + if(ret != Z_OK && (ret != Z_STREAM_END || strm->avail_in != 0) && !(ret == Z_BUF_ERROR && strm->avail_out == 0)) + { + break; + } + } + + if(ret == Z_BUF_ERROR && strm->avail_out == 0) + { + return true; + } + + *inflateRet = ret; + return ret == Z_OK || (ret == Z_STREAM_END && strm->avail_in == 0 && eof); +} + +static bool cc4group_uncompressGroup(CC4Group* const this, CC4Group_ReadCallback const callback, void* const callbackArg, int const memoryManagement) { + assert(this); + assert(callback); + assert(callbackArg); + // only if the group is still empty assert(this->root.children == NULL); @@ -352,9 +428,58 @@ static bool cc4group_uncompressGroup(CC4Group* const this, const uint8_t* const CC4Group_CleanupJob tmpCleanup; uint8_t* mappedTmpFile = NULL; + + size_t totalReadSize = 0; + + const uint8_t* readData = NULL; + const uint8_t* readDataAfterMagic = NULL; + size_t readSize = 0; + bool eof = false; + + uint8_t magic1, magic2; + + while(!eof && totalReadSize < 2) + { + if(readData != NULL) + { + cc4group_applyMemoryManagementEnd(memoryManagement, readData); + } + readData = NULL; + readSize = 0; + + eof = callback(&readData, &readSize, callbackArg); + + if(readSize == 0) + { + continue; + } + + cc4group_applyMemoryManagementStart(memoryManagement, &readData, readSize); + + size_t newTotalReadSize = totalReadSize + readSize; + if(totalReadSize < 1 && newTotalReadSize > 0) + { + magic1 = readData[0]; + } + + if(totalReadSize < 2 && newTotalReadSize > 1) + { + magic2 = readData[1 - totalReadSize]; + readDataAfterMagic = readData + (2 - totalReadSize); + } + + totalReadSize = newTotalReadSize; + } + + if(eof && totalReadSize < 2) + { + SET_EOD_ERROR("reading the group magic"); + goto ret; + } + static uint8_t magic[] = {0x1f, 0x8b}; - if((compressedData[0] != C4GroupMagic1 && compressedData[0] != magic[0]) || (compressedData[1] != C4GroupMagic2 && compressedData[1] != magic[1])) + if((magic1 != C4GroupMagic1 && magic1 != magic[0]) || (magic2 != C4GroupMagic2 && magic2 != magic[1])) { SET_MESSAGE_ERROR("The file is not a valid group file. Magic bytes don't match."); goto ret; @@ -396,13 +521,22 @@ static bool cc4group_uncompressGroup(CC4Group* const this, const uint8_t* const goto ret; } - strm.next_in = (uint8_t*)compressedData + 2; // I don't know why this must be non-const; anyway a read-only mapped input doesn't segfault. so it seems to be used read-only - strm.avail_in = size - 2; + if(totalReadSize > 2) + { + strm.next_in = (uint8_t*)readDataAfterMagic; + strm.avail_in = totalReadSize - 2; + } - ret = inflate(&strm, Z_SYNC_FLUSH); - if(ret != Z_OK && (ret != Z_STREAM_END || strm.avail_in != 0)) + if(!cc4group_inflateFillOutput(&strm, callback, callbackArg, memoryManagement, &eof, &readData, &ret)) { - SET_ZERROR_ERROR("inflate: inflating the group header", ret); + if(eof && ret == Z_BUF_ERROR) + { + SET_EOD_ERROR("inflating group header"); + } + else + { + SET_ZERROR_ERROR("inflate: inflating the group header", ret); + } goto ret; } @@ -412,7 +546,7 @@ static bool cc4group_uncompressGroup(CC4Group* const this, const uint8_t* const { // the group is empty // so the stream should have ended also - if(ret != Z_STREAM_END) + if(ret != Z_STREAM_END || !eof || strm.avail_in > 0) { SET_MALFORMED_MESSAGE_ERROR("The group is empty but the gzip stream has not ended yet."); goto ret; @@ -453,10 +587,16 @@ static bool cc4group_uncompressGroup(CC4Group* const this, const uint8_t* const strm.next_out = (Bytef*)cores; strm.avail_out = sizeof(C4GroupEntryCore) * header->Entries; - ret = inflate(&strm, Z_SYNC_FLUSH); - if(ret != Z_OK) + if(!cc4group_inflateFillOutput(&strm, callback, callbackArg, memoryManagement, &eof, &readData, &ret)) { - SET_ZERROR_ERROR("inflate: inflating the group entry dictionary", ret); + if(eof && ret == Z_BUF_ERROR) + { + SET_EOD_ERROR("inflating the group entry dictionary"); + } + else + { + SET_ZERROR_ERROR("inflate: inflating the group entry dictionary", ret); + } goto ret; } @@ -475,16 +615,33 @@ static bool cc4group_uncompressGroup(CC4Group* const this, const uint8_t* const strm.next_out = data; strm.avail_out = uncompressedSize; - ret = inflate(&strm, Z_SYNC_FLUSH); - if(ret != Z_STREAM_END || strm.avail_in != 0 || strm.avail_out != 0) + if(!cc4group_inflateFillOutput(&strm, callback, callbackArg, memoryManagement, &eof, &readData, &ret)) + { + if(eof && ret == Z_BUF_ERROR) + { + SET_EOD_ERROR("inflating the group contents"); + } + else + { + SET_ZERROR_ERROR("inflate: inflating the group contents", ret); + } + goto ret; + } + + if(strm.avail_in > 0 || ret != Z_STREAM_END) { - SET_ZERROR_ERROR("inflate: inflating group contents", ret); + SET_MALFORMED_MESSAGE_ERROR("The group contents are read completely but more data is left to read"); goto ret; } retData = mappedTmpFile; ret: + if(readData != NULL) + { + cc4group_applyMemoryManagementEnd(memoryManagement, readData); + } + if(header != NULL) { free(header); @@ -514,13 +671,69 @@ ret: return cc4group_buildGroupHierarchy(this); } -static bool cc4group_openMemory(CC4Group* const this, const uint8_t* const compressedData, size_t const size) +typedef struct { + const uint8_t* data; + size_t size; +} CompleteDataReadCallbackArg; + +static bool cc4group_completeDataReadCallback(const uint8_t** const data, size_t* const size, void* callbackArg) +{ + CompleteDataReadCallbackArg* argData = callbackArg; + *data = argData->data; + *size = argData->size; + return true; +} + +static bool cc4group_readFdReadCallback(const uint8_t** const data, size_t* const size, void* callbackArg) +{ +#define CHUNK_SIZE 1000*1000 + uint8_t* readData = malloc(CHUNK_SIZE); + if(data == NULL) + { + return true; + } + + ssize_t count = read(*(int*)callbackArg, readData, CHUNK_SIZE); + + if(count > 0) + { + *data = readData; + *size = count; + return false; + } + return true; +#undef CHUNK_SIZE +} + +static bool cc4group_readFilePointerReadCallback(const uint8_t** const data, size_t* const size, void* callbackArg) +{ +#define CHUNK_SIZE 1000*1000 + uint8_t* readData = malloc(CHUNK_SIZE); + if(data == NULL) + { + return true; + } + + size_t count = fread(readData, 1, CHUNK_SIZE, callbackArg); + + if(count > 0) + { + *data = readData; + *size = count; + return false; + } + return true; +#undef CHUNK_SIZE +} + +static bool cc4group_openMemory(CC4Group* const this, const uint8_t* const compressedData, size_t const size, int const memoryManagement) { assert(this); assert(compressedData); assert(size); - return cc4group_uncompressGroup(this, compressedData, size); + CompleteDataReadCallbackArg data = {.data = compressedData, .size = size}; + return cc4group_uncompressGroup(this, cc4group_completeDataReadCallback, &data, memoryManagement); } static bool cc4group_setSubRoot(CC4Group* const this, const char* const subPath) @@ -618,7 +831,8 @@ static bool cc4group_uncompressGroupFromFile(CC4Group* const this, const char* c goto ret; } - success = cc4group_uncompressGroup(this, mappedFile, size); + CompleteDataReadCallbackArg data = {.data = mappedFile, .size = size}; + success = cc4group_uncompressGroup(this, cc4group_completeDataReadCallback, &data, Reference); ret: if(mappedFile != MAP_FAILED) @@ -826,9 +1040,25 @@ static bool cc4group_create(CC4Group* const this) return true; } +static bool cc4group_openFd(CC4Group* const this, int fd) +{ + return cc4group_uncompressGroup(this, cc4group_readFdReadCallback, &fd, Take); +} + +static bool cc4group_openFilePointer(CC4Group* const this, FILE* file) +{ + return cc4group_uncompressGroup(this, cc4group_readFilePointerReadCallback, file, Take); +} + static bool cc4group_openExisting(CC4Group* const this, const char* const path) { assert(this); + assert(path); + + if(strcmp(path, "-") == 0) + { + return cc4group_openFd(this, STDIN_FILENO); + } return cc4group_uncompressGroupFromFile(this, path); } @@ -1688,6 +1918,9 @@ CC4Group_API cc4group = { .delete = cc4group_delete, .openExisting = cc4group_openExisting, .openMemory = cc4group_openMemory, + .openFd = cc4group_openFd, + .openFilePointer = cc4group_openFilePointer, + .openWithReadCallback = cc4group_uncompressGroup, .save = cc4group_save, .saveOverwrite = cc4group_saveOverwrite, .extractAll = cc4group_extractAll, diff --git a/src/cc4group.h b/src/cc4group.h index 16c684a..a63d995 100644 --- a/src/cc4group.h +++ b/src/cc4group.h @@ -10,6 +10,7 @@ extern "C" { #include <stddef.h> #include <stdint.h> #include <stdbool.h> +#include <stdio.h> typedef struct { const char* fileName; @@ -31,6 +32,11 @@ typedef struct { } CC4Group_CleanupJob; typedef void* (*CC4Group_TmpMemoryStrategy)(CC4Group* const this, const size_t size, CC4Group_CleanupJob* cleanupJob); +// the callback has to store a pointer to the newly read data in data, as well as the amount of read data in size +// the callback must return true if the end of data is reached or any read error happens and false otherwise +// the pointer passed in will be handled as specified with the corresponding MemoryManagement +typedef bool (*CC4Group_ReadCallback)(const uint8_t** const data, size_t* const size, void* const arg); + typedef struct { CC4Group* (*new)(void); bool (*create)(CC4Group* const this); @@ -42,7 +48,20 @@ typedef struct { // opens a group that is stored entirely in memory // only open an in-memory group on a frehsly created group object - bool (*openMemory)(CC4Group* const this, const uint8_t* const groupData, size_t const size); + // see the description of MemoryManagement to know if and when data has to be freed by the caller + // if the lazy mode is not used, the data can be freed immediately after this function returns + bool (*openMemory)(CC4Group* const this, const uint8_t* const groupData, size_t const size, int const memoryManagement); + + // opens a group through a file descriptor + // the file descriptor must have been opened with read access + bool (*openFd)(CC4Group* const this, int fd); + + // opens a group through a FILE* + // the file must have been opened with read access + bool (*openFilePointer)(CC4Group* const this, FILE* fd); + + // opens a group and calls the callback to get the group data + bool (*openWithReadCallback)(CC4Group* const this, CC4Group_ReadCallback const callback, void* const callbackArg, int const memoryManagement); bool (*save)(CC4Group* const this, const char* const path); bool (*saveOverwrite)(CC4Group* const this, const char* const path); diff --git a/src/cppc4group.cpp b/src/cppc4group.cpp index d8bfbb3..2768afd 100644 --- a/src/cppc4group.cpp +++ b/src/cppc4group.cpp @@ -76,6 +76,26 @@ bool CppC4Group::openExisting(const std::string& path) return cc4group.openExisting(p->g, path.c_str()); } +bool CppC4Group::openFd(const int fd) +{ + return cc4group.openFd(p->g, fd); +} + +bool CppC4Group::openFilePointer(FILE* file) +{ + return cc4group.openFilePointer(p->g, file); +} + +bool CppC4Group::openMemory(const void* const data, const size_t size, const MemoryManagement management) +{ + return cc4group.openMemory(p->g, reinterpret_cast<const uint8_t* const>(data), size, convertMemoryManagement(management)); +} + +bool CppC4Group::openWithReadCallback(const ReadCallback callback, void* const callbackArg, const MemoryManagement management) +{ + return cc4group.openWithReadCallback(p->g, reinterpret_cast<CC4Group_ReadCallback>(callback), callbackArg, convertMemoryManagement(management)); +} + bool CppC4Group::save(const std::string& path, const bool overwrite) { return (overwrite ? cc4group.saveOverwrite : cc4group.save)(p->g, path.c_str()); diff --git a/src/cppc4group.hpp b/src/cppc4group.hpp index 67d350d..0787c97 100644 --- a/src/cppc4group.hpp +++ b/src/cppc4group.hpp @@ -3,6 +3,7 @@ #include <memory> #include <vector> #include <optional> +#include <cstdio> class CppC4Group { struct Private; @@ -30,6 +31,8 @@ public: }; public: + using ReadCallback = bool(*)(const void** const data, size_t* const size, void* const arg); + enum TmpMemoryStrategy { Memory, File, @@ -50,6 +53,10 @@ public: void create(); bool openExisting(const std::string& path); + bool openFd(const int fd); + bool openFilePointer(FILE* file); + bool openMemory(const void* const data, const size_t size, const MemoryManagement management = Reference); + bool openWithReadCallback(const ReadCallback callback, void* const callbackArg, const MemoryManagement management = Take); bool save(const std::string& path, const bool overwrite = false); bool extractAll(const std::string& path); |
