summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/cc4group.c267
-rw-r--r--src/cc4group.h21
-rw-r--r--src/cppc4group.cpp20
-rw-r--r--src/cppc4group.hpp7
4 files changed, 297 insertions, 18 deletions
diff --git a/src/cc4group.c b/src/cc4group.c
index f3975ee..abf0b12 100644
--- a/src/cc4group.c
+++ b/src/cc4group.c
@@ -28,6 +28,7 @@
#define SET_ERROR(errorCauser, errorCode, errorFormatter, data) do { this->error.code = errorCode; this->error.formatter.formatter = errorFormatter; this->error.causer = errorCauser; this->error.method = __func__; } while(0)
#define SET_MESSAGE_ERROR(message) SET_ERROR(message, 1337, cc4group_messageFormatter, NULL)
+#define SET_EOD_ERROR(message) SET_MESSAGE_ERROR("Unexpected end of group data while " message)
#define SET_MALFORMED_MESSAGE_ERROR(message) SET_MESSAGE_ERROR("The group file is malformed: " message)
#define SET_ERRNO_ERROR(causer) SET_ERROR(causer, errno, cc4group_strerrorFormatter, NULL)
#define SET_ZERROR_ERROR(causer, error) SET_ERROR(causer, error, cc4group_zerrorFormatter, (void*)errno)
@@ -96,6 +97,28 @@ typedef enum {
Reference
} CC4Group_MemoryManagement;
+static bool cc4group_applyMemoryManagementStart(CC4Group_MemoryManagement const management, const uint8_t** data, size_t size)
+{
+ if(management == Copy)
+ {
+ uint8_t* copy = malloc(size);
+ if(copy == NULL)
+ {
+ return false;
+ }
+ memcpy(copy, data, size);
+ *data = copy;
+ }
+ return true;
+}
+
+static void cc4group_applyMemoryManagementEnd(CC4Group_MemoryManagement const management, const uint8_t* data)
+{
+ if(management == Copy || management == Take)
+ {
+ free((void*)data);
+ }
+}
static const uint8_t* cc4group_getOnlyEntryData(CC4Group* const this, const C4GroupEntryData* entry);
static const C4GroupEntryData* cc4group_getEntryByPath(const CC4Group* const this, const char* const entryPath);
@@ -339,8 +362,61 @@ static bool cc4group_buildGroupHierarchy(CC4Group* const this)
return ret;
}
-static bool cc4group_uncompressGroup(CC4Group* const this, const uint8_t* const compressedData, size_t const size)
+static bool cc4group_inflateFillOutput(z_stream* const strm, CC4Group_ReadCallback const callback, void* const callbackArg, CC4Group_MemoryManagement const memoryManagement, bool* eof, const uint8_t** const lastData, int* inflateRet)
+{
+ int ret = Z_OK;
+ while(strm->avail_out > 0)
+ {
+ if(strm->avail_in == 0)
+ {
+ if(*eof)
+ {
+ *inflateRet = Z_OK;
+ return false;
+ }
+
+ if(*lastData != NULL)
+ {
+ cc4group_applyMemoryManagementEnd(memoryManagement, *lastData);
+ *lastData = NULL;
+ }
+
+ size_t readSize = 0;
+ *eof = callback(lastData, &readSize, callbackArg);
+
+ if(readSize == 0)
+ {
+ continue;
+ }
+
+ cc4group_applyMemoryManagementStart(memoryManagement, lastData, readSize);
+
+ strm->avail_in = readSize;
+ strm->next_in = (uint8_t*)*lastData; // I don't know why this must be non-const; anyway a read-only mapped input doesn't segfault. so it seems to be used read-only
+ }
+
+ ret = inflate(strm, *eof ? Z_FINISH : Z_NO_FLUSH);
+ if(ret != Z_OK && (ret != Z_STREAM_END || strm->avail_in != 0) && !(ret == Z_BUF_ERROR && strm->avail_out == 0))
+ {
+ break;
+ }
+ }
+
+ if(ret == Z_BUF_ERROR && strm->avail_out == 0)
+ {
+ return true;
+ }
+
+ *inflateRet = ret;
+ return ret == Z_OK || (ret == Z_STREAM_END && strm->avail_in == 0 && eof);
+}
+
+static bool cc4group_uncompressGroup(CC4Group* const this, CC4Group_ReadCallback const callback, void* const callbackArg, int const memoryManagement)
{
+ assert(this);
+ assert(callback);
+ assert(callbackArg);
+
// only if the group is still empty
assert(this->root.children == NULL);
@@ -352,9 +428,58 @@ static bool cc4group_uncompressGroup(CC4Group* const this, const uint8_t* const
CC4Group_CleanupJob tmpCleanup;
uint8_t* mappedTmpFile = NULL;
+
+ size_t totalReadSize = 0;
+
+ const uint8_t* readData = NULL;
+ const uint8_t* readDataAfterMagic = NULL;
+ size_t readSize = 0;
+ bool eof = false;
+
+ uint8_t magic1, magic2;
+
+ while(!eof && totalReadSize < 2)
+ {
+ if(readData != NULL)
+ {
+ cc4group_applyMemoryManagementEnd(memoryManagement, readData);
+ }
+ readData = NULL;
+ readSize = 0;
+
+ eof = callback(&readData, &readSize, callbackArg);
+
+ if(readSize == 0)
+ {
+ continue;
+ }
+
+ cc4group_applyMemoryManagementStart(memoryManagement, &readData, readSize);
+
+ size_t newTotalReadSize = totalReadSize + readSize;
+ if(totalReadSize < 1 && newTotalReadSize > 0)
+ {
+ magic1 = readData[0];
+ }
+
+ if(totalReadSize < 2 && newTotalReadSize > 1)
+ {
+ magic2 = readData[1 - totalReadSize];
+ readDataAfterMagic = readData + (2 - totalReadSize);
+ }
+
+ totalReadSize = newTotalReadSize;
+ }
+
+ if(eof && totalReadSize < 2)
+ {
+ SET_EOD_ERROR("reading the group magic");
+ goto ret;
+ }
+
static uint8_t magic[] = {0x1f, 0x8b};
- if((compressedData[0] != C4GroupMagic1 && compressedData[0] != magic[0]) || (compressedData[1] != C4GroupMagic2 && compressedData[1] != magic[1]))
+ if((magic1 != C4GroupMagic1 && magic1 != magic[0]) || (magic2 != C4GroupMagic2 && magic2 != magic[1]))
{
SET_MESSAGE_ERROR("The file is not a valid group file. Magic bytes don't match.");
goto ret;
@@ -396,13 +521,22 @@ static bool cc4group_uncompressGroup(CC4Group* const this, const uint8_t* const
goto ret;
}
- strm.next_in = (uint8_t*)compressedData + 2; // I don't know why this must be non-const; anyway a read-only mapped input doesn't segfault. so it seems to be used read-only
- strm.avail_in = size - 2;
+ if(totalReadSize > 2)
+ {
+ strm.next_in = (uint8_t*)readDataAfterMagic;
+ strm.avail_in = totalReadSize - 2;
+ }
- ret = inflate(&strm, Z_SYNC_FLUSH);
- if(ret != Z_OK && (ret != Z_STREAM_END || strm.avail_in != 0))
+ if(!cc4group_inflateFillOutput(&strm, callback, callbackArg, memoryManagement, &eof, &readData, &ret))
{
- SET_ZERROR_ERROR("inflate: inflating the group header", ret);
+ if(eof && ret == Z_BUF_ERROR)
+ {
+ SET_EOD_ERROR("inflating group header");
+ }
+ else
+ {
+ SET_ZERROR_ERROR("inflate: inflating the group header", ret);
+ }
goto ret;
}
@@ -412,7 +546,7 @@ static bool cc4group_uncompressGroup(CC4Group* const this, const uint8_t* const
{
// the group is empty
// so the stream should have ended also
- if(ret != Z_STREAM_END)
+ if(ret != Z_STREAM_END || !eof || strm.avail_in > 0)
{
SET_MALFORMED_MESSAGE_ERROR("The group is empty but the gzip stream has not ended yet.");
goto ret;
@@ -453,10 +587,16 @@ static bool cc4group_uncompressGroup(CC4Group* const this, const uint8_t* const
strm.next_out = (Bytef*)cores;
strm.avail_out = sizeof(C4GroupEntryCore) * header->Entries;
- ret = inflate(&strm, Z_SYNC_FLUSH);
- if(ret != Z_OK)
+ if(!cc4group_inflateFillOutput(&strm, callback, callbackArg, memoryManagement, &eof, &readData, &ret))
{
- SET_ZERROR_ERROR("inflate: inflating the group entry dictionary", ret);
+ if(eof && ret == Z_BUF_ERROR)
+ {
+ SET_EOD_ERROR("inflating the group entry dictionary");
+ }
+ else
+ {
+ SET_ZERROR_ERROR("inflate: inflating the group entry dictionary", ret);
+ }
goto ret;
}
@@ -475,16 +615,33 @@ static bool cc4group_uncompressGroup(CC4Group* const this, const uint8_t* const
strm.next_out = data;
strm.avail_out = uncompressedSize;
- ret = inflate(&strm, Z_SYNC_FLUSH);
- if(ret != Z_STREAM_END || strm.avail_in != 0 || strm.avail_out != 0)
+ if(!cc4group_inflateFillOutput(&strm, callback, callbackArg, memoryManagement, &eof, &readData, &ret))
+ {
+ if(eof && ret == Z_BUF_ERROR)
+ {
+ SET_EOD_ERROR("inflating the group contents");
+ }
+ else
+ {
+ SET_ZERROR_ERROR("inflate: inflating the group contents", ret);
+ }
+ goto ret;
+ }
+
+ if(strm.avail_in > 0 || ret != Z_STREAM_END)
{
- SET_ZERROR_ERROR("inflate: inflating group contents", ret);
+ SET_MALFORMED_MESSAGE_ERROR("The group contents are read completely but more data is left to read");
goto ret;
}
retData = mappedTmpFile;
ret:
+ if(readData != NULL)
+ {
+ cc4group_applyMemoryManagementEnd(memoryManagement, readData);
+ }
+
if(header != NULL)
{
free(header);
@@ -514,13 +671,69 @@ ret:
return cc4group_buildGroupHierarchy(this);
}
-static bool cc4group_openMemory(CC4Group* const this, const uint8_t* const compressedData, size_t const size)
+typedef struct {
+ const uint8_t* data;
+ size_t size;
+} CompleteDataReadCallbackArg;
+
+static bool cc4group_completeDataReadCallback(const uint8_t** const data, size_t* const size, void* callbackArg)
+{
+ CompleteDataReadCallbackArg* argData = callbackArg;
+ *data = argData->data;
+ *size = argData->size;
+ return true;
+}
+
+static bool cc4group_readFdReadCallback(const uint8_t** const data, size_t* const size, void* callbackArg)
+{
+#define CHUNK_SIZE 1000*1000
+ uint8_t* readData = malloc(CHUNK_SIZE);
+ if(data == NULL)
+ {
+ return true;
+ }
+
+ ssize_t count = read(*(int*)callbackArg, readData, CHUNK_SIZE);
+
+ if(count > 0)
+ {
+ *data = readData;
+ *size = count;
+ return false;
+ }
+ return true;
+#undef CHUNK_SIZE
+}
+
+static bool cc4group_readFilePointerReadCallback(const uint8_t** const data, size_t* const size, void* callbackArg)
+{
+#define CHUNK_SIZE 1000*1000
+ uint8_t* readData = malloc(CHUNK_SIZE);
+ if(data == NULL)
+ {
+ return true;
+ }
+
+ size_t count = fread(readData, 1, CHUNK_SIZE, callbackArg);
+
+ if(count > 0)
+ {
+ *data = readData;
+ *size = count;
+ return false;
+ }
+ return true;
+#undef CHUNK_SIZE
+}
+
+static bool cc4group_openMemory(CC4Group* const this, const uint8_t* const compressedData, size_t const size, int const memoryManagement)
{
assert(this);
assert(compressedData);
assert(size);
- return cc4group_uncompressGroup(this, compressedData, size);
+ CompleteDataReadCallbackArg data = {.data = compressedData, .size = size};
+ return cc4group_uncompressGroup(this, cc4group_completeDataReadCallback, &data, memoryManagement);
}
static bool cc4group_setSubRoot(CC4Group* const this, const char* const subPath)
@@ -618,7 +831,8 @@ static bool cc4group_uncompressGroupFromFile(CC4Group* const this, const char* c
goto ret;
}
- success = cc4group_uncompressGroup(this, mappedFile, size);
+ CompleteDataReadCallbackArg data = {.data = mappedFile, .size = size};
+ success = cc4group_uncompressGroup(this, cc4group_completeDataReadCallback, &data, Reference);
ret:
if(mappedFile != MAP_FAILED)
@@ -826,9 +1040,25 @@ static bool cc4group_create(CC4Group* const this)
return true;
}
+static bool cc4group_openFd(CC4Group* const this, int fd)
+{
+ return cc4group_uncompressGroup(this, cc4group_readFdReadCallback, &fd, Take);
+}
+
+static bool cc4group_openFilePointer(CC4Group* const this, FILE* file)
+{
+ return cc4group_uncompressGroup(this, cc4group_readFilePointerReadCallback, file, Take);
+}
+
static bool cc4group_openExisting(CC4Group* const this, const char* const path)
{
assert(this);
+ assert(path);
+
+ if(strcmp(path, "-") == 0)
+ {
+ return cc4group_openFd(this, STDIN_FILENO);
+ }
return cc4group_uncompressGroupFromFile(this, path);
}
@@ -1688,6 +1918,9 @@ CC4Group_API cc4group = {
.delete = cc4group_delete,
.openExisting = cc4group_openExisting,
.openMemory = cc4group_openMemory,
+ .openFd = cc4group_openFd,
+ .openFilePointer = cc4group_openFilePointer,
+ .openWithReadCallback = cc4group_uncompressGroup,
.save = cc4group_save,
.saveOverwrite = cc4group_saveOverwrite,
.extractAll = cc4group_extractAll,
diff --git a/src/cc4group.h b/src/cc4group.h
index 16c684a..a63d995 100644
--- a/src/cc4group.h
+++ b/src/cc4group.h
@@ -10,6 +10,7 @@ extern "C" {
#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
+#include <stdio.h>
typedef struct {
const char* fileName;
@@ -31,6 +32,11 @@ typedef struct {
} CC4Group_CleanupJob;
typedef void* (*CC4Group_TmpMemoryStrategy)(CC4Group* const this, const size_t size, CC4Group_CleanupJob* cleanupJob);
+// the callback has to store a pointer to the newly read data in data, as well as the amount of read data in size
+// the callback must return true if the end of data is reached or any read error happens and false otherwise
+// the pointer passed in will be handled as specified with the corresponding MemoryManagement
+typedef bool (*CC4Group_ReadCallback)(const uint8_t** const data, size_t* const size, void* const arg);
+
typedef struct {
CC4Group* (*new)(void);
bool (*create)(CC4Group* const this);
@@ -42,7 +48,20 @@ typedef struct {
// opens a group that is stored entirely in memory
// only open an in-memory group on a frehsly created group object
- bool (*openMemory)(CC4Group* const this, const uint8_t* const groupData, size_t const size);
+ // see the description of MemoryManagement to know if and when data has to be freed by the caller
+ // if the lazy mode is not used, the data can be freed immediately after this function returns
+ bool (*openMemory)(CC4Group* const this, const uint8_t* const groupData, size_t const size, int const memoryManagement);
+
+ // opens a group through a file descriptor
+ // the file descriptor must have been opened with read access
+ bool (*openFd)(CC4Group* const this, int fd);
+
+ // opens a group through a FILE*
+ // the file must have been opened with read access
+ bool (*openFilePointer)(CC4Group* const this, FILE* fd);
+
+ // opens a group and calls the callback to get the group data
+ bool (*openWithReadCallback)(CC4Group* const this, CC4Group_ReadCallback const callback, void* const callbackArg, int const memoryManagement);
bool (*save)(CC4Group* const this, const char* const path);
bool (*saveOverwrite)(CC4Group* const this, const char* const path);
diff --git a/src/cppc4group.cpp b/src/cppc4group.cpp
index d8bfbb3..2768afd 100644
--- a/src/cppc4group.cpp
+++ b/src/cppc4group.cpp
@@ -76,6 +76,26 @@ bool CppC4Group::openExisting(const std::string& path)
return cc4group.openExisting(p->g, path.c_str());
}
+bool CppC4Group::openFd(const int fd)
+{
+ return cc4group.openFd(p->g, fd);
+}
+
+bool CppC4Group::openFilePointer(FILE* file)
+{
+ return cc4group.openFilePointer(p->g, file);
+}
+
+bool CppC4Group::openMemory(const void* const data, const size_t size, const MemoryManagement management)
+{
+ return cc4group.openMemory(p->g, reinterpret_cast<const uint8_t* const>(data), size, convertMemoryManagement(management));
+}
+
+bool CppC4Group::openWithReadCallback(const ReadCallback callback, void* const callbackArg, const MemoryManagement management)
+{
+ return cc4group.openWithReadCallback(p->g, reinterpret_cast<CC4Group_ReadCallback>(callback), callbackArg, convertMemoryManagement(management));
+}
+
bool CppC4Group::save(const std::string& path, const bool overwrite)
{
return (overwrite ? cc4group.saveOverwrite : cc4group.save)(p->g, path.c_str());
diff --git a/src/cppc4group.hpp b/src/cppc4group.hpp
index 67d350d..0787c97 100644
--- a/src/cppc4group.hpp
+++ b/src/cppc4group.hpp
@@ -3,6 +3,7 @@
#include <memory>
#include <vector>
#include <optional>
+#include <cstdio>
class CppC4Group {
struct Private;
@@ -30,6 +31,8 @@ public:
};
public:
+ using ReadCallback = bool(*)(const void** const data, size_t* const size, void* const arg);
+
enum TmpMemoryStrategy {
Memory,
File,
@@ -50,6 +53,10 @@ public:
void create();
bool openExisting(const std::string& path);
+ bool openFd(const int fd);
+ bool openFilePointer(FILE* file);
+ bool openMemory(const void* const data, const size_t size, const MemoryManagement management = Reference);
+ bool openWithReadCallback(const ReadCallback callback, void* const callbackArg, const MemoryManagement management = Take);
bool save(const std::string& path, const bool overwrite = false);
bool extractAll(const std::string& path);