summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMarkus Mittendrein <git@maxmitti.tk>2019-08-31 15:16:07 +0200
committerMarkus Mittendrein <git@maxmitti.tk>2019-08-31 15:16:07 +0200
commit36f1c4c4e822458170099890cf586dd013e82673 (patch)
treeabc87bd974a7600b459ef7de16708182eb90f293 /src
parent4ba42d7fe8eb1b10c8b10c545a33c0ec981dcf7e (diff)
downloadcc4group-36f1c4c4e822458170099890cf586dd013e82673.tar.gz
cc4group-36f1c4c4e822458170099890cf586dd013e82673.zip
Store duplicate data only once if possible (Offset stays in the valid range)
Diffstat (limited to 'src')
-rw-r--r--src/cc4group.c114
1 files changed, 102 insertions, 12 deletions
diff --git a/src/cc4group.c b/src/cc4group.c
index b086110..2919224 100644
--- a/src/cc4group.c
+++ b/src/cc4group.c
@@ -73,6 +73,7 @@ typedef struct C4GroupEntryData_t {
size_t absolutePosition;
const char* path;
+ const struct C4GroupEntryData_t* shareDataWith;
} C4GroupEntryData;
LIST_AUTO(C4GroupEntryData, GroupEntryList)
@@ -326,7 +327,7 @@ static bool buildChildren(CC4Group* const this, C4GroupEntryData* const entry, s
return false;
}
- C4GroupEntryData* childEntry = &GroupEntryListAppend(entry->children, (C4GroupEntryData){.core = *core, .data = childData + core->Offset, .memoryManagement = cc4group.MemoryManagement.Reference, .children = NULL, .parent = entry, .absolutePosition = entry->absolutePosition + childDataOffset + core->Offset, .path = NULL})->value;
+ C4GroupEntryData* childEntry = &GroupEntryListAppend(entry->children, (C4GroupEntryData){.core = *core, .data = childData + core->Offset, .memoryManagement = cc4group.MemoryManagement.Reference, .children = NULL, .parent = entry, .absolutePosition = entry->absolutePosition + childDataOffset + core->Offset, .path = NULL, .shareDataWith = NULL})->value;
if(this->readState.position < childEntry->absolutePosition)
{
@@ -1203,6 +1204,7 @@ static void cc4group_init(CC4Group* const this)
this->root.children = NULL;
this->root.core.Directory = true;
this->root.path = NULL;
+ this->root.shareDataWith = NULL;
this->cleanupJobs = CleanUpJobListNew();
@@ -1533,6 +1535,7 @@ static C4GroupEntryData* cc4group_addFileFromDisk(CC4Group* const this, const ch
result->core.Modified = st.st_mtime;
result->core.Size = st.st_size;
result->path = filePath;
+ result->shareDataWith = NULL;
ret:
if(result == NULL || result->path == NULL)
@@ -2251,7 +2254,10 @@ static size_t cc4group_calculateEntrySizes(CC4Group* const this, C4GroupEntryDat
size_t sum = sizeof(C4GroupHeader) + GroupEntryListSize(children) * sizeof(C4GroupEntryCore);
ForeachGroupEntry(children)
{
- sum += cc4group_calculateEntrySizes(this, &entry->value);
+ if(entry->value.shareDataWith == NULL)
+ {
+ sum += cc4group_calculateEntrySizes(this, &entry->value);
+ }
}
return entryData->core.Size = sum;
@@ -2262,7 +2268,7 @@ static size_t cc4group_calculateEntrySizes(CC4Group* const this, C4GroupEntryDat
}
}
-static void cc4group_calculateEntryCRC(CC4Group* const this, C4GroupEntryData* const groupEntry)
+static void cc4group_calculateEntryCRC(CC4Group* const this, C4GroupEntryData* const groupEntry, bool withFileName)
{
if(groupEntry->core.Directory)
{
@@ -2275,13 +2281,13 @@ static void cc4group_calculateEntryCRC(CC4Group* const this, C4GroupEntryData* c
uint32_t crc = 0;
ForeachGroupEntry(children)
{
- cc4group_calculateEntryCRC(this, &entry->value);
+ cc4group_calculateEntryCRC(this, &entry->value, withFileName);
crc ^= entry->value.core.CRC;
}
- groupEntry->core.HasCRC = C4GroupEntryCore_ContentsFileNameCRC;
+ groupEntry->core.HasCRC = withFileName ? C4GroupEntryCore_ContentsFileNameCRC : C4GroupEntryCore_ContentsCRC;
groupEntry->core.CRC = crc;
}
- else if(groupEntry->core.HasCRC != C4GroupEntryCore_ContentsFileNameCRC)
+ else
{
const uint8_t* data = cc4group_getOnlyEntryData(this, groupEntry);
if(data == NULL)
@@ -2289,13 +2295,74 @@ static void cc4group_calculateEntryCRC(CC4Group* const this, C4GroupEntryData* c
return;
}
uint32_t crc = crc32(0, data, groupEntry->core.Size);
- crc = crc32(crc, (uint8_t*)groupEntry->core.FileName, strlen(groupEntry->core.FileName));
+ if(withFileName)
+ {
+ crc = crc32(crc, (uint8_t*)groupEntry->core.FileName, strlen(groupEntry->core.FileName));
+ }
- groupEntry->core.HasCRC = C4GroupEntryCore_ContentsFileNameCRC;
+ groupEntry->core.HasCRC = withFileName ? C4GroupEntryCore_ContentsFileNameCRC : C4GroupEntryCore_ContentsCRC;
groupEntry->core.CRC = crc;
}
}
+// currentOffset is the offset of parent to inEntry's original parent's header
+static const C4GroupEntryData* cc4group_findDuplicateEntry(C4GroupEntryData* const inEntry, C4GroupEntryData* const parent, size_t currentOffset)
+{
+ uint32_t ownCRC = inEntry->core.CRC;
+ ForeachGroupEntry(parent->children)
+ {
+ if(&entry->value != inEntry && entry->value.shareDataWith == NULL)
+ {
+ if(entry->value.core.CRC == ownCRC)
+ {
+ return &entry->value;
+ }
+ else if(entry->value.core.Directory)
+ {
+ const C4GroupEntryData* result = cc4group_findDuplicateEntry(inEntry, &entry->value, currentOffset);
+ if(result != 0)
+ {
+ return result;
+ }
+ }
+
+ if(entry->value.shareDataWith == NULL)
+ {
+ currentOffset += entry->value.core.Size;
+ }
+ }
+ }
+
+ return 0;
+}
+
+// assumes that everything is loaded already and CRCs and sizes have been calculated before
+static void cc4group_deduplicateEntriesOf(C4GroupEntryData* const inEntry)
+{
+ ForeachGroupEntry(inEntry->children)
+ {
+ entry->value.shareDataWith = NULL;
+ }
+
+ ForeachGroupEntry(inEntry->children)
+ {
+ const C4GroupEntryData* shareWith = cc4group_findDuplicateEntry(&entry->value, inEntry, 0);
+ if(shareWith != 0)
+ {
+ entry->value.shareDataWith = shareWith;
+ }
+ else if(entry->value.core.Directory)
+ {
+ cc4group_deduplicateEntriesOf(&entry->value);
+ }
+ }
+}
+
+static void cc4group_deduplicateEntries(CC4Group* const this)
+{
+ cc4group_deduplicateEntriesOf(&this->root);
+}
+
static bool cc4group_flushBufferedWrite(WriteCallback* const callback)
{
bool success = true;
@@ -2430,20 +2497,38 @@ static bool cc4group_writeEntries(CC4Group* const this, C4GroupEntryData* const
ForeachGroupEntry(children)
{
- entry->value.core.Offset = offset;
+ if(entry->value.shareDataWith == NULL)
+ {
+ entry->value.core.Offset = offset;
+ }
entry->value.core.Packed = 1;
+ if(entry->value.shareDataWith == NULL)
+ {
+ offset += entry->value.core.Size;
+ }
+ }
+
+ ForeachGroupEntry(children)
+ {
+ if(entry->value.shareDataWith != NULL)
+ {
+ entry->value.core.Offset = entry->value.shareDataWith->core.Offset;
+ }
+
if(!cc4group_deflateToCallback(callback, &entry->value.core, sizeof(entry->value.core), Z_NO_FLUSH, Z_OK))
{
SET_MESSAGE_ERROR("Failed writing a group entry core");
return false;
}
-
- offset += entry->value.core.Size;
}
ForeachGroupEntry(children)
{
+ if(entry->value.shareDataWith != NULL)
+ {
+ continue;
+ }
if(entry->value.core.Directory)
{
if(!cc4group_writeEntries(this, &entry->value, callback))
@@ -2529,7 +2614,10 @@ static bool cc4group_saveWithWriteCallback(CC4Group* const this, CC4Group_WriteC
bool success = false;
cc4group_calculateEntrySizes(this, &this->root);
- cc4group_calculateEntryCRC(this, &this->root);
+ cc4group_calculateEntryCRC(this, &this->root, false);
+ cc4group_deduplicateEntries(this);
+ cc4group_calculateEntrySizes(this, &this->root);
+ cc4group_calculateEntryCRC(this, &this->root, true);
if(!cc4group_writeEntries(this, &this->root, &callback))
{
// error is set in cc4group_writeEntries
@@ -3020,6 +3108,7 @@ static C4GroupEntryData* cc4group_createEntry(CC4Group* const this, const char*
entry.data = NULL;
entry.path = NULL;
+ entry.shareDataWith = NULL;
entry.memoryManagement = cc4group.MemoryManagement.Reference;
C4GroupEntryCore_init(&entry.core);
@@ -3130,6 +3219,7 @@ static bool cc4group_setEntryData(CC4Group* const this, const char* const entryP
{
free((void*)entry->path);
entry->path = NULL;
+ entry->shareDataWith = NULL;
}
if(data != NULL && size != 0)