diff options
| author | Markus Mittendrein <git@maxmitti.tk> | 2019-08-31 15:16:07 +0200 |
|---|---|---|
| committer | Markus Mittendrein <git@maxmitti.tk> | 2019-08-31 15:16:07 +0200 |
| commit | 36f1c4c4e822458170099890cf586dd013e82673 (patch) | |
| tree | abc87bd974a7600b459ef7de16708182eb90f293 | |
| parent | 4ba42d7fe8eb1b10c8b10c545a33c0ec981dcf7e (diff) | |
| download | cc4group-36f1c4c4e822458170099890cf586dd013e82673.tar.gz cc4group-36f1c4c4e822458170099890cf586dd013e82673.zip | |
Store duplicate data only once if possible (Offset stays in the valid range)
| -rw-r--r-- | src/cc4group.c | 114 |
1 files changed, 102 insertions, 12 deletions
diff --git a/src/cc4group.c b/src/cc4group.c index b086110..2919224 100644 --- a/src/cc4group.c +++ b/src/cc4group.c @@ -73,6 +73,7 @@ typedef struct C4GroupEntryData_t { size_t absolutePosition; const char* path; + const struct C4GroupEntryData_t* shareDataWith; } C4GroupEntryData; LIST_AUTO(C4GroupEntryData, GroupEntryList) @@ -326,7 +327,7 @@ static bool buildChildren(CC4Group* const this, C4GroupEntryData* const entry, s return false; } - C4GroupEntryData* childEntry = &GroupEntryListAppend(entry->children, (C4GroupEntryData){.core = *core, .data = childData + core->Offset, .memoryManagement = cc4group.MemoryManagement.Reference, .children = NULL, .parent = entry, .absolutePosition = entry->absolutePosition + childDataOffset + core->Offset, .path = NULL})->value; + C4GroupEntryData* childEntry = &GroupEntryListAppend(entry->children, (C4GroupEntryData){.core = *core, .data = childData + core->Offset, .memoryManagement = cc4group.MemoryManagement.Reference, .children = NULL, .parent = entry, .absolutePosition = entry->absolutePosition + childDataOffset + core->Offset, .path = NULL, .shareDataWith = NULL})->value; if(this->readState.position < childEntry->absolutePosition) { @@ -1203,6 +1204,7 @@ static void cc4group_init(CC4Group* const this) this->root.children = NULL; this->root.core.Directory = true; this->root.path = NULL; + this->root.shareDataWith = NULL; this->cleanupJobs = CleanUpJobListNew(); @@ -1533,6 +1535,7 @@ static C4GroupEntryData* cc4group_addFileFromDisk(CC4Group* const this, const ch result->core.Modified = st.st_mtime; result->core.Size = st.st_size; result->path = filePath; + result->shareDataWith = NULL; ret: if(result == NULL || result->path == NULL) @@ -2251,7 +2254,10 @@ static size_t cc4group_calculateEntrySizes(CC4Group* const this, C4GroupEntryDat size_t sum = sizeof(C4GroupHeader) + GroupEntryListSize(children) * sizeof(C4GroupEntryCore); ForeachGroupEntry(children) { - sum += cc4group_calculateEntrySizes(this, &entry->value); + if(entry->value.shareDataWith == NULL) + { + sum += cc4group_calculateEntrySizes(this, &entry->value); + } } return entryData->core.Size = sum; @@ -2262,7 +2268,7 @@ static size_t cc4group_calculateEntrySizes(CC4Group* const this, C4GroupEntryDat } } -static void cc4group_calculateEntryCRC(CC4Group* const this, C4GroupEntryData* const groupEntry) +static void cc4group_calculateEntryCRC(CC4Group* const this, C4GroupEntryData* const groupEntry, bool withFileName) { if(groupEntry->core.Directory) { @@ -2275,13 +2281,13 @@ static void cc4group_calculateEntryCRC(CC4Group* const this, C4GroupEntryData* c uint32_t crc = 0; ForeachGroupEntry(children) { - cc4group_calculateEntryCRC(this, &entry->value); + cc4group_calculateEntryCRC(this, &entry->value, withFileName); crc ^= entry->value.core.CRC; } - groupEntry->core.HasCRC = C4GroupEntryCore_ContentsFileNameCRC; + groupEntry->core.HasCRC = withFileName ? C4GroupEntryCore_ContentsFileNameCRC : C4GroupEntryCore_ContentsCRC; groupEntry->core.CRC = crc; } - else if(groupEntry->core.HasCRC != C4GroupEntryCore_ContentsFileNameCRC) + else { const uint8_t* data = cc4group_getOnlyEntryData(this, groupEntry); if(data == NULL) @@ -2289,13 +2295,74 @@ static void cc4group_calculateEntryCRC(CC4Group* const this, C4GroupEntryData* c return; } uint32_t crc = crc32(0, data, groupEntry->core.Size); - crc = crc32(crc, (uint8_t*)groupEntry->core.FileName, strlen(groupEntry->core.FileName)); + if(withFileName) + { + crc = crc32(crc, (uint8_t*)groupEntry->core.FileName, strlen(groupEntry->core.FileName)); + } - groupEntry->core.HasCRC = C4GroupEntryCore_ContentsFileNameCRC; + groupEntry->core.HasCRC = withFileName ? C4GroupEntryCore_ContentsFileNameCRC : C4GroupEntryCore_ContentsCRC; groupEntry->core.CRC = crc; } } +// currentOffset is the offset of parent to inEntry's original parent's header +static const C4GroupEntryData* cc4group_findDuplicateEntry(C4GroupEntryData* const inEntry, C4GroupEntryData* const parent, size_t currentOffset) +{ + uint32_t ownCRC = inEntry->core.CRC; + ForeachGroupEntry(parent->children) + { + if(&entry->value != inEntry && entry->value.shareDataWith == NULL) + { + if(entry->value.core.CRC == ownCRC) + { + return &entry->value; + } + else if(entry->value.core.Directory) + { + const C4GroupEntryData* result = cc4group_findDuplicateEntry(inEntry, &entry->value, currentOffset); + if(result != 0) + { + return result; + } + } + + if(entry->value.shareDataWith == NULL) + { + currentOffset += entry->value.core.Size; + } + } + } + + return 0; +} + +// assumes that everything is loaded already and CRCs and sizes have been calculated before +static void cc4group_deduplicateEntriesOf(C4GroupEntryData* const inEntry) +{ + ForeachGroupEntry(inEntry->children) + { + entry->value.shareDataWith = NULL; + } + + ForeachGroupEntry(inEntry->children) + { + const C4GroupEntryData* shareWith = cc4group_findDuplicateEntry(&entry->value, inEntry, 0); + if(shareWith != 0) + { + entry->value.shareDataWith = shareWith; + } + else if(entry->value.core.Directory) + { + cc4group_deduplicateEntriesOf(&entry->value); + } + } +} + +static void cc4group_deduplicateEntries(CC4Group* const this) +{ + cc4group_deduplicateEntriesOf(&this->root); +} + static bool cc4group_flushBufferedWrite(WriteCallback* const callback) { bool success = true; @@ -2430,20 +2497,38 @@ static bool cc4group_writeEntries(CC4Group* const this, C4GroupEntryData* const ForeachGroupEntry(children) { - entry->value.core.Offset = offset; + if(entry->value.shareDataWith == NULL) + { + entry->value.core.Offset = offset; + } entry->value.core.Packed = 1; + if(entry->value.shareDataWith == NULL) + { + offset += entry->value.core.Size; + } + } + + ForeachGroupEntry(children) + { + if(entry->value.shareDataWith != NULL) + { + entry->value.core.Offset = entry->value.shareDataWith->core.Offset; + } + if(!cc4group_deflateToCallback(callback, &entry->value.core, sizeof(entry->value.core), Z_NO_FLUSH, Z_OK)) { SET_MESSAGE_ERROR("Failed writing a group entry core"); return false; } - - offset += entry->value.core.Size; } ForeachGroupEntry(children) { + if(entry->value.shareDataWith != NULL) + { + continue; + } if(entry->value.core.Directory) { if(!cc4group_writeEntries(this, &entry->value, callback)) @@ -2529,7 +2614,10 @@ static bool cc4group_saveWithWriteCallback(CC4Group* const this, CC4Group_WriteC bool success = false; cc4group_calculateEntrySizes(this, &this->root); - cc4group_calculateEntryCRC(this, &this->root); + cc4group_calculateEntryCRC(this, &this->root, false); + cc4group_deduplicateEntries(this); + cc4group_calculateEntrySizes(this, &this->root); + cc4group_calculateEntryCRC(this, &this->root, true); if(!cc4group_writeEntries(this, &this->root, &callback)) { // error is set in cc4group_writeEntries @@ -3020,6 +3108,7 @@ static C4GroupEntryData* cc4group_createEntry(CC4Group* const this, const char* entry.data = NULL; entry.path = NULL; + entry.shareDataWith = NULL; entry.memoryManagement = cc4group.MemoryManagement.Reference; C4GroupEntryCore_init(&entry.core); @@ -3130,6 +3219,7 @@ static bool cc4group_setEntryData(CC4Group* const this, const char* const entryP { free((void*)entry->path); entry->path = NULL; + entry->shareDataWith = NULL; } if(data != NULL && size != 0) |
