From 789ccf3365bc03a66abf5b36bb4c0098c9c5957a Mon Sep 17 00:00:00 2001 From: Markus Mittendrein Date: Wed, 20 Mar 2019 13:55:01 +0100 Subject: Much documentation (I consider it to be quite complete) --- examples/c4cat.c | 2 + examples/c4cat_dyn.c | 2 + examples/c4info.c | 2 + src/cc4group.h | 188 +++++++++++++++++++++++++++++++++++++++++++++++++-- src/cppc4group.hpp | 40 +++++++++++ 5 files changed, 230 insertions(+), 4 deletions(-) diff --git a/examples/c4cat.c b/examples/c4cat.c index ee0ed9b..322ff70 100644 --- a/examples/c4cat.c +++ b/examples/c4cat.c @@ -7,6 +7,7 @@ #include #include "cc4group.h" +// the internal platform-header is used for the mmap below #include "platform/platform.h" bool catNormalFile(const char* const path, const off_t size) @@ -18,6 +19,7 @@ bool catNormalFile(const char* const path, const off_t size) return false; } + // don't worry about this (and the unmap), it is just another way of reading the file more conveniently void* mappedFile = cc4group_mmap(NULL, size, PROT_READ, MAP_PRIVATE, file, 0); if(close(file) == -1) diff --git a/examples/c4cat_dyn.c b/examples/c4cat_dyn.c index 92da979..51161ef 100644 --- a/examples/c4cat_dyn.c +++ b/examples/c4cat_dyn.c @@ -17,6 +17,8 @@ CC4Group_API* cc4group_dyn; #define cc4group (*cc4group_dyn) +// if you already inspected the normal c4cat you may skip down to main, as the rest is exactly the same thanks to the #define-trick above + bool catNormalFile(const char* const path, const off_t size) { int file = open(path, O_RDONLY); diff --git a/examples/c4info.c b/examples/c4info.c index 6fa63fe..ef4ace0 100644 --- a/examples/c4info.c +++ b/examples/c4info.c @@ -29,6 +29,8 @@ int main(int argc, char* argv[]) else { CC4Group_EntryInfo info; + // yes, this should be error checked, but this is only an example. and actually, if the root entry info can not be retrieved, there must be some serious problem somewhere else + // anyway, the worst thing that can happen would be printing some random uninitialized data to stdout. what a nice surprise! cc4group.getEntryInfo(group, "", &info); puts(argv[1]); diff --git a/src/cc4group.h b/src/cc4group.h index d79f6fe..c3e8085 100644 --- a/src/cc4group.h +++ b/src/cc4group.h @@ -1,6 +1,64 @@ #pragma once +// this is the main include file of the cc4group API +// it contains the API struct as well as all types needed by users of the API +// all functions, types and constants available in the API are described here + +// before heading on to the descriptions of the single elements needed, first take a look at the general concepts used throughout the API: +// - the API is designed around CC4Group objects +// CC4Group is an opaque type and thus only pointers to it may be used by applications +// a CC4Group instance can be obtained by calling cc4group.new() +// just keep in mind that it may return NULL in the rare case that memory allocation fails for whatever reason +// - after creating a fresh CC4Group object it's intended use needs to be decided on, by either calling cc4group.create on it... +// ...to create an empty group in-memory where contents may be added later +// or call cc4group.openExisting together with the path to a physical group file on disk to load it's contents into memory +// (for more sophisticated cases, one of the other open functions may be used) +// it is important that only one of this function is called only on freshly created groups +// otherwise in the lucky case some assertion may trigger or other undefined things may happen +// this two step process is needed to ensure informational error reporting when something goes wrong while opening existing groups +// - after calling cc4group.create or some cc4group.open-function, the group can be inspected and modified to your heart's content through the rest of the available API +// once all modifications are done, they can be persisted by calling cc4group.save or cc4group.saveOverwrite (but better be sure about overwriting)... +// ...to save everything to the compressed C4Group format on disk +// after saving, the group can still be modified in the same way and saved again as often as desired +// just keep in mind that changes are lost if they are not saved afterwards +// - almost all functions need a path or name as argument +// cc4group.openExisting, cc4group.save and cc4group.saveOverwrite are the only exceptions that take a real full path to a file on disk +// all other functions take only so called "entry paths", which are paths inside the group +// imagine that the contents of the group are your "/" (or C:) and the current working directory is also "/" (or C:) +// thus any entry path is the absolute path of an entry inside the group +// the directory separator used for all entry paths in cc4group is "/" _regardless of the platform_. yes, you better watch out Windows users! +// in contrast to paths names are only the the name of the entry itself, not the whole path +// - to free all resources used by the group when it is not needed anymore, call cc4group.delete with the group pointer and discard it afterwards (i.e. don't use it anymore) +// - all functions, except cc4group.new, cc4group.delete and cc4group.setTmpMemoryStrategy (where the latter can't fail) follow the same scheme +// all of them can fail, either caused by wrong arguments or by things outside of the applications control +// they all return _true_ if everything went well and _false_ if any error occured +// information about the error (incredibly useful for debugging or asking for help with problems) can be obtained by using one the cc4group.getError* functions +// for starters, cc4group.getErrorMessage should contain more than enough information +// just make sure that the returned string is consumed before calling it again +// only errors in the initialization functions (create and open) are seriously critical and make it impossible to continue using the group object +// simple opening errors like file not found just leave the group object in its fresh "new" state, but existing but malformed groups may leave it in an unclean state +// thus the group object should be deleted and a new one created if one of these functions fail +// any other error happening after successfully opening the group file are recoverable and the group object can be continued to be used as usual, ... +// ...although it may be impossible to make progress if the error was caused by something severe like being out of memory +// in general you better check _every_ single API call for errors as it leads to much more consistent user experience in rare error cases and most importantly... +// ...it will be a great help in debugging mistakes that are made by the programmer +// for lazy writers or just some testing, the nature of the API actually makes it possible to just chain a bunch of API calls with && and... +// ...thanks to short-circuiting it will stop exactly at the error and the whole expression will return false and the error information can be retrieved as usual +// sadly, this trick didn't seem to make it into any example, as they are written to output additional, more user-friendly, information together with the error message +// - finally (and i still forgot some other general aspect used for sure), because the return value of the functions is already used for the success-bool, ... +// ...functions that should actually return some real data have one (or more) pointer-arguments where they store the returned information +// - if you are such a patient reader and you made it here, you may now look at some of the examples (in the examples folder) +// my recommendations for starters are c4info, c4ls, unc4group and c4copy as they are really simple and don't have much logic for other things +// some of the examples may also have practical use cases, for example unc4group as a much faster c4group extractor (compared to the official c4group command line tool)... +// ...or maybe c4cat which should be familiar, or maybe c4copy, which can freshly repack some really old groups (like version < 1.2 or maybe with worse compression)... +// ...without modifying the original group, and also keeping modification dates and authors, because currently cc4group doesn't update any of these automatically... +// ...(but it is planned to implement automatic updating of this stuff that can be turned off for such purposes) +// the examples are not really commented, so it is your exercise to actually find out what they do and how they do it exactly + #ifdef __cplusplus +// this is needed for the C++-wrapper cppc4group +// if you intend to use C++ you probably want to use cppc4group instead of cc4group +// although, still take a look at the API-struct below, as this is the main place of documentation about the API, because it is mostly the same for cppc4group and cc4group extern "C" { #define this this_ #define new new_ @@ -12,35 +70,88 @@ extern "C" { #include #include + +// this struct is used by the getEntryInfo and getEntryInfos methods +// contains all information and metadata possibly known about the group's contents +// sizes always denote the uncompressed size of the group's contents + typedef struct { + // the base name of the entry without parent folders and stuff const char* fileName; + + // a unix timestamp when the entry was modified the last time int32_t modified; + + // the author of this entry + // C4Group only stores the author in directories, thus this will be the parent directory's author for file entries const char* author; + + // the size of the entry + // for files this is simply the filesize + // for directories this is the size of the directories group header plus the size of the directories entry dictionary. it represents the overhead of the directory that is used in addition to the stored contents size_t size; + + // for files this is equal to size + // for directories this the size as specified above, plus the size of all contained entries, or simply the total of the directory size_t totalSize; + + // this only has a meaning for files + // it resembles the execute permission known from unix bool executable; + + // true for directories, false for files bool directory; + + // so called "official" groups were distributed by the inventor of C4Group in former times + // in the original editor this was used to protect the original groups from being changed, although its only a meta information and not a real protection + // cc4group can modify official groups in the usual ways and can change groups' "official" status through the setOfficial method + // users of cc4group should honor and change the "official" status how they think is appropriate (or simply ignore it) bool official; } CC4Group_EntryInfo; + +// CC4Group is the opaque type of the cc4group objects +// only pointers to it can be created and they are passed to the available methods found below typedef struct CC4Group_t CC4Group; + +// callback types for tmp memory strategies + +// a custom cleanup function may be used to clean up arbitrary resources +// it receives a single custom argument, which's contents can be defined by the custom tmp memory strategy when allocating the resources typedef void(*CC4Group_CleanupFunc)(void* data); + +// this struct holds the custom cleanup function together with the argument that it will receive when it is called +// the custom tmp memory strategy must at least set this to a valid function. data may be uninitialized if its not used by this function typedef struct { CC4Group_CleanupFunc func; void* data; } CC4Group_CleanupJob; + +// a tmp memory strategy is a function that needs to return a pointer to read- and writable memory at least size big +// in case of an error, the function must return NULL +// in case it returns not NULL, at least the func-member of cleanupJob must be set to a valid function +// additionally it's data-pointer member may be set to arbitrary data that is needed for successfull cleanup. it will be passed when the cleanup function is called +// the strategy also receives the group object for which the memory is needed. this is mainly passed for use internally for accurate error-reporting when using the predefined strategies typedef void* (*CC4Group_TmpMemoryStrategy)(CC4Group* const this, const size_t size, CC4Group_CleanupJob* cleanupJob); + +// callback types for openWithReadCallback + // the callback has to store a pointer to the newly read data in data, as well as the amount of read data in size // the callback must return true if the end of data is reached or any read error happens and false otherwise // the pointer passed in will be handled as specified with the corresponding MemoryManagement typedef bool (*CC4Group_ReadCallback)(const void** const data, size_t* const size, void* const arg); -// can be used as initialization before and deinitialization after all necessary calls to a read callback are made; for instance for buffer allocation and deletion +// can be used as initialization before and deinitialization after all necessary calls to a read callback are made +// for instance for buffer allocation and deletion // the callback should return true on success and false on failure +// a deinitialization failure will only trigger a warning, as it doesn't really affect the operation of the group typedef bool (*CC4Group_ReadSetupCallback)(void* const arg); + +// this is the main API struct of cc4group +// it contains all available methods and constants typedef struct { struct { int Take; // cc4group will free the data when its not needed anymore; e.g. in the destructor or when setting the file's data again @@ -50,16 +161,32 @@ typedef struct { struct { + // all temporarily uncompressed data will be hold in-memory (RAM) + // this is the preferred strategy because it is very fast and relies only on RAM CC4Group_TmpMemoryStrategy Memory; + + // all temporarily uncompressed data will be stored in a memory-mapped temporary file that is being created in the current working directory CC4Group_TmpMemoryStrategy File; + + // if the uncompressed data is smaller than 500 MB in-memory is tried first + // if in-memory fails (e.g. because there is not enough RAM) or the data's size is greater than or equal to 500 MB, it will fall back to the file strategy + // this is the default strategy and should be appropriate for almost any case CC4Group_TmpMemoryStrategy Auto; } const TmpMemoryStrategies; + // sets the global temporary memory strategy to be used for storing the uncompressed data of a group + // NOTE: this is a static method (i.e. it is used without any object) + // this will affect all open*-calls issued after calling this function + // either one of the pre-defined strategies from above or any custom strategy can be specified + // a custom strategy can be used by passing an appropriate function pointer + // for details, look at the description of CC4Group_TmpMemoryStrategy void (*setTmpMemoryStrategy)(const CC4Group_TmpMemoryStrategy strategy); + // allocates and initializes a new group object, like the operator new // NULL may be returned if the memory allocation (malloc) fails; in this case errno contains additional error information CC4Group* (*new)(void); + // destructs the group object and frees all memory used by group, like the operator delete void (*delete)(CC4Group* const this); @@ -84,7 +211,9 @@ typedef struct { // the file must have been opened with read access; also be aware that the file must be opened with binary mode on windows bool (*openFilePointer)(CC4Group* const this, FILE* fd); - // opens a group and calls the callback to get the group data; initCallback is called before readCallback is called and deinitCallback is called after all read operations are done; initCallback and deinitCallback may be NULL if they should not be used + // opens a group and calls the callback to get the group data + // initCallback is called before readCallback is called and deinitCallback is called after all read operations are done + // initCallback and deinitCallback may be NULL if they should not be used bool (*openWithReadCallback)(CC4Group* const this, CC4Group_ReadCallback const readCallback, void* const callbackArg, int const memoryManagement, CC4Group_ReadSetupCallback const initCallback, CC4Group_ReadSetupCallback const deinitCallback); @@ -98,32 +227,75 @@ typedef struct { // extraction to disk + + // extracts the complete group contents recursively into a newly created directory named by targetPath + // the directory itself must not exist, but the containing directory must exist. otherwise an error will be generated bool (*extractAll)(CC4Group* const this, const char* const targetPath); + + // extracts only a single file or a sub directory of the group denoted by the entryPath to the targetPath + // the containing directory of the targetPath must exist, but the final targetPath must not exist. otherwise an error will be generated bool (*extractSingle)(CC4Group* const this, const char* const entryPath, const char* const targetPath); + // retrieval of metadata about the stored files and directories + // stores all metadata known about the entry denoted by path into the CC4Group_EntryInfo struct pointed to by info, similar to stat + // an empty path "" or NULL will retrieve information about the root directory bool (*getEntryInfo)(CC4Group* const this, const char* const path, CC4Group_EntryInfo* const info); + + // retrieves all metadata like getEntryInfo of all files and directories inside the directory denoted by path, similar to ls + // a pointer to a dynamically allocated array of all CC4Group_EntryInfo structs will be stored in infos + // the amount of infos (and thus the amount of files in the directory) is stored in size + // the caller must free the pointer stored in infos when it is not needed anymore + // an empty path "" or NULL will retrieve information about the root directory bool (*getEntryInfos)(CC4Group* const this, const char* const path, CC4Group_EntryInfo** const infos, size_t* const size); - // the group owns the data pointed to. the pointer is valid until the group destructor is called + // data retrieval and manipulation + + // stores a pointer to the read-only data of the file denoted by entryPath in data, and the size of the data in size + // the group owns the data pointed to. the pointer is valid until the group destructor is called or the data is changed through setEntryData bool (*getEntryData)(CC4Group* const this, const char* const entryPath, const void** const data, size_t* const size); + // overwrites the data of the file denoted by entryPath with data indicated by data and size // see the description of MemoryManagement to know if and when data has to be freed by the caller bool (*setEntryData)(CC4Group* const this, const char* const entryPath, const void* const data, size_t const size, int const memoryManagementMode); // group metadata handling - bool (*setMaker)(CC4Group* const this, const char* const maker, const char* const path, bool const recursive); + // the following set* functions set specific metadata of files or directories denoted by path + // in case of directories, the metadata will be set recursively to all contents as applicable if recursive is true + // the root directory can be addressed by passing NULL or "" as path + + // files and directories bool (*setCreation)(CC4Group* const this, int32_t const creation, const char* const path, bool const recursive); + + // directories only + bool (*setMaker)(CC4Group* const this, const char* const maker, const char* const path, bool const recursive); bool (*setOfficial)(CC4Group* const this, bool const official, const char* const path, bool const recursive); + + // files only bool (*setExecutable)(CC4Group* const this, bool const executable, const char* const path); // modifying the group + + // creates an empty directory inside the group at the place with the name as denoted by path + // the parent directory (if any) must exist alredy bool (*createDirectory)(CC4Group* const this, const char* const path); + + // creates an empty file inside the group at the place with the name as denoted by path + // the parent directory (if any) must exist alredy + // the file contents can afterwards be set with setEntryData bool (*createFile)(CC4Group* const this, const char* const path); + + // renames, and possibly moves, the entry (file or directory) denoted by oldPath to newPath + // the parent directory of newPath (if any) must exist already + // if only strictly renaming is desired, oldPath and newPath must contain the same full path to the parent directory, with only the final name being different bool (*renameEntry)(CC4Group* const this, const char* const oldPath, const char* const newPath); + + // deletes the entry (file or directory) denoted by path + // directorys are only deleted when recursive is true, which has the effect of recursively deleting all contents (if any) and then deleting the directory + // if recursive is false but the path ends up being a directory, an error is generated bool (*deleteEntry)(CC4Group* const this, const char* const path, bool const recursive); @@ -145,7 +317,15 @@ typedef struct { const char* (*getErrorCauser)(const CC4Group* const this); } const CC4Group_API; + +// in the case that the cc4group API shall be loaded dynamically at runtime, the same global instance of the struct can instead be accessed by loading the symbol named "cc4group" and casting it to CC4Group_API* +// it contains all necessary and available data of the API and is the only symbol that needs to be loaded +// also you have to #define CC4GROUP_DYNAMIC_LOAD before #include-ing this header file in that case +// for drop-in support for code that is written for the API linked at compile-time (or just for convenience) store the loaded pointer in a global variable, say "cc4group_dyn" and then #define cc4group as (*cc4group_dyn) +// c4cat_dyn is a working example of this concept + #ifndef CC4GROUP_DYNAMIC_LOAD +// access to all cc4group-methods and constants must be made through this global instance of the API struct extern CC4Group_API cc4group; #endif diff --git a/src/cppc4group.hpp b/src/cppc4group.hpp index 166fadf..a1bb28a 100644 --- a/src/cppc4group.hpp +++ b/src/cppc4group.hpp @@ -1,5 +1,14 @@ #pragma once +// this is the main and only include file for cppc4group, the C++-wrapper for cc4group +// because cc4group is already object-oriented, this is just a simple wrapper and almost all methods work exactly the same, just with C++-types +// although, when dealing with raw memory (because cc4group can't know the type of the stored data) also users of this wrapper have to deal with some void* +// because most methods behave exactly the same, please look at cc4group.h for their description (they are named exactly the same with a few exceptions) +// in case their are real differences, they will be noted in this header also +// until now, all examples only use cc4group's C-API, but as a C++-programmer you should have no problems reading them anyway +// one general difference to the C-API that is not mentioned at the individual methods is that functions actually returning data return an std::optional instead of a bool... +// ...the optional will be populated normally and empty in error cases + #include #include #include @@ -7,19 +16,27 @@ #include class CppC4Group { + // all C-related stuff is hidden from this header so it doesn't land in the precious C++-only code this might be used in... + // ...and pollute the root-namespace with various types that are better contained in this class instead struct Private; std::unique_ptr p; public: + // this struct is used for the getEntryData-method to return them in a uniform way (instead of using reference- or pointer-arguments) struct Data { + // this will contain the information cc4group.getEntryData stores in its data argument const void* data; + // and the same for size size_t size; + // there should be no reason to construct one of these as API user, but it is needed internally Data(); Data(const void* const data, const size_t size); }; + // this struct contains the same things as the CC4Group_EntryInfo struct of the C-version + // member descriptions also apply equally here struct EntryInfo { std::string fileName; int32_t modified; @@ -31,21 +48,36 @@ public: bool official; }; + // this struct is used in custom tmp memory strategies + // instead of using additional pointer arguments to return the cleanup stuff everything is returned in a single one of this by the tmp memory strategy function struct TmpMemory { using TmpMemoryCleanupCallback = std::function; + // memory needs to hold the readily allocated memory + // it corresponds to the pointer that needs to be returned by C tmp memory strategies void* memory; + + // cleanup is a fancy std::function that will be called when the memory is not needed anymore + // it receives the memory pointer stored above and the custom argument stored right below as arguments TmpMemoryCleanupCallback cleanup; + + // this can be set to arbitrary data needed by the cleanup callback to properly cleanup all used resources void* arg; + // this is just a plain constructor setting each member accordingly TmpMemory(void* const memory, const TmpMemoryCleanupCallback& cleanup, void* const arg); }; public: + // the C++ custom tmp memory strategy only receives the needed size as argument and must return an optional containing the information described above (TmpMemory) + // or an empty optional on failure using TmpMemoryCallback = std::optional(*)(size_t size); + + // these two are equivalent to their C counterparts using ReadCallback = bool(*)(const void** const data, size_t* const size, void* const arg); using SetupCallback = bool(*)(void* const arg); + // these enums are just mapped to their C-counterparts internally enum TmpMemoryStrategy { Memory, File, @@ -58,11 +90,17 @@ public: Reference }; + // use this to set one of the predefined strategies static void setTmpMemoryStrategy(const TmpMemoryStrategy strategy); + + // and this for your own custom strategy static void setTmpMemoryStrategy(const TmpMemoryCallback callback); public: + // the constructor will automatically construct an internal CC4Group, so no new-equivalent method is needed CppC4Group(); + + // the destructor will automatically delete the internal CC4Group, so also no extra method needed ~CppC4Group(); void create(); @@ -71,6 +109,8 @@ public: bool openFilePointer(FILE* file); bool openMemory(const void* const data, const size_t size, const MemoryManagement management = Reference); bool openWithReadCallback(const ReadCallback callback, void* const callbackArg, const MemoryManagement management = Take, SetupCallback initCallback = nullptr, SetupCallback deinitCallback = nullptr); + + // save actually maps to both cc4group.save and cc4group.saveOverwrite, thanks to default arguments (yes, thats the reason why they are separate in the C-API) bool save(const std::string& path, const bool overwrite = false); bool extractAll(const std::string& path); -- cgit v1.2.3-54-g00ecf