I have recently been trying to get a solid grasp of how to work with binary data, both for networking and file storage, in C++. In java, there are many utility classes that made this easy. Since these don't exist in C++, I figured making a limited class that does something similar would be a good way to learn how to work with binary data.
BinaryBlob.h:
#pragma once
#include <vector>
#include <string>
typedef unsigned char byte;
class BinaryBlob{
public:
void loadFromFile(const std::string &filePath);
void writeToFile(const std::string &filePath) const;
void writeBool(bool b);
bool readBool();
void writeInt(int b);
int readInt();
void writeString(const std::string &str);
std::string readString();
void writeBytes(byte *data, unsigned length);
inline void readBytes(byte *data, unsigned length); //inline so that it is inlined with the other read functions, irrelevant to non-member functions since those won't be inlined in separate implementation files
private:
std::vector<byte> m_binaryData;
unsigned m_readIndex = 0;
constexpr static unsigned BUFFER_SIZE = 1024;
};
BinaryBlob.cpp:
#include "BinaryBlob.h"
#include <cstdint>
#include <fstream>
void BinaryBlob::loadFromFile(const std::string &filePath){
std::ifstream fin(filePath, std::ios::binary);
byte buffer[1024];
while(fin){
fin.read(reinterpret_cast<char*>(buffer), BUFFER_SIZE);
writeBytes(buffer, fin.gcount());
}
}
void BinaryBlob::writeToFile(const std::string &filePath) const {
std::ofstream fout(filePath, std::ios::binary);
fout.write(reinterpret_cast<const char*>(m_binaryData.data()), m_binaryData.size());
}
void BinaryBlob::writeBool(bool b) {
byte byteData = b ? 1 : 0l;
writeBytes(&byteData, 1);
}
bool BinaryBlob::readBool(){
byte byteData;
readBytes(&byteData, 1);
return byteData == 1;
}
void BinaryBlob::writeInt(int b){
std::uint32_t absValue = std::abs(b);
bool isPositive = b > 0;
writeBool(isPositive);
writeBytes(reinterpret_cast<byte*>(&absValue), sizeof(std::uint32_t));
}
int BinaryBlob::readInt(){
bool isPositive = readBool();
std::uint32_t absValue;
readBytes(reinterpret_cast<byte*>(&absValue), sizeof(std::uint32_t));
return absValue * (isPositive ? 1 : -1);
}
void BinaryBlob::writeString(const std::string &str) {
writeInt(str.length()); //I know this would probably be better as an unsigned value, but because this is a simple practice, no method to write unsigned values is created
for(unsigned i=0;i<str.length();++i){
char c = str[i];
writeBytes(reinterpret_cast<byte*>(&c), 1);
}
}
std::string BinaryBlob::readString() {
int length = readInt();
std::string str;
str.reserve(length);
char *characterData = new char[length];
readBytes(reinterpret_cast<byte*>(characterData), length);
for(unsigned i=0;i<length;++i){
str.push_back(characterData[i]);
}
delete characterData;
return str;
}
void BinaryBlob::writeBytes(byte *data, unsigned length){
for(unsigned i=0;i<length;++i){
m_binaryData.push_back(data[i]);
}
}
void BinaryBlob::readBytes(byte *data, unsigned length){
for(unsigned i=0;i<length;++i){
data[i] = m_binaryData[m_readIndex+i];
}
m_readIndex += length;
}
Test program 1 (designed to test if the file io functions work):
#include <string>
#include <iostream>
#include "BinaryBlob.h"
int main(int argc, char *argv[]){
if(argc != 3){
std::cerr << "Usage: " << argv[0] << " <input-file> <output-file>" << std::endl;
return -1;
}
std::string inputPath = argv[1];
std::string outputPath = argv[2];
BinaryBlob binaryBlob;
binaryBlob.loadFromFile(inputPath);
binaryBlob.writeToFile(outputPath);
return 0;
}
Test program 2 (designed to test if the type <-> binary data functions work):
#include <iostream>
#include "BinaryBlob.h"
int main(int argc, char *argv[]){
if(argc != 3){
std::cerr << "Usage: " << argv[0] << " <file> <create/read>" << std::endl;
return 0;
}
std::string file = argv[1];
std::string command = argv[2];
BinaryBlob binaryBlob;
bool b;
int i;
std::string str;
if(command == "create"){
std::cout << "Enter a boolean: ";
std::cin >> b;
std::cout << "Enter an integer: ";
std::cin >> i;
std::cout << "Enter a string: ";
std::cin >> str;
binaryBlob.writeBool(b);
binaryBlob.writeInt(i);
binaryBlob.writeString(str);
binaryBlob.writeToFile(file);
}else if(command == "read"){
binaryBlob.loadFromFile(file);
b = binaryBlob.readBool();
i = binaryBlob.readInt();
str = binaryBlob.readString();
std::cout << "Boolean is: " << b << std::endl;
std::cout << "Integer is: " << i << std::endl;
std::cout << "String is: " << str << std::endl;
}
return 0;
}
The specific things I am interested in feedback on are (although any other feedback of any sort is most certainly welcome, as long as it's remotely useful):
- Because I only have access to a handful of Intel and AMD cpus, which are all very similar, I didn't test this code for portability (ie. running on an ARM CPU), to see if files generated on one CPU are readable on the other. Ignoring endianness, is there anything I missed?
- If I am writing code designed to be run on x86 CPUs only (both Intel and AMD), but with a variety of compilers and OSes, do I need to worry about anything more than I did here (including endianness)?
- Is my use of
reinterpret_castcorrect? - For the string writing, I am just casting the string's data to its byte representation. Is this reliable? I know the C++ standard does not guarantee a character encoding, but do these encodings differ in practice on x86 CPUs? What about on broader ranges (ie. ARM?). Does this depend on the compiler or OS? If I wanted to correctly handle this, such that it is fully portable, how would I do so?
- How far, if at all, am I jumping outside the C++ standard with this code, into undefined behavior? Unless this is "not at all," how can I accomplish the same thing fully within the standard? Is this even possible?