-
Notifications
You must be signed in to change notification settings - Fork 64
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added buffer files #101
base: main
Are you sure you want to change the base?
Added buffer files #101
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,76 @@ | ||||||
#ifndef COMPRESSOR_FRONTEND_BUFFER_HPP | ||||||
#define COMPRESSOR_FRONTEND_BUFFER_HPP | ||||||
|
||||||
// C++ libraries | ||||||
#include <cstdint> | ||||||
#include <vector> | ||||||
|
||||||
// Project Headers | ||||||
#include "Constants.hpp" | ||||||
|
||||||
/** | ||||||
* A base class for keeping track of static and dynamic buffers needed for a growing buffer. | ||||||
* The base class does not grow the buffer, the child class is responsible for doing this. | ||||||
*/ | ||||||
|
||||||
namespace compressor_frontend { | ||||||
template <typename type> | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
class Buffer { | ||||||
public: | ||||||
// Prevent copying of buffer as this will be really slow | ||||||
Buffer (Buffer&&) = delete; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||||||
|
||||||
Buffer& operator= (const Buffer&) = delete; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Operators should be after the constructor. |
||||||
|
||||||
Buffer () { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use the initializer list. |
||||||
m_curr_pos = 0; | ||||||
m_active_storage = m_static_storage; | ||||||
m_curr_storage_size = cStaticByteBuffSize; | ||||||
} | ||||||
|
||||||
~Buffer () { | ||||||
for (type* dynamic_storage : m_dynamic_storages) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
free(dynamic_storage); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
} | ||||||
} | ||||||
|
||||||
type* get_active_buffer () { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
return m_active_storage; | ||||||
} | ||||||
|
||||||
[[nodiscard]] uint32_t get_curr_storage_size () const { | ||||||
return m_curr_storage_size; | ||||||
} | ||||||
|
||||||
void set_curr_pos (uint32_t curr_pos) { | ||||||
m_curr_pos = curr_pos; | ||||||
} | ||||||
|
||||||
[[nodiscard]] uint32_t get_curr_pos () const { | ||||||
return m_curr_pos; | ||||||
} | ||||||
|
||||||
/** | ||||||
* Reset a buffer to parse a new log message | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||||||
*/ | ||||||
virtual void reset () { | ||||||
m_curr_pos = 0; | ||||||
for (type* dynamic_storage : m_dynamic_storages) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||||||
free(dynamic_storage); | ||||||
} | ||||||
m_dynamic_storages.clear(); | ||||||
m_active_storage = m_static_storage; | ||||||
m_curr_storage_size = cStaticByteBuffSize; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not in this review, but I would suggest changing
|
||||||
} | ||||||
|
||||||
protected: | ||||||
uint32_t m_curr_pos; | ||||||
uint32_t m_curr_storage_size; | ||||||
// Dynamic storage performs better as c-style arrays than as vectors | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you can omit this comment. It should be clear to the reader than a vector of vectors is not as appropriate as a vector of C-arrays. |
||||||
type* m_active_storage; | ||||||
std::vector<type*> m_dynamic_storages; | ||||||
type m_static_storage[cStaticByteBuffSize]; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
}; | ||||||
} | ||||||
|
||||||
#endif // COMPRESSOR_FRONTEND_BUFFER_HPP | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add empty newline at end of file. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
// C++ libraries | ||
#include <memory.h> | ||
#include <string> | ||
|
||
// spdlog | ||
#include <spdlog/spdlog.h> | ||
|
||
// Project Headers | ||
#include "InputBuffer.hpp" | ||
|
||
using std::string; | ||
using std::to_string; | ||
|
||
namespace compressor_frontend { | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unnecessary empty line. |
||
void InputBuffer::reset () { | ||
m_at_end_of_file = false; | ||
m_finished_reading_file = false; | ||
m_consumed_pos = 0; | ||
m_bytes_read = 0; | ||
m_last_read_first_half = false; | ||
Buffer::reset(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The way you use |
||
} | ||
|
||
bool InputBuffer::read_is_safe () { | ||
if (m_finished_reading_file) { | ||
return false; | ||
} | ||
// If the next message starts at 0, the previous character is at m_curr_storage_size - 1 | ||
if (m_consumed_pos == -1) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
m_consumed_pos = m_curr_storage_size - 1; | ||
} | ||
// Check that the last log message ends in the half of the buffer that was last read. | ||
// This means the other half of the buffer has already been fully used. | ||
if ((!m_last_read_first_half && m_consumed_pos > m_curr_storage_size / 2) || | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
(m_last_read_first_half && m_consumed_pos < m_curr_storage_size / 2 && | ||
m_consumed_pos > 0)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For clarity, the brace should be on the next line for multiline expressions. |
||
return true; | ||
} | ||
return false; | ||
} | ||
|
||
bool InputBuffer::increase_size_and_read (ReaderInterface& reader, size_t& old_storage_size) { | ||
old_storage_size = m_curr_storage_size; | ||
bool flipped_static_buffer = false; | ||
// Handle super long line for completeness, but efficiency doesn't matter | ||
if (m_active_storage == m_static_storage) { | ||
SPDLOG_WARN("Long line detected changing to dynamic input buffer and" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
" increasing size to {}.", m_curr_storage_size * 2); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should just save the new size in a variable rather than repeating it everywhere. |
||
} else { | ||
SPDLOG_WARN("Long line detected increasing dynamic input buffer size to {}.", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
m_curr_storage_size * 2); | ||
} | ||
m_dynamic_storages.emplace_back((char*)malloc(2 * m_curr_storage_size * sizeof(char))); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't use |
||
if (m_dynamic_storages.back() == nullptr) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
SPDLOG_ERROR("Failed to allocate input buffer of size {}.", m_curr_storage_size); | ||
string err = "Lexer failed to find a match after checking entire buffer"; | ||
throw std::runtime_error(err); | ||
} | ||
if (m_last_read_first_half == false) { | ||
// Buffer in correct order | ||
memcpy(m_dynamic_storages.back(), m_active_storage, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use |
||
m_curr_storage_size * sizeof(char)); | ||
} else { | ||
// Buffer out of order, so it needs to be flipped when copying | ||
memcpy(m_dynamic_storages.back(), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
m_active_storage + m_curr_storage_size * sizeof(char) / 2, | ||
m_curr_storage_size * sizeof(char) / 2); | ||
memcpy(m_dynamic_storages.back() + m_curr_storage_size * sizeof(char) / 2, | ||
m_active_storage, m_curr_storage_size * sizeof(char) / 2); | ||
flipped_static_buffer = true; | ||
} | ||
m_curr_storage_size *= 2; | ||
m_active_storage = m_dynamic_storages.back(); | ||
m_bytes_read = m_curr_storage_size / 2; | ||
m_curr_pos = m_curr_storage_size / 2; | ||
read(reader); | ||
return flipped_static_buffer; | ||
} | ||
|
||
unsigned char InputBuffer::get_next_character () { | ||
if (m_finished_reading_file && m_curr_pos == m_bytes_read) { | ||
m_at_end_of_file = true; | ||
return utf8::cCharEOF; | ||
} | ||
unsigned char character = m_active_storage[m_curr_pos]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can combine this line and the next into one. |
||
m_curr_pos++; | ||
if (m_curr_pos == m_curr_storage_size) { | ||
m_curr_pos = 0; | ||
} | ||
return character; | ||
} | ||
|
||
void InputBuffer::read (ReaderInterface& reader) { | ||
size_t bytes_read; | ||
// read into the correct half of the buffer | ||
uint32_t read_offset = 0; | ||
if (m_last_read_first_half) { | ||
read_offset = m_curr_storage_size / 2; | ||
} | ||
reader.read(m_active_storage + read_offset, m_curr_storage_size / 2, bytes_read); | ||
m_last_read_first_half = !m_last_read_first_half; | ||
if (bytes_read < m_curr_storage_size / 2) { | ||
m_finished_reading_file = true; | ||
} | ||
m_bytes_read += bytes_read; | ||
if (m_bytes_read > m_curr_storage_size) { | ||
m_bytes_read -= m_curr_storage_size; | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
#ifndef COMPRESSOR_FRONTEND_INPUT_BUFFER_HPP | ||
#define COMPRESSOR_FRONTEND_INPUT_BUFFER_HPP | ||
|
||
// Project Headers | ||
#include "../ReaderInterface.hpp" | ||
#include "Buffer.hpp" | ||
|
||
namespace compressor_frontend { | ||
class InputBuffer : public Buffer<char> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
public: | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unnecessary newline. |
||
/** | ||
* Resets input buffer | ||
* @return | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doesn't return anything. |
||
*/ | ||
void reset () override; | ||
|
||
/** | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Comment has too many negatives. How about "Checks if reading into the input buffer is safe in that unconsumed data won't be overwritten"? |
||
* Checks if reading into the input buffer won't overwrite data not yet used | ||
* (e.g., data being overwritten is already compressed in the case of compression) | ||
* @return bool | ||
*/ | ||
bool read_is_safe (); | ||
|
||
/** | ||
* Reads into the half of the buffer currently available | ||
* @param reader | ||
*/ | ||
void read (ReaderInterface& reader); | ||
|
||
/** | ||
* Reads if no unused data will be overwritten | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. unused -> unconsumed? |
||
* @param reader | ||
*/ | ||
void try_read (ReaderInterface& reader) { | ||
if (read_is_safe()) { | ||
read(reader); | ||
} | ||
} | ||
|
||
/** | ||
* Swaps to a dynamic buffer (or doubles its size) if needed | ||
* @return bool | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
*/ | ||
bool increase_size_and_read (ReaderInterface& reader, size_t& old_storage_size); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
/** | ||
* Check if at end of file, and return next char (or EOF) | ||
* @return unsigned char | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
*/ | ||
unsigned char get_next_character (); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why unsigned char when the type of the buffer element is char? |
||
|
||
bool all_data_read () { | ||
if (m_last_read_first_half) { | ||
return (m_curr_pos == m_curr_storage_size / 2); | ||
} else { | ||
return (m_curr_pos == 0); | ||
} | ||
} | ||
|
||
void set_consumed_pos (uint32_t consumed_pos) { | ||
m_consumed_pos = consumed_pos; | ||
} | ||
|
||
void set_at_end_of_file (bool at_end_of_file) { | ||
m_at_end_of_file = at_end_of_file; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why would a caller need to set this manually rather than it being set by reading to the end of the file? |
||
} | ||
|
||
[[nodiscard]] bool at_end_of_file () const { | ||
return m_at_end_of_file; | ||
} | ||
|
||
private: | ||
uint32_t m_bytes_read; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The use of m_curr_pos, m_consumed_pos, and m_bytes_read is not clear to me. Can you describe their usage? |
||
uint32_t m_consumed_pos; | ||
bool m_last_read_first_half; | ||
bool m_finished_reading_file; | ||
bool m_at_end_of_file; | ||
}; | ||
} | ||
|
||
#endif // COMPRESSOR_FRONTEND_INPUT_BUFFER_HPP |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
#include "OutputBuffer.hpp" | ||
|
||
// C++ standard libraries | ||
#include <string> | ||
|
||
// spdlog | ||
#include <spdlog/spdlog.h> | ||
|
||
using std::string; | ||
|
||
namespace compressor_frontend { | ||
void OutputBuffer::increment_pos () { | ||
m_curr_pos++; | ||
if (m_curr_pos == m_curr_storage_size) { | ||
if (m_active_storage == m_static_storage) { | ||
SPDLOG_WARN( | ||
"Very long log detected: changing to a dynamic output buffer and " | ||
"increasing size to {}. Expect increased latency.", | ||
m_curr_storage_size * 2); | ||
} else { | ||
SPDLOG_WARN("Very long log detected: increasing dynamic output buffer size to {}.", | ||
m_curr_storage_size * 2); | ||
} | ||
m_dynamic_storages.emplace_back( | ||
(Token*)malloc(2 * m_curr_storage_size * sizeof(Token))); | ||
if (m_dynamic_storages.back() == nullptr) { | ||
SPDLOG_ERROR("Failed to allocate output buffer of size {}.", m_curr_storage_size); | ||
/// TODO: update exception when they're properly | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When they're properly what? |
||
/// (e.g., "failed_to_compress_log_continue_to_next") | ||
throw std::runtime_error( | ||
"Lexer failed to find a match after checking entire buffer"); | ||
} | ||
memcpy(m_dynamic_storages.back(), m_active_storage, | ||
m_curr_storage_size * sizeof(Token)); | ||
m_active_storage = m_dynamic_storages.back(); | ||
m_curr_storage_size *= 2; | ||
} | ||
} | ||
|
||
void OutputBuffer::reset () { | ||
m_has_timestamp = false; | ||
m_has_delimiters = false; | ||
Buffer::reset(); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
#ifndef COMPRESSOR_FRONTEND_OUTPUT_BUFFER_HPP | ||
#define COMPRESSOR_FRONTEND_OUTPUT_BUFFER_HPP | ||
|
||
// Project Headers | ||
#include "Buffer.hpp" | ||
#include "Token.hpp" | ||
|
||
/** | ||
* A buffer containing the tokenized output of the parser. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
* The active buffer contains all the tokens from the current log message. | ||
* The first token contains the timestamp (if there is no timestamp the first token is invalid). | ||
* For performance (runtime latency) it defaults to a static buffer and when more tokens are needed | ||
* to be stored than the current capacity it switches to a dynamic buffer. | ||
* Each time the capacity is exceeded a new dynamic buffer is added to the list of dynamic buffers. | ||
*/ | ||
namespace compressor_frontend { | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unnecessary empty line. |
||
class OutputBuffer : public Buffer<Token> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OutputTokenBuffer? |
||
public: | ||
|
||
/** | ||
* Increment buffer pos, swaps to a dynamic buffer (or doubles its size) if needed | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
*/ | ||
void increment_pos (); | ||
|
||
/** | ||
* Resets output buffer | ||
* @return | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No return |
||
*/ | ||
void reset () override; | ||
|
||
void set_has_timestamp (bool has_timestamp) { | ||
m_has_timestamp = has_timestamp; | ||
} | ||
|
||
[[nodiscard]] bool has_timestamp () const { | ||
return m_has_timestamp; | ||
} | ||
|
||
void set_has_delimiters (bool has_delimiters) { | ||
m_has_delimiters = has_delimiters; | ||
} | ||
|
||
[[nodiscard]] bool has_delimiters () const { | ||
return m_has_delimiters; | ||
} | ||
|
||
void set_token (uint32_t pos, Token& value) { | ||
m_active_storage[pos] = value; | ||
} | ||
|
||
void set_curr_token (Token& value) { | ||
m_active_storage[m_curr_pos] = value; | ||
} | ||
|
||
[[nodiscard]] const Token& get_curr_token () const { | ||
return m_active_storage[m_curr_pos]; | ||
} | ||
|
||
private: | ||
bool m_has_timestamp = false; | ||
bool m_has_delimiters = false; | ||
}; | ||
} | ||
|
||
#endif // COMPRESSOR_FRONTEND_OUTPUT_BUFFER_HPP |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.