/*
 * Logserver
 * Copyright (C) 2017-2025 Joel Reardon
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

#ifndef __HUGE_VECTOR__H__
#define __HUGE_VECTOR__H__

#include <list>
#include <vector>

using namespace std;

/* index in our vector of vectors, consisting of page and offset on that page */
struct pagepos_t {
	size_t page;
	size_t off;
};

/* This class is for a huge vector, implemented as a vector of vectors. A deque
 * is inappropriate because we will rarely insert at the front, however it is
 * technically possible and we will occasionally insert at the middle to
 * implement line breaks. Allocating (and reallocating) huge vectors becomes
 * noticeable at very large sizes, so we have a template parameter max size and then
 * push a new vector for the next lines. Insertions can cause one vector to
 * grow, so we have to count elements from the start. */

// TODO: page size defaults to one million, which is the worst case for insertion
// and reallocation. We should assess if a larger or smaller value is more
// appropriate.

/* huge_vector is templated on the type it stores T and the default size when a
 * new line is pushed on a new vector with PAGESIZE. Note that the default is
 * not a nice power of two to allow some number of insertions and line breaks
 * without risking a reallocation as a result
 */
template<typename T, int PAGESIZE = 1000000>
class huge_vector {
public:
	class iterator {
	public:
		iterator(const huge_vector* ref) : _ref(ref) {
			_pp.page = 0;
			_pp.off = 0;
		}

		iterator(const huge_vector* ref, pagepos_t pp) : _ref(ref) {
			_pp = pp;
		}

		const T& operator*() const {
			return _ref->_lines[_pp.page][_pp.off];
		}

		bool operator==(const iterator& other) {
			return !(*this != other);
		}

		bool operator!=(const iterator& other) {
			return _pp.off != other._pp.off ||
			    _pp.page != other._pp.page ||
			    _ref != other._ref;
		}

		iterator& operator++() {
			_ref->next(&_pp);
			return *this;
		}

		iterator& operator--() {
			_ref->prev(&_pp);
			return *this;
		}

	protected:
		pagepos_t _pp;
		const huge_vector* _ref;
	};

	class const_iterator {
	public:
		const_iterator(const huge_vector* ref) : _ref(ref) {
			_pp.page = 0;
			_pp.off = 0;
		}

		const_iterator(const huge_vector* ref, pagepos_t pp) : _ref(ref) {
			_pp = pp;
		}

		const T& operator*() const {
			return _ref->_lines[_pp.page][_pp.off];
		}

		bool operator==(const iterator& other) {
			return !(*this != other);
		}

		bool operator!=(const iterator& other) {
			return _pp.off != other.off ||
			    _pp.page != other.page ||
			    _ref != other._ref;
		}

		iterator& operator++() {
			_ref->next(_pp);
			return *this;
		}

		iterator& operator--() {
			_ref->prev(_pp);
			return *this;
		}

	protected:
		pagepos_t _pp;
		const huge_vector* _ref;
	};

	/* constructor */
	huge_vector() {
		clear();
	}

	/* reset to empty */
	virtual void clear() {
		_length = 0;
		_lines.clear();
		_starts.clear();
	}

	/* increment the pagepos to the next value, minding if we jump pages */
	virtual void next(pagepos_t* pp) const {
		check(pp->page < _lines.size(), "next(): pp less than lines");
		++(pp->off);
		// we are at the end().
		if (pp->page + 1 == _lines.size()) [[unlikely]] return;
		if (pp->off == _lines[pp->page].size()) [[unlikely]] {
			pp->off = 0;
			 ++(pp->page);
		}
	}

	/* decrement the pagepos to the previous value, minding if we jump pages
	 */
	virtual void prev(pagepos_t* pp) const {
		check(pp->page < _lines.size(), "prev(): pp less than lines");
		if (pp->off == 0) [[unlikely]] {
			if (pp->page) pp->off = _lines[pp->page - 1].size();
			else pp->off = -1;
			--(pp->page);
		}
		--(pp->off);
	}

	/* returns true is the virtual size of this huge_vector is larger than
	 * pos */
	virtual bool valid(size_t pos) const {
		return valid(getpos(pos));
	}

	/* checks if pagepos has a page and offset that exists */
	virtual bool valid(pagepos_t pos) const {
		if (pos.page < _lines.size())
			return pos.off < _lines.at(pos.page).size();
		return false;
	}

	/* pushs a new item to the back of the huge_vector, minding if we need
	 * to add a new page */
	virtual void add(T&& item) {
		assert(_lines.size() == _starts.size());
		if (_lines.empty() ||
		    _lines.back().size() >= PAGESIZE) [[unlikely]] {
			if (!_starts.empty()) [[likely]] {
				auto it = _starts.end();
				--it;
				size_t newpos = it->first +
					_lines.back().size();
				_starts[newpos] = _lines.size();
			} else {
				_starts[0] = 0;
			}
			_lines.push_back(vector<T>());
			_lines.back().reserve(PAGESIZE);
		}
		assert(item != nullptr);
		_lines.back().push_back(std::move(item));
		++_length;
	}

	/* returns the number of elements */
	virtual size_t length() const {
		return _length;
	}

	/* inserts a list of items at a position, faster than inserting
	 * individially since we can slide all elements once the right amount.
	 * returns the new length of the huge_vector */
	virtual size_t insert(list<T>& items, size_t pos) {
		if (items.size() == 0) return _length;
		pagepos_t pp = getpos(pos);
		check(valid(pp), "insert() pp not valid");
		slide(pp, items.size());
		auto it = items.begin();
		auto& vec = _lines.at(pp.page);
		while (it != items.end()) {
			assert(it->get());
			vec[pp.off] = std::move(*it);
			++pp.off;
			++it;
		}
		_length += items.size();
		adjust_starts();
		return _length;
	}

	/* inserts one item at a position */
	virtual size_t insert(T&& item, size_t pos) {
		pagepos_t pp = getpos(pos);
		check(valid(pp), "insert() one, pp not valid");
		_lines.at(pp.page).insert(_lines.at(pp.page).begin() + pp.off,
					  std::move(item));
		++_length;
		adjust_starts();
		return _length;
	}

	/* support array index operator overload for elegance */
	T& operator[](size_t pos) {
		pagepos_t pp = getpos(pos);
		return _lines[pp.page][pp.off];
	}

	/* const element access */
	virtual const T& at(size_t pos) const {
		pagepos_t pp = getpos(pos);
		return _lines[pp.page][pp.off];
	}

	/* removes the element at a position and updates length */
	virtual void remove(size_t pos) {
		pagepos_t pp = getpos(pos);
		check(valid(pp), "remove() pp not valid");
		_lines[pp.page].erase(_lines[pp.page].begin() + pp.off);
		--_length;
		adjust_starts();
	}

	/* streams the entire sequence of lines to os */
	virtual void write(ostream& os) const {
		for (const auto& y: _lines) {
			for (const auto& x : y) {
				os << x->get() << endl;
			}
		}
	}

	/* gets the string_view representation of the element at pos. This makes
	 * an assumptions on T that it is a pointer to something that has a
	 * view() function that returns a string_view, which is fine for
	 * logserver */
	virtual const string_view view_at(size_t pos) const {
		pagepos_t pp = getpos(pos);
		return _lines[pp.page][pp.off]->view();
	}

	/* Helper function to make matching faster. Iterates over pagepos_t
	 * directly and runs matching callback fn() adding the position to the
	 * results set if fn returns true. This avoid having to convert a
	 * position to pagepos each line.
	 */
	virtual size_t range_add_if(
			size_t start,
			size_t end,
			set<size_t>* results,
			const function<bool(const string_view)>& fn) const {
		assert(results);
		if (end < start) {
			size_t swap = end;
			end = start;
			start = swap;
		}

		if (start >= _length) return _length;
		if (end > _length) end = _length;
		pagepos_t pp = getpos(start);
		for (size_t i = start; i < end; ++i) {
			if (fn(_lines[pp.page][pp.off]->view())) {
				results->insert(results->end(), i);
			}
			next(&pp);
		}
		return _length;
	}

	/* consistency checker used for unit tests */
	virtual void sanity() {
		assert(_lines.size() == _starts.size());
		size_t pos = 0;
		auto it = _starts.begin();
		for (size_t i = 0; i < _lines.size(); ++i) {
			assert(it->first == pos);
			assert(it->second == i);
			pos += _lines[i].size();
			++it;
		}
	}

	virtual iterator begin() const {
		return iterator(this);
	}

	virtual const_iterator cbegin() const {
		return const_iterator(this);
	}

	virtual iterator end() const {
		if (!_lines.size()) return iterator(this);
		pagepos_t pp;
		pp.page = _lines.size() - 1;
		pp.off = _lines[_lines.size() - 1].size();
		return iterator(this, pp);
	}

	virtual const_iterator cend() const {
		if (!_lines.size()) return const_iterator(this);
		pagepos_t pp;
		pp.page = _lines.size() - 1;
		pp.off = _lines[_lines.size() - 1].size();
		return const_iterator(this, pp);
	}

protected:
	// we have inserted or deleted somewhere and need to change all start
	// keys after that point. since this is infrequent and the shuffling is
	// more costly just recompute it.
	virtual inline void adjust_starts() {
		_starts.clear();
		int pos = 0;
		for (size_t i = 0; i < _lines.size(); ++i) {
			_starts[pos] = i;
			pos += _lines[i].size();
		}
	}

	/* converts a virtual huge_vector index into a page/pos combination */
	virtual inline pagepos_t getpos(size_t pos) const {
		pagepos_t ret;
		// optimize when we don't have a huge vector, such as only a
		// single page, or the pos is on that first page.
		if (_lines.empty() || pos < _lines.front().size()) [[likely]] {
			ret.page = 0;
			ret.off = pos;
			return ret;
		}
		// find the relevant page and compute its offset on it
		auto it = _starts.upper_bound(pos);
		assert(it != _starts.begin());
		--it;
		ret.page = it->second;
		assert(it->first <= pos);
		ret.off = pos - it->first;
		return ret;
	}

	/* implements the slide needed to insert new elements. pp is the
	 * page/pos to insert at, and amount is the number of new elements to
	 * support */
	virtual void slide(pagepos_t& pp, size_t amount) {
		if (!amount) return;
		auto& vec = _lines.at(pp.page);
		size_t pos = vec.size() - 1;
		vec.resize(vec.size() + amount);
		while (true) {
			assert(vec[pos].get());
			vec[pos + amount] = std::move(vec[pos]);
			assert(vec[pos].get() == nullptr);
			if (pos == pp.off) break;
			--pos;
		}
	}

	// used in assertions to optimize out in release. checks valid page pos
	static inline bool check(bool val, const string& text) {
		if (!val) [[unlikely]] {
			throw runtime_error(text);
		}
		return true;
	}

	// the vector of vectors
	vector<vector<T>> _lines;
	// the length of the vector
	size_t _length;
	// tracks first element global offset per page
	map<size_t, size_t> _starts;

};

#endif  // __HUGE_VECTOR__H__
