HTML Extractor

1

HTML Extractor

What is hx?

2

What is hx?

Why use Markdown document?

3

Why use Markdown document?

SWP ≠ Literate Programming

4

SWP ≠ Literate Programming

A very top-down view of hx

5

A very top-down view of hx

6
@Def(file: ../src/hx.cpp)
@put(global elements)
int main(int argc, const char **argv) {
@put(main body)
}
@End(file: ../src/hx.cpp)

Commands

7

Commands

Fragments

8

Fragments

Defined Fragments

9

Defined Fragments

Including fragments

10

Including fragments

Naming files

11

Naming files

Generating the first source code

12

Generating the first source code

13
int main(int argc, const char **argv) {
}

Steps in main

14

Steps in main

15
@def(main body)
#if ! NDEBUG
@put(perform unit-tests)
#endif
@end(main body)
16
@add(main body)
@put(process arguments)
@end(main body)
17
@add(main body)
@put(read source files)
@end(main body)
18
@add(main body)
@put(serialize fragments)
@end(main body)
19
@add(main body)
@put(write HTML file)
@end(main body)
20
@def(global elements)
@put(includes)
@end(global elements)

Next steps

21

Next steps

22
@add(global elements)
class Frag;
class Frag_Ref;

Frag *find_frag(const std::string &path, const std::string &key,
bool local, std::string *got_path = nullptr
);
Frag *find_frag(const Frag_Ref &ref, std::string *got_path = nullptr);

Frag &get_frag(const std::string &path,
const std::string &key, bool local
);
Frag &get_frag(const Frag_Ref &ref);

#include <map>
using Frag_Map = std::map<std::string, Frag>;

Frag_Map &frag_map(const std::string &in);
Frag_Map &frag_map();

void split_frag(const std::string &name, Frag *meta,
std::map<std::string, std::string> &&values
);
void clear_frags();
void eval_metas();
@end(global elements)
23
@inc(read.md)
24
@inc(blocks.md)
25
@inc(log.md)
26
@inc(frag.md)
27
@inc(input.md)

Parsing command line arguments

28

Parsing command line arguments

29
@def(process arguments)
for (int i { 1 }; i < argc; ++i) {
std::string arg { argv[i] };
@put(process argument)
@put(process file argument)
ASSERT_MSG(false, "unknown argument [" << argv[i] << ']');
}
@end(process arguments)
30
@add(global elements)
std::string stylesheet { "slides/slides.css" };
@end(global elements)
31
@def(process argument) {
static const std::string prefix { "--css=" };
if (arg.substr(0, prefix.length()) == prefix) {
stylesheet = arg.substr(prefix.length());
continue;
}
} @end(process argument)
32
@Def(needed by read_sources)
int blockLimit = -1;
@End(needed by read_sources)
33
@add(process argument) {
static const std::string prefix { "--limit=" };
if (arg.substr(0, prefix.length()) == prefix) {
blockLimit = std::stoi(arg.substr(prefix.length()));
continue;
}
} @end(process argument)
34
@def(process file argument)
inputs.add(arg);
continue;
@end(process file argument)

Read input files

35

Read input files

36
@Add(inputs prereqs)
using SI = std::string::const_iterator;
@End(inputs prereqs)
37
@Add(needed by read_sources)
void process_char(
Frag *frag, char ch, const std::string &cur_path, int cur_line
) {
@put(process char)
}
@End(needed by read_sources)
38
@Add(process line)
auto end = line.cend();
std::string cur_path = inputs.cur().path();
int cur_line = inputs.cur().line();
std::map<std::string, std::string> cmd_values;
for (auto i = line.cbegin(); i != end; ++i) {
if (skip_spaces) {
if (*i <= ' ') { continue; }
skip_spaces = false;
}
@mul(process special chars)
process_char(frag, *i, cur_path, cur_line);
}
if (! skip_spaces) {
process_char(frag, '\n', cur_path, cur_line);
}
@End(process line)
39
@Def(additional read vars)
Frag *frag { nullptr };
@End(additional read vars)
40
@def(process char)
if (frag) {
frag->add(ch, cur_path, cur_line);
}
@end(process char)
41
@def(process special chars)
if (*i == '@') {
auto nb = i + 1;
auto ne = nb;
@put(cmd prefix)
if (ne != end && ne != nb) {
std::string name { nb, ne };
@put(cmd argument)
}
}
@end(process special chars)
42
@def(cmd prefix)
while (ne != end && *ne != '(') {
if (! isalpha(*ne)) {
ne = end;
break;
}
++ne;
}
@end(cmd prefix)
43
@def(cmd argument)
auto ab = ne + 1; auto ae = ab;
while (ae != end && *ae != ')') {
if (*ae == '@') {
@put(handle at in cmd arg)
}
++ae;
}
if (ae != end) {
std::string arg {ab, ae};
@put(cmd found)
continue;
}
@end(cmd argument)
44
@def(handle at in cmd arg)
if (++ae == end) { break; }
if (isalpha(*ae)) {
@put(handle cmd in cmd arg)
}
@end(handle at in cmd arg)
45
@def(handle cmd in cmd arg)
auto ac { ae };
while (isalpha(*ac)) {
++ac; if (ac == end) { break; }
}
if (ac != end && *ac == '(') {
int cnt = 1; ++ac;
while (ac != end && cnt != 0) {
if (*ac == '(') { ++cnt; }
if (*ac == ')') { --cnt; }
++ac;
}
if (cnt == 0) {
ae = ac - 1;
}
}
@end(handle cmd in cmd arg)
46
@def(cmd found)
i = ae;
bool outside = ! frag;
do {
if (outside && ! blockLimit) {
break;
}
@put(do special cmd)
@put(do default cmd)
} while (false);
if (blockLimit && outside && frag) {
--blockLimit;
}
@end(cmd found)
47
@Add(needed by read_sources)
inline void expand_cmd_arg(
Frag *f, const std::string &arg,
const std::string cur_path, int cur_line
) {
auto b = arg.begin();
auto e = arg.end();
@put(expand loop)
}
@End(needed by read_sources)
48
@def(do default cmd)
if (frag) {
if (frag->is_meta()) {
auto f { cur_path };
auto l { cur_line };
frag->add('@', f, l);
frag->add(name, f, l);
frag->add('(', f, l);
frag->add(arg, f, l);
frag->add(')', f, l);
} else {
expand_cmd_arg(frag, arg, cur_path, cur_line);
}
}
if (name == "b") {
skip_spaces = true;
}
@end(do default cmd)
49
@add(includes)
#include <algorithm>
@end(includes)
50
@def(expand loop)
while (b != e) {
auto x = std::find(b, e, '@');
@put(expand before)
if (x != e) {
b = x + 1;
@put(expand escaped)
} else {
b = e;
}
}
@end(expand loop)
51
@def(expand before)
f->add(std::string { b, x }, cur_path, cur_line);
@end(expand before)
52
@def(expand escaped)
if (b != e) {
f->add(*b, cur_path, cur_line);
++b;
}
@end(expand escaped)
53
@Add(needed by read_sources)
#define ASSERT_NOT_FRAG() \
ASSERT_MSG(! frag, '@' << \
name << "(" << arg << \
") in frag [" << \
frag->name << ']' \
)
@End(needed by read_sources)
54
@Add(needed by read_sources)
#define CHECK_NOT_DEFINED() \
if (isPopulatedFrag(frag)) { \
WARN_MSG("frag [" << arg << \
"] already defined" \
); \
}
@End(needed by read_sources)
55
@def(do special cmd)
if (name == "def") {
ASSERT_NOT_FRAG();
frag = &get_frag(cur_path, arg, true);
CHECK_NOT_DEFINED();
break;
}
@end(do special cmd)
56
@Add(needed by read_sources)
#define ASSERT_FRAG() \
ASSERT_MSG(frag, '@' << \
name << "(" << arg << \
") in frag [" << \
frag->name << ']' \
)
@End(needed by read_sources)
57
@add(do special cmd) {
auto i { cmd_values.find(name) };
if (i != cmd_values.end()) {
frag->add(i->second, cur_path, cur_line);
break;
}
} @end(do special cmd)
58
@add(do special cmd)
if (name == "end" || name == "End") {
ASSERT_FRAG();
if (frag->is_meta()) {
std::string pattern;
std::map<std::string, std::string> values;
parse_args(arg, pattern, values);
if (frag->name == pattern) {
frag = nullptr;
} else {
auto f { cur_path };
auto l { cur_line };
frag->add('@', f, l);
frag->add(name, f, l);
frag->add('(', f, l);
frag->add(arg, f, l);
frag->add(')', f, l);
}
} else {
@put(frag names must match)
frag = nullptr;
}
break;
}
@end(do special cmd)
59
@def(frag names must match)
ASSERT_MSG(frag->name == arg,
"closing [" << arg << "] != [" << frag->name << ']'
);
@end(frag names must match)
60
@Add(needed by read_sources)
#define CHECK_DEFINED() \
if (! isPopulatedFrag(frag)) { \
WARN_MSG("frag [" << arg << \
"] not defined" \
); \
}
@End(needed by read_sources)
61
@add(do special cmd)
if (name == "add") {
if (frag && frag->is_meta()) {
auto f { cur_path };
auto l { cur_line };
frag->add('@', f, l);
frag->add(name, f, l);
frag->add('(', f, l);
frag->add(arg, f, l);
frag->add(')', f, l);
} else {
ASSERT_NOT_FRAG();
frag = &get_frag(cur_path, arg, true);
CHECK_DEFINED();
}
break;
}
@end(do special cmd)
62
@Add(needed by read_sources)
void parse_args(const std::string &arg, std::string &pattern,
std::map<std::string, std::string> &values
) {
for (unsigned i { 0 }; i < arg.size(); ++i) {
if (arg[i] == '@') {
unsigned j { i + 1 };
while (j < arg.size() && isalpha(arg[j])) { ++j; }
if (j > i + 1 && j < arg.size() && arg[j] == '(') {
int cnt { 1 };
unsigned k { j + 1 };
for (; k < arg.size() && cnt; ++k) {
if (arg[k] == '(') { ++cnt; }
if (arg[k] == ')') { --cnt; }
}
if (! cnt) {
std::string key { arg.substr(i + 1, j - i - 1) };
std::string value { arg.substr(j + 1, k - j - 2) };
values[key] = value;
pattern += '@';
pattern += key;
pattern += '(';
i = k - 1;
}
}
}
pattern += arg[i];
}
}
@End(needed by read_sources)
63
@add(do special cmd)
if (name == "put") {
if (! frag && arg.find('@') != std::string::npos) {
std::string pattern;
std::map<std::string, std::string> values;
parse_args(arg, pattern, values);
Frag *sub = &get_frag(cur_path, pattern, true);
sub->addMultiple();
split_frag(pattern, sub, std::move(values));
} else {
ASSERT_MSG(frag, "@put" << "(" << arg << ") not in frag");
Frag *sub = &get_frag(cur_path, arg, true);
ASSERT(sub);
@mul(check frag ex. count)
sub->addExpand();
frag->add(Frag_Ref { cur_path, arg, true });
}
break;
}
@end(do special cmd)
64
@def(check frag ex. count)
if (sub->expands()) {
std::cerr << "multiple expands of [" << sub->name << "]\n";
}
if (sub->multiples()) {
std::cerr << "expand after mult of [" << sub->name << "]\n";
}
@end(check frag ex. count)
65
@add(do special cmd)
if (name == "inc") {
ASSERT_MSG(! frag, "include in frag [" << frag->name << ']');
if (! inputs.has(arg)) {
inputs.push(arg);
}
break;
}
@end(do special cmd)
66
@add(do special cmd)
if (name == "mul") {
ASSERT_MSG(frag, "@mul not in frag");
Frag *sub = &get_frag(cur_path, arg, true);
if (sub) {
@mul(check for prev expands)
sub->addMultiple();
frag->add(Frag_Ref { cur_path, arg, true });
}
break;
}
@end(do special cmd)
67
@def(check for prev expands)
if (sub->expands()) {
std::cerr << "multiple after expand of [" << sub->name << "]\n";
}
@end(check for prev expands)
68
@add(do special cmd)
if (name == "Def") {
@put(do Def)
break;
}
@end(do special cmd)
69
@def(do Def)
ASSERT_MSG(! frag, "@Def in frag [" << frag->name << ']');
frag = &get_frag(cur_path, arg, false);
if (isPopulatedFrag(frag)) {
std::cerr << "Frag [" << arg << "] already defined\n";
}
@end(do Def)
70
@add(do special cmd)
if (name == "Add") {
@put(do Add)
break;
}
@end(do special cmd)
71
@def(do Add)
ASSERT_MSG(! frag, "@Add in frag [" << frag->name << ']');
frag = &get_frag(cur_path, arg, false);
if (! isPopulatedFrag(frag)) {
std::cerr << "Frag [" << arg << "] not defined\n";
}
@end(do Add)
72
@add(do special cmd)
if (name == "rep") {
ASSERT_MSG(! frag, "@rep in frag [" << frag->name << ']');
frag = &get_frag(cur_path, arg, true);
@mul(clear frag)
break;
}
@end(do special cmd)
73
@add(do special cmd)
if (name == "Rep") {
ASSERT_MSG(! frag, "@Rep in frag [" << frag->name << ']');
frag = &get_frag(cur_path, arg, false);
@mul(clear frag)
break;
}
@end(do special cmd)
74
@def(clear frag)
ASSERT_MSG(frag, "frag [" << name << "] not defined");
frag->clear();
@end(clear frag)
75
@add(do special cmd)
if (name == "Put") {
@put(do Put)
break;
}
@end(do special cmd)
76
@def(do Put)
ASSERT_MSG(frag, "@Put not in frag");
Frag *sub = &get_frag(cur_path, arg, false);
if (sub) {
@mul(check frag ex. count)
sub->addExpand();
frag->add(Frag_Ref { cur_path, arg, false });
}
@end(do Put)
77
@add(do special cmd)
if (name == "Mul") {
@put(do Mul)
break;
}
@end(do special cmd)
78
@def(do Mul)
ASSERT_MSG(frag, "@Mul not in frag");
Frag *sub = &get_frag(cur_path, arg, false);
if (sub) {
@mul(check for prev expands)
sub->addMultiple();
frag->add(Frag_Ref { cur_path, arg, false });
}
@end(do Mul)
79
@add(do special cmd)
if (name == "priv") {
ASSERT_MSG(frag, "@priv not in frag");
@put(process private frag)
break;
}
@end(do special cmd)
80
@add(includes)
#include <functional>
#include <sstream>
@end(includes)
81
@def(process private frag)
std::hash<std::string> h;
auto cur { h(cur_path + ':' + arg) & 0x7fffffff };
@end(process private frag)
82
@add(process private frag)
std::ostringstream hashed;
hashed << "_private_" << cur << '_' << arg;
frag->add(hashed.str(), cur_path, cur_line);
@end(process private frag)
83
@add(do special cmd)
if (name == "magic") {
ASSERT_MSG(frag, "@magic not in frag");
@put(process magic frag)
break;
}
@end(do special cmd)
84
@def(process magic frag)
std::hash<std::string> h;
auto cur { h(cur_path + ':' + arg) & 0x7fffffff };
@end(process magic frag)
85
@add(process magic frag)
std::ostringstream value;
value << cur;
frag->add(value.str(), cur_path, cur_line);
@end(process magic frag)

Serialize Fragments

86

Serialize Fragments

87
@add(global elements)
@put(needed by files write)
void files_write() {
@put(files write)
}
@end(global elements)
88
@def(serialize fragments)
if (write_files) {
files_write();
}
@end(serialize fragments)
89
@def(files write)
for (auto &i : frag_map()) {
const Frag *frag { &i.second };
std::string cur_path { };
std::string cur_name { i.first };
@mul(serialize frag)
}
@end(files write)
90
@add(files write)
for (auto &j : inputs) {
std::string cur_path { j.first };
for (auto &i : frag_map(cur_path)) {
const std::string cur_name { i.first };
const Frag *frag { &i.second };
@mul(serialize frag)
}
}
@end(files write)
91
@def(serialize frag) {
if (frag->isFile(cur_name)) {
@put(write in file)
}
} @end(serialize frag)
92
@add(serialize frag) {
int sum { frag->expands() + frag->multiples() };
if (sum <= 0) {
std::cerr << "frag [" << frag->name << "] not called\n";
}
} @end(serialize frag)
93
@add(serialize frag)
if (! isPopulatedFrag(frag)) {
std::cerr << "frag [" << frag->name << "] not populated\n";
}
@end(serialize frag)
94
@def(needed by files write)
std::string file_name(const std::string &name) {
return name.substr(6);
}
@end(needed by files write)
95
@add(needed by files write)
bool file_changed(const std::string &name, const Frag &f, std::string cur_path) {
std::ifstream in { file_name(name).c_str() };
if (! check_frag(name, f, in, cur_path)) {
return true;
}
if (in.get() != EOF) {
return true;
}
return false;
}
@end(needed by files write)
96
@def(write in file)
if (file_changed(cur_name, *frag, cur_path)) {
std::ofstream out(file_name(cur_name).c_str());
serializeFrag(cur_name, *frag, out, cur_path);
}
@end(write in file)
97
@add(global elements)
@put(needed by files process)
void files_process() {
@put(files process)
}
@end(global elements)
98
@add(serialize fragments)
if (process_files) {
files_process();
}
@end(serialize fragments)
99
@def(files process)
for (auto &i : frag_map()) {
const Frag *frag { &i.second };
const std::string cur_path;
const std::string cur_name = i.first;
@mul(serialize cmd)
}
@end(files process)
100
@add(files process)
for (auto &j : inputs) {
for (auto &i : frag_map(j.first)) {
const Frag *frag { &i.second };
const std::string cur_path = j.first;
const std::string cur_name = i.first;
@mul(serialize cmd)
}
}
@end(files process)
101
@def(needed by files process)
bool no_cmds = false;
@end(needed by files process)
102
@def(serialize cmd) {
const std::string cmd { Frag::cmd(cur_name) };
if (cmd.size()) {
@put(write cmd in file)
}
} @end(serialize cmd)
103
@def(write cmd in file)
std::ostringstream out {};
serializeFrag(cur_name, *frag, out, cur_path);
std::string o { out.str() };
if (no_cmds) {
std::cout << o;
} else {
@put(do write cmd)
}
@end(write cmd in file)
104
@def(do write cmd)
std::FILE *f { popen(cmd.c_str(), "w") };
if (f) {
std::fwrite(o.c_str(), o.size(), 1, f);
pclose(f);
}
@end(do write cmd)
105
@add(process argument) {
static const std::string prefix { "--no-cmds" };
if (arg == prefix) {
no_cmds = true;
continue;
}
} @end(process argument)
106
@inc(html.md)
107
@inc(view.md)
108
@inc(line.md)
109
@inc(edit.md)
110
@inc(range.md)
111
@inc(write.md)
112
@inc(add.md)
113
@inc(ncurses.md)
114
@inc(todos.md)
115
@add(global elements)
using Inputs_Frag_Map = std::map<std::string, Frag_Map>;

class Frag_State {
public:
std::unique_ptr<Frag_State> parent;
Inputs_Frag_Map state;
Frag_State(std::unique_ptr<Frag_State> &&p):
parent { std::move(p) }
{ }
Frag *meta = nullptr;
std::string meta_path;
std::string meta_name;
std::map<std::string, std::string> meta_values;
};

std::unique_ptr<Frag_State> all_frags_ =
std::move(std::make_unique<Frag_State>(nullptr));
Frag_State *cur_state_ = nullptr;

Frag_State &cur_state() {
return cur_state_ ? *cur_state_ : *all_frags_;
}

Frag *find_frag(Frag_State &state, const std::string &in,
const std::string &key
) {
auto got { state.state[in].find(key) };
if (got != state.state[in].end()) {
return &got->second;
}
if (state.parent) {
Frag *pg = find_frag(*state.parent, in, key);
if (pg) {
return &state.state[in].insert({
key, { key, pg }
}).first->second;
}
}
return nullptr;
}

Frag *find_frag(const std::string &in, const std::string &key) {
return find_frag(cur_state(), in, key);
}

Frag *find_frag_in_files(const std::string &path, const std::string &key,
std::string *got_path
) {
std::string p { path };
for (;;) {
Frag *f { find_frag(p, key) };
if (f) {
if (got_path) { *got_path = p; }
return f;
}
const Input &i { inputs[p] };
if (i.prev.empty()) { return nullptr; }
p = i.prev;
}
}

Frag *find_frag(const std::string &path, const std::string &key,
bool local, std::string *got_path
) {
if (local) {
if (got_path) { *got_path = path; }
return find_frag(path, key);
} else {
Frag *f { nullptr };
Input &i { inputs[path] };
if (! i.prev.empty()) {
f = find_frag_in_files(i.prev, key, got_path);
}
if (! f) {
f = find_frag(std::string { }, key);
if (f) {
if (got_path) { *got_path = std::string { }; }
}
}
return f;
}
}

Frag *find_frag(const Frag_Ref &ref, std::string *got_path) {
return find_frag(ref.path, ref.name, ref.local, got_path);
}

Frag &add_frag(Frag_State &state, const std::string &in,
const std::string &key
) {
Frag *prev { nullptr };
if (state.parent) {
prev = &add_frag(*state.parent, in, key);
}
Frag &res { state.state[in].insert({
key, { key, prev }
}).first->second };
return res;
}

Frag &add_frag(const std::string &in, const std::string &key) {
return add_frag(cur_state(), in, key);
}

Frag &get_frag(const std::string &path,
const std::string &key, bool local
) {
Frag *f { find_frag(path, key, local) };
if (f) { return *f; }
const std::string new_path { local ? path : std::string { } };
return add_frag(new_path, key);
}

Frag &get_frag(const Frag_Ref &ref) {
return get_frag(ref.path, ref.name, ref.local);
}

Frag_Map &frag_map(Frag_State &state, const std::string &in) {
Frag_Map &cur { state.state[in] };
if (state.parent) {
Frag_Map &prev { frag_map(*state.parent, in) };
for (auto &f: prev) {
if (cur.find(f.first) == cur.end()) {
cur.insert({ f.first, { f.first, &f.second } });
}
}
}
return cur;
}
Frag_Map &frag_map(const std::string &in) {
return frag_map(cur_state(), in);
}

Frag_Map &frag_map() {
return frag_map(std::string { });
}

void split_frag(const std::string &name, Frag *meta,
std::map<std::string, std::string> &&values
) {
Frag_State &current = *all_frags_;
current.meta = meta;
current.meta_path = inputs.open_head();
current.meta_values = std::move(values);
current.meta_name = name;
std::unique_ptr<Frag_State> n {
std::move(std::make_unique<Frag_State>(std::move(all_frags_)))
};
all_frags_ = std::move(n);
cur_state_ = nullptr;
}

void clear_frags() {
all_frags_ = std::move(std::make_unique<Frag_State>(nullptr));
cur_state_ = nullptr;
}

void eval_meta(Frag_State &fs) {
if (fs.parent) {
eval_meta(*fs.parent);
}
if (fs.meta) {
@put(apply meta)
}
}

void eval_metas() {
eval_meta(*all_frags_);
}
@end(global elements)
116
@def(apply meta)
std::ostringstream out;
serializeFrag(fs.meta_name, *fs.meta, out, fs.meta_path);
std::istringstream in { out.str() };
std::string line;
Frag *frag = nullptr;
std::string cur_path = fs.meta_path;
int cur_line { 1 };
auto &cmd_values = fs.meta_values;
cur_state_ = &fs;
bool skip_spaces { false };
while (std::getline(in, line)) {
auto end = line.cend();
for (auto i = line.cbegin(); i != end; ++i) {
if (skip_spaces) {
if (*i <= ' ') { continue; }
skip_spaces = false;
}
@mul(process special chars)
process_char(frag, *i, cur_path, cur_line);
}
if (! skip_spaces) {
process_char(frag, '\n', cur_path, cur_line);
}
}
cur_state_ = nullptr;
@end(apply meta)