1 //\blockfile.d database blockfile tool
2 module tagion.tools.blockutil;
3 
4 import std.algorithm;
5 import std.array : join;
6 import std.conv : to;
7 import std.exception;
8 import std.format;
9 import std.getopt;
10 import std.range;
11 import std.stdio;
12 import std.traits : EnumMembers;
13 import std.typecons;
14 import tagion.dart.BlockFile;
15 import tagion.dart.DARTException : BlockFileException;
16 import tagion.dart.Recycler;
17 import tagion.hibon.Document;
18 import tagion.hibon.HiBONJSON : toPretty;
19 import tagion.tools.Basic;
20 import tagion.tools.revision;
21 import tools = tagion.tools.toolsexception;
22 
23 mixin Main!_main;
24 
25 enum HAS_BLOCK_FILE_ARG = 2;
26 
27 enum ExitCode {
28     NOERROR,
29     MISSING_BLOCKFILE, /// Blockfile missing argument
30     BAD_BLOCKFILE, /// Bad blockfile format
31     OPEN_FILE_FAILED, /// Unable to open file
32 }
33 
34 @safe
35 struct BlockFileAnalyzer {
36     private BlockFile blockfile;
37     uint inspect_iterations = uint.max;
38     uint max_block_iteration = 1000;
39     bool logscale;
40     Index index_from;
41     Index index_to;
42     ~this() {
43         if (blockfile) {
44             blockfile.close;
45         }
46     }
47 
48     void print(File fout) {
49         fout.writeln("Block map");
50         blockfile.dump(from : index_from, to:
51                 index_to, fout:
52                 fout);
53     }
54 
55     void recyclePrint(File fout) {
56         fout.writeln("Recycler map");
57         blockfile.recycleDump(fout);
58     }
59     
60     void recyclerCurrentPrint(File fout) {
61         import tagion.logger.Statistic;
62 
63 
64         Index index = blockfile.masterBlock.recycle_header_index;
65 
66         if (index == Index(0)) {
67             return;
68         }
69         Statistic!(ulong, Yes.histogram) stat;
70         while (index != Index.init) {
71             auto add_segment = RecycleSegment(blockfile, index);
72             stat(add_segment.size);
73         pragma(msg, "add_segment ", typeof(add_segment.next));
74             index = add_segment.next;
75         }
76         fout.writeln(stat.histogramString(logscale));
77     }
78 
79     void recycleStatisticPrint(File fout) {
80         fout.writefln("Number of recycler fragments | Number of times |");
81         blockfile.recycleStatisticDump(fout, logscale);
82     }
83 
84     void printStatistic(File fout) {
85         fout.writeln("Block size | number of times this block size has been claimed |");
86         blockfile.statisticDump(fout, logscale);
87     }
88 
89     void dumpGraph(File fout) {
90         import std.algorithm;
91         import std.range;
92 
93         auto text = [
94             "```graphviz",
95             "digraph {",
96             `e [shape=record label="{`,
97         ];
98 
99         BlockFile.BlockSegmentRange seg_range = blockfile[index_from .. index_to];
100         const uint segments_per_line = 16;
101         uint pos = 0;
102         string[] line = ["{"];
103 
104         foreach (seg; seg_range) {
105 
106             if (pos == segments_per_line) {
107                 scope (exit) {
108                     line = ["{"];
109                     pos = 0;
110                 }
111                 line ~= "}|";
112                 text ~= line.join;
113                 // go to the next
114             }
115 
116             string repeat_char;
117             if (seg.type.length == 0) {
118                 repeat_char = "A";
119             }
120             else {
121                 repeat_char = seg.type[0 .. 1];
122             }
123 
124             line ~= repeat(repeat_char, seg.size).array.join;
125             line ~= ["|"];
126 
127             pos += 1;
128         }
129         if (seg_range.walkLength % segments_per_line != 0) {
130             line ~= "}|";
131             text ~= line.join;
132         }
133 
134         text ~= `}"]`;
135         text ~= "}";
136         text ~= "```";
137         // add the end
138         text.each!((s) => fout.writeln(s));
139     }
140 
141     const(Document) dumpIndexDoc(const(Index) index) {
142         return blockfile.load(index);
143     }
144 
145     void printHeader(File fout) {
146         fout.writefln("%s", blockfile.headerBlock);
147     }
148 
149     void printMaster(File fout) {
150         fout.writefln("%s", blockfile.masterBlock);
151     }
152 
153 
154     void printUtilisation(File fout) {
155         auto seg_range = blockfile[index_from .. index_to];
156         uint pos = 0;
157         size_t recycler_size;
158         size_t total;
159         size_t doc_size;
160         size_t data_size;
161         foreach (seg; seg_range) {
162             total+=seg.size;
163             if (RecycleSegment.isRecord(seg.doc)) {
164                 recycler_size+=seg.size;
165                 continue;
166             }
167             doc_size+=seg.doc.full_size;
168             data_size+=seg.size;
169         }
170         // Converts from number of blocks to number of bytes
171         total+=1; /// 1 for header-block 
172         total*=blockfile.BLOCK_SIZE;
173         recycler_size*=blockfile.BLOCK_SIZE;
174         data_size*=blockfile.BLOCK_SIZE;
175         string unit_name="KiB";
176         uint unit=1 << 10; // KiB
177         if (total > 1<< 24) {
178            unit_name="MiB";
179             unit=1 << 20;
180         }
181         fout.writefln("Total     %9.3f%s", double(total)/unit, unit_name);
182         fout.writefln("Data      %9.3f%s %5.2f%%", double(data_size)/unit, unit_name, 
183             100.0*data_size/total);
184         fout.writefln("Documents %9.3f%s %5.2f%%", double(doc_size)/unit, unit_name,
185             100.0*doc_size/total);
186         fout.writefln("Recycler  %9.3f%s %5.2f%%", double(recycler_size)/unit, unit_name,
187             100.0*recycler_size/total);
188         fout.writefln("Data utilisation       %5.2f%%",  100.0*doc_size/data_size);
189 
190         
191     }
192 }
193 
194 BlockFileAnalyzer analyzer;
195 int _main(string[] args) {
196     immutable program = args[0];
197     bool version_switch;
198     bool display_meta;
199     bool print; /// prints the block map
200     bool inspect;
201     bool ignore; /// Ignore blockfile format errors
202     ulong block_number; /// Block number to read (block_number > 0)
203     bool sequency; /// Prints the sequency on the next header
204     bool print_recycler;
205     bool print_recycler_statistic;
206     bool print_recycler_current;
207     bool print_statistic;
208     bool print_graph;
209     bool dump_doc;
210     bool print_header;
211     bool print_master;
212     bool print_utilisation;
213     ulong[] indices;
214     bool dump;
215     string index_range;
216     string output_filename;
217     enum logo = import("logo.txt");
218     string filename;
219     try {
220 
221         auto main_args = getopt(args,
222                 std.getopt.config.caseSensitive,
223                 std.getopt.config.bundling,
224                 "version", "Display the version", &version_switch,
225                 "v|verbose", "Prints more debug information", &__verbose_switch,
226                 "R|range", "Sets range of block indices (Default is full range)", &index_range,
227                 "print", "Prints the entire blockfile", &print,
228                 "print-recycler", "Dumps the recycler", &print_recycler,
229                 "r|recyclerstatistic", "Dumps the recycler statistic block", &print_recycler_statistic,
230                 "t|recyclercurrentstat", "Dumps the current statistic in the file", &print_recycler_current,
231                 "s|statistic", "Dumps the statistic block", &print_statistic,
232                 "g|print-graph", "Dump the blockfile in graphviz format", &print_graph,
233                 "d|dumpdoc", "Dump the document located at an specific index", &dump_doc,
234                 "H|header", "Dump the header block", &print_header,
235                 "M|master", "Dump the master block", &print_master,
236                 "i|index", "the index to dump the document from", &indices,
237                 "o|output", "Output filename (Default stdout)", &output_filename,
238                 "U|utilised", "Dumps the utalisation of the blockfile", &print_utilisation,
239                 "dump", "Dumps the blocks as a HiBON sequency to stdout or a file", &dump,
240                 "log", "Sets logscale on statistics", &analyzer.logscale,
241         );
242 
243         if (version_switch) {
244             revision_text.writeln;
245             return ExitCode.NOERROR;
246         }
247 
248         if (main_args.helpWanted) {
249             writeln(logo);
250             defaultGetoptPrinter(
251                     [
252                     revision_text,
253                     "Documentation: https://tagion.org/",
254                     "",
255                     "Usage:",
256                     format("%s <file> [<option>...]", program),
257                     "",
258                     "Where:",
259                     //            "<command>           one of [--read, --rim, --modify, --rpc]",
260                     "",
261 
262                     "<option>:",
263 
264                     ].join("\n"),
265                     main_args.options);
266             return ExitCode.NOERROR;
267         }
268 
269         if (args.length !is HAS_BLOCK_FILE_ARG) {
270             error("Missing blockfile");
271             return ExitCode.MISSING_BLOCKFILE;
272         }
273 
274         if (dump) {
275             vout = stderr;
276         }
277         filename = args[1]; /// First argument is the blockfile name
278         analyzer.blockfile = BlockFile(filename, Yes.read_only);
279         size_t index_from, index_to;
280         if (!index_range.empty) {
281             const fields =
282                 index_range.formattedRead("%d:%d", index_from, index_to)
283                     .ifThrown(0);
284             tools.check(fields == 2,
285                     format("Angle range shoud be ex. --range 42:117 not %s", index_range));
286             verbose("Angle from [%d:%d]", index_from, index_to);
287             analyzer.index_from = index_from;
288             analyzer.index_to = index_to;
289         }
290 
291         if (dump) {
292             File fout = stdout;
293             if (!output_filename.empty) {
294                 fout = File(output_filename, "w");
295             }
296             scope (exit) {
297                 if (fout !is stdout) {
298                     fout.close;
299                 }
300             }
301             foreach (block_segment; analyzer.blockfile[index_from .. index_to]) {
302                 fout.rawWrite(block_segment.doc.serialize);
303             }
304             return 0;
305         }
306         if (print) {
307             analyzer.print(vout);
308         }
309 
310         if (print_header) {
311             analyzer.printHeader(vout);
312         }
313         if (print_master) {
314             analyzer.printMaster(vout);
315         }
316 
317         if (print_recycler) {
318             analyzer.recyclePrint(vout);
319         }
320 
321         if (print_recycler_statistic) {
322             analyzer.recycleStatisticPrint(vout);
323         }
324 
325         if (print_recycler_current) {
326             analyzer.recyclerCurrentPrint(vout);
327         }
328         if (print_statistic) {
329             analyzer.printStatistic(vout);
330         }
331 
332         if (print_graph) {
333             analyzer.dumpGraph(vout);
334         }
335 
336         if (print_utilisation) {
337             analyzer.printUtilisation(vout);
338         }
339 
340         if (!indices.empty) {
341             File fout = stdout;
342             if (!output_filename.empty) {
343                 fout = File(output_filename, "w");
344             }
345             scope (exit) {
346                 if (fout !is stdout) {
347                     fout.close;
348                 }
349             }
350             foreach (index; indices) {
351                 const doc = analyzer.dumpIndexDoc(Index(index));
352                 fout.rawWrite(doc.serialize);
353             }
354         }
355     }
356     catch (BlockFileException e) {
357         stderr.writefln("Error: Bad blockfile format for %s", filename);
358         error(e);
359         stderr.writefln(
360                 "Try to use the --inspect or --ignore switch to analyze the blockfile format");
361         return ExitCode.BAD_BLOCKFILE;
362     }
363     catch (Exception e) {
364         stderr.writefln("Error: Unable to open file %s", filename);
365         error(e);
366         return ExitCode.OPEN_FILE_FAILED;
367     }
368 
369     return ExitCode.NOERROR;
370 }