1 /**
2  * Perform highlighting on code section.
3  *
4  * DDOC string can contains embedded code. Those code can be highlighted by
5  * means of macros (keywork will be surrounded by $(DOLLAR)(D_KEYWORD),
6  * comments by $(DOLLAR)(D_COMMENT), etc...
7  * This module performs the highlighting.
8  *
9  * Copyright: © 2014 Economic Modeling Specialists, Intl.
10  * Authors: Brian Schott, Mathias 'Geod24' Lang
11  * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
12  */
13 module ddoc.highlight;
14 
15 /**
16  * Parses a string and replace embedded code (code between at least 3 '-') with
17  * the relevant macros.
18  *
19  * Params:
20  * str = A string that might contain embedded code. Only code will be modified.
21  *	 If the string doesn't contain any embedded code, it will be returned as is.
22  *
23  * Returns:
24  * A (possibly new) string containing the embedded code put in the proper macros.
25  */
26 string highlight(string str)
27 {
28 	// Note: I don't think DMD is conformant w.r.t ddoc.
29 	// The following file:
30 	// Ddoc
31 	// ----
32 	// int main(string[] args) { return 0;}
33 	// void test(int hello, string other);
34 	// ----
35 	//
36 	// Produce the following document ($(DDOC) boilerplate excluded:
37 	//
38 	// <pre class="d_code"><font color=blue>int</font> main(string[] args) { <font color=blue>return</font> 0;}
39 	// <font color=blue>void</font> test(<font color=blue>int</font> hello, string other);
40 	// </pre>
41 
42 	import ddoc.lexer;
43 	import ddoc.macros : tokOffset;
44 	import std.array : appender;
45 
46 	auto lex = Lexer(str, true);
47 	auto output = appender!string;
48 	size_t start;
49 	// We need this because there's no way to tell how many dashes precede
50 	// an embedded.
51 	size_t end;
52 	while (!lex.empty)
53 	{
54 		if (lex.front.type == Type.embedded)
55 		{
56 			if (start != end)
57 				output.put(lex.text[start .. end]);
58 			output.put("$(D_CODE ");
59 			highlightCode(lex.front.text, output);
60 			output.put(")");
61 			start = lex.offset;
62 		}
63 		else if (lex.front.type == Type.inlined)
64 		{
65 			if (start != end)
66 				output.put(lex.text[start .. end]);
67 			output.put("$(DDOC_BACKQUOTED ");
68 			highlightCode(lex.front.text, output);
69 			output.put(")");
70 			start = lex.offset;
71 		}
72 		end = lex.offset;
73 		lex.popFront();
74 	}
75 	if (start)
76 		output.put(lex.text[start .. end]);
77 	return start ? output.data : str;
78 }
79 
80 ///
81 unittest
82 {
83 	import ddoc.lexer;
84 
85 	auto s1 = `Here is some embedded D code I'd like to show you:
86 $(MY_D_CODE
87 ------
88 // Entry point...
89 void main() {
90   import std.stdio : writeln;
91   writeln("Hello,", " ", "world", "!");
92 }
93 ------
94 )
95 Isn't it pretty ?`;
96 	// Embedded code is surrounded by D_CODE macro, and tokens have their own
97 	// macros (see: D_KEYWORD for example).
98 	auto r1 = highlight(s1);
99 	auto e1 = `Here is some embedded D code I'd like to show you:
100 $(MY_D_CODE
101 $(D_CODE $(D_COMMENT // Entry point...)
102 $(D_KEYWORD void) main() {
103   $(D_KEYWORD import) std.stdio : writeln;
104   writeln($(D_STRING "Hello,"), $(D_STRING " "), $(D_STRING "world"), $(D_STRING "!"));
105 })
106 )
107 Isn't it pretty ?`;
108 	assert(r1 == e1, r1);
109 
110 	// No allocation is performed if the string doesn't contain inline code.
111 	auto s2 = `This is some simple string
112 --
113 It doesn't do much
114 --
115 Hope you won't allocate`;
116 	auto r2 = highlight(s2);
117 	assert(r2 is s2, r2);
118 }
119 
120 // Test multiple embedded code.
121 unittest
122 {
123 	auto s1 = `----
124 void main() {}
125 ----
126 ----
127 int a = 42;
128 ----
129 ---
130 unittest {
131     assert(42, "Life, universe, stuff");
132 }
133 ---`;
134 	auto e1 = `$(D_CODE $(D_KEYWORD void) main() {})
135 $(D_CODE $(D_KEYWORD int) a = 42;)
136 $(D_CODE $(D_KEYWORD unittest) {
137     $(D_KEYWORD assert)(42, $(D_STRING "Life, universe, stuff"));
138 })`;
139 	auto r1 = highlight(s1);
140 	assert(r1 == e1, r1);
141 }
142 
143 unittest
144 {
145 	auto s = `
146         ---------
147         asm pure nothrow @nogc @trusted
148         {
149             // the compiler does not check the attributes
150             ret;
151         }
152         ---------
153 `;
154 	auto e = `
155         $(D_CODE $(D_KEYWORD asm) $(D_KEYWORD pure) $(D_KEYWORD nothrow) @nogc @trusted
156 {
157     $(D_COMMENT // the compiler does not check the attributes)
158     ret;
159 }
160 )
161 `;
162 	auto r = highlight(s);
163 	assert(r == e, r);
164 }
165 
166 private:
167 void highlightCode(O)(string code, ref O output)
168 {
169 	import dparse.lexer;
170 	import std.string : representation;
171 
172 	enum fName = "<embedded-code-in-documentation>";
173 
174 	auto cache = StringCache(StringCache.defaultBucketCount);
175 	auto toks = code.representation.dup.byToken(LexerConfig(fName,
176 		StringBehavior.source, WhitespaceBehavior.include), &cache);
177 	while (!toks.empty)
178 	{
179 		if (toks.front.type.isStringLiteral)
180 		{
181 			output.put("$(D_STRING ");
182 			output.put(toks.front.text);
183 			output.put(")");
184 		}
185 		else if (toks.front == tok!"comment")
186 		{
187 			output.put("$(D_COMMENT ");
188 			output.put(toks.front.text);
189 			output.put(")");
190 		}
191 		else if (toks.front.type.isKeyword || toks.front.type.isBasicType)
192 		{
193 			output.put("$(D_KEYWORD ");
194 			output.put(toks.front.type.str);
195 			output.put(")");
196 		}
197 		else if (toks.front.text.length)
198 		{
199 			output.put(toks.front.text);
200 		}
201 		else
202 		{
203 			output.put(toks.front.type.str);
204 		}
205 		toks.popFront();
206 	}
207 }