Doxygen XLinks
by
V: 2511R0
Website: doxygen
Loading...
Searching...
No Matches
htmlreplacer.cpp
1//==================================================================================================
2// This implementation-file is part of DoxygenXLinks - A doxygen post-processor that allows to
3// define smarter <b>Doxygen</b>-links.
4//
5// \emoji :copyright: 2025-2026 A-Worx GmbH, Germany.
6// Published under \ref mainpage_license "Boost Software License".
7//==================================================================================================
8#include "jobs.hpp"
9#include "dxl.hpp"
10#include "dxlapp.hpp"
11#include "ALib.ALox.H"
12#include "ALib.App.H" // TODO(251204 09:14): we need this only for the definition of LOX_LOX.
13 // How can we avoid to include the whole app?
14
15#include <iostream>
16#include <fstream>
17
18using namespace alib;
19using namespace std;
20
21namespace dxl {
22
24 Lox_SetDomain("DXL/HTML/JOB", Scope::Method )
25 Lox_Info( "Reading HTML file {!Q} of size {}", htmlFileNode.Name(),
27 dxl.Stats.HTMLFileSize.fetch_add(int(htmlFileNode->Size()));
28
29 Path path;
30 {ALIB_LOCK_SHARED_WITH(dxl.GetHTMLTreeLock())
31 htmlFileNode.AssembleRealPath(path, lang::Inclusion::Include);
32 }
33
34 // read exclamations applicable to this file once
36 dxl.Exclamations.Get(htmlFileNode.Name(), exclamations);
37
39
40 Lox_Info("Reading HTML file: {}", path )
41 MappedFile& htmlFile= poolWorker->InputFile;
43 try {
44 mfc= htmlFile.Open(path.Terminate(), htmlFileNode->Size(), false);
45 } catch (std::exception&) {
46 app.cErr->Add(app.cli.ExitCodeDecls.Find(ExitCodes::CantOpenHMLFile).Mapped()->FormatString(),
47 path);
48 app.machine.SetExitCode(ExitCodes::CantOpenHMLFile);
49 return true;
50 }
51
52 // output buffer (fileSize * 3). And: we add some padding bytes, to be able to test
53 // backward contents without checking actual write-size.
54 AString& writeBuffer= poolWorker->WriteBuffer;
55 writeBuffer.EnsureRemainingCapacity(integer(htmlFileNode->Size() * 3));
56 constexpr integer writeBufferPadSize= 20;
57 writeBuffer._(Fill(0, writeBufferPadSize));
58 char* writeBufferStart= writeBuffer.VBuffer();
59 char* wb = writeBufferStart + writeBufferPadSize;
60
61 // if the application has an exit code, we stop right now
62 // todo: this periodical check makes sense but was never tested, yet.
63 // it has to go into the loop below and also to other jobs
64 if ( app.machine.GetExitCode().Integral() )
65 return true;
66
67 // we want maximum speed and even spare the fast log calls in the loop.
68 Verbosity verbosity;
69 Lox_GetVerbosity(verbosity)
70
71 // loop over all lines of the HTML-file
72 bool fileChanged = false;
73 int cntELReplacements = 0;
74 int cntELRefReplacements= 0;
75 int lineNo = 1;
76 String512 linkString;
77 size_t lineStartRemaining = mfc.Remaining();
78 while (!mfc.IsEOF()) {
79 char c= char( mfc.Next<NC>() );
80
81 // linefeed
82 if (c == '\n') { *wb++= '\n'; lineNo++; lineStartRemaining= mfc.Remaining(); continue; }
83
84 // check for EL-and ELREF-anchors with every '"'
85 if (c == '"') {
86 bool isElAnchor = characters::Equal<char>(wb-12, "<a class=\"el" , 12);
87 bool isElRefAnchor= characters::Equal<char>(wb-15, "<a class=\"elRef", 15);
88 if ( isElAnchor || isElRefAnchor ) {
89 // sometimes Doxygen adds two spaces
90 while (mfc.Remaining() && char(*mfc.Current()) ==' ')
91 mfc.Next<NC>();
92
93 // read 'href="x"'
94 if (mfc.Remaining() < 8) { *wb++= '\"'; *wb++= ' '; continue; }
95 #if ALIB_DEBUG
96 ALIB_ASSERT( mfc()=='h'
97 && mfc()=='r'
98 && mfc()=='e'
99 && mfc()=='f'
100 && mfc()=='='
101 && mfc()=='\"', "DXL/HTML/JOB")
102 #else
103 mfc.Skip(6);
104 #endif
105
106 // read file-name and anchor
107 String256 fileName;
108 String128 anchor;
109 bool isAnchor= false;
110 while (mfc.Remaining() && (c= mfc()) != '\"') {
111 if (c=='#') {isAnchor= true; continue;}
112 if (!isAnchor) fileName._(c);
113 else anchor ._(c);
114 }
115 Styles styles;
116 dxl.GetELDecoration( styles, isElRefAnchor, htmlFileNode, fileName, anchor,
117 lineNo, int(lineStartRemaining - mfc.Remaining() - 9) );
118 ALIB_ASSERT_ERROR(styles.Size(), "DXL/HTML/JOB", "No styles given for EL-Anchor" )
119 // re-activate AString
120 ALIB_ASSERT_ERROR(wb - writeBufferStart < writeBuffer.Capacity(),
121 "DXL/HTML/JOB", "Write buffer overflow detected" )
122 writeBuffer.SetLength(wb - writeBufferStart);
123
124 for ( int i= 1; i < styles.Size(); ++i ) // starting with 1 omits el/elRef
125 writeBuffer._<NC>( ' ' )._<NC>( styles.Get(i) );
126 writeBuffer._<NC>("\" href=\"" );
127 if ( fileName.IsNotEmpty() ) writeBuffer._<NC>( fileName );
128 if ( anchor.IsNotEmpty() ) writeBuffer._<NC>( '#' )._<NC>( anchor );
129 writeBuffer._<NC>( '\"' );
130 wb= writeBuffer.VBuffer() + writeBuffer.Length();
131 fileChanged= true;
132 if (isElAnchor) ++cntELReplacements;
133 else ++cntELRefReplacements;
134 }
135 else
136 *wb++= '\"';
137 continue;
138 }
139
140 // not '#'? continue
141 if (c != '#') { *wb++= c; continue; }
142
143
144 //---- 2nd character ----
145 if (mfc.Remaining()<3) {
146 *wb++= c;
147 while (mfc.Remaining()){
148 c= char(mfc.Next<NC>());
149 if ( c == '\n')
150 ++lineNo;
151 *wb++= c;
152 }
153 break;
154 }
155
156 c= char( mfc.Next<NC>() );
157
158 // if double hash is given, remove one hash ##"..." and keep the rest.
159 // Note: This is mainly needed for the documentation of this tool ;-)
160 if (c == '#' ) { *wb++= '#'; continue;}
161
162 // not '"' ?
163 if ( c != '\"' ) {
164 *wb++= '#';
165 *wb++= c;
166 if ( c == '\n')
167 ++lineNo;
168 continue;
169 }
170
171 //---- 3rd character: not an allowed link start? ----
172 c= char(mfc.Next<NC>());
173 if ( !isalpha(c) && String(".%^_&").IndexOf(c) < 0 ) {
174 *wb++= '#';
175 *wb++= '\"';
176 *wb++= c;
177 if ( c == '\n')
178 ++lineNo;
179 continue;
180 }
181
182 int colNo= int(lineStartRemaining - mfc.Remaining() - 2);
183
184 // search for exclamations
185 { auto exclIt= exclamations.begin();
186 for (; exclIt!=exclamations.end(); ++exclIt )
187 if ( (*exclIt)->Matches(lineNo, colNo ) )
188 break;
189 if (exclIt != exclamations.end()) {
190 *wb++= '#';
191 *wb++= '\"';
192 *wb++= c;
193 continue;
194 } }
195
196 // This seems to be an XLink!
197 bool suppressedAnchor;
198 linkString.Reset(c); {
199 bool foundEnd= false;
200 while (mfc.Remaining()) {
201 c= char(mfc.Next<NC>());
202 if ( c == '\\') { linkString._<NC>(c); linkString._<NC>(char(mfc.Next())); continue; }
203 if ( c == '\"') { foundEnd= true; break;}
204 if ( c == '\n') { lineNo++; break; }
205 linkString._<NC>(c);
206 if (linkString.Length() == 511 ) {
207 Lox_Warning( "Found unterminated XLink pattern {!Q} in HTML file {}:{}:{}",
208 linkString, path, lineNo, colNo )
209 break;
210 }
211 }
212
213 suppressedAnchor= linkString.CharAtStart() == '%';
214
215 // end not found or the "&" which (was let is pass above!) did not evaluate to '<' symbol
216 bool illegalHTMLEntity= ( linkString.CharAt(suppressedAnchor ? 1 : 0) == '&'
217 && ( ( suppressedAnchor && !linkString.StartsWith("%&lt;"))
218 || (!suppressedAnchor && !linkString.StartsWith( "&lt;")) ) ) ;
219 if ( !foundEnd || illegalHTMLEntity ) {
220 *wb++= '#';
221 *wb++= '\"';
222 for ( auto lsC : linkString )
223 *wb++= lsC;
224 if ( !illegalHTMLEntity) {
225 *wb++= '\n';
226 Lox_Warning( "Found unterminated XLink pattern {!Q} in HTML file {}:{}:{}",
227 linkString, path, lineNo -1, colNo )
228 }
229 continue;
230 }
231
232 if (suppressedAnchor)
233 linkString[0]= ' ';
234 linkString.Trim();
235 }
236
237 ConvertHTMLEntitiesToAscii(linkString);
238 if (verbosity >= Verbosity::Info)
239 Lox_Info( "Found XLink pattern {!Q} in HTML file {}:{}:{}",
240 linkString, path, lineNo, colNo )
241
242 XLink* link= dxl.GetXLink(linkString, htmlFileNode);
243 {ALIB_LOCK_WITH(link->Lock)
244 link->HTMLLocations.push_back({htmlFileNode, lineNo, colNo});
245 }
246
247 // re-activate AString
248 ALIB_ASSERT_ERROR(wb - writeBufferStart < writeBuffer.Capacity(), "DXL/HTML/JOB",
249 "Write buffer overflow detected" )
250 writeBuffer.SetLength(wb - writeBufferStart);
251
252 // not resolved?
253 if ( !link->IsResolved() ) {
254 // paste the original XLink to the output
255 writeBuffer._<NC>( "#")._<NC>( "\"");
256 if ( suppressedAnchor )
257 writeBuffer._<NC>( "%");
258 writeBuffer._<NC>(linkString)._<NC>( "\"");
259 } else {
260 fileChanged= true;
261
262 // write replacement
263 auto& css= link->CSSClasses;
264 ALIB_ASSERT_ERROR(css.Size(), "DXL/HTML/JOB", "No styles given for XLink {}", linkString )
265
266 // -------------- write anchor -------------
267 if (!suppressedAnchor) {
268 writeBuffer._<NC>( "<a class=\"" );
269 for ( int i= 0; i < css.Size(); ++i )
270 writeBuffer._<NC>( css.Get(i) )._<NC>( ' ' );
271 writeBuffer.DeleteEnd(1);
272 writeBuffer._<NC>( "\" href=\"" )
273 ._<NC>( link->Result().HTMLBaseURL )
274 ._<NC>( link->Result().HTMLFile );
275 if (link->Result().HTMLAnchor.IsNotEmpty())
276 writeBuffer._<NC>( '#' )._<NC>( link->Result().HTMLAnchor );
277
278 writeBuffer._<NC>( "\">" )
279 ._<NC>( link->Display )
280 ._<NC>( "</a>" );
281
282 // -------------- write non-anchor text -------------
283 } else {
284 writeBuffer._<NC>( css.IsCodeEntity() ? "<code" :"<span" );
285 writeBuffer._<NC>( " class=\"" );
286 for ( int i= 1; i < css.Size(); ++i ) // start with 1, this omits el/elRef
287 writeBuffer._<NC>( css.Get(i) )._<NC>( ' ' );
288 writeBuffer.DeleteEnd(1);
289 writeBuffer._<NC>( "\">" );
290 writeBuffer._<NC>( link->Display )
291 ._<NC>( css.IsCodeEntity() ? "</code>" :"</span>" );
292 }
293 }
294 wb= writeBuffer.VBuffer() + writeBuffer.Length();
295 } // the read-loop
296
297 // add stats
298 dxl.Stats.HTMLFileLines.fetch_add(lineNo);
299 dxl.Stats.ELReplacements .fetch_add(cntELReplacements);
300 dxl.Stats.ELREFReplacements.fetch_add(cntELRefReplacements);
301
302 //-------------------------- write file ---------------------------------
303 if ( fileChanged && app.cli.DryRun != cli::DryRunModes::Application) {
304 Lox_Verbose("Writing file: {}", path )
305
306 Path tempPath;
307 tempPath << path << ".tmp";
308 ofstream outFile(tempPath.Terminate());
309 if ( !outFile.is_open() ) {
310 app.cErr->Add( app.cli.ExitCodeDecls.Find(ExitCodes::CantOpenHMLFile).Mapped()->FormatString(),
311 tempPath);
312 return true;
313 }
314 ALIB_ASSERT_ERROR(wb - writeBufferStart < writeBuffer.Capacity(), "DXL/HTML/JOB",
315 "Write buffer overflow detected" )
316 outFile.write(writeBuffer.Buffer() + writeBufferPadSize, wb - writeBufferStart - writeBufferPadSize);
317 outFile.close();
318
319 if ( outFile.fail() ) {
320 app.cErr->Add( app.cli.ExitCodeDecls.Find(ExitCodes::CantOpenHMLFile).Mapped()->FormatString(),
321 tempPath);
322 return true;
323 }
324
325 htmlFile.Close();
326 std::error_code ec;
327 std::filesystem::rename(tempPath.Terminate(), path.Terminate(), ec);
328 if ( ec.value() != 0 ) {
329 app.cErr->Add( app.cli.ExitCodeDecls.Find(ExitCodes::CantOpenHMLFile).Mapped()->FormatString(),
330 path, ec);
331 return true;
332 }
333 }
334 return true;
335}
336} //namespace [dxl]
337
#define ALIB_LOCK_SHARED_WITH(lock)
#define ALIB_ASSERT(cond, domain)
#define ALIB_ASSERT_ERROR(cond, domain,...)
#define ALIB_LOCK_WITH(lock)
#define Lox_Info(...)
#define Lox_SetDomain(...)
#define Lox_GetVerbosity(result,...)
#define Lox_Verbose(...)
#define Lox_Warning(...)
bool IsEOF() const noexcept
const std::byte * Current() const noexcept
void Skip(std::size_t n)
std::size_t Remaining() const noexcept
void Close() noexcept
Data Open(const char *path, std::size_t knownSize=std::numeric_limits< std::size_t >::max(), bool disableMMap=false)
constexpr const TChar * Terminate() const
integer Capacity() const
TAString & DeleteEnd(const TString< TChar > &deleteIfMatch)
TChar * VBuffer() const
TAString & Trim(const TCString< TChar > &trimChars=CStringConstantsTraits< TChar >::DefaultWhitespaces())
void EnsureRemainingCapacity(integer spaceNeeded)
void SetLength(integer newLength)
constexpr integer Length() const
TChar CharAtStart() const
TChar CharAt(integer idx) const
constexpr bool IsNotEmpty() const
constexpr const TChar * Buffer() const
bool StartsWith(const TString &needle) const
class DXLApp
Definition dxlapp.hpp:37
int Size() const
Definition styles.hpp:96
const alib::String & Get(int idx) const
Definition styles.hpp:113
TApp & Get()
bool Equal(const TChar *lhs, const TChar *rhs, integer cmpLength)
lox::Verbosity Verbosity
strings::TFill< character > Fill
lang::integer integer
strings::TString< character > String
system::Path Path
files::MappedFile MappedFile
LocalString< 128 > String128
LocalString< 256 > String256
strings::TAString< character, lang::HeapAllocator > AString
std::vector< T, StdMA< T > > StdVectorMA
LocalString< 512 > String512
todox
Definition doxyfile.cpp:20
void ConvertHTMLEntitiesToAscii(alib::AString &buffer)
Definition dxl.cpp:104
@ CantOpenHMLFile
A HTML file was not found or could not be accessed.
Definition dxl.hpp:96
DXLPoolWorker * poolWorker
The pool worker that executes this job.
bool Do() override
alib::files::File htmlFileNode
The HTML-file to load and search for DoxygenXLinks links.
Definition jobs.hpp:53
alib::String HTMLBaseURL
Definition index.hpp:391
alib::String HTMLAnchor
The HTML anchor hash. Set only with members.
Definition index.hpp:398