bpp-phyl3 3.0.0
DataFlow.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
5#include <Bpp/Exceptions.h>
6#include <algorithm>
7#include <fstream> // debug
8#include <functional> // std::hash
9#include <iomanip>
10#include <iostream> // debug
11#include <ostream> // debug
12#include <regex>
13#include <stack> // invalidate/compute recursively + debug
14#include <type_traits> // DotOptions flags
15#include <typeinfo>
16#include <unordered_set> // debug
17#include <set>
18
19#include "DataFlow.h"
20#include "DataFlowNumeric.h"
21
22/* std::type_info::name() returns a "mangled" type name, not very readable.
23 * Compilers can optionally provide an ABI header cxxabi.h.
24 * This header contain the demangle function, which makes the type name readable.
25 * By testing on godbolt, all versions of gcc and clang have this header.
26 */
27#if defined(__GNUC__) || defined(__clang__)
28#include <cstdlib>
29#include <cxxabi.h>
30static std::string demangle (const char* name)
31{
32 int status{};
33 std::unique_ptr<char, void (*)(void*)> res{abi::__cxa_demangle (name, nullptr, nullptr, &status),
34 std::free};
35 return status == 0 ? res.get () : name;
36}
37#else
38static std::string demangle (const char* name) { return name; }
39#endif
40
41namespace bpp
42{
43std::string prettyTypeName (const std::type_info& ti) { return demangle (ti.name ()); }
44} // namespace bpp
45
46namespace bpp
47{
48/*****************************************************************************
49 * Error & dependency check functions.
50 */
51void failureComputeWasCalled (const std::type_info& nodeType)
52{
53 throw Exception (prettyTypeName (nodeType) + ": compute() was called");
54}
55
56void failureNodeConversion (const std::type_info& handleType, const Node_DF& node)
57{
58 throw Exception (prettyTypeName (handleType) + " cannot store: " + prettyTypeName (typeid (node)));
59}
60
61void failureDependencyNumberMismatch (const std::type_info& contextNodeType, std::size_t expectedSize,
62 std::size_t givenSize)
63{
64 throw Exception (prettyTypeName (contextNodeType) + ": expected " + std::to_string (expectedSize) +
65 " dependencies, got " + std::to_string (givenSize));
66}
67
68void failureEmptyDependency (const std::type_info& contextNodeType, std::size_t depIndex)
69{
70 throw Exception (prettyTypeName (contextNodeType) + ": " + std::to_string (depIndex) +
71 "-th dependency is empty (nullptr)");
72}
73
74void failureDependencyTypeMismatch (const std::type_info& contextNodeType, std::size_t depIndex,
75 const std::type_info& expectedType,
76 const std::type_info& givenNodeType)
77{
78 throw Exception (prettyTypeName (contextNodeType) + ": expected class derived from " +
79 prettyTypeName (expectedType) + " as " + std::to_string (depIndex) +
80 "-th dependency, got " + prettyTypeName (givenNodeType));
81}
82
83void checkDependencyVectorSize (const std::type_info& contextNodeType, const NodeRefVec& deps,
84 std::size_t expectedSize)
85{
86 auto size = deps.size ();
87 if (size != expectedSize)
88 {
89 failureDependencyNumberMismatch (contextNodeType, expectedSize, size);
90 }
91}
92
93void checkDependencyVectorMinSize (const std::type_info& contextNodeType, const NodeRefVec& deps,
94 std::size_t expectedMinSize)
95{
96 auto size = deps.size ();
97 if (size < expectedMinSize)
98 {
99 failureDependencyNumberMismatch (contextNodeType, expectedMinSize, size);
100 }
101}
102
103void checkDependenciesNotNull (const std::type_info& contextNodeType, const NodeRefVec& deps)
104{
105 for (std::size_t i = 0; i < deps.size (); ++i)
106 {
107 if (!deps[i])
108 {
109 failureEmptyDependency (contextNodeType, i);
110 }
111 }
112}
113
114void checkNthDependencyNotNull (const std::type_info& contextNodeType, const NodeRefVec& deps, std::size_t index)
115{
116 if (!deps[index])
117 failureEmptyDependency (contextNodeType, index);
118}
119
120/*****************************************************************************
121 * Node impl.
122 */
123Node_DF::Node_DF (const NodeRefVec& dependenciesArg) : dependencyNodes_ (dependenciesArg)
124{
125 // dependencyNodes_.erase(std::remove_if(dependencyNodes_.begin(),dependencyNodes_.end(),[](NodeRef nr){return nr==0;}), dependencyNodes_.end());
126 for (auto& n : dependencyNodes_)
127 {
128 if (n)
129 n->registerNode (this);
130 }
131}
132Node_DF::Node_DF (NodeRefVec&& dependenciesArg) : dependencyNodes_ (std::move (dependenciesArg))
133{
134 // dependencyNodes_.erase(std::remove_if(dependencyNodes_.begin(),dependencyNodes_.end(),[](NodeRef nr){return nr==0;}), dependencyNodes_.end());
135 for (auto& n : dependencyNodes_)
136 {
137 if (n)
138 n->registerNode (this);
139 }
140}
141
143{
144 for (auto& n : dependencyNodes_)
145 {
146 if (n)
147 n->unregisterNode (this);
148 }
149}
150
151std::string Node_DF::description() const
152{
153 std::string nodeType = prettyTypeName (typeid (*this));
154 // Shorten displayed name by removing namespaces.
155 nodeType = std::regex_replace(nodeType, std::regex("bpp::"), "");
156 nodeType = std::regex_replace(nodeType, std::regex("Eigen::"), "");
157 nodeType = std::regex_replace(nodeType, std::regex("std::"), "");
158 nodeType = std::regex_replace(nodeType, std::regex("__cxx..::"), "");
159 nodeType = std::regex_replace(nodeType, std::regex("(char_traits<[^>]*>)"), "char");
160 nodeType = std::regex_replace(nodeType, std::regex("(allocator<[^>]*>)"), "");
161 nodeType = std::regex_replace(nodeType, std::regex("(Matrix<)([^>]*>)"), "Matrix");
162 nodeType = std::regex_replace(nodeType, std::regex("(basic_string<)([^>]*>)"), "string");
163 return nodeType;
164}
165
166std::string Node_DF::debugInfo() const { return {}; }
167
169
170bool Node_DF::compareAdditionalArguments(const Node_DF&) const { return false; }
171std::size_t Node_DF::hashAdditionalArguments() const { return 0; }
172
174{
175 throw Exception ("Node does not support derivation: " + description ());
176}
177
179{
180 throw Exception ("Node does not support recreate(deps): " + description ());
181}
182
184{
185 // Compute the current node (and dependencies recursively) if needed
186 if (isValid ())
187 return;
188
189 // Discover then recompute needed nodes
190 std::stack<Node_DF*> nodesToVisit;
191 std::stack<Node_DF*> nodesToRecompute;
192 nodesToVisit.push (this);
193 while (!nodesToVisit.empty ())
194 {
195 auto* n = nodesToVisit.top();
196 nodesToVisit.pop();
197 if (!n->isValid())
198 {
199 nodesToRecompute.push(n);
200 for (auto& dep : n->dependencies())
201 {
202 if (dep)
203 nodesToVisit.push(dep.get());
204 }
205 }
206 }
207 while (!nodesToRecompute.empty())
208 {
209 auto* n = nodesToRecompute.top();
210 nodesToRecompute.pop ();
211 if (!n->isValid())
212 {
213 n->compute();
214 n->makeValid();
215 }
216 }
217}
218
220{
221 if (!isValid ())
222 return;
223 std::stack<Node_DF*> nodesToInvalidate;
224 nodesToInvalidate.push (this);
225 while (!nodesToInvalidate.empty ())
226 {
227 auto* n = nodesToInvalidate.top ();
228 nodesToInvalidate.pop ();
229 if (n->isValid ())
230 {
231 n->makeInvalid ();
232 for (auto* dependent : n->dependentNodes_)
233 {
234 nodesToInvalidate.push (dependent);
235 }
236 }
237 }
238}
239
241{
242 dependentNodes_.emplace_back (n);
243}
244
246{
247 dependentNodes_.erase (std::remove (dependentNodes_.begin (), dependentNodes_.end (), n),
248 dependentNodes_.end ());
249}
250
251/*****************************************************************************
252 * Free functions.
253 */
254bool isTransitivelyDependentOn (const Node_DF& searchedDependency, const Node_DF& node)
255{
256 std::stack<const Node_DF*> nodesToVisit;
257 nodesToVisit.push (&node);
258 while (!nodesToVisit.empty ())
259 {
260 auto* current = nodesToVisit.top ();
261 nodesToVisit.pop ();
262 if (current == &searchedDependency)
263 return true;
264 for (auto& dep : current->dependencies ())
265 {
266 nodesToVisit.push (dep.get ());
267 }
268 }
269 return false;
270}
271
273 const std::unordered_map<const Node_DF*, NodeRef>& substitutions)
274{
275 if (node == 0)
276 return node;
277 auto it = substitutions.find(node.get());
278 if (it != substitutions.end())
279 {
280 // Substitute sub tree.
281 return it->second;
282 }
283 else if (node->dependencies().empty())
284 {
285 // Leaves do no support rebuild: just copy them
286 return node;
287 }
288 else
289 {
290 // Recursion : only rebuild if dependencies have changed
291 NodeRefVec recreatedDeps(node->nbDependencies ());
292 for (std::size_t i = 0; i < recreatedDeps.size (); ++i)
293 {
294 recreatedDeps[i] = recreateWithSubstitution(c, node->dependency(i), substitutions);
295 }
296 if (recreatedDeps == node->dependencies ())
297 {
298 return node;
299 }
300 else
301 {
302 return node->recreate(c, std::move(recreatedDeps));
303 }
304 }
305}
306
307/*****************************************************************************
308 * Context.
309 */
310
311Context::Context() : nodeCache_(), zero_(ConstantZero<size_t>::create(*this, Dimension<size_t>()))
312{}
313
314
316{
317 assert (newNode != nullptr);
318 // Try inserting it, which will fail if already present and return the old one
319 auto r = nodeCache_.emplace(std::move (newNode));
320 return r.first->ref;
321}
322
324{
325 assert (newNode != nullptr);
326 // First remove this object from the set if it is already
327 // for (auto it = nodeCache_.begin(); it != nodeCache_.end();)
328 // {
329 // if (it->ref == newNode)
330 // it = nodeCache_.erase(it);
331 // else
332 // ++it;
333 // }
334
335 // Try inserting it, which will fail if already present and return the old one
336 auto r = nodeCache_.emplace (newNode);
337 return r.first->ref;
338}
339
341{
342 while (nodeCache_.size() != 0)
343 {
344 auto it = nodeCache_.begin();
345 erase(it->ref);
346 }
347
348 nodeCache_.clear();
349}
350
351
352std::vector<const Node_DF*> Context::getAllNodes() const
353{
354 std::vector<const Node_DF*> ret;
355 for (const auto& it : nodeCache_)
356 {
357 ret.push_back(it.ref.get());
358 }
359 return ret;
360}
361
362
363bool Context::erase(const NodeRef& node)
364{
365 if (!node)
366 return false;
367
368 std::set<NodeRef> sNodes;
369 sNodes.emplace(node);
370
371 bool flag = true;
372 bool ret = false;
373 while (flag)
374 {
375 flag = false;
376 for (auto n : sNodes)
377 {
378 if (n->nbDependentNodes() == 0)
379 {
380 for (auto it = nodeCache_.begin(); it != nodeCache_.end();)
381 {
382 if (it->ref == n)
383 {
384 sNodes.erase(n);
385 for (auto dep:n->dependencies())
386 {
387 if (!dep)
388 continue;
389 sNodes.emplace(dep);
390 dep->unregisterNode (n.get());
391 }
392 it = nodeCache_.erase(it);
393 flag = true;
394 ret = true;
395 }
396 else
397 ++it;
398 }
399 }
400 if (flag)
401 break;
402 }
403 }
404
405 return ret;
406}
407
408/* Compare/hash the triplet (type, deps, additionalArgs).
409 * type and deps are available directly from the Node*.
410 * additionalArgs is handled through the two virtual methods.
411 */
413{
414 const auto& lhs = *this->ref;
415 const auto& rhs = *other.ref;
416 return typeid (lhs) == typeid (rhs) && lhs.dependencies () == rhs.dependencies () &&
417 lhs.compareAdditionalArguments (rhs);
418}
419
421{
422 const auto& node = *ref.ref;
423 std::size_t seed = typeid (node).hash_code ();
424 for (const auto& dep : node.dependencies ())
425 {
426 combineHash (seed, dep);
427 }
428 combineHash (seed, node.hashAdditionalArguments ());
429 return seed;
430}
431
432/*****************************************************************************
433 * output dataflow graph to dot format.
434 */
435
436// Make enum behave as flags.
438{
439 using IntType = typename std::underlying_type<DotOptions>::type;
440 return static_cast<DotOptions>(static_cast<IntType>(a) | static_cast<IntType>(b));
441}
443{
444 using IntType = typename std::underlying_type<DotOptions>::type;
445 return static_cast<IntType>(a) & static_cast<IntType>(b);
446}
447
448// Escape text in a dot box label
449static std::string dotLabelEscape (const std::string& s)
450{
451 std::string result;
452 result.reserve (s.size ());
453 static const char toEscape[] = "<>|{} ";
454 for (const char c : s)
455 {
456 if (std::any_of (std::begin (toEscape), std::end (toEscape),
457 [c](const char c2) {
458 return c == c2;
459 }))
460 {
461 result.push_back ('\\');
462 }
463 result.push_back (c);
464 }
465 return result;
466}
467
468// Dot node id for dataflow Node: 'N' + hash as a string
469static std::string dotIdentifier (const Node_DF& node)
470{
471 return 'N' + std::to_string (std::hash<const Node_DF*>{}(&node));
472}
473
474// Write line with node representation
475static void writeDotNode (std::ostream& os, const Node_DF& node, DotOptions opt)
476{
477 os << '\t' << dotIdentifier (node);
479 {
480 os << " [style=filled, shape=Mrecord,label=\"{" << dotLabelEscape (node.description ())
481 << "| valid=" << node.isValid () << ' ' << dotLabelEscape (node.debugInfo ()) << "}\"";
482 }
483 else
484 {
485 os << " [style=filled, shape=" << node.shape() << ",label=\"" << dotLabelEscape (node.description ()) << "\"";
486 }
487 os << ",fillcolor=\"" << node.color() << "\"]";
488 os << ";\n";
489}
490
491// Write line with edge representation for n-th dependency of from
492static void writeDotEdge (std::ostream& os, const Node_DF& from, std::size_t depIndex, DotOptions opt)
493{
494 const auto& to = *from.dependency (depIndex);
495 os << '\t' << dotIdentifier (from) << " -> " << dotIdentifier (to);
496 os << " [";
498 {
499 os << "label=\"" << depIndex << "\",";
500 }
501
502 auto fc = from.color() != "white" ? from.color() : "black";
503 // auto tc=to.color()!="white"?to.color():"black";
504
505 // std::vector<double> vpat={0.05,0.1,0.15,0.2};
506
507 // std::string pat;
508 // for (size_t i=0;i<vpat.size();i++)
509 // pat += fc + ";" + std::to_string(vpat[i]) + ":" + tc + ";" + std::to_string(vpat[vpat.size()-i-1])+":";
510
511 os << "color=\"" << fc << "\"";
512 os << " ]";
513 os << ";\n";
514}
515
516// Write dot lines for graph structure, starting from the given entry points.
517static void writeGraphStructure (std::ostream& os, const std::vector<const Node_DF*>& entryPoints,
518 DotOptions opt)
519{
520 std::stack<const Node_DF*> nodesToVisit;
521 std::unordered_set<const Node_DF*> discoveredNodes;
522
523 const auto discover = [&nodesToVisit, &discoveredNodes](const Node_DF* n) {
524 if (!n)
525 return;
526 const bool discovered = discoveredNodes.find (n) != discoveredNodes.end ();
527 if (!discovered)
528 {
529 nodesToVisit.emplace (n);
530 discoveredNodes.emplace (n);
531 }
532 };
533
534 for (const auto* n : entryPoints)
535 {
536 discover (n);
537 }
538
539 while (!nodesToVisit.empty ())
540 {
541 const auto* node = nodesToVisit.top ();
542
543 nodesToVisit.pop ();
544 writeDotNode (os, *node, opt);
545
547 {
548 for (const auto* dependent : node->dependentNodes ())
549 {
550 discover (dependent);
551 }
552 }
553 for (std::size_t index = 0; index < node->nbDependencies (); ++index)
554 {
555 if (node->dependency(index))
556 {
557 writeDotEdge (os, *node, index, opt);
558 discover (node->dependency (index).get ());
559 }
560 }
561 }
562}
563
564void writeGraphToDot (std::ostream& os, const std::vector<const Node_DF*>& nodes, DotOptions opt)
565{
566 os << "digraph {\n";
567 writeGraphStructure (os, nodes, opt);
568 os << "}\n";
569}
570
571void writeGraphToDot (const std::string& filename, const std::vector<const Node_DF*>& nodes,
572 DotOptions opt)
573{
574 std::ofstream file{filename};
575 writeGraphToDot (file, nodes, opt);
576}
577} // namespace bpp
static std::string demangle(const char *name)
Definition: DataFlow.cpp:38
r = 0 for each component.
Context for dataflow node construction.
Definition: DataFlow.h:527
void clear()
Clear the context.
Definition: DataFlow.cpp:340
std::unordered_set< CachedNodeRef, CachedNodeRefHash > nodeCache_
Definition: DataFlow.h:605
NodeRef cached(NodeRef &&newNode)
For a newly created node, return its equivalent from the cache. If not already present in the cache,...
Definition: DataFlow.cpp:315
bool erase(const NodeRef &r)
Remove an element from the map, only if it has no dependent nodes, and likewise down the graph.
Definition: DataFlow.cpp:363
std::vector< const Node_DF * > getAllNodes() const
Returns the vector of all pointers to all nodes.
Definition: DataFlow.cpp:352
Base dataflow Node class.
Definition: DataFlow.h:152
Node_DF()=default
virtual std::string description() const
Node pretty name (default = type name).
Definition: DataFlow.cpp:151
NodeRefVec dependencyNodes_
Definition: DataFlow.h:312
virtual ~Node_DF()
Definition: DataFlow.cpp:142
virtual std::size_t hashAdditionalArguments() const
Return the hash of node-specific configuration.
Definition: DataFlow.cpp:171
virtual std::string debugInfo() const
Node debug info (default = ""): user defined detailed info for DF graph debug.
Definition: DataFlow.cpp:166
virtual NodeRef derive(Context &c, const Node_DF &node)
Returns a node computing d(this_node_expression)/d(node_expression).
Definition: DataFlow.cpp:173
const NodeRef & dependency(std::size_t i) const noexcept
Definition: DataFlow.h:185
virtual bool hasNumericalProperty(NumericalProperty prop) const
Test if the node has the given numerical property.
Definition: DataFlow.cpp:168
virtual NodeRef recreate(Context &c, NodeRefVec &&deps)
Recreate the node with different dependencies.
Definition: DataFlow.cpp:178
bool isValid() const noexcept
Definition: DataFlow.h:166
void registerNode(Node_DF *n)
Definition: DataFlow.cpp:240
virtual bool compareAdditionalArguments(const Node_DF &other) const
Compare node-specific configuration to another.
Definition: DataFlow.cpp:170
void unregisterNode(const Node_DF *n)
Definition: DataFlow.cpp:245
void computeRecursively()
Compute this node value, recomputing dependencies (transitively) as needed.
Definition: DataFlow.cpp:183
void invalidateRecursively() noexcept
Invalidate (transitively) dependent nodes from this one.
Definition: DataFlow.cpp:219
virtual std::string shape() const
Definition: DataFlow.h:195
std::vector< Node_DF * > dependentNodes_
Definition: DataFlow.h:313
virtual std::string color() const
Definition: DataFlow.h:190
T to(const std::string &s)
Defines the basic types of data flow nodes.
static void writeDotEdge(std::ostream &os, const Node_DF &from, std::size_t depIndex, DotOptions opt)
Definition: DataFlow.cpp:492
void checkDependenciesNotNull(const std::type_info &contextNodeType, const NodeRefVec &deps)
Checks that all dependencies are not null, throws if not.
Definition: DataFlow.cpp:103
static std::string dotLabelEscape(const std::string &s)
Definition: DataFlow.cpp:449
DotOptions operator|(DotOptions a, DotOptions b)
Definition: DataFlow.cpp:437
std::string to_string(const NoDimension &)
static std::string dotIdentifier(const Node_DF &node)
Definition: DataFlow.cpp:469
void checkNthDependencyNotNull(const std::type_info &contextNodeType, const NodeRefVec &deps, std::size_t index)
Definition: DataFlow.cpp:114
DotOptions
Definition: DataFlow.h:47
static void writeGraphStructure(std::ostream &os, const std::vector< const Node_DF * > &entryPoints, DotOptions opt)
Definition: DataFlow.cpp:517
std::vector< NodeRef > NodeRefVec
Alias for a dependency vector (of NodeRef).
Definition: DataFlow.h:81
std::string prettyTypeName(const std::type_info &ti)
Debug: return a readable name for a C++ type descriptor (from typeid operator).
Definition: DataFlow.cpp:43
static void writeDotNode(std::ostream &os, const Node_DF &node, DotOptions opt)
Definition: DataFlow.cpp:475
NumericalProperty
Numerical properties for DF Node.
Definition: DataFlow.h:88
void failureDependencyNumberMismatch(const std::type_info &contextNodeType, std::size_t expectedSize, std::size_t givenSize)
Definition: DataFlow.cpp:61
NodeRef recreateWithSubstitution(Context &c, const NodeRef &node, const std::unordered_map< const Node_DF *, NodeRef > &substitutions)
Recreate node by transitively replacing dependencies according to substitutions mapping.
Definition: DataFlow.cpp:272
void checkDependencyVectorSize(const std::type_info &contextNodeType, const NodeRefVec &deps, std::size_t expectedSize)
Definition: DataFlow.cpp:83
void failureDependencyTypeMismatch(const std::type_info &contextNodeType, std::size_t depIndex, const std::type_info &expectedType, const std::type_info &givenNodeType)
Definition: DataFlow.cpp:74
bool isTransitivelyDependentOn(const Node_DF &searchedDependency, const Node_DF &node)
Check if searchedDependency if a transitive dependency of node.
Definition: DataFlow.cpp:254
void checkDependencyVectorMinSize(const std::type_info &contextNodeType, const NodeRefVec &deps, std::size_t expectedMinSize)
Checks the minimum size of a dependency vector, throws if mismatch.
Definition: DataFlow.cpp:93
void writeGraphToDot(std::ostream &os, const std::vector< const Node_DF * > &nodes, DotOptions opt)
Write dataflow graph starting at nodes to output stream.
Definition: DataFlow.cpp:564
void failureNodeConversion(const std::type_info &handleType, const Node_DF &node)
Definition: DataFlow.cpp:56
void failureEmptyDependency(const std::type_info &contextNodeType, std::size_t depIndex)
Definition: DataFlow.cpp:68
void combineHash(std::size_t &seed, const T &t)
Combine hashable value to a hash, from Boost library.
Definition: DataFlow.h:33
void failureComputeWasCalled(const std::type_info &nodeType)
Definition: DataFlow.cpp:51
std::shared_ptr< Node_DF > NodeRef
Definition: DataFlow.h:78
bool operator&(DotOptions a, DotOptions b)
Definition: DataFlow.cpp:442
std::size_t operator()(const CachedNodeRef &ref) const
Definition: DataFlow.cpp:420
NodeRef is hashable and comparable as a pointer. CachedNodeRef is hashable and comparable,...
Definition: DataFlow.h:590
bool operator==(const CachedNodeRef &other) const
Definition: DataFlow.cpp:412
Store a dimension for type T.