bpp-phyl3  3.0.0
DataFlow.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include <Bpp/Exceptions.h>
6 #include <algorithm>
7 #include <fstream> // debug
8 #include <functional> // std::hash
9 #include <iomanip>
10 #include <iostream> // debug
11 #include <ostream> // debug
12 #include <regex>
13 #include <stack> // invalidate/compute recursively + debug
14 #include <type_traits> // DotOptions flags
15 #include <typeinfo>
16 #include <unordered_set> // debug
17 #include <set>
18 
19 #include "DataFlow.h"
20 #include "DataFlowNumeric.h"
21 
22 /* std::type_info::name() returns a "mangled" type name, not very readable.
23  * Compilers can optionally provide an ABI header cxxabi.h.
24  * This header contain the demangle function, which makes the type name readable.
25  * By testing on godbolt, all versions of gcc and clang have this header.
26  */
27 #if defined(__GNUC__) || defined(__clang__)
28 #include <cstdlib>
29 #include <cxxabi.h>
30 static std::string demangle (const char* name)
31 {
32  int status{};
33  std::unique_ptr<char, void (*)(void*)> res{abi::__cxa_demangle (name, nullptr, nullptr, &status),
34  std::free};
35  return status == 0 ? res.get () : name;
36 }
37 #else
38 static std::string demangle (const char* name) { return name; }
39 #endif
40 
41 namespace bpp
42 {
43 std::string prettyTypeName (const std::type_info& ti) { return demangle (ti.name ()); }
44 } // namespace bpp
45 
46 namespace bpp
47 {
48 /*****************************************************************************
49  * Error & dependency check functions.
50  */
51 void failureComputeWasCalled (const std::type_info& nodeType)
52 {
53  throw Exception (prettyTypeName (nodeType) + ": compute() was called");
54 }
55 
56 void failureNodeConversion (const std::type_info& handleType, const Node_DF& node)
57 {
58  throw Exception (prettyTypeName (handleType) + " cannot store: " + prettyTypeName (typeid (node)));
59 }
60 
61 void failureDependencyNumberMismatch (const std::type_info& contextNodeType, std::size_t expectedSize,
62  std::size_t givenSize)
63 {
64  throw Exception (prettyTypeName (contextNodeType) + ": expected " + std::to_string (expectedSize) +
65  " dependencies, got " + std::to_string (givenSize));
66 }
67 
68 void failureEmptyDependency (const std::type_info& contextNodeType, std::size_t depIndex)
69 {
70  throw Exception (prettyTypeName (contextNodeType) + ": " + std::to_string (depIndex) +
71  "-th dependency is empty (nullptr)");
72 }
73 
74 void failureDependencyTypeMismatch (const std::type_info& contextNodeType, std::size_t depIndex,
75  const std::type_info& expectedType,
76  const std::type_info& givenNodeType)
77 {
78  throw Exception (prettyTypeName (contextNodeType) + ": expected class derived from " +
79  prettyTypeName (expectedType) + " as " + std::to_string (depIndex) +
80  "-th dependency, got " + prettyTypeName (givenNodeType));
81 }
82 
83 void checkDependencyVectorSize (const std::type_info& contextNodeType, const NodeRefVec& deps,
84  std::size_t expectedSize)
85 {
86  auto size = deps.size ();
87  if (size != expectedSize)
88  {
89  failureDependencyNumberMismatch (contextNodeType, expectedSize, size);
90  }
91 }
92 
93 void checkDependencyVectorMinSize (const std::type_info& contextNodeType, const NodeRefVec& deps,
94  std::size_t expectedMinSize)
95 {
96  auto size = deps.size ();
97  if (size < expectedMinSize)
98  {
99  failureDependencyNumberMismatch (contextNodeType, expectedMinSize, size);
100  }
101 }
102 
103 void checkDependenciesNotNull (const std::type_info& contextNodeType, const NodeRefVec& deps)
104 {
105  for (std::size_t i = 0; i < deps.size (); ++i)
106  {
107  if (!deps[i])
108  {
109  failureEmptyDependency (contextNodeType, i);
110  }
111  }
112 }
113 
114 void checkNthDependencyNotNull (const std::type_info& contextNodeType, const NodeRefVec& deps, std::size_t index)
115 {
116  if (!deps[index])
117  failureEmptyDependency (contextNodeType, index);
118 }
119 
120 /*****************************************************************************
121  * Node impl.
122  */
123 Node_DF::Node_DF (const NodeRefVec& dependenciesArg) : dependencyNodes_ (dependenciesArg)
124 {
125  // dependencyNodes_.erase(std::remove_if(dependencyNodes_.begin(),dependencyNodes_.end(),[](NodeRef nr){return nr==0;}), dependencyNodes_.end());
126  for (auto& n : dependencyNodes_)
127  {
128  if (n)
129  n->registerNode (this);
130  }
131 }
132 Node_DF::Node_DF (NodeRefVec&& dependenciesArg) : dependencyNodes_ (std::move (dependenciesArg))
133 {
134  // dependencyNodes_.erase(std::remove_if(dependencyNodes_.begin(),dependencyNodes_.end(),[](NodeRef nr){return nr==0;}), dependencyNodes_.end());
135  for (auto& n : dependencyNodes_)
136  {
137  if (n)
138  n->registerNode (this);
139  }
140 }
141 
143 {
144  for (auto& n : dependencyNodes_)
145  {
146  if (n)
147  n->unregisterNode (this);
148  }
149 }
150 
151 std::string Node_DF::description() const
152 {
153  std::string nodeType = prettyTypeName (typeid (*this));
154  // Shorten displayed name by removing namespaces.
155  nodeType = std::regex_replace(nodeType, std::regex("bpp::"), "");
156  nodeType = std::regex_replace(nodeType, std::regex("Eigen::"), "");
157  nodeType = std::regex_replace(nodeType, std::regex("std::"), "");
158  nodeType = std::regex_replace(nodeType, std::regex("__cxx..::"), "");
159  nodeType = std::regex_replace(nodeType, std::regex("(char_traits<[^>]*>)"), "char");
160  nodeType = std::regex_replace(nodeType, std::regex("(allocator<[^>]*>)"), "");
161  nodeType = std::regex_replace(nodeType, std::regex("(Matrix<)([^>]*>)"), "Matrix");
162  nodeType = std::regex_replace(nodeType, std::regex("(basic_string<)([^>]*>)"), "string");
163  return nodeType;
164 }
165 
166 std::string Node_DF::debugInfo() const { return {}; }
167 
169 
170 bool Node_DF::compareAdditionalArguments(const Node_DF&) const { return false; }
171 std::size_t Node_DF::hashAdditionalArguments() const { return 0; }
172 
174 {
175  throw Exception ("Node does not support derivation: " + description ());
176 }
177 
179 {
180  throw Exception ("Node does not support recreate(deps): " + description ());
181 }
182 
184 {
185  // Compute the current node (and dependencies recursively) if needed
186  if (isValid ())
187  return;
188 
189  // Discover then recompute needed nodes
190  std::stack<Node_DF*> nodesToVisit;
191  std::stack<Node_DF*> nodesToRecompute;
192  nodesToVisit.push (this);
193  while (!nodesToVisit.empty ())
194  {
195  auto* n = nodesToVisit.top();
196  nodesToVisit.pop();
197  if (!n->isValid())
198  {
199  nodesToRecompute.push(n);
200  for (auto& dep : n->dependencies())
201  {
202  if (dep)
203  nodesToVisit.push(dep.get());
204  }
205  }
206  }
207  while (!nodesToRecompute.empty())
208  {
209  auto* n = nodesToRecompute.top();
210  nodesToRecompute.pop ();
211  if (!n->isValid())
212  {
213  n->compute();
214  n->makeValid();
215  }
216  }
217 }
218 
220 {
221  if (!isValid ())
222  return;
223  std::stack<Node_DF*> nodesToInvalidate;
224  nodesToInvalidate.push (this);
225  while (!nodesToInvalidate.empty ())
226  {
227  auto* n = nodesToInvalidate.top ();
228  nodesToInvalidate.pop ();
229  if (n->isValid ())
230  {
231  n->makeInvalid ();
232  for (auto* dependent : n->dependentNodes_)
233  {
234  nodesToInvalidate.push (dependent);
235  }
236  }
237  }
238 }
239 
241 {
242  dependentNodes_.emplace_back (n);
243 }
244 
246 {
247  dependentNodes_.erase (std::remove (dependentNodes_.begin (), dependentNodes_.end (), n),
248  dependentNodes_.end ());
249 }
250 
251 /*****************************************************************************
252  * Free functions.
253  */
254 bool isTransitivelyDependentOn (const Node_DF& searchedDependency, const Node_DF& node)
255 {
256  std::stack<const Node_DF*> nodesToVisit;
257  nodesToVisit.push (&node);
258  while (!nodesToVisit.empty ())
259  {
260  auto* current = nodesToVisit.top ();
261  nodesToVisit.pop ();
262  if (current == &searchedDependency)
263  return true;
264  for (auto& dep : current->dependencies ())
265  {
266  nodesToVisit.push (dep.get ());
267  }
268  }
269  return false;
270 }
271 
273  const std::unordered_map<const Node_DF*, NodeRef>& substitutions)
274 {
275  if (node == 0)
276  return node;
277  auto it = substitutions.find(node.get());
278  if (it != substitutions.end())
279  {
280  // Substitute sub tree.
281  return it->second;
282  }
283  else if (node->dependencies().empty())
284  {
285  // Leaves do no support rebuild: just copy them
286  return node;
287  }
288  else
289  {
290  // Recursion : only rebuild if dependencies have changed
291  NodeRefVec recreatedDeps(node->nbDependencies ());
292  for (std::size_t i = 0; i < recreatedDeps.size (); ++i)
293  {
294  recreatedDeps[i] = recreateWithSubstitution(c, node->dependency(i), substitutions);
295  }
296  if (recreatedDeps == node->dependencies ())
297  {
298  return node;
299  }
300  else
301  {
302  return node->recreate(c, std::move(recreatedDeps));
303  }
304  }
305 }
306 
307 /*****************************************************************************
308  * Context.
309  */
310 
311 Context::Context() : nodeCache_(), zero_(ConstantZero<size_t>::create(*this, Dimension<size_t>()))
312 {}
313 
314 
316 {
317  assert (newNode != nullptr);
318  // Try inserting it, which will fail if already present and return the old one
319  auto r = nodeCache_.emplace(std::move (newNode));
320  return r.first->ref;
321 }
322 
324 {
325  assert (newNode != nullptr);
326  // First remove this object from the set if it is already
327  // for (auto it = nodeCache_.begin(); it != nodeCache_.end();)
328  // {
329  // if (it->ref == newNode)
330  // it = nodeCache_.erase(it);
331  // else
332  // ++it;
333  // }
334 
335  // Try inserting it, which will fail if already present and return the old one
336  auto r = nodeCache_.emplace (newNode);
337  return r.first->ref;
338 }
339 
341 {
342  while (nodeCache_.size()!=0)
343  {
344  auto it = nodeCache_.begin();
345  erase(it->ref);
346  }
347 
348  nodeCache_.clear();
349 }
350 
351 
352 std::vector<const Node_DF*> Context::getAllNodes() const
353 {
354  std::vector<const Node_DF*> ret;
355  for (const auto& it : nodeCache_)
356  {
357  ret.push_back(it.ref.get());
358  }
359  return ret;
360 }
361 
362 
363 bool Context::erase(const NodeRef& node)
364 {
365  if (!node)
366  return false;
367 
368  std::set<NodeRef> sNodes;
369  sNodes.emplace(node);
370 
371  bool flag = true;
372  bool ret = false;
373  while (flag)
374  {
375  flag = false;
376  for (auto n : sNodes)
377  {
378  if (n->nbDependentNodes() == 0)
379  {
380  for (auto it = nodeCache_.begin(); it != nodeCache_.end();)
381  {
382  if (it->ref == n)
383  {
384  sNodes.erase(n);
385  for (auto dep:n->dependencies())
386  {
387  if (!dep)
388  continue;
389  sNodes.emplace(dep);
390  dep->unregisterNode (n.get());
391  }
392  it = nodeCache_.erase(it);
393  flag = true;
394  ret = true;
395  }
396  else
397  ++it;
398  }
399  }
400  if (flag)
401  break;
402  }
403  }
404 
405  return ret;
406 }
407 
408 /* Compare/hash the triplet (type, deps, additionalArgs).
409  * type and deps are available directly from the Node*.
410  * additionalArgs is handled through the two virtual methods.
411  */
413 {
414  const auto& lhs = *this->ref;
415  const auto& rhs = *other.ref;
416  return typeid (lhs) == typeid (rhs) && lhs.dependencies () == rhs.dependencies () &&
417  lhs.compareAdditionalArguments (rhs);
418 }
419 
421 {
422  const auto& node = *ref.ref;
423  std::size_t seed = typeid (node).hash_code ();
424  for (const auto& dep : node.dependencies ())
425  {
426  combineHash (seed, dep);
427  }
428  combineHash (seed, node.hashAdditionalArguments ());
429  return seed;
430 }
431 
432 /*****************************************************************************
433  * output dataflow graph to dot format.
434  */
435 
436 // Make enum behave as flags.
438 {
439  using IntType = typename std::underlying_type<DotOptions>::type;
440  return static_cast<DotOptions>(static_cast<IntType>(a) | static_cast<IntType>(b));
441 }
443 {
444  using IntType = typename std::underlying_type<DotOptions>::type;
445  return static_cast<IntType>(a) & static_cast<IntType>(b);
446 }
447 
448 // Escape text in a dot box label
449 static std::string dotLabelEscape (const std::string& s)
450 {
451  std::string result;
452  result.reserve (s.size ());
453  static const char toEscape[] = "<>|{} ";
454  for (const char c : s)
455  {
456  if (std::any_of (std::begin (toEscape), std::end (toEscape),
457  [c](const char c2) {
458  return c == c2;
459  }))
460  {
461  result.push_back ('\\');
462  }
463  result.push_back (c);
464  }
465  return result;
466 }
467 
468 // Dot node id for dataflow Node: 'N' + hash as a string
469 static std::string dotIdentifier (const Node_DF& node)
470 {
471  return 'N' + std::to_string (std::hash<const Node_DF*>{}(&node));
472 }
473 
474 // Write line with node representation
475 static void writeDotNode (std::ostream& os, const Node_DF& node, DotOptions opt)
476 {
477  os << '\t' << dotIdentifier (node);
479  {
480  os << " [style=filled, shape=Mrecord,label=\"{" << dotLabelEscape (node.description ())
481  << "| valid=" << node.isValid () << ' ' << dotLabelEscape (node.debugInfo ()) << "}\"";
482  }
483  else
484  {
485  os << " [style=filled, shape=" << node.shape() << ",label=\"" << dotLabelEscape (node.description ()) << "\"";
486  }
487  os << ",fillcolor=\"" << node.color() << "\"]";
488  os << ";\n";
489 }
490 
491 // Write line with edge representation for n-th dependency of from
492 static void writeDotEdge (std::ostream& os, const Node_DF& from, std::size_t depIndex, DotOptions opt)
493 {
494  const auto& to = *from.dependency (depIndex);
495  os << '\t' << dotIdentifier (from) << " -> " << dotIdentifier (to);
496  os << " [";
498  {
499  os << "label=\"" << depIndex << "\",";
500  }
501 
502  auto fc = from.color() != "white" ? from.color() : "black";
503  // auto tc=to.color()!="white"?to.color():"black";
504 
505  // std::vector<double> vpat={0.05,0.1,0.15,0.2};
506 
507  // std::string pat;
508  // for (size_t i=0;i<vpat.size();i++)
509  // pat += fc + ";" + std::to_string(vpat[i]) + ":" + tc + ";" + std::to_string(vpat[vpat.size()-i-1])+":";
510 
511  os << "color=\"" << fc << "\"";
512  os << " ]";
513  os << ";\n";
514 }
515 
516 // Write dot lines for graph structure, starting from the given entry points.
517 static void writeGraphStructure (std::ostream& os, const std::vector<const Node_DF*>& entryPoints,
518  DotOptions opt)
519 {
520  std::stack<const Node_DF*> nodesToVisit;
521  std::unordered_set<const Node_DF*> discoveredNodes;
522 
523  const auto discover = [&nodesToVisit, &discoveredNodes](const Node_DF* n) {
524  if (!n)
525  return;
526  const bool discovered = discoveredNodes.find (n) != discoveredNodes.end ();
527  if (!discovered)
528  {
529  nodesToVisit.emplace (n);
530  discoveredNodes.emplace (n);
531  }
532  };
533 
534  for (const auto* n : entryPoints)
535  {
536  discover (n);
537  }
538 
539  while (!nodesToVisit.empty ())
540  {
541  const auto* node = nodesToVisit.top ();
542 
543  nodesToVisit.pop ();
544  writeDotNode (os, *node, opt);
545 
547  {
548  for (const auto* dependent : node->dependentNodes ())
549  {
550  discover (dependent);
551  }
552  }
553  for (std::size_t index = 0; index < node->nbDependencies (); ++index)
554  {
555  if (node->dependency(index))
556  {
557  writeDotEdge (os, *node, index, opt);
558  discover (node->dependency (index).get ());
559  }
560  }
561  }
562 }
563 
564 void writeGraphToDot (std::ostream& os, const std::vector<const Node_DF*>& nodes, DotOptions opt)
565 {
566  os << "digraph {\n";
567  writeGraphStructure (os, nodes, opt);
568  os << "}\n";
569 }
570 
571 void writeGraphToDot (const std::string& filename, const std::vector<const Node_DF*>& nodes,
572  DotOptions opt)
573 {
574  std::ofstream file{filename};
575  writeGraphToDot (file, nodes, opt);
576 }
577 } // namespace bpp
static std::string demangle(const char *name)
Definition: DataFlow.cpp:38
r = 0 for each component.
Context for dataflow node construction.
Definition: DataFlow.h:527
void clear()
Clear the context.
Definition: DataFlow.cpp:340
std::unordered_set< CachedNodeRef, CachedNodeRefHash > nodeCache_
Definition: DataFlow.h:605
NodeRef cached(NodeRef &&newNode)
For a newly created node, return its equivalent from the cache. If not already present in the cache,...
Definition: DataFlow.cpp:315
bool erase(const NodeRef &r)
Remove an element from the map, only if it has no dependent nodes, and likewise down the graph.
Definition: DataFlow.cpp:363
std::vector< const Node_DF * > getAllNodes() const
Returns the vector of all pointers to all nodes.
Definition: DataFlow.cpp:352
Base dataflow Node class.
Definition: DataFlow.h:152
Node_DF()=default
virtual std::string description() const
Node pretty name (default = type name).
Definition: DataFlow.cpp:151
NodeRefVec dependencyNodes_
Definition: DataFlow.h:312
virtual ~Node_DF()
Definition: DataFlow.cpp:142
virtual std::size_t hashAdditionalArguments() const
Return the hash of node-specific configuration.
Definition: DataFlow.cpp:171
const NodeRef & dependency(std::size_t i) const noexcept
Definition: DataFlow.h:185
virtual std::string debugInfo() const
Node debug info (default = ""): user defined detailed info for DF graph debug.
Definition: DataFlow.cpp:166
virtual NodeRef derive(Context &c, const Node_DF &node)
Returns a node computing d(this_node_expression)/d(node_expression).
Definition: DataFlow.cpp:173
virtual bool hasNumericalProperty(NumericalProperty prop) const
Test if the node has the given numerical property.
Definition: DataFlow.cpp:168
virtual NodeRef recreate(Context &c, NodeRefVec &&deps)
Recreate the node with different dependencies.
Definition: DataFlow.cpp:178
bool isValid() const noexcept
Definition: DataFlow.h:166
void registerNode(Node_DF *n)
Definition: DataFlow.cpp:240
virtual bool compareAdditionalArguments(const Node_DF &other) const
Compare node-specific configuration to another.
Definition: DataFlow.cpp:170
void unregisterNode(const Node_DF *n)
Definition: DataFlow.cpp:245
void computeRecursively()
Compute this node value, recomputing dependencies (transitively) as needed.
Definition: DataFlow.cpp:183
void invalidateRecursively() noexcept
Invalidate (transitively) dependent nodes from this one.
Definition: DataFlow.cpp:219
virtual std::string shape() const
Definition: DataFlow.h:195
std::vector< Node_DF * > dependentNodes_
Definition: DataFlow.h:313
virtual std::string color() const
Definition: DataFlow.h:190
T to(const std::string &s)
Defines the basic types of data flow nodes.
static void writeDotEdge(std::ostream &os, const Node_DF &from, std::size_t depIndex, DotOptions opt)
Definition: DataFlow.cpp:492
void checkDependenciesNotNull(const std::type_info &contextNodeType, const NodeRefVec &deps)
Checks that all dependencies are not null, throws if not.
Definition: DataFlow.cpp:103
static std::string dotLabelEscape(const std::string &s)
Definition: DataFlow.cpp:449
DotOptions operator|(DotOptions a, DotOptions b)
Definition: DataFlow.cpp:437
std::string to_string(const NoDimension &)
static std::string dotIdentifier(const Node_DF &node)
Definition: DataFlow.cpp:469
void checkNthDependencyNotNull(const std::type_info &contextNodeType, const NodeRefVec &deps, std::size_t index)
Definition: DataFlow.cpp:114
DotOptions
Definition: DataFlow.h:47
static void writeGraphStructure(std::ostream &os, const std::vector< const Node_DF * > &entryPoints, DotOptions opt)
Definition: DataFlow.cpp:517
std::vector< NodeRef > NodeRefVec
Alias for a dependency vector (of NodeRef).
Definition: DataFlow.h:81
std::string prettyTypeName(const std::type_info &ti)
Debug: return a readable name for a C++ type descriptor (from typeid operator).
Definition: DataFlow.cpp:43
static void writeDotNode(std::ostream &os, const Node_DF &node, DotOptions opt)
Definition: DataFlow.cpp:475
NumericalProperty
Numerical properties for DF Node.
Definition: DataFlow.h:88
void failureDependencyNumberMismatch(const std::type_info &contextNodeType, std::size_t expectedSize, std::size_t givenSize)
Definition: DataFlow.cpp:61
NodeRef recreateWithSubstitution(Context &c, const NodeRef &node, const std::unordered_map< const Node_DF *, NodeRef > &substitutions)
Recreate node by transitively replacing dependencies according to substitutions mapping.
Definition: DataFlow.cpp:272
void checkDependencyVectorSize(const std::type_info &contextNodeType, const NodeRefVec &deps, std::size_t expectedSize)
Definition: DataFlow.cpp:83
void failureDependencyTypeMismatch(const std::type_info &contextNodeType, std::size_t depIndex, const std::type_info &expectedType, const std::type_info &givenNodeType)
Definition: DataFlow.cpp:74
bool isTransitivelyDependentOn(const Node_DF &searchedDependency, const Node_DF &node)
Check if searchedDependency if a transitive dependency of node.
Definition: DataFlow.cpp:254
void checkDependencyVectorMinSize(const std::type_info &contextNodeType, const NodeRefVec &deps, std::size_t expectedMinSize)
Checks the minimum size of a dependency vector, throws if mismatch.
Definition: DataFlow.cpp:93
void writeGraphToDot(std::ostream &os, const std::vector< const Node_DF * > &nodes, DotOptions opt)
Write dataflow graph starting at nodes to output stream.
Definition: DataFlow.cpp:564
void failureNodeConversion(const std::type_info &handleType, const Node_DF &node)
Definition: DataFlow.cpp:56
void failureEmptyDependency(const std::type_info &contextNodeType, std::size_t depIndex)
Definition: DataFlow.cpp:68
void combineHash(std::size_t &seed, const T &t)
Combine hashable value to a hash, from Boost library.
Definition: DataFlow.h:33
void failureComputeWasCalled(const std::type_info &nodeType)
Definition: DataFlow.cpp:51
std::shared_ptr< Node_DF > NodeRef
Definition: DataFlow.h:78
bool operator&(DotOptions a, DotOptions b)
Definition: DataFlow.cpp:442
std::size_t operator()(const CachedNodeRef &ref) const
Definition: DataFlow.cpp:420
NodeRef is hashable and comparable as a pointer. CachedNodeRef is hashable and comparable,...
Definition: DataFlow.h:590
bool operator==(const CachedNodeRef &other) const
Definition: DataFlow.cpp:412
Store a dimension for type T.