%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Fault Tolerance --- General Issues %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @TechReport{Anceaume-Cabillic-Chevochot-Puaut-97, author = {E. Anceaume and G. Cabillic and P. Chevochot and I. Puaut}, title = {Hades: A Middleware Support for Distributed Safety-Critical Real-Time Applications}, institution = {INRIA}, year = {1997}, number = {3280}, month = oct, keyword = {REAL TIME, DISTRIBUTION, AVAILABILITY, SAFETY CRITICAL}, abstract = {The report is a presentation of some classic techniques concerning real-time applications and the performance (in time) evaluation for these techniques. Hades represents a complete system handling fault tolerance (hardware and software) techniques and their evaluation for real-time.}, comment = {[Mon Jan 25 1999] 7/10 pres. The paper is more a description of Hades and does not treat deeply the techniques and strategies used.} } @TechReport{Anceaume-Cabillic-Chevochot-Puaut-98, author = {E. Anceaume and G. Cabillic and P. Chevochot and I. Puaut}, title = {A Flexible Run-time Support for Distributed Dependable Hard Real-Time Applications}, institution = {INRIA}, year = {1998}, number = {3564}, month = nov, comment = {TO BE READ.} } @Article{Anderson-Knight-83, author = {T. Anderson and J.C. Knight}, title = {A framework for software fault tolerance in real-time systems}, journal = {IEEE Transactions on Software Engineering}, year = {1983}, volume = {9}, number = {3}, month = may, pages = {355--364}, comment = {TO BE READ: Backward recovery in real-time systems} } @Article{Ayache-Courtiat-Diaz82, author = {{J.-M.} Ayache and {J.-P.} Courtiat and M. Diaz}, title = {{REBUS}, a fault-tolerant distributed system for industrial real-time control}, journal = {IEEE Transactions on Computers}, year = {1982}, volume = {C-31}, number = {7}, pages = {339--349}, comment = {TO BE OBTAINED.} } @InProceedings{Campbell-Horton-Belford-79, author = {{R. H.} Campbell and {K. H.} Horton and {G. G.} Belford}, title = {Simulations of a fault tolerant deadline mechanism}, booktitle = {Proceedings of the 9th Fault Tolerant Computing Symposium (Madison, WI)}, year = {1979}, organization = {IEEE Computer Society}, month = jun, pages = {95--101}, comment = {TO BE OBTAINED: Campbell's scheme for watchdog timers.} } @Article{Chung-Liu-Lin-90, author = {{J.-Y.} Chung and {J. W. S.} Liu and {K.-J.} Lin}, title = {Scheduling periodic jobs that allow imprecise results}, journal = {IEEE Transactions on Computers}, year = {1990}, volume = {39}, number = {9}, month = sep, pages = {1156--1174}, comment = {TO BE OBTAINED: Improved Campbell's scheme for watchdog timers.} } @Book{Cristian-90, author = {F. Cristian}, title = {Fault-tolerant distributed computing}, publisher = {INRIA Rocquencourt}, year = {1990}, address = {Le Chesnay}, comment = {TO BE OBTAINED.} } @Article{Cristian-91, author = {F. Cristian}, title = {Understanding fault-tolerant distributed systems}, journal = {Communications of the ACM}, year = {1991}, volume = {34}, number = {2}, month = feb, pages = {56--78}, note = {New version available at ftp://ftp.cs.ucsd.edu/pub/team/understandingftsystems.ps.Z}, comment = {[Mon Feb 15 1999] 7/10 pres. The paper proposes basic concepts which are used to explain hardware and software architecture for fault-tolerant distributed systems. For example, the concepts of server, depends on relation, failure, failure semantics. The general issues in hardware and software architectures are presented. For software fault-tolerance, the issues related to synchronisation (close ou loose) are discussed. The paper was revised in 1993, but the references are before '90. Some examples of industrial faul-tolerant architectures are given. } } @Article{Hugue-Stotts-91, author = {M.C.M. Hugue and P.D. Stotts}, title = {Guaranteed task deadlines for fault-tolerant workloads with conditional branches}, journal = {Real-Time Systems}, year = {1991}, volume = {3}, number = {3}, month = sep, pages = {275--305}, comment = {TO BE OBTAINED: Static scheduling and modular redundancy.} } @InProceedings{Jou-88, author = {{J.-Y.} Jou}, title = {Fault-Tolerant Algorithms and Architectures for Real Time Signal Processing}, booktitle = {Proceedings of International Conference on Parallel Processing}, volume = {1 : Architecture}, year = {1988}, publisher = {The Pennsylvania State University}, address = {Pennsylvania, USA}, month = aug, pages = {359--362}, comment = {TO BE OBTAINED.} } @PhdThesis{Ghosh-96, author = {S. Ghosh}, title = {Guaranteeing Falt Tolerance through Scheduling in Real-Time Systems}, school = {University of Pittsburgh}, year = {1996}, comment = {TO BE READ.} } @InProceedings{Ghosh-Melhem-Mosse-94, author = {S. Ghosh and R. Melhem and D. Mossé}, title = {Fault-Tolerant Scheduling on a Hard Real-Time Multiprocessor System}, booktitle = {Proceedings of the 8th International Symposium on Parallel Processing}, editor = {Howard Jay Siegel}, year = {1994}, publisher = {IEEE Computer Society Press}, address = {Los Alamitos, CA, USA}, month = apr, pages = {775--783}, comment = {TO BE READ.} } @InProceedings{Ghosh-Melhem-Mosse-95, author = {S. Ghosh and R. Melhem and D. Mossé}, title = {Fault-Tolerant Scheduling on a Hard Real-Time Multiprocessor System}, booktitle = {Proceedings of 8th International Parallel Proccssing Symposium, {IPPS}'94}, year = {1994}, comment = {TO BE READ. Available at ftp://ft.cs.pitt.edu/realtime/ft-sch-ipps94.ps.gz} } @Article{Ghosh-Melhem-Mosse-Sansarma-98, author = {S. Ghosh and R. Melhem and D. Mossé and J. Sansarma}, title = {Fault-Tolerant, Rate-Monotonic Scheduling}, journal = {Real-Time Systems Journal}, year = {1998}, volume = {15}, number = {2}, comment = {TO BE READ. Available at ftp://ft.cs.pitt.edu/realtime/ft-rms-dcca97.ps.gz} } @Book{Jalote-94, author = {P. Jalote}, title = {Fault Tolerance in Distributed Systems}, publisher = {Prentice Hall}, year = {1994}, address = {Englewood Cliffs, New Jersey}, keyword = {DEPENDABILITY, DISTRIBUTED PROCESSING}, abstract = {Seems to be the more recent and complete book (in English) on fault-tolerance. It is based on concepts given in \cite{Laprie-96} but contains more technical details.}, comment = {[Mon Jan 25 1999] 9/10 tech.} } @Book{Kopetz-Ramamritham-90, author = {K. Kopetz and K. Ramamritham}, title = {Distributed real time systems and fault tolerance}, publisher = {INRIA-Rocquencourt}, year = {1990}, number = {ICDCS-10}, address = {Le Chesnay}, comment = {TO BE OBTAINED.} } @InProceedings{Kalyanasundaram-Pruhs-97, author = {Kalyanasundaram and Pruhs}, title = {Fault-tolerant Real-time Scheduling}, booktitle = {Proceedings of Annual European Symposium on Algorithms, {ESA}'97}, year = {1997}, comment = {TO BE OBTAINED.} } @Book{Laprie-92, author = {J.-C. Laprie and others}, title = {Dependability: Basic Concepts and Terminology}, publisher = {Springer-Verlag}, year = {1992}, editor = {J.-C. Laprie}, series = {Dependable Computing and Fault-Tolerant Systems}, address = {Wien, New York}, keyword = {DEPENDABILITY}, abstract = {The book contains five parts written respectively in English, French, German, Italian, and Japanese. Each part gives a definition (in the correspondent language) of concepts included in the dependability: fault, error, failures, etc.}, comment = {[Mon Feb 8 1999] Interesting for the glossary of French terms and their English translation.} } @Book{Laprie-96, author = {J.-C. Laprie and others}, title = {Guide de la sûreté de fonctionnement}, publisher = {Cépaduès Editions}, year = {1996}, note = {(2$^{e}$~édition)}, keyword = {DEPENDABILITY}, abstract = {This book is a ``guide'' for the dependable systems. It gives an introduction to the dependability. The dependability is defined in the first chapter, the methods and techniques concerning dependability are presented (informally) in the second chapter, and the integration of these methods in the development process is presented in the third chapter. The book is an overview, so the presentation is not very technical (algorithms, etc.), but includes a lot of references.}, comment = {[Mon Feb 8 1999] 8/10 pres.} } @Article{Liestman-Campbell-86, author = {{A. L.} Liestman and {R. H.} Campbell}, title = {A fault-tolerant scheduling problem}, journal = {IEEE Transactions on Software Engineering}, year = {1986}, volume = {12}, number = {11}, month = nov, pages = {1089--1095}, comment = {TO BE OBTAIND: Improved Campbell's scheme for watchdog timers.} } @InProceedings{Lueth-Laengle-94, author = {{T. C.} Lueth and Th. Laengle}, title = {Fault-Tolerance and Error Recovery in an Autonomous Robot with Distributed Controlled Components}, booktitle = {Proceedings of the 2nd IEEE International Symposium on Distributed Autonomous Robotic Systems {DARS'94} (Wako, Japan)}, year = {1994}, organization = {IEEE}, month = jul, pages = {117--119}, note = {Also published in H. Asama (Ed.) Distributed Robotic Systems, Springer-Verlag}, keyword = {DEPENDABILITY, ROBOTS}, abstract = {The paper presents and addresses some concepts to obtain fault-tolerant behaviour and error recovery in a distributed controlled robot system. The approach for planning and control is the use of agents. In this context it gives a framework for error detection and recovery. No real-time constraints are considered.}, comment = {[Thu Feb 4 1999] 5/10 pres.} } @TechReport{Mikulin-Melhem-Mosse-97, author = {D. Mikulin and R. Melhem and D. Mossé}, title = {Fault-Tolerant Real-Time Mach: A Timeline-based Approach}, institution = {University of Pittsburgh}, year = {1997}, type = {Technical Report}, number = {TR 97-01}, address = {Pittsburgh, PA 15260}, comment = {TO BE READ. Available at ftp://ftp.cs.pitt.edu/realtime/res-recl.ps.gz} } @InProceedings{Mosse-94, author = {D. Mossé}, title = {Mechanisms for system-level fault tolerance in real-time systems}, booktitle = {Proceedings of International Conference on Robotics, Vision, and Parallel Processing for Industrial Automation (Ipoh, Malaysia)}, year = {1994}, month = may, comment = {TO BE READ. Available at ftp://ftp.cs.pitt.edu/} } @InProceedings{Mosse-Melhem-Ghosh-94, author = {D. Mossé and R. Melhem and S. Ghosh}, title = {Analysis of a Fault-Tolerant Multiprocessor Scheduling Algorithm}, booktitle = {Proceedings of the 24th Fault Tolerant Computing Symposium {FTCS}'94}, year = {1994}, month = jun, comment = {TO BE READ. Available at ftp://ftp.cs.pitt.edu/realtime/ft-sch-analysis-ftcs94.ps.gz} } @TechReport{Saridakis-Issarny-98, author = {T. Saridakis and V. Issarny}, title = {Fault Tolerant Software Architecture}, institution = {INRIA}, year = {1998}, number = {3350}, keyword = {ARCHITECTURE REFINEMENT, DEPENDABILITY, FORMAL SPECIFICATION, SOFTWARE ARCHITECTURE}, abstract = {The report propose a layered framework for the analysis of the fault tolerance software properties. 6 layers are proposed and the properties of fault tolerance techniques formalized using propositional logic.}, comment = {[Mon Jan 25 1999] 7/10 tech. The framework is interesting and the formalization of the FT may be re-used for the definition of the algorithm and the verification issues. However, this does not seem to be very new (see references).} } @InProceedings{Wei-Hiraishi-Cheng-Campbell-80, author = {{A. Y.} Wei and K. Hiraishi and R. Cheng and {R. H.} Campbell}, title = {Application of the fault-tolerant deadline mechanism to a satellite on-board computer system}, booktitle = {Proceedings of the 10th Fault Tolerant Computing Symposium (Kyoto, Japan)}, year = {1980}, organization = {IEEE Computer Society}, month = jun, pages = {107--109}, comment = {TO BE OBTAINED: Campbell's scheme for watchdog timers.} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Formal Methods for FT and/or RT %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @InProceedings{Rushby-92, author = {J. Rushby}, title = {Formal Methods for Dependable Real-Time Systems}, booktitle = {Proceedings of International Symposium on Real-Time Embedded Processing for Space Applications, (Les Saintes-Maries-de-la-Mer, France)}, year = {1992}, organization = {}, publisher = {Cépaduès Editions}, address = {Toulouse, France}, month = nov, pages = {355--366}, note = {Paper available at http://www.csl.sri.com/reports/html/repsa92.html}, keyword = {DEPENDABILITY, REAL TIME, TEMPORAL LOGIC}, abstract = {This paper outlines the motivation for using formal methods to specify and reason about real-time properties, and sketch some of the approaches that have been proposed and used. It is an invited paper, so no technical details are given, but a lot of interesting references. A lot of stuff has been taken from this paper in~\cite{Rushby-94}.}, comment = {[Mon Feb 8 1999] 9/10 pres.} } @Article{Rushby-94, author = {J. Rushby}, title = {Critical System Properties: Survey and Taxonomy}, journal = {Reliability Engineering and Systems Safety}, year = {1994}, volume = {43}, number = {2}, pages = {189--219}, note = {Also Technical Report CSL-93-01 May 1993}, keyword = {DEPENDABILITY, REAL-TIME, SAFETY, SECURE SYSTEMS}, abstract = {It is a survey of the methods employed in four approaches used for the treatment of critical systems: dependability, safety, security, and real-time. The application of formal methods to these domains is also studied and a taxonomy of these approaches from the point of view of theirs properties is given.}, comment = {[Mon Feb 1 1999] 9/10 pres. Well written, with a lot of interesting references.} } @InProceedings{Rushby-96, author = {J. Rushby}, title = {Reconfiguration and Transient Recovery in State Machine Architectures}, booktitle = {Proceedings of the 26th Fault Tolerant Computing Symposium {FTCS}'96 (Sendai, Japan)}, year = {1996}, organization = {IEEE Computer Society}, month = jun, pages = {6--15}, note = {Paper available at http://www.csl.sri.com/reports/html/ftcs96.html}, keyword = {DEPENDABILITY, BYZANTINE PROTOCOLS}, abstract = {The paper presents an architecture based on state machine replication, and extended to provide transient recovery and reconfiguration in the presence of arbitrary faults. This architecture introduces ``probators'' channels (channels which are probably faulty). The paper treats the clock synchronization and the interactive consistency problems for this architecture through the ``Oral Messages'' algorithm.}, comment = {[Mon Feb 8 1999] 9/10 teo.} } @Article{Rushby-vonHenke-93, author = {J. Rushby and {F. von} Henke}, title = {Formal Verification of Algorithms for Critical Systems}, journal = {IEEE Transactions on Software Engineering}, year = {1993}, volume = {19}, number = {1}, month = jan, pages = {13--23}, keyword = {FORMAL VERIFICATION, CLOCK SYNCHRONIZATION}, abstract = {The paper presents the core of the proof for the Interactive Convergence Algorithm for clock synchronization in N-modularly redundant systems. The verification is based on the proof system EHDM (ancestor of PVS). Then, it derives some key requirements for a formal specification and verification system adequate for this type of algorithm.}, comment = {[Mon Feb 10 1999] 8/10 teo. Is a good case study for verification, but nothing new for dependability.} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % SynDEx %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @InProceedings{Lavarenne-Sorel-93, author = {C. Lavarenne and Y. Sorel}, title = {Performance Optimization of Multiprocessor Real-Time Applications by Graph Transformations}, booktitle = {Proceedings of {ParCo}'93 (Grenoble, France)}, year = {1993}, month = sep, keyword = {SYNDEX, SCHEDULING}, abstract = {The paper describes an heuristic of algorithm scheduling in SynDEx, a version of schedule flexibility.}, comment = {[Wed Feb 10 1999] 8/10 teo.} } @Article{Lavarenne-Sorel-97, author = {C. Lavarenne and Y. Sorel}, title = {Modèle unifié pour la conception conjointe logiciel-matériel}, journal = {Traitement du Signal}, year = {1997}, volume = {14}, number = {6}, pages = {569--578}, keyword = {SYNDEX, DEPENDENCE GRAPHS}, abstract = {Presentation of the model "factorized graph".}, comment = {[Tue Dec 15 1998] 8/10 teo.} } @InProceedings{Sorel-94, author = {Y. Sorel}, title = {Massively Parallel Computing Systems with Real Time Constraints --- The ``Algorithm Architecture Adequation'' Methodology}, booktitle = {Proceedings of Massively Parallel Computing Systems (Ischia, Italy)}, year = {1994}, month = may, keyword = {SYNDEX, SCHEDULING}, abstract = {The paper presents the methodology A$^3$ and its vocabulary: potential and available parallelism, software graph, hardware graph, macro-RTL model for multiprocessors, execution units (operators and bus), routing, distribution, communications, and scheduling transformations, schedule-flexibility heuristic. A short presentation of the heuristics defined in~\cite{Lavarenne-Sorel-93} and an overview of SynDEx v4 tool are given. }, comment = {[Tue Dec 15 1998] 9/10 pres.} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Modes in synchronous languages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @InProceedings{Maraninchi-Remond-97, author = {F. Maraninchi and Y. Rémond}, title = {Compositionality Criteria for Defining Mixed-Styles Synchronous Languages}, booktitle = {Proceeding of the International Symposium: Compositionality - The Significant Difference (Holstein, Germany)}, year = {1997}, publisher = {Springer-Verlag}, month = sep, keyword = {ARGOS, LUSTRE, MODES}, abstract = {This paper relates modes and compositional aspects of modeling and verification. The definition of modes is taken from ~\cite{Maraninchi-Remond-98}. Some interesting descriptions of separate compilation and verification by observers are given.}, comment = {[Tue Jan 26 1999] 8/10 pres.} } @InProceedings{Maraninchi-Remond-98, author = {F. Maraninchi and Y. Rémond}, title = {Mode-Automata: About Modes and States for Reactive Systems}, booktitle = {Proceeding of European Symposium On Programming (Lisbon, Portugal)}, year = {1998}, publisher = {Springer-Verlag}, month = mar, keyword = {ARGOS LUSTRE MODES}, abstract = {The paper introduces the notion of modes which allows the formalization of discrete transitions between laws (equations) describing data flows. In fact, this approach allows the combination of operational descriptions (automata) and declarative descriptions (data flow equations).}, comment = {[Tue Jan 26 1999] 8/10 teo.} } @Article{Marchand-Rutten-Chaumette-97, author = {E. Marchand and E. Rutten and F. Chaumette}, title = {From Data-Flow Task to Multitasking: Applying the Synchronous Approach to Active Vision in Robotics}, journal = {IEEE Transactions on Control Systems Technology}, year = {1997}, volume = {5}, number = {2}, month = mar, pages = {200--216}, keyword = {MODES SIGNAL}, abstract = {This paper describes an application of SIGNALGTi concepts to the specification of a robotic application. The sequencing of data flow specifications in SIGNAL is summarized. See \cite{Rutten-LeGuernic-94} for a detailed presentation of this extension of SIGNAL.}, comment = {[Thu Feb 4 1999] 8/10 tech.} } @InProceedings{Rutten-LeGuernic-94, author = {E. Rutten and P. {Le Guernic}}, title = {Sequencing Data Flow Tasks in {SIGNAL}}, booktitle = {Proceedings of the ACM Sigplan Workshop on Language, Compiler, and Tool Support for Real-Time Systems, (Orlando, FL, USA)}, year = {1994}, month = jun, keyword = {MODES SIGNAL}, abstract = {The paper introduces the sequencing of data flow computations in Signal. With a (declarative) computation is associated the interval inside which the computation is active, and this form a task. This association may be with re-initialization or continuing. The propagation of interval constraints at the sub-processes in a hierarchical description is given.}, comment = {[Wed Feb 10 1999] 8/10 teo. A detailed version exists like RR-INRIA 2120, November 1993.} }