rfc9696.original.xml | rfc9696.xml | |||
---|---|---|---|---|
<?rfc toc="yes"?> | <?xml version="1.0" encoding="UTF-8"?> | |||
<?rfc tocompact="yes"?> | ||||
<?rfc tocdepth="3"?> | ||||
<?rfc tocindent="yes"?> | ||||
<?rfc symrefs="yes"?> | ||||
<?rfc sortrefs="yes"?> | ||||
<?rfc comments="yes"?> | ||||
<?rfc inline="yes"?> | ||||
<?rfc compact="no"?> | ||||
<?rfc subcompact="no"?> | ||||
<?rfc authorship="yes"?> | ||||
<?rfc tocappendix="yes"?> | ||||
<rfc xmlns:xi="http://www.w3.org/2001/XInclude" category="info" ipr='trust20090 | ||||
2' tocInclude="true" obsoletes="" updates="" consensus="true" submissionType="I | ||||
ETF" xml:lang="en" version="3" docName="draft-ietf-rift-applicability-17" > | ||||
<front> | ||||
<title abbrev='RIFT Applicability Statement'>RIFT Applicability and Operational | ||||
Considerations</title> | ||||
<author fullname='Yuehua Wei' initials='Y.' surname='Wei' role='editor' > | ||||
<organization>ZTE Corporation</organization> | ||||
<address> | ||||
<postal> | ||||
<street>No.50, Software Avenue</street> | ||||
<city>Nanjing</city> | ||||
<region/> | ||||
<code>210012</code> | ||||
<country>China</country> | ||||
</postal> | ||||
<email>wei.yuehua@zte.com.cn</email> | ||||
</address> | ||||
</author> | ||||
<author fullname='Zheng Zhang' initials='Z.' surname='Zhang'> | ||||
<organization>ZTE Corporation</organization> | ||||
<address> | ||||
<postal> | ||||
<street>No.50, Software Avenue</street> | ||||
<city>Nanjing</city> | ||||
<region/> | ||||
<code>210012</code> | ||||
<country>China</country> | ||||
</postal> | ||||
<email>zhang.zheng@zte.com.cn</email> | ||||
</address> | ||||
</author> | ||||
<author fullname='Dmitry Afanasiev' initials='D.' surname='Afanasiev'> | ||||
<organization>Yandex</organization> | ||||
<address> | ||||
<postal> | ||||
<street/> | ||||
<city/> | ||||
<region/> | <!DOCTYPE rfc [ | |||
<!ENTITY nbsp " "> | ||||
<!ENTITY zwsp "​"> | ||||
<!ENTITY nbhy "‑"> | ||||
<!ENTITY wj "⁠"> | ||||
]> | ||||
<code/> | <rfc xmlns:xi="http://www.w3.org/2001/XInclude" category="info" ipr='trust200902 ' tocInclude="true" obsoletes="" updates="" consensus="true" submissionType="IET F" xml:lang="en" version="3" docName="draft-ietf-rift-applicability-17" number=" 9696" symRefs="true" sortRefs="true"> | |||
<country/> | <front> | |||
</postal> | <!-- [rfced] Please note that the title of the document has been updated to | |||
expand "RIFT" per Section 3.6 of RFC 7322 ("RFC Style Guide"). Please | ||||
review. | ||||
<email>fl0w@yandex-team.ru</email> | Original: | |||
</address> | RIFT Applicability and Operational Considerations | |||
</author> | ||||
<author fullname='Pascal Thubert' initials='P.' surname='Thubert'> | Current: | |||
<organization abbrev='Cisco Systems'>Cisco Systems, Inc</organization> | Routing in Fat Trees (RIFT) Applicability and Operational Considerations | |||
<address> | --> | |||
<postal> | <title abbrev='RIFT Applicability Statement'>Routing in Fat Trees (RIFT) Applica | |||
bility and Operational Considerations</title> | ||||
<seriesInfo name="RFC" value="9696"/> | ||||
<author fullname='Yuehua Wei' initials='Y.' surname='Wei' role='editor' > | ||||
<organization>ZTE Corporation</organization> | ||||
<address> | ||||
<postal> | ||||
<street>No.50, Software Avenue</street> | ||||
<city>Nanjing</city> | ||||
<code>210012</code> | ||||
<country>China</country> | ||||
</postal> | ||||
<email>wei.yuehua@zte.com.cn</email> | ||||
</address> | ||||
</author> | ||||
<author fullname='Zheng (Sandy) Zhang' initials='Z.' surname='Zhang'> | ||||
<organization>ZTE Corporation</organization> | ||||
<address> | ||||
<postal> | ||||
<street>No.50, Software Avenue</street> | ||||
<city>Nanjing</city> | ||||
<code>210012</code> | ||||
<country>China</country> | ||||
</postal> | ||||
<email>zhang.zheng@zte.com.cn</email> | ||||
</address> | ||||
</author> | ||||
<author fullname='Dmitry Afanasiev' initials='D.' surname='Afanasiev'> | ||||
<organization>Yandex</organization> | ||||
<address> | ||||
<email>fl0w@yandex-team.ru</email> | ||||
</address> | ||||
</author> | ||||
<author fullname='Pascal Thubert' initials='P.' surname='Thubert'> | ||||
<organization abbrev='Cisco Systems'>Cisco Systems, Inc</organization> | ||||
<address> | ||||
<postal> | ||||
<street>Building D</street> | <street>Building D</street> | |||
<street>45 Allee des Ormes - BP1200 </street> | <street>45 Allee des Ormes - BP1200 </street> | |||
<city>MOUGINS - Sophia Antipolis</city> | <city>Mougins - Sophia Antipolis</city> | |||
<code>06254</code> | <code>06254</code> | |||
<country>FRANCE</country> | <country>France</country> | |||
</postal> | </postal> | |||
<phone>+33 497 23 26 34</phone> | <phone>+33 497 23 26 34</phone> | |||
<email>pthubert@cisco.com</email> | <email>pthubert@cisco.com</email> | |||
</address> | </address> | |||
</author> | </author> | |||
<author fullname='Tony Przygienda' initials='T.' surname='Przygienda'> | ||||
<author fullname='Tony Przygienda' initials='T.' surname='Przygienda'> | <organization abbrev='Juniper Networks'>Juniper Networks</organization> | |||
<organization abbrev='Juniper Networks'>Juniper Networks</organization> | <address> | |||
<address> | <postal> | |||
<postal> | <street>1194 N. Mathilda Ave</street> | |||
<street>1194 N. Mathilda Ave </street> | <city>Sunnyvale</city> | |||
<city>Sunnyvale, CA</city> | <region>CA</region> | |||
<code>94089</code> | <code>94089</code> | |||
<country>US</country> | <country>United States of America</country> | |||
</postal> | </postal> | |||
<email>prz@juniper.net</email> | <email>prz@juniper.net</email> | |||
</address> | </address> | |||
</author> | </author> | |||
<date month="December" year="2024"/> | ||||
<date/> | <area>RTG</area> | |||
<workgroup>rift</workgroup> | ||||
<!-- [rfced] Please insert any keywords (beyond those that appear in | ||||
the title) for use on https://www.rfc-editor.org/search. --> | ||||
<keyword>example</keyword> | ||||
<area>Routing</area> | ||||
<workgroup>RIFT WG</workgroup> | ||||
<keyword>RIFT</keyword> | ||||
<abstract> | <abstract> | |||
<t> | <t> | |||
This document discusses the properties, applicability and operational considerat | This document discusses the properties, applicability, and operational | |||
ions | considerations of Routing in Fat Trees (RIFT) in different network scenarios | |||
of RIFT in different | with the intention of providing a rough guide on how RIFT can be deployed | |||
network scenarios. It intends to provide a | to simplify routing operations in Clos topologies and their variations. | |||
rough guide how RIFT can be deployed to simplify routing operations in | ||||
Clos topologies and their variations. | ||||
</t> | </t> | |||
</abstract> | </abstract> | |||
</front> | </front> | |||
<!-- ***** MIDDLE MATTER ***** --> | ||||
<middle> | <middle> | |||
<section><name>Introduction</name> | <section><name>Introduction</name> | |||
<t>This document discusses the properties and applicability of | <t>This document discusses the properties and applicability of | |||
<xref target='I-D.ietf-rift-rift'>"Routing in Fat Trees"</xref> in | <xref target='RFC9692'>"RIFT: Routing in Fat Trees"</xref> in | |||
different deployment scenarios and highlights the operational simplicity of the | different deployment scenarios and highlights the operational simplicity of the | |||
technology compared to traditional routing solutions. | technology compared to traditional routing solutions. | |||
It also documents special considerations when RIFT is used with or without overl ays and/or controllers, and how RIFT identifies miscablings and reroutes around node and link failures. | It also documents special considerations when RIFT is used with or without overl ays and/or controllers and how RIFT identifies miscablings and reroutes around n ode and link failures. | |||
</t> | </t> | |||
</section> | </section> | |||
<section><name>Terminology</name> | <section><name>Terminology</name> | |||
<t>This document uses the terminology of <xref target='I-D.ietf-rift-rift'>RIFT< | ||||
/xref>. The most frequently used terminologies defined in RIFT are listed here. | <!-- [rfced] To avoid repetition and make the text more concise, we have | |||
These terms are consistent with definition in <xref target='I-D.ietf-rift-rift'> | updated the following sentences in Section 2. Please let us know any | |||
RIFT</xref> </t> | objections. | |||
Original: | ||||
This document uses the terminology of RIFT [RIFT]. The most | ||||
frequently used terminologies defined in RIFT are listed here. These terms | ||||
are consistent with definition in RIFT [RIFT] | ||||
Current: | ||||
This document uses the terminology defined in [RIFT]. The most | ||||
frequently used terms and their definitions from that document are listed | ||||
here. | ||||
--> | ||||
<t>This document uses the terminology defined in <xref target='RFC9692'/>. | ||||
The most frequently used terms and their definitions from that document are | ||||
listed here.</t> | ||||
<dl newline="true" spacing="normal"> | <dl newline="true" spacing="normal"> | |||
<dt>Clos/Fat Tree:</dt> | <dt>Clos / Fat Tree:</dt> | |||
<dd> | <dd> | |||
This document uses the terms Clos and Fat Tree inter changeably | This document uses the terms "Clos" and "Fat Tree" i nterchangeably | |||
where it always refers to a folded spine-and-leaf to pology with possibly multiple Points of Delivery (PoDs) and one or multiple Top of Fabric (ToF) planes. | where it always refers to a folded spine-and-leaf to pology with possibly multiple Points of Delivery (PoDs) and one or multiple Top of Fabric (ToF) planes. | |||
Several modifications such as leaf-2-leaf | Several modifications such as leaf-2-leaf | |||
shortcuts and multiple level shortcuts are possible and described further in | shortcuts and multiple level shortcuts are possible and described further in | |||
the document. | the document. | |||
</dd> | </dd> | |||
<dt>Crossbar:</dt> | <dt>Crossbar:</dt> | |||
<dd> | <dd> | |||
Physical arrangement of ports in a switching matrix without | Physical arrangement of ports in a switching matrix without | |||
implying any further scheduling or buffering discipl ines. | implying any further scheduling or buffering discipl ines. | |||
</dd> | </dd> | |||
<dt>Directed Acyclic Graph (DAG):</dt> | <dt>Directed Acyclic Graph (DAG):</dt> | |||
<dd>A finite directed graph with no directed cycles (loops). | <dd>A finite directed graph with no directed cycles (loops). | |||
<!-- [rfced] What is "vice versa" referring to in this sentence? | ||||
Original: | ||||
If links in a Clos are considered as either being all directed | ||||
towards the top or vice versa, each of such two graphs is a DAG. | ||||
Perhaps: | ||||
If links in a Clos are considered as either being all directed | ||||
towards the top or bottom, each of such two graphs is a DAG. | ||||
--> | ||||
If links in a Clos are considered as either being al l directed towards the top or vice versa, each | If links in a Clos are considered as either being al l directed towards the top or vice versa, each | |||
of such two graphs is a DAG. | of two such graphs is a DAG. | |||
</dd> | </dd> | |||
<dt>Disaggregation:</dt> | <dt>Disaggregation:</dt> | |||
<dd> | <dd> | |||
Process in which a node decides to | The process in which a node decides to | |||
advertise more specific prefixes Southwards, either | advertise more specific prefixes southwards, either | |||
positively to | positively to | |||
attract the corresponding traffic, or negatively to | attract the corresponding traffic or negatively to r | |||
repel it. | epel it. | |||
Disaggregation is performed to prevent traffic loss and suboptimal | Disaggregation is performed to prevent traffic loss and suboptimal | |||
routing to the more specific prefixes.</dd> | routing to the more specific prefixes.</dd> | |||
<dt>Leaf:</dt> | <dt>Leaf:</dt> | |||
<dd>A node without southbound adjacencies. Level 0 implies a leaf in R IFT but a leaf does not have to be level 0. | <dd>A node without southbound adjacencies. Level 0 implies a leaf in R IFT, but a leaf does not have to be level 0. | |||
</dd> | </dd> | |||
<dt>LIE:</dt> | <dt>LIE:</dt> | |||
<dd>This is an acronym for a | <dd>This is an acronym for "Link Information Element" exchanged | |||
"Link Information Element" exchanged on | on all the system's links running RIFT to form <em>ThreeWay</em> | |||
all the system's links running RIFT to form <em>Thre | adjacencies and carry information used to perform RIFT Zero Touch | |||
eWay</em> adjacencies and carry | Provisioning (ZTP) of levels. | |||
information used to perform RIFT Zero Touch Provisio | ||||
ning (ZTP) of levels. | ||||
</dd> | </dd> | |||
<dt>South Reflection:</dt> | <dt>South Reflection:</dt> | |||
<dd>Often abbreviated just as | <dd>Often abbreviated just as | |||
"reflection", it defines a mechanism where South Nod e TIEs | "reflection", South Reflection defines a mechanism w here South Node TIEs | |||
are "reflected" from the level south back up north t o allow | are "reflected" from the level south back up north t o allow | |||
nodes in the same level | nodes in the same level | |||
without E-W links to be aware of each other's node T opology | without East-West links to be aware of each other's node Topology | |||
Information Elements (TIEs).</dd> | Information Elements (TIEs).</dd> | |||
<dt>Spine:</dt> | <dt>Spine:</dt> | |||
<dd>Any nodes north of leaves and south of ToF nodes. Multiple | <dd>Any nodes north of leaves and south of ToF nodes. Multiple | |||
layers of spines in a PoD are possible. | layers of spines in a PoD are possible. | |||
</dd> | </dd> | |||
<dt>TIE:</dt> | <dt>TIE:</dt> | |||
<dd>This is an acronym for a "Topology | <dd>This is an acronym for "Topology Information Element". TIEs are | |||
Information Element". TIEs are exchanged between RIF | exchanged between RIFT nodes to describe parts of a network such as | |||
T nodes to | links and address prefixes. A TIE always has a direction and a | |||
describe parts of a network such as links and addres | type. North TIEs (sometimes abbreviated as N-TIEs) are used when | |||
s prefixes. | dealing with TIEs in the northbound representation, and South-TIEs | |||
A TIE has always a direction and a type. | (sometimes abbreviated as S-TIEs) are used for the southbound | |||
North TIEs (sometimes abbreviated as N-TIEs) are use | equivalent. TIEs have different types, such as node and prefix TIEs. | |||
d when dealing with | ||||
TIEs in the | ||||
northbound representation | ||||
and South-TIEs (sometimes abbreviated as S-TIEs) | ||||
for the southbound equivalent. TIEs have different t | ||||
ypes such | ||||
as node and prefix TIEs. | ||||
</dd> | </dd> | |||
</dl> | </dl> | |||
<!--End of Terminology--> | ||||
</section> | </section> | |||
<section><name>Problem Statement of Routing in Modern IP Fabric Fat Tree Network s</name> | <section><name>Problem Statement of Routing in Modern IP Fabric Fat Tree Network s</name> | |||
<!-- [rfced] We are unable to verify if the term "homonym" is used correctly in [FATTREE]. May we rephrase the following sentence for accuracy? | ||||
<t><xref target="CLOS">Clos</xref> topologies (called commonly a fat tree/networ | Original: | |||
k in modern IP fabric considerations as homonym to the original definition of th | Clos [CLOS] topologies (called commonly a fat tree/network in modern | |||
e term <xref target="FATTREE">Fat Tree</xref>) have gained prominence in today's | IP fabric considerations as homonym to the original definition of the | |||
networking, primarily as a result of the paradigm shift towards a centralized d | term Fat Tree [FATTREE]) have gained prominence in today's | |||
ata-center based architecture that deliver a majority of computation and storage | networking, primarily as a result of the paradigm shift towards a | |||
services. | centralized data-center based architecture that deliver a majority of | |||
computation and storage services. | ||||
Perhaps: | ||||
Clos [CLOS] topologies (commonly called a Fat Tree/network in modern | ||||
IP fabric considerations as a similar term for the original definition of the | ||||
term Fat Tree [FATTREE]) have gained prominence in today's | ||||
networking, primarily as a result of the paradigm shift towards a | ||||
centralized data-center-based architecture that delivers a majority of | ||||
computation and storage services. | ||||
--> | ||||
<t><xref target="CLOS">Clos</xref> topologies (commonly called a Fat Tree/networ | ||||
k in modern IP fabric considerations as a homonym to the original definition of | ||||
the term <xref target="FATTREE">Fat Tree</xref>) have gained prominence in today | ||||
's networking, primarily as a result of the paradigm shift towards a centralized | ||||
data-center-based architecture that delivers a majority of computation and stor | ||||
age services. | ||||
</t> | </t> | |||
<t>Current routing protocols were geared towards a network with an | <t>Current routing protocols were geared towards a network with an | |||
irregular topology with isotropic properties, and low degree of connectivity. | irregular topology with isotropic properties and a low degree of connectivity. | |||
When applied to Fat Tree topologies: | When applied to Fat Tree topologies: | |||
</t> | </t> | |||
<ul> | <ul spacing="normal"> | |||
<li>They tend to need extensive configuration or provisioning during | <li>They tend to need extensive configuration or provisioning | |||
initialization | during initialization and adding or removing nodes from the | |||
and adding or removing nodes from the fabric.</li> | fabric.</li> | |||
<li>For link state routing protocols, all nodes including spine and | <li>For link-state routing protocols, all nodes including | |||
leaf nodes learn the entire network topology and | spine-and-leaf nodes learn the entire network topology and routing | |||
routing information, which is in fact, not needed on the leaf nodes | information, which is actually not needed on the leaf nodes during | |||
during normal | normal operation. They flood significant amounts of duplicate | |||
operation. They flood significant amounts of duplicate link stat | link-state information between spine-and-leaf nodes during | |||
e information between spine | topology updates and convergence events, requiring that additional | |||
and leaf nodes during topology updates and convergence events, requi | CPU and link bandwidth be consumed. This may impact the stability | |||
ring that | and scalability of the fabric, make the fabric less reactive to | |||
additional CPU and link bandwidth be consumed. | failures, and prevent the use of cheaper hardware at the lower | |||
This may impact the stability and scalability of the fabric, make th | levels (i.e., spine-and-leaf nodes). | |||
e fabric less | ||||
reactive to failures, and prevent the use of cheaper hardware at the | ||||
lower levels | ||||
(i.e. spine and leaf nodes). | ||||
</li> | </li> | |||
</ul> | </ul> | |||
</section> | </section> | |||
<section><name>Applicability of RIFT to Clos IP Fabrics</name> | <section><name>Applicability of RIFT to Clos IP Fabrics</name> | |||
<t> | <t> | |||
Further content of this document assumes that the reader is | Further content of this document assumes that the reader is familiar with the | |||
familiar with the terms and concepts used in <xref target='RFC2328'>OSPF (Open S | terms and concepts used in the <xref target='RFC2328'>Open Shortest Path First | |||
hortest Path First)</xref>, <xref target='RFC5340'>OSPF for IPv6</xref> | (OSPF)</xref>, <xref target='RFC5340'>OSPF for IPv6</xref>, and <xref | |||
and <xref target='ISO10589-Second-Edition'>IS-IS (Intermediate System to Interme | target='ISO10589-Second-Edition'>Intermediate System to Intermediate System | |||
diate System)</xref> link-state protocols. The sections of <xref target='I-D.iet | (IS-IS)</xref> link-state | |||
f-rift-rift'>RIFT</xref> outline the requirements of routing in IP fabrics and R | protocols. <xref target='RFC9692'/> outlines the | |||
IFT protocol concepts. | requirements of routing in IP fabrics and RIFT protocol concepts. | |||
</t> | </t> | |||
<section><name>Overview of RIFT</name> | <section><name>Overview of RIFT</name> | |||
<t> | <t> | |||
RIFT is a dynamic routing protocol that is tailored for use in Clos, Fat-Tree, a | RIFT is a dynamic routing protocol that is tailored for use in Clos, Fat Tree, a | |||
nd other anisotropic topologies. | nd other anisotropic topologies. | |||
A core property therefore of RIFT is that its operation is | Therefore, a core property of RIFT is that its operation is | |||
sensitive to the structure of the fabric - it is anisotropic. RIFT acts as a lin | sensitive to the structure of the fabric -- it is anisotropic. RIFT acts as a li | |||
k-state protocol when "pointing north", advertising southwards routes to northwa | nk-state protocol when "pointing north", advertising southward routes to northwa | |||
rds peers (parents) through flooding and database synchronization. When "pointin | rd peers (parents) through flooding and database synchronization. When "pointing | |||
g south", RIFT operates hop-by-hop like a distance- vector protocol, typically a | south", RIFT operates hop-by-hop like a distance-vector protocol, typically adv | |||
dvertising a fabric default route towards the Top of Fabric (ToF, aka superspine | ertising a fabric default route towards the ToF, aka superspine, to southward pe | |||
) to southwards peers (children). | ers (children). | |||
</t> | </t> | |||
<t> | <t> | |||
The fabric default is typically the default route, as described in | The fabric default is typically the default route as described in | |||
Section 6.3.8 "Southbound Default Route Origination" of | Section <xref target='RFC9692' sectionFormat='bare' section='6.3.8'> | |||
<xref target='I-D.ietf-rift-rift'>RIFT</xref>. | "Southbound Default Route Origination"</xref> of <xref target="RFC9692"/>. | |||
The ToF nodes may alternatively originate more specific prefixes (P') southbound | The ToF nodes may alternatively originate more specific prefixes (P') southbound | |||
instead of the default route. In such a scenario, all addresses carried within | instead of the default route. In such a scenario, all addresses carried within | |||
the RIFT domain must be contained within P', and it is possible for a leaf that | the RIFT domain must be contained within P', and it is possible for a leaf that | |||
acts as gateway to the Internet to advertise the default route instead. | acts as gateway to the Internet to advertise the default route instead. | |||
</t> | </t> | |||
<t>RIFT floods flat link-state information northbound only so that each level | <t>RIFT floods flat link-state information northbound only so that each level | |||
obtains the full topology of levels south of it. That information is never flood | obtains the full topology of the levels that are south of it. That information i | |||
ed | s never flooded | |||
east-west or back south again. So a top tier node has full set of prefixes from | East-West or back south again, so a top tier node has a full set of prefixes fro | |||
m | ||||
the Shortest Path First (SPF) calculation. | the Shortest Path First (SPF) calculation. | |||
</t> | </t> | |||
<t>In the southbound direction, the protocol operates like a "fully summarizing, | <t>In the southbound direction, the protocol operates like a "fully summarizing, | |||
unidirectional" path-vector protocol or rather a distance-vector with implicit s plit horizon. Routing information, normally just the default route, propagates o ne hop south and is "re-advertised" by nodes at next lower level. | unidirectional" path-vector protocol or, rather, a distance-vector with implicit split horizon. Routing information, normally just the default route, propagates one hop south and is "re-advertised" by nodes at next lower level. | |||
</t> | </t> | |||
<figure align='center' anchor='pic-rift'><name>RIFT overview</name> | <figure align='center' anchor='pic-rift'><name>RIFT Overview</name> | |||
<artwork align='center'><![CDATA[ | <artwork align='center'><![CDATA[ | |||
+---------------+ +----------------+ | +---------------+ +----------------+ | |||
| ToF | | ToF | LEVEL 2 | | ToF | | ToF | LEVEL 2 | |||
+ ++------+--+--+-+ ++-+--+----+-----+ | + ++------+--+--+-+ ++-+--+----+-----+ | |||
| | | | | | | | | ^ | | | | | | | | | | ^ | |||
+ | | | +-------------------------+ | | + | | | +-------------------------+ | | |||
Distance | +-------------------+ | | | | | | Distance- | +-------------------+ | | | | | | |||
Vector | | | | | | | | + | Vector | | | | | | | | + | |||
South | | | | +--------+ | | | Link-State | South | | | | +--------+ | | | Link-State | |||
+ | | | | | | | | Flooding | + | | | | | | | | Flooding | |||
| | | +----------------+ | | | North | | | | +----------------+ | | | North | |||
v | | | | | | | | + | v | | | | | | | | + | |||
++---+-+ +------+ +-+----+ ++----++ | | ++---+-+ +------+ +-+----+ ++----++ | | |||
|SPINE | |SPINE | | SPINE| | SPINE| | LEVEL 1 | |SPINE | |SPINE | | SPINE| | SPINE| | LEVEL 1 | |||
+ ++----++ ++---+-+ +-+--+-+ ++----++ | | + ++----++ ++---+-+ +-+--+-+ ++----++ | | |||
+ | | | | | | | | | ^ N | + | | | | | | | | | ^ N | |||
Distance | +-------+ | | +--------+ | | | E | Distance- | +-------+ | | +--------+ | | | E | |||
Vector | | | | | | | | | +------> | Vector | | | | | | | | | +------> | |||
South | +-------+ | | | +------+ | | | | | South | +-------+ | | | +------+ | | | | | |||
+ | | | | | | | | | + | + | | | | | | | | | + | |||
v ++--++ +-+-++ ++--++ ++--++ + | v ++--++ +-+-++ ++--++ ++--++ + | |||
|LEAF| |LEAF| |LEAF| |LEAF| LEVEL 0 | |LEAF| |LEAF| |LEAF| |LEAF| LEVEL 0 | |||
+----+ +----+ +----+ +----+ | +----+ +----+ +----+ +----+]]></artwork> | |||
]]></artwork> | ||||
</figure> | </figure> | |||
<t>A spine node has only information necessary for its level, which is all | <t>A spine node only has information necessary for its level, which is all | |||
destinations south of the node based on SPF calculation, default route, and | destinations south of the node based on SPF calculation, the default route, and | |||
potentially disaggregated routes. | potentially disaggregated routes. | |||
</t> | </t> | |||
<t>RIFT combines the advantage of both link-state and distance-vector: | <!-- [rfced] May we specify "link-state" and "distance-vector" for clarity in | |||
the following instances? | ||||
Original: | ||||
RIFT combines the advantage of both link-state and distance-vector... | ||||
RIFT also eliminates major disadvantages of link-state and distance-vector | ||||
with... | ||||
Perhaps: | ||||
RIFT combines the advantages of both link-state and distance-vector | ||||
protocols... | ||||
RIFT also eliminates major disadvantages of link-state and distance-vector | ||||
protocols... | ||||
--> | ||||
<t>RIFT combines the advantages of both link-state and distance-vector: | ||||
</t> | </t> | |||
<ul> | <ul spacing="normal"> | |||
<li>Fastest possible convergence</li> | <li>Fastest possible convergence</li> | |||
<li>Automatic detection of topology</li> | <li>Automatic detection of topology</li> | |||
<li>Minimal routes/information on Top-of-Rack (ToR) switches, aka le af nodes</li> | <li>Minimal routes/information on Top-of-Rack (ToR) switches, aka le af nodes</li> | |||
<li>High degree of ECMP</li> | <li>High degree of ECMP</li> | |||
<li>Fast de-commissioning of nodes</li> | <li>Fast decommissioning of nodes</li> | |||
<li>Maximum propagation speed with flexible prefixes in an update</l i> | <li>Maximum propagation speed with flexible prefixes in an update</l i> | |||
</ul> | </ul> | |||
<t>So there are two types of link-state database which are "north representation | <t>There are two types of link-state databases that are "north representation" | |||
" North Topology Information Elements (N-TIEs) and "south representation" South | North Topology Information Elements (N-TIEs) and "south representation" South | |||
Topology Information Elements (S-TIEs). The N-TIEs contain a link-state topology | Topology Information Elements (S-TIEs). The N-TIEs contain a link-state | |||
description of lower levels and S-TIEs carry simply default and disaggregated ro | topology description of lower levels, and the S-TIEs simply carry default and | |||
utes for the lower | disaggregated routes for the lower levels. | |||
levels. | ||||
</t> | </t> | |||
<t>RIFT also eliminates major disadvantages of link-state and distance-vector wi th: | <t>RIFT also eliminates major disadvantages of link-state and distance-vector wi th the following: | |||
</t> | </t> | |||
<t> | <ul spacing="normal"> | |||
</t><ul> | ||||
<li>Reduced and balanced flooding</li> | <li>Reduced and balanced flooding</li> | |||
<li>Level constrained automatic neighbor discovery</li> | <li>Level-constrained automatic neighbor discovery</li> | |||
</ul><t> | </ul><t> | |||
</t> | </t> | |||
<t>To achieve this, RIFT builds on the art of IGPs, not only OSPF and IS-IS but also MANET and IoT (Internet of Things), to provide unique features: | <t>To achieve this, RIFT builds on the art of IGPs, such as OSPF, IS-IS, Mobile Ad Hoc Network (MANET), and Internet of Things (IoT) to provide unique features : | |||
</t> | </t> | |||
<ul> | <ul spacing="normal"> | |||
<li>Automatic (positive or negative) route disaggregation of northwa | <li>Automatic (positive or negative) route disaggregation of northwa | |||
rds routes upon fallen leaves</li> | rd routes upon fallen leaves</li> | |||
<li>Recursive operation in the case of negative route disaggregation | <li>Recursive operation in the case of negative route | |||
</li> | disaggregation </li> | |||
<li>Anisotropic routing that extends a principle seen in <xref targe | <li>Anisotropic routing that extends a principle seen in the <xref t | |||
t='RFC6550'>RPL</xref> to wide superspines</li> | arget='RFC6550'>Routing Protocol for Low-Power and Lossy Networks (RPL)</xref> t | |||
<li>Optimal flooding reduction that derives from the concept of a "m | o wide superspines</li> | |||
ultipoint relay" (MPR) found in <xref target='RFC3626'>OLSR</xref> and | <li>Optimal flooding reduction that derives from the concept of a "m | |||
balances the flooding load over northbound links and nodes.</li> | ultipoint relay" (MPR) found in <xref target='RFC3626'>Optimized Link State Rout | |||
ing (OLSR)</xref> and | ||||
balances the flooding load over northbound links and nodes</li> | ||||
</ul> | </ul> | |||
<t>Additional advantages that are unique to RIFT are listed below, the details o f which can be found in <xref target='I-D.ietf-rift-rift'>RIFT</xref>. | <t>Additional advantages that are unique to RIFT are listed below. The details o f these advantages can be found in <xref target='RFC9692'>RIFT</xref>. | |||
</t> | </t> | |||
<ul> | <ul spacing="normal"> | |||
<li>True ZTP (Zero Touch Provisioning)</li> | <li>True ZTP</li> | |||
<li>Minimal blast radius on failures</li> | <li>Minimal blast radius on failures</li> | |||
<li>Can utilize all paths through fabric without looping</li> | <li>Can utilize all paths through fabric without looping</li> | |||
<li>Simple leaf implementation that can scale down to servers</li> | <li>Simple leaf implementation that can scale down to servers</li> | |||
<li>Key-Value store</li> | <li>Key-value store</li> | |||
<li>Horizontal links used for protection only</li> | <li>Horizontal links used for protection only</li> | |||
<!--li>Supports non-equal cost multipath and can replace multi-chass | <!-- [rfced] Some author comments are present in the XML. Please confirm that | |||
is link aggregation group (MLAG or MC-LAG)</li--> | no updates related to these comments are outstanding. Note that the | |||
comments will be deleted prior to publication. | ||||
--> | ||||
<!--li>Supports non-equal cost multipath and can replace multi-chassis link agg | ||||
regation group (MLAG or MC-LAG)</li--> | ||||
</ul> | </ul> | |||
</section> | </section> | |||
<section><name>Applicable Topologies</name> | <section><name>Applicable Topologies</name> | |||
<t> | <t> | |||
Albeit RIFT is specified primarily for "proper" Clos or Fat Tree topologies, | Albeit RIFT is specified primarily for "proper" Clos or Fat Tree topologies, | |||
the protocol natively supports Points of Delivery (PoD) concepts, which, strictl y speaking, are not found in the original Clos concept. | the protocol natively supports Points of Delivery (PoD) concepts, which, strictl y speaking, are not found in the original Clos concept. | |||
</t> | </t> | |||
<t>Further, the specification explains and supports operations of multi-plane | <t>Further, the specification explains and supports operations of multi-plane | |||
Clos variants where the protocol recommends the use of inter-plane rings at the | Clos variants where the protocol recommends the use of inter-plane rings at the | |||
Top-of-Fabric level to allow the reconciliation of topology view of different pl | ToF level to allow the reconciliation of topology view of different planes | |||
anes | to make the Negative Disaggregation viable in case of failures within a plane. | |||
to make the negative disaggregation viable in case of failures within a plane. | ||||
These observations hold not only in case of RIFT but also in the generic | These observations hold not only in case of RIFT but also in the generic | |||
case of dynamic routing on Clos variants with multiple planes and failures | case of dynamic routing on Clos variants with multiple planes and failures | |||
in bi-sectional bandwidth, especially on the leafs. | in bisectional bandwidth, especially on the leaves. | |||
</t> | </t> | |||
<section><name>Horizontal Links</name> | <section><name>Horizontal Links</name> | |||
<t> | <t> | |||
RIFT is not limited to pure Clos divided into PoD and multi-planes but | RIFT is not limited to pure Clos divided into PoD and multi-planes but | |||
supports horizontal (East-West) links below the top of fabric level. Those links | supports horizontal (East-West) links below the ToF level. Those links | |||
are used only for last resort northbound forwarding when a spine loses all its | are used only for last resort northbound forwarding when a spine loses all its | |||
northbound links or cannot compute a default route through them. | northbound links or cannot compute a default route through them. | |||
</t> | </t> | |||
<t> A full-mesh connectivity between nodes on the same level can be employed | <!-- [rfced] May we update the following sentence for clarity? Additionally, | |||
and that allows N-SPF to provide for any node losing | should "employed" be updated to "deployed"? We note that this is the only | |||
all its northbound adjacencies (as long as any of the other | instance of "employed" that appears in the document. | |||
nodes in the level are northbound connected) to still participate in northbound | ||||
forwarding. | Original: | |||
A full-mesh connectivity between nodes on the same level can be | ||||
employed and that allows N-SPF to provide for any node losing all its | ||||
northbound adjacencies (as long as any of the other nodes in the | ||||
level are northbound connected) to still participate in northbound | ||||
forwarding. | ||||
Perhaps: | ||||
A full-mesh connectivity between nodes on the same level can be | ||||
deployed, which allows North SPF (N-SPF) to provide for any node losing all i | ||||
ts | ||||
northbound adjacencies (as long as any of the other nodes in the | ||||
level are northbound connected) and still participate in northbound | ||||
forwarding. | ||||
--> | ||||
<t>A full-mesh connectivity between nodes on the same level can be employed | ||||
and that allows North SPF (N-SPF) to provide for any node losing all its | ||||
northbound adjacencies (as long as any of the other nodes in the level are | ||||
northbound connected) to still participate in northbound forwarding. | ||||
</t> | </t> | |||
<t>Note that a "ring" of horizontal links at any level below ToF does not provi de a "ring-based protection" scheme since the SPF computation would have to deal necessarily with breaking of "loops", an application for which RIFT is not inte nded. | <t>Note that a "ring" of horizontal links at any level below ToF does not provi de a "ring-based protection" scheme since the SPF computation would have to deal with breaking of "loops", an application for which RIFT is not intended. | |||
</t> | </t> | |||
</section> | </section> | |||
<section><name>Vertical Shortcuts</name> | <section><name>Vertical Shortcuts</name> | |||
<t> | <t> | |||
Through relaxations of the specified adjacency forming rules, RIFT implementatio ns can be extended to support vertical "shortcuts". The RIFT specification | Through relaxations of the specified adjacency forming rules, RIFT implementatio ns can be extended to support vertical "shortcuts". The RIFT specification | |||
itself does not provide the exact details since the resulting solution suffers f rom | itself does not provide the exact details since the resulting solution suffers f rom | |||
either much larger blast radius with increased flooding volumes or | either a much larger blast radius with increased flooding volumes or | |||
in case of maximum aggregation routing, bow-tie problems. | bow tie problems in the case of maximum aggregation routing. | |||
</t> | </t> | |||
</section> | </section> | |||
<section><name>Generalizing to any Directed Acyclic Graph</name> | <section><name>Generalizing to Any Directed Acyclic Graph</name> | |||
<t> | <t> | |||
RIFT is an anisotropic routing protocol, meaning that it has a sense of directio n (northbound, southbound, east-west) and that it operates differently depending on the direction. | RIFT is an anisotropic routing protocol, meaning that it has a sense of directio n (northbound, southbound, and East-West) and operates differently depending on the direction. | |||
</t> | </t> | |||
<t> | <t> | |||
Since a DAG provides a sense of north (the | Since a DAG provides a sense of north (the | |||
direction of the DAG) and of south (the reverse), it can be used to | direction of the DAG) and south (the reverse), it can be used to | |||
apply RIFT——an edge in the DAG that has only incoming vertices is a | apply RIFT -- an edge in the DAG that has only incoming vertices is a | |||
ToF node. | ToF node. | |||
</t><t> | </t><t> | |||
There are a number of caveats though: | There are a number of caveats though: | |||
</t> | </t> | |||
<ul> | <ul spacing="normal"> | |||
<li>The DAG structure must exist before RIFT starts, so there is a need for a companion protocol to establish the logical DAG structure. | <li>The DAG structure must exist before RIFT starts, so there is a need for a companion protocol to establish the logical DAG structure. | |||
</li> | </li> | |||
<li>A generic DAG does not have a sense of east and west. The operation specif | <li>A generic DAG does not have a sense of East and West. The operation specif | |||
ied for east-west links and the southbound reflection between nodes are not appl | ied for East-West links and the southbound reflection between nodes are not appl | |||
icable. | icable. | |||
Also ZTP will derive a sense of depth that will eliminate some links. Variatio | Also, ZTP will derive a sense of depth that will eliminate some links. Variati | |||
ns of ZTP could be derived to meet specific objectives, e.g., make it so that mo | ons of ZTP could be derived to meet specific objectives, e.g., make it so that m | |||
st routers have at least 2 parents to reach the ToF. | ost routers have at least two parents to reach the ToF. | |||
</li> | </li> | |||
<li> | <li> | |||
RIFT applies to any Destination-Oriented DAG (DODAG) where there's only one To | RIFT applies to any Destination-Oriented DAG (DODAG) where there's only one To | |||
F node and the problem of disaggregation does not exist. In that case, RIFT | F node and the problem of disaggregation does not exist. | |||
operates very much like RPL <xref target='RFC6550'/>, but using Link State for | <!-- [rfced] Should "Link State" be specified as "link-state protocols" here? | |||
southbound routes (downwards in RPL's terms). | ||||
For an arbitrary DAG with multiple destinations (ToFs) the way disaggregation | Original: | |||
happens has to be considered. | In that case, RIFT operates very much like RPL [RFC6550], but using | |||
Link State for southbound routes (downwards in RPL's terms). | ||||
Perhaps: | ||||
In that case, RIFT operates very much like RPL [RFC6550], but uses | ||||
link-state protocols for southbound routes (downwards in RPL's terms). | ||||
--> | ||||
In that case, RIFT | ||||
operates very much like RPL <xref target='RFC6550'/>, but uses Link State for | ||||
southbound routes (downwards in RPL's terms). | ||||
For an arbitrary DAG with multiple destinations (ToFs), the way disaggregation | ||||
happens has to be considered. | ||||
</li> | </li> | |||
<li>Positive disaggregation expects that most of the ToF nodes reach most of t he leaves, so disaggregation is the exception as opposed to the rule. When this is no longer true, it makes sense to turn off disaggregation and route between t he ToF nodes over a ring, a full mesh, transit network, or a form of area zero. There again, this operation is similar to RPL operating as a single DODAG with a virtual root. | <li>Positive Disaggregation expects that most of the ToF nodes reach most of t he leaves, so disaggregation is the exception as opposed to the rule. When this is no longer true, it makes sense to turn off disaggregation and route between t he ToF nodes over a ring, a full mesh, a transit network, or a form of area zero . Then again, this operation is similar to RPL operating as a single DODAG with a virtual root. | |||
</li> | </li> | |||
<li> | <li> | |||
In order to aggregate and disaggregate routes, RIFT requires that all the ToF nodes share the full knowledge of the prefixes in the fabric. This can be achiev ed with a ring as suggested by <xref target='I-D.ietf-rift-rift'>"RIFT"</xref>, by some preconfiguration, or using a synchronization with a common repository wh ere all the active prefixes are registered. | In order to aggregate and disaggregate routes, RIFT requires that all the ToF nodes share the full knowledge of the prefixes in the fabric. This can be achiev ed with a ring as suggested by <xref target='RFC9692'>RIFT</xref>, by some preco nfiguration, or by using a synchronization with a common repository where all th e active prefixes are registered. | |||
</li> | </li> | |||
</ul> | </ul> | |||
</section> | </section> | |||
<section title="Reachability of Internal Nodes in the Fabric" anchor="onastick"> | <section title="Reachability of Internal Nodes in the Fabric" anchor="onastick"> | |||
<t>RIFT does not require that nodes have reachable addresses in the fabric, | <t>RIFT does not require that nodes have reachable addresses in the fabric, | |||
though it is clearly desirable for operational purposes. Under normal op erating | though it is clearly desirable for operational purposes. Under normal op erating | |||
conditions this can be easily achieved by injecting the node's loopback | conditions, this can be easily achieved by injecting the node's loopback | |||
address into North and South Prefix TIEs or other implementation specifi | address into North and South Prefix TIEs or other implementation-specifi | |||
c | c | |||
mechanisms. | mechanisms. | |||
</t> | </t> | |||
<t> | <t> | |||
Special considerations arise when a node loses all northbound adjacencie | Special considerations arise when a node loses all northbound adjacencie | |||
s, | s | |||
but is not at the top of the fabric. If a spine node loses all northboun | but is not at the top of the fabric. If a spine node loses all northboun | |||
d links, the spine node doesn't advertise default route. But if the level of the | d links, the spine node doesn't advertise a default route. But if the level of t | |||
spine node is auto-determined by ZTP, it will "fall down" as depicted in <xref | he spine node is auto-determined by ZTP, it will "fall down" as depicted in <xre | |||
target='Fallen-spine'/>. | f target='Fallen-spine'/>. | |||
</t> | </t> | |||
</section> | </section> | |||
</section> | </section> | |||
<section><name>Use Cases</name> | <section><name>Use Cases</name> | |||
<section><name>Data Center Topologies</name> | <section><name>Data Center Topologies</name> | |||
<section><name>Data Center Fabrics</name> | <section><name>Data Center Fabrics</name> | |||
<t> | <t> | |||
<!-- [rfced] May we rephrase the following sentence for ease of the reader? | ||||
Original: | ||||
RIFT is suited for applying in data center (DC) IP fabrics underlay | ||||
routing, vast majority of which seem to be currently (and for the foreseeable | ||||
future) Clos architectures. | ||||
Perhaps: | ||||
RIFT is suited for applying underlay routing in data center (DC) IP | ||||
fabrics, with the vast majority of these IP fabrics being Clos architectures | ||||
(and will be for the foreseeable future). | ||||
--> | ||||
RIFT is suited for applying in data center (DC) IP fabrics underlay routing, vas t majority of which seem to be currently (and | RIFT is suited for applying in data center (DC) IP fabrics underlay routing, vas t majority of which seem to be currently (and | |||
for | for | |||
the foreseeable future) | the foreseeable future) | |||
Clos architectures. It significantly simplifies operation and deployment | Clos architectures. It significantly simplifies operation and deployment | |||
of such fabrics as described in <xref target='opex'/> for environments compared | of such fabrics as described in <xref target='opex'/> for environments compared | |||
to | to | |||
extensive proprietary provisioning and operational solutions. | extensive proprietary provisioning and operational solutions. | |||
</t> | </t> | |||
</section> | </section> | |||
skipping to change at line 469 ¶ | skipping to change at line 557 ¶ | |||
.| | | | | | .| | | | | | |||
.| | A0 | | A1 | | .| | A0 | | A1 | | |||
.| +-+--++ ++---++ | .| +-+--++ ++---++ | |||
.| | | | | | .| | | | | | |||
.| | +------------+ | | .| | +------------+ | | |||
.| | +-----------+ | | | .| | +-----------+ | | | |||
.| | | | | | .| | | | | | |||
.| +-+-+-+ +--+-++ | .| +-+-+-+ +--+-++ | |||
.+-+ | | | | .+-+ | | | | |||
. | L0 | | L1 | | . | L0 | | L1 | | |||
. +-----+ +-----+ | . +-----+ +-----+]]></artwork> | |||
]]> | ||||
</artwork> | ||||
</figure> | </figure> | |||
<t> | <t> | |||
RIFT is not strictly limited to Clos topologies. The protocol only | RIFT is not strictly limited to Clos topologies. The protocol only | |||
requires a sense of "compass rose directionality" either achieved | requires a sense of "compass rose directionality" either achieved | |||
through configuration or derivation of levels. | through configuration or derivation of levels. | |||
So, conceptually, shortcuts between levels could be included. | So conceptually, shortcuts between levels could be included. | |||
<xref target="levelshortcuts"/> depicts an example of a shortcut | <xref target="levelshortcuts"/> depicts an example of a shortcut | |||
between levels. In this example, sub-optimal routing will | between levels. In this example, suboptimal routing will | |||
occur when traffic is sent from L0 to L1 via S0's | occur when traffic is sent from L0 to L1 via S0's | |||
default route and back down through A0 or A1. | default route and back down through A0 or A1. | |||
In order to avoid that, only default routes from A0 or A1 | In order to avoid that, only default routes from A0 or A1 | |||
are used, all leaves would be required to install each other's routes. | are used. All leaves would be required to install each other's routes. | |||
</t> | </t> | |||
<t> | <t> | |||
While various technical and operational challenges may require the use o f such modifications, | While various technical and operational challenges may require the use o f such modifications, | |||
discussion of those topics are outside the scope of this document. | discussion of those topics is outside the scope of this document. | |||
</t> | </t> | |||
</section> | </section> | |||
</section> | </section> | |||
<section><name>Metro Networks</name> | <section><name>Metro Networks</name> | |||
<t> | <t> | |||
The demand for bandwidth is increasing steadily, driven primarily by | The demand for bandwidth is increasing steadily, driven primarily by | |||
environments close to | environments close to | |||
content producers (server farms connection via DC fabrics) but in | content producers (server farms connection via DC fabrics) but in | |||
proximity to content consumers as well. | proximity to content consumers as well. | |||
Consumers are often clustered in metro areas with their own network | Consumers are often clustered in metro areas with their own network | |||
architectures that can benefit | architectures that can benefit | |||
from simplified, regular Clos structures and hence from RIFT. | from simplified, regular Clos structures. Thus, they can also benefit from RIFT. | |||
</t> | </t> | |||
</section> | </section> | |||
<section><name>Building Cabling</name> | <section><name>Building Cabling</name> | |||
<t> | <t> | |||
Commercial edifices are often cabled in topologies that are | Commercial edifices are often cabled in topologies that are | |||
either Clos or its isomorphic equivalents. The | either Clos or its isomorphic equivalents. The | |||
Clos can grow rather high with many levels. That presents a challenge | Clos can grow rather high with many levels. That presents a challenge | |||
for traditional routing protocols (except BGP<xref target='RFC4271'/> and by now | for traditional routing protocols (except BGP <xref target='RFC4271'/> and Priva | |||
largely | te Network-Network Interface (PNNI) <xref target='PNNI'/>, which is largely | |||
phased-out PNNI<xref target='PNNI'/>) which do not support | phased-out by now) that do not support | |||
an arbitrary number of levels which RIFT does naturally. Moreover, due to the li | an arbitrary number of levels, which RIFT does naturally. Moreover, due to the l | |||
mited sizes of forwarding tables in network elements of building cabling, the mi | imited sizes of forwarding tables in network elements of building cabling, the m | |||
nimum FIB size RIFT maintains under normal conditions is cost-effective in terms | inimum FIB size RIFT maintains under normal conditions is cost-effective in term | |||
of hardware and operational costs. | s of hardware and operational costs. | |||
</t> | </t> | |||
</section> | </section> | |||
<section><name>Internal Router Switching Fabrics</name> | <section><name>Internal Router Switching Fabrics</name> | |||
<t> | <t> | |||
It is common in high-speed communications switching and routing | It is common in high-speed communications switching and routing | |||
devices to use switch fabrics which are interconnection networks inside the devi | devices to use switch fabrics that are interconnection networks inside the devic | |||
ces connecting the input ports to their output ports. For example, crossbar is o | es connecting the input ports to their output ports. For example, a crossbar is | |||
ne of the switch fabric techniques while a crossbar is not feasible due to cost, | one of the switch fabric techniques, even though it is not feasible due to cost, | |||
head-of-line blocking or size trade-offs. And normally such fabrics are not sel | head-of-line blocking, or size trade-offs. Normally, such fabrics are not self- | |||
f-healing or rely on 1:1 or 1+1 protection schemes but it is conceivable to use | healing or rely on 1:1 or 1+1 protection schemes, but it is conceivable to use R | |||
RIFT to operate Clos fabrics that can deal effectively with interconnections | IFT to operate Clos fabrics that can deal effectively with interconnections | |||
or subsystem failures in such module. RIFT is not IP specific and | or subsystem failures in such a module. RIFT is not IP specific and | |||
hence any link addressing connecting internal device subnets is | hence any link addressing connecting internal device subnets is | |||
conceivable. | conceivable. | |||
</t> | </t> | |||
</section> | </section> | |||
<section><name>CloudCO</name> | <section><name>CloudCO</name> | |||
<t> | <t> | |||
The Cloud Central Office (CloudCO) is a new stage of telecom Central Office. It | The Cloud Central Office (CloudCO) is a new stage of the telecom Central Office. | |||
takes the advantage of Software Defined Networking (SDN) and Network Function Vi | It takes the advantage of Software-Defined Networking (SDN) and Network Functio | |||
rtualization (NFV) in conjunction with general purpose hardware to optimize curr | n Virtualization (NFV) in conjunction with general purpose hardware to optimize | |||
ent networks. | current networks. | |||
The following figure illustrates this architecture at a high level. It describes | The following figure illustrates this architecture at a high level. It describes | |||
a single instance or macro-node of cloud CO that provides a number of Value Add | a single instance or macro-node of CloudCO that provides a number of value-adde | |||
ed Services (VAS), a Broadband Access Abstraction (BAA), and virtualized network | d services (VASes), a Broadband Access Abstraction (BAA), and virtualized networ | |||
services. An Access I/O module faces a Cloud CO access node, and the Customer P | k services. An Access I/O module faces a CloudCO access node and the Customer Pr | |||
remises Equipments (CPEs) behind it. A Network I/O module is facing the core net | emises Equipment (CPE) behind it. A Network I/O module is facing the core networ | |||
work. The two I/O modules are interconnected by a leaf and spine fabric <xref ta | k. | |||
rget='TR-384'/>. | <!-- [rfced] To match [TR-384], may we update "leaf and spine fabric" to | |||
"leaf-spine fabric"? | ||||
Original: | ||||
The two I/O modules are interconnected by a leaf and spine fabric [TR-384]. | ||||
Perhaps: | ||||
The two I/O modules are interconnected by a leaf-spine fabric [TR-384]. | ||||
--> | ||||
The two I/O modules are interconnected by a leaf and spine fabric <xref target=' | ||||
TR-384'/>. | ||||
</t> | </t> | |||
<figure align='center' anchor='pic-CloudCO'><name>An example of CloudCO arch itecture</name> | <figure align='center' anchor='pic-CloudCO'><name>CloudCO Architecture Examp le</name> | |||
<artwork align='center'><![CDATA[ | <artwork align='center'><![CDATA[ | |||
+---------------------+ +----------------------+ | +---------------------+ +----------------------+ | |||
| Spine | | Spine | | | Spine | | Spine | | |||
| Switch | | Switch | | | Switch | | Switch | | |||
+------+---+------+-+-+ +--+-+-+-+-----+-------+ | +------+---+------+-+-+ +--+-+-+-+-----+-------+ | |||
| | | | | | | | | | | | | | | | | | | | | | | | | | |||
| | | | | +-------------------------------+ | | | | | | | +-------------------------------+ | | |||
| | | | | | | | | | | | | | | | | | | | | | | | | | |||
| | | | +-------------------------+ | | | | | | | | +-------------------------+ | | | | |||
| | | | | | | | | | | | | | | | | | | | | | | | | | |||
skipping to change at line 577 ¶ | skipping to change at line 673 ¶ | |||
| |--------| |--------| |----------| |-------| | | | |--------| |--------| |----------| |-------| | | |||
| || VAS6 || || VAS3 || || v802.1x|| ||VAS2 || | | | || VAS6 || || VAS3 || || v802.1x|| ||VAS2 || | | |||
| |--------| |--------| |----------| |-------| | | | |--------| |--------| |----------| |-------| | | |||
| |--------| |--------| |----------| |-------| | | | |--------| |--------| |----------| |-------| | | |||
| || VAS7 || || VAS4 || || vIGMP || ||BAA || | | | || VAS7 || || VAS4 || || vIGMP || ||BAA || | | |||
| |--------| |--------| |----------| |-------| | | | |--------| |--------| |----------| |-------| | | |||
| +--------+ +--------+ +----------+ +-------+ | | | +--------+ +--------+ +----------+ +-------+ | | |||
| | | | | | |||
++-----------+ +---------++ | ++-----------+ +---------++ | |||
|Network I/O | |Access I/O| | |Network I/O | |Access I/O| | |||
+------------+ +----------+ | +------------+ +----------+]]></artwork> | |||
]]> | ||||
</artwork> | ||||
</figure> | </figure> | |||
------------+ <span class="insert">+----------+]] ></artwork></span> | ||||
<t> | <t> | |||
The Spine-Leaf architecture deployed inside CloudCO meets the network requiremen | The Spine-Leaf architecture deployed inside CloudCO meets the network requiremen | |||
ts of adaptable, agile, scalable and dynamic. | ts of being adaptable, agile, scalable, and dynamic.</t> | |||
</t> | ||||
</section> | </section> | |||
</section> | </section> | |||
</section> | </section> | |||
<section anchor='opex'><name>Operational Considerations</name> | <section anchor='opex'><name>Operational Considerations</name> | |||
<t> | <t> | |||
RIFT presents the features for organizations building and operating | RIFT presents the features for organizations building and operating | |||
IP fabrics to simplify the operation and deployments while achieving | IP fabrics to simplify the operation and deployments while achieving | |||
many desirable | many desirable | |||
properties of a dynamic routing protocol on such a substrate: | properties of a dynamic routing protocol on such a substrate: | |||
</t> | </t> | |||
<ul> | <ul spacing="normal"> | |||
<li> | <li> | |||
RIFT only floods routing information to the devices that need it. | RIFT only floods routing information to the devices that need it. | |||
</li> | </li> | |||
<li> | <li> | |||
RIFT allows for Zero Touch Provisioning within the protocol. | RIFT allows for ZTP within the protocol. | |||
In its most extreme version, RIFT does not rely on any specific addressing | In its most extreme version, RIFT does not rely on any specific addressing | |||
and for IP fabric can operate using <xref target='RFC4861'>IPv6 ND</xref> only. | and can operate using <xref target='RFC4861'>IPv6 Neighbor Discovery (ND)</xref> only for IP fabric. | |||
</li> | </li> | |||
<li> | <li> | |||
RIFT has provisions to detect common IP fabric miscabling scenarios. | RIFT has provisions to detect common IP fabric miscabling scenarios. | |||
</li> | </li> | |||
<li> | <li> | |||
RIFT negotiates automatically BFD per link. This allows for IP and <xref target= 'RFC7130'>micro-BFD</xref> to replace Link Aggregation Groups (LAGs) which do hi de bandwidth | RIFT automatically negotiates Bidirectional Forwarding Detection (BFD) per link. This allows for IP and <xref target='RFC7130'>micro-BFD</xref> to replace Link Aggregation Groups (LAGs) that hide bandwidth | |||
imbalances in case of constituent failures. Further automatic link validation | imbalances in case of constituent failures. Further automatic link validation | |||
techniques similar to <xref target='RFC5357'/> could be supported as well. | techniques similar to those in <xref target='RFC5357'/> could be supported as we ll. | |||
</li> | </li> | |||
<li> | <li> | |||
RIFT inherently solves many problems associated with the use of | RIFT inherently solves many problems associated with the use of | |||
traditional routing topologies with dense meshes and high degrees of ECMP by | traditional routing topologies with dense meshes and high degrees of ECMP by | |||
including automatic bandwidth balancing, flood reduction and automatic | including automatic bandwidth balancing, flood reduction, and automatic | |||
disaggregation on failures while providing maximum aggregation of prefixes | disaggregation on failures while providing maximum aggregation of prefixes | |||
in default scenarios. ECMP in RIFT eliminates the need for more Loop-Free Altern ates procedures. | in default scenarios. ECMP in RIFT eliminates the need for more Loop-Free Altern ate (LFA) procedures. | |||
</li> | </li> | |||
<li> | <li> | |||
<!-- [rfced] May we rephrase and break up the following sentence to | ||||
improve readability? | ||||
Original: | ||||
* RIFT reduces FIB size towards the bottom of the IP fabric where | ||||
most nodes reside and allows with that for cheaper hardware on the | ||||
edges and introduction of modern IP fabric architectures that | ||||
encompass e.g. server multi-homing. | ||||
Perhaps: | ||||
* RIFT reduces FIB size towards the bottom of the IP fabric where | ||||
most nodes reside. This allows for cheaper hardware on the | ||||
edges and introduction of modern IP fabric architectures that | ||||
encompass server multihoming and other mechanisms. | ||||
--> | ||||
RIFT reduces FIB size towards the bottom of the IP fabric where most nodes | RIFT reduces FIB size towards the bottom of the IP fabric where most nodes | |||
reside and allows with that for cheaper hardware on the edges and introduction | reside and allows with that for cheaper hardware on the edges and introduction | |||
of modern IP fabric architectures that encompass e.g. server multi-homing. | of modern IP fabric architectures that encompass, e.g., server multihoming. | |||
</li> | </li> | |||
<li> | <li> | |||
RIFT provides valley-free routing and with that is loop free. A valley-free path allows reversal of direction at most once from a packet heading northbound to s outhbound while permitting traversal of horizontal links in the northbound phase . This allows the use of any such valley-free path in bi-sectional fabric bandw idth between two destinations irrespective of their metrics which can be used to balance load on the fabric in different ways. Valley-free routing eliminates t he need for any specific micro-loop avoidance procedures for RIFT. | RIFT provides valley-free routing that is loop free. A valley-free path allows f or reversal of direction at most once from a packet heading northbound to southb ound while permitting traversal of horizontal links in the northbound phase. Th is allows for the use of any such valley-free path in bisectional fabric bandwid th between two destinations irrespective of their metrics that can be used to ba lance load on the fabric in different ways. Valley-free routing eliminates the need for any specific micro-loop avoidance procedures for RIFT. | |||
</li> | </li> | |||
<li> | <li> | |||
RIFT includes a key-value distribution mechanism | RIFT includes a key-value distribution mechanism | |||
which allows for future applications | that allows for future applications | |||
such as automatic provisioning of basic overlay services or automatic key | such as automatic provisioning of basic overlay services or automatic key | |||
roll-overs over whole fabrics. | rollovers over whole fabrics. | |||
</li> | </li> | |||
<li> | <li> | |||
RIFT is designed for minimum delay in case of prefix mobility on the fabric. In | RIFT is designed for minimum delay in case of prefix mobility on the fabric. In | |||
conjunction with <xref target='RFC8505'/>, RIFT can differentiate anycast advert isements from mobility events and retain only the most recent advertisement in t he latter case. | conjunction with <xref target='RFC8505'/>, RIFT can differentiate anycast advert isements from mobility events and retain only the most recent advertisement in t he latter case. | |||
</li> | </li> | |||
<li> | <li> | |||
Many further operational and design points collected over many years of | Many further operational and design points collected over many years of | |||
routing protocol deployments have been incorporated in RIFT such as | routing protocol deployments have been incorporated in RIFT such as | |||
fast flooding rates, protection of information lifetimes and operationally | fast flooding rates, protection of information lifetimes, and operationally | |||
recognizable remote ends of links and node names. | recognizable remote ends of links and node names. | |||
</li> | </li> | |||
</ul> | </ul> | |||
<section><name>South Reflection</name> | <section><name>South Reflection</name> | |||
<t>South reflection is a mechanism that South Node TIEs are "reflected" | <t>South reflection is a mechanism where South Node TIEs are "reflected" | |||
back up north to allow nodes in same level without east-west links to "see" | back up north to allow nodes in the same level without East-West links to "s | |||
ee" | ||||
each other. | each other. | |||
</t> | </t> | |||
<t>For example, in Figure 4, Spine111\Spine112\Spine121\Spine122 reflects No de S-TIEs | <t>For example, in <xref target='pic-suboptimal'/>, Spine111\Spine112\Spine1 21\Spine122 reflects Node S-TIEs | |||
from ToF21 to ToF22 separately. Respectively, Spine111\Spine112\Spine121\Spi ne122 reflects Node | from ToF21 to ToF22 separately. Respectively, Spine111\Spine112\Spine121\Spi ne122 reflects Node | |||
S-TIEs from ToF22 to ToF21 separately. So ToF22 and ToF21 see each other's | S-TIEs from ToF22 to ToF21 separately, so ToF22 and ToF21 see each other's | |||
node information as level 2 nodes. | node information as level 2 nodes. | |||
</t> | </t> | |||
<t>In an equivalent fashion, as the result of the south reflection between S pine121-Leaf121-Spine122 | <t>In an equivalent fashion, as the result of the south reflection between S pine121-Leaf121-Spine122 | |||
and Spine121-Leaf122-Spine122, Spine121 and Spine 122 knows each other at | and Spine121-Leaf122-Spine122, Spine121 and Spine 122 know each other at | |||
level 1. | level 1. | |||
</t> | </t> | |||
<!-- [rfced] We note that the following instances of text are repeated at | ||||
the end of Sections 5.1 and following Figure 4 in Section 5.2. Should the text | ||||
in Section 5.2 be removed to avoid repetition? | ||||
</section> | Original (Section 5.1): | |||
In an equivalent fashion, as the result of the south | ||||
reflection between Spine121-Leaf121-Spine122 and Spine121-Leaf122-Spine122, | ||||
Spine121 and Spine 122 knows each other at level 1. | ||||
Original (Section 5.2): | ||||
As shown in Figure 4, as the result of the south | ||||
reflection between Spine121-Leaf121-Spine122 and Spine121-Leaf122-Spine122, | ||||
Spine121 and Spine 122 knows each other at level 1. | ||||
--> | ||||
</section> | ||||
<section><name>Suboptimal Routing on Link Failures</name> | <section><name>Suboptimal Routing on Link Failures</name> | |||
<figure align='center' anchor='pic-suboptimal'><name>Suboptimal routing upon link failure use case</name> | <figure align='center' anchor='pic-suboptimal'><name>Suboptimal Routing Upon Link Failure Use Case</name> | |||
<artwork align='center'><![CDATA[ | <artwork align='center'><![CDATA[ | |||
+--------+ +--------+ | +--------+ +--------+ | |||
| ToF21 | | ToF22 | LEVEL 2 | | ToF21 | | ToF22 | LEVEL 2 | |||
++--+-+-++ ++-+--+-++ | ++--+-+-++ ++-+--+-++ | |||
| | | | | | | + | | | | | | | | + | |||
| | | | | | | linkTS8 | | | | | | | | linkTS8 | |||
+------------+ | +-+linkTS3+-+ | | | +-------------+ | +------------+ | +-+linkTS3+-+ | | | +-------------+ | |||
| | | | | | + | | | | | | | | + | | |||
| +---------------------------+ | linkTS7 | | | +---------------------------+ | linkTS7 | | |||
| | | | + + + | | | | | | + + + | | |||
skipping to change at line 698 ¶ | skipping to change at line 820 ¶ | |||
+-+---+--+ +-+----+-+ +-+---+---+ +-+----+-+ | +-+---+--+ +-+----+-+ +-+---+---+ +-+----+-+ | |||
| | | | | | | | | | | | | | | | | | |||
| +-------------+ | + ++XX+linkSL6+---+ + | | +-------------+ | + ++XX+linkSL6+---+ + | |||
| | | | linkSL5 | | linkSL8 | | | | | linkSL5 | | linkSL8 | |||
| +-----------+ | | + +---+linkSL7+-+ | + | | +-----------+ | | + +---+linkSL7+-+ | + | |||
| | | | | | | | | | | | | | | | | | |||
+-+---+-+ +--+--+-+ +-+---+-+ +--+--+-+ | +-+---+-+ +--+--+-+ +-+---+-+ +--+--+-+ | |||
|Leaf111| |Leaf112| |Leaf121| |Leaf122| LEVEL 0 | |Leaf111| |Leaf112| |Leaf121| |Leaf122| LEVEL 0 | |||
+-+-----+ +-+-----+ +-----+-+ +-+-----+ | +-+-----+ +-+-----+ +-----+-+ +-+-----+ | |||
+ + + + | + + + + | |||
Prefix111 Prefix112 Prefix121 Prefix122 | Prefix111 Prefix112 Prefix121 Prefix122]]></artwork> | |||
]]></artwork> | ||||
</figure> | </figure> | |||
<t>As shown in <xref target='pic-suboptimal'/>, as the result of the south r eflection between | <t>As shown in <xref target='pic-suboptimal'/>, as the result of the south r eflection between | |||
Spine121-Leaf121-Spine122 and Spine121-Leaf122-Spine122, Spine121 and Spine | Spine121-Leaf121-Spine122 and Spine121-Leaf122-Spine122, Spine121 and Spine | |||
122 knows each other at level 1.</t> | 122 know each other at level 1.</t> | |||
<t>Without disaggregation mechanism, when linkSL6 fails, the packet from | <!-- [rfced] We have rephrased the following sentence and split it into two | |||
leaf121 to prefix122 will probably go up through linkSL5 to linkTS3 then go | for ease of the reader. Please let us know any objections. | |||
down through linkTS4 to linkSL8 to Leaf122 or go up through linkSL5 to linkT | ||||
S6 | Original: | |||
then go down through linkTS8 and linkSL8 to Leaf122 based on pure default ro | Without disaggregation mechanism, when linkSL6 fails, the packet | |||
ute. | from leaf121 to prefix122 will probably go up through linkSL5 to linkTS3 then | |||
It's the case of suboptimal routing or bow-tieing.</t> | ago down through linkTS4 to linkSL8 to Leaf122 or go up through linkSL5 to | |||
<t>With disaggregation mechanism, when linkSL6 fails, Spine122 will detect t | linkTS6 then go down through linkTS8 and linkSL8 to Leaf122 based on pure | |||
he | default route. | |||
failure according to the reflected node S-TIE from Spine121. Based on the | ||||
Current: | ||||
Without disaggregation mechanisms, the packet from leaf121 to | ||||
prefix122 will probably go up through linkSL5 to linkTS3 when linkSL6 | ||||
fails. Then, the packet will go down through linkTS4 to linkSL8 to Leaf122 or | ||||
go up through linkSL5 to linkTS6, then go down through linkTS8 and linkSL8 to | ||||
Leaf122 based on the pure default route. | ||||
--> | ||||
<t>Without disaggregation mechanisms, the packet from | ||||
leaf121 to prefix122 will probably go up through linkSL5 to linkTS3 when lin | ||||
kSL6 fails. Then, the packet will go | ||||
down through linkTS4 to linkSL8 to Leaf122 or go up through linkSL5 to linkT | ||||
S6, | ||||
then go down through linkTS8 and linkSL8 to Leaf122 based on the pure defaul | ||||
t route. | ||||
This is the case of suboptimal routing or bow tying.</t> | ||||
<t>With disaggregation mechanisms, Spine122 will detect the | ||||
failure according to the reflected node S-TIE from Spine121 when linkSL6 fai | ||||
ls. Based on the | ||||
disaggregation algorithm provided by RIFT, Spine122 will explicitly advertis e | disaggregation algorithm provided by RIFT, Spine122 will explicitly advertis e | |||
prefix122 in Disaggregated Prefix S-TIE PrefixTIEElement(prefix122, cost 1). The packet | prefix122 in Disaggregated Prefix S-TIE PrefixTIEElement(prefix122, cost 1). The packet | |||
from leaf121 to prefix122 will only be sent to linkSL7 following a longest-p refix | from leaf121 to prefix122 will only be sent to linkSL7 following a longest-p refix | |||
match to prefix 122 directly then go down through linkSL8 to Leaf122 . | match to prefix 122 directly, then it will go down through linkSL8 to Leaf12 2. | |||
</t> | </t> | |||
</section> | </section> | |||
<section><name>Black-Holing on Link Failures</name> | <section><name>Black-Holing on Link Failures</name> | |||
<figure align='center' anchor='pic-blackhole'><name>Black-holing upon link f ailure use case</name> | <figure align='center' anchor='pic-blackhole'><name>Black-Holing Upon Link F ailure Use Case</name> | |||
<artwork align='center'><![CDATA[ | <artwork align='center'><![CDATA[ | |||
+--------+ +--------+ | +--------+ +--------+ | |||
| ToF 21 | | ToF 22 | LEVEL 2 | | ToF 21 | | ToF 22 | LEVEL 2 | |||
++-+--+-++ ++-+--+-++ | ++-+--+-++ ++-+--+-++ | |||
| | | | | | | + | | | | | | | | + | |||
| | | | | | | linkTS8 | | | | | | | | linkTS8 | |||
+--------------+ | +-+linkTS3+X+ | | | +--------------+ | +--------------+ | +-+linkTS3+X+ | | | +--------------+ | |||
linkTS1 | | | | | + | | linkTS1 | | | | | + | | |||
+ +-----------------------------+ | linkTS7 | | + +-----------------------------+ | linkTS7 | | |||
| | + | + + + | | | | + | + + + | | |||
skipping to change at line 747 ¶ | skipping to change at line 885 ¶ | |||
+-+---+--+ ++----+--+ +-+---+--+ +-+----+-+ | +-+---+--+ ++----+--+ +-+---+--+ +-+----+-+ | |||
| | | | | | | | | | | | | | | | | | |||
+ +---------------+ | + +---+linkSL6+---+ + | + +---------------+ | + +---+linkSL6+---+ + | |||
linkSL1 | | | linkSL5 | | linkSL8 | linkSL1 | | | linkSL5 | | linkSL8 | |||
+ +--+linkSL3+--+ | | + +---+linkSL7+-+ | + | + +--+linkSL3+--+ | | + +---+linkSL7+-+ | + | |||
| | | | | | | | | | | | | | | | | | |||
+-+---+-+ +--+--+-+ +-+---+-+ +--+--+-+ | +-+---+-+ +--+--+-+ +-+---+-+ +--+--+-+ | |||
|Leaf111| |Leaf112| |Leaf121| |Leaf122| LEVEL 0 | |Leaf111| |Leaf112| |Leaf121| |Leaf122| LEVEL 0 | |||
+-+-----+ +-+-----+ +-----+-+ +-----+-+ | +-+-----+ +-+-----+ +-----+-+ +-----+-+ | |||
+ + + + | + + + + | |||
Prefix111 Prefix112 Prefix121 Prefix122 | Prefix111 Prefix112 Prefix121 Prefix122]]></artwork> | |||
]]></artwork> | ||||
</figure> | </figure> | |||
<t>This scenario illustrates a case when double link failure occurs and with that | <t>This scenario illustrates a case where double link failure occurs and | |||
black-holing can happen.</t> | black-holing can happen.</t> | |||
<t>Without disaggregation mechanism, when linkTS3 and linkTS4 both fail, | <t>Without disaggregation mechanisms, | |||
the packet from leaf111 to prefix122 would suffer 50% black-holing based | the packet from leaf111 to prefix122 would suffer 50% black-holing based | |||
on pure default route. The packet supposed to go up through linkSL1 to | on pure default route when linkTS3 and linkTS4 both fail. The packet is sup | |||
linkTS1 then go down through linkTS3 or linkTS4 will be dropped. The | posed to go up through linkSL1 to | |||
packet supposed to go up through linkSL3 to linkTS2 then go down through | linkTS1 and then go down through linkTS3 or linkTS4 will be dropped. The | |||
linkTS3 or linkTS4 will be dropped as well. It's the case of black-holing.</ | packet is supposed to go up through linkSL3 to linkTS2, then go down through | |||
t> | linkTS3 or linkTS4 will be dropped as well. This is the case of black-holing | |||
<t>With disaggregation mechanism, when linkTS3 and linkTS4 both fail, ToF22 | .</t> | |||
will | <t>With disaggregation mechanisms, ToF22 will | |||
detect the failure according to the reflected node S-TIE of ToF21 from | detect the failure according to the reflected node S-TIE of ToF21 from | |||
Spine111\Spine112. Based on the disaggregation algorithm | Spine111\Spine112 when linkTS3 and linkTS4 both fail. Based on the disaggreg ation algorithm | |||
provided by RIFT, ToF22 will explicitly originate an S-TIE with prefix 121 a nd | provided by RIFT, ToF22 will explicitly originate an S-TIE with prefix 121 a nd | |||
prefix 122, that is flooded to spines 111, 112, 121 and 122.</t> | prefix 122 that is flooded to spines 111, 112, 121, and 122.</t> | |||
<t>The packet from leaf111 to prefix122 will not be routed to linkTS1 or | <t>The packet from leaf111 to prefix122 will not be routed to linkTS1 or | |||
linkTS2. The packet from leaf111 to prefix122 will only be routed to linkTS5 | linkTS2. The packet from leaf111 to prefix122 will only be routed to linkTS5 | |||
or linkTS7 following a longest-prefix match to prefix122.</t> | or linkTS7 following a longest-prefix match to prefix122.</t> | |||
</section> | </section> | |||
<section><name>Zero Touch Provisioning (ZTP)</name> | <section><name>Zero Touch Provisioning (ZTP)</name> | |||
<t> | <t> | |||
RIFT is designed to require a very minimal configuration to simplify its operati on and avoid human errors; based on that minimal information, Zero Touch Provisi oning (ZTP) auto configures the key operational parameters of all the RIFT nodes , including the SystemID of the node that must be unique in the RIFT network and the level of the node in the Fat Tree, which determines which peers are northwa rds "parents" and which are southwards "children". | RIFT is designed to require a very minimal configuration to simplify its operati on and avoid human errors; based on that minimal information, ZTP auto configure s the key operational parameters of all the RIFT nodes, including the System ID of the node that must be unique in the RIFT network and the level of the node in the Fat Tree, which determines which peers are northward "parents" and which ar e southward "children". | |||
</t> | </t> | |||
<t> | <t> | |||
ZTP is always on, but its decisions can be overridden when a network administrat | ZTP is always on, but its decisions can be overridden when a network administrat | |||
or prefers to impose its own configuration. In that case, it is the responsibili | or prefers to impose its own configuration. In that case, it is the responsibili | |||
ty of the administrator to ensure that the configured parameters are correct, | ty of the administrator to ensure that the configured parameters are correct, i. | |||
in other words that the SystemID of each node is unique, and that the administra | e., ensure that the System ID of each node is unique and that the administrative | |||
tively set levels truly reflect the relative position of the nodes in the fabric | ly set levels truly reflect the relative position of the nodes in the fabric. | |||
. It is | <!-- [rfced] Is "and when not" referring to ZTP configuring the network? | |||
Original: | ||||
It is recommended to let ZTP configure the network, and when not, it | ||||
is recommended to configure the level of all the nodes to avoid an undesirabl | ||||
e | ||||
interaction between ZTP and the manual configuration. | ||||
Perhaps: | ||||
It is recommended to let ZTP configure the network, and when ZTP does | ||||
not configure the network, it is recommended to configure the level of all th | ||||
e | ||||
nodes to avoid an undesirable interaction between ZTP and the manual | ||||
configuration. | ||||
--> | ||||
It is | ||||
recommended to let ZTP configure the network, and when not, it is recommended to | recommended to let ZTP configure the network, and when not, it is recommended to | |||
configure the level of all the nodes to avoid an undesirable interaction between ZTP and the manual configuration. | configure the level of all the nodes to avoid an undesirable interaction between ZTP and the manual configuration. | |||
</t> | </t> | |||
<t>ZTP requires that the administrator points out the Top-of-Fabric (ToF) nodes | <t>ZTP requires that the administrator points out the ToF nodes to set the | |||
to set the baseline from which the fabric topology is derived. The Top-of-Fabric | baseline from which the fabric topology is derived. The ToF nodes are | |||
nodes are configured with TOP_OF_FABRIC flag which are initial 'seeds' needed f | configured with the TOP_OF_FABRIC flag, which are initial 'seeds' needed for | |||
or other ZTP nodes to derive their level in the topology. | other ZTP nodes to derive their level in the topology. ZTP computes the level | |||
ZTP computes the level of each node based on the Highest Available Level (HAL) | of each node based on the Highest Available Level (HAL) of the potential | |||
of the potential parent(s) nearest that baseline, which represents the superspin | parent closest to that baseline, which represents the superspine. In a | |||
e. | fashion, RIFT can be seen as a distance-vector protocol that computes a set of | |||
In a fashion, RIFT can be seen as a distance-vector protocol that computes a set | feasible successors towards the superspine and autoconfigures the rest of the | |||
of feasible successors towards the superspine and auto-configures the rest of t | topology. | |||
he topology. | ||||
</t> | </t> | |||
<t> | <t> | |||
The auto configuration mechanism computes a global maximum of levels by diffusi | The autoconfiguration mechanism computes a global maximum of levels by | |||
on. | diffusion. The derivation of the level of each node happens then based on | |||
The derivation of the level of each node happens then based on Link Information | LIEs received from its neighbors, whereas each node (with possible exceptions | |||
Elements (LIEs) received from its | of configured leaves) tries to attach at the highest possible point in the | |||
neighbors whereas each node (with possibly exceptions of configured leaves) trie | fabric. This guarantees that even if the diffusion front reaches a node from | |||
s to | "below" faster than from "above", it will greedily abandon already negotiated | |||
attach at the highest possible point in the fabric. This guarantees that even if | levels derived from nodes topologically below it and properly peer with nodes | |||
the diffusion front reaches a node from "below" faster | above. | |||
than from "above", it will greedily abandon already negotiated level derived fro | ||||
m nodes | ||||
topologically below it and properly peer with nodes above. | ||||
</t> | </t> | |||
<t> | <t> | |||
The achieved equilibrium can be disturbed massively by all nodes with highest l | The achieved equilibrium can be disturbed massively by all nodes with the highe | |||
evel either leaving or entering the domain (with some finer distinctions not exp | st level either leaving or entering the domain (with some finer distinctions not | |||
lained further). | explained further). | |||
It is therefore recommended that each node is multi-homed towards nodes with re | It is therefore recommended that each node is multihomed towards nodes with res | |||
spective HAL offerings. Fortunately, this is the natural state of things for the | pective HAL offerings. Fortunately, this is the natural state of things for the | |||
topology variants considered in RIFT. | topology variants considered in RIFT. | |||
</t> | </t> | |||
<t> | <t> | |||
A RIFT node may also be configured to confine it to the leaf role with the LEAF_ ONLY flag. A leaf node can also be configured to support leaf-2-leaf procedures with the LEAF_2_LEAF flag. In either case the node cannot be TOP_OF_FABRIC and i ts level cannot be configured. RIFT will fully determine the node's level after it is attached to the topology and ensure that the node is at the "bottom of the hierarchy" (southernmost). | A RIFT node may also be configured to confine it to the leaf role with the LEAF_ ONLY flag. A leaf node can also be configured to support leaf-2-leaf procedures with the LEAF_2_LEAF flag. In both cases, the node cannot be TOP_OF_FABRIC and i ts level cannot be configured. RIFT will fully determine the node's level after it is attached to the topology and ensure that the node is at the "bottom of the hierarchy" (southernmost). | |||
</t> | </t> | |||
</section> | </section> | |||
<section><name>Miscabling</name> | <section><name>Miscabling</name> | |||
<section><name>Miscabling Examples</name> | <section><name>Miscabling Examples</name> | |||
<figure align='center' anchor='single-plane-mis-cabling'><name>A single plan e miscabling example</name> | <figure align='center' anchor='single-plane-mis-cabling'><name>A Single-Plan e Miscabling Example</name> | |||
<artwork align='center'><![CDATA[ | <artwork align='center'><![CDATA[ | |||
+----------------+ +-----------------+ | +----------------+ +-----------------+ | |||
| ToF21 | +------+ ToF22 | LEVEL 2 | | ToF21 | +------+ ToF22 | LEVEL 2 | |||
+-------+----+---+ | +----+---+--------+ | +-------+----+---+ | +----+---+--------+ | |||
| | | | | | | | | | | | | | | | | | | | |||
| | | +----------------------------+ | | | | | +----------------------------+ | | |||
| +---------------------------+ | | | | | | +---------------------------+ | | | | | |||
| | | | | | | | | | | | | | | | | | | | |||
| | | | +-----------------------+ | | | | | | | +-----------------------+ | | | |||
| | +------------------------+ | | | | | | +------------------------+ | | | | |||
skipping to change at line 826 ¶ | skipping to change at line 983 ¶ | |||
+-+---+--+ +-+---+--+ | +--+---+-+ +--+---+-+ | +-+---+--+ +-+---+--+ | +--+---+-+ +--+---+-+ | |||
|Spine111| |Spine112| | |Spine121| |Spine122| LEVEL 1 | |Spine111| |Spine112| | |Spine121| |Spine122| LEVEL 1 | |||
+-+---+--+ ++----+--+ | +--+---+-+ +-+----+-+ | +-+---+--+ ++----+--+ | +--+---+-+ +-+----+-+ | |||
| | | | | | | | | | | | | | | | | | | | |||
| +---------+ | link-M | +---------+ | | | +---------+ | link-M | +---------+ | | |||
| | | | | | | | | | | | | | | | | | | | |||
| +-------+ | | | | +-------+ | | | | +-------+ | | | | +-------+ | | | |||
| | | | | | | | | | | | | | | | | | | | |||
+-+---+-+ +--+--+-+ | +-+---+-+ +--+--+-+ | +-+---+-+ +--+--+-+ | +-+---+-+ +--+--+-+ | |||
|Leaf111| |Leaf112+-----+ |Leaf121| |Leaf122| LEVEL 0 | |Leaf111| |Leaf112+-----+ |Leaf121| |Leaf122| LEVEL 0 | |||
+-------+ +-------+ +-------+ +-------+ | +-------+ +-------+ +-------+ +-------+]]></artwork> | |||
]]></artwork> | ||||
</figure> | </figure> | |||
-------+ +-------+ +-------+ <span class="insert">+-------+]]> ;</artwork></span> | ||||
<t><xref target='single-plane-mis-cabling'/> shows a single plane miscabling example. It's a perfect Fat Tree fabric except link-M connecting Leaf112 to ToF 22. | <t><xref target='single-plane-mis-cabling'/> shows a single-plane miscabling example. It's a perfect Fat Tree fabric except for link-M connecting Leaf112 to ToF22. | |||
</t> | </t> | |||
<t>The RIFT control protocol can discover the physical links automatically a | <t>The RIFT control protocol can discover the physical links automatically a | |||
nd be able to detect cabling that violates Fat Tree topology constraints. | nd is able to detect cabling that violates Fat Tree topology constraints. | |||
It reacts accordingly to such miscabling attempts, at a minimum preventi | It reacts accordingly to such miscabling attempts, preventing adjacencie | |||
ng adjacencies between nodes from being formed and traffic from being forwarded | s between nodes from being formed and traffic from being forwarded on those misc | |||
on those miscabled links. | abled links at a minimum. | |||
Leaf112 will in such scenario use link-M to derive its level (unless it | In such scenario, Leaf112 will use link-M to derive its level (unless it | |||
is leaf) and can report links to Spine111 and Spine112 as miscabled unless the i | is leaf) and can report links to Spine111 and Spine112 as miscabled unless the | |||
mplementations | implementations | |||
allows horizontal links. | allow horizontal links. | |||
</t> | </t> | |||
<t><xref target='multi-plane-mis-cabling'/> shows a multiple plane miscabling example. Since Leaf112 and Spine121 belong to two different PoDs, the adjacency between Leaf112 and Spine121 can not be formed. Link-W would be detected and pr evented. | <t><xref target='multi-plane-mis-cabling'/> shows a multi-plane miscabling ex ample. Since Leaf112 and Spine121 belong to two different PoDs, the adjacency be tween Leaf112 and Spine121 cannot be formed. Link-W would be detected and preven ted. | |||
</t> | </t> | |||
<figure align='center' anchor='multi-plane-mis-cabling'><name>A multiple pla ne miscabling example</name> | <figure align='center' anchor='multi-plane-mis-cabling'><name>A Multiple Pla ne Miscabling Example</name> | |||
<artwork align='center'><![CDATA[ | <artwork align='center'><![CDATA[ | |||
+-------+ +-------+ +-------+ +-------+ | +-------+ +-------+ +-------+ +-------+ | |||
|ToF A1| |ToF A2| |ToF B1| |ToF B2| LEVEL 2 | |ToF A1| |ToF A2| |ToF B1| |ToF B2| LEVEL 2 | |||
+-------+ +-------+ +-------+ +-------+ | +-------+ +-------+ +-------+ +-------+ | |||
| | | | | | | | | | | | | | | | | | |||
| | | +-----------------+ | | | | | | | +-----------------+ | | | | |||
| +--------------------------+ | | | | | | +--------------------------+ | | | | | |||
| +------+ | | | +------+ | | | +------+ | | | +------+ | | |||
| | +-----------------+ | | | | | | | | +-----------------+ | | | | | | |||
| | | +--------------------------+ | | | | | | +--------------------------+ | | | |||
skipping to change at line 863 ¶ | skipping to change at line 1019 ¶ | |||
|Spine111| |Spine112| +---+Spine121| |Spine122| LEVEL 1 | |Spine111| |Spine112| +---+Spine121| |Spine122| LEVEL 1 | |||
+-+---+--+ ++----+--+ | +--+---+-+ +-+----+-+ | +-+---+--+ ++----+--+ | +--+---+-+ +-+----+-+ | |||
| | | | | | | | | | | | | | | | | | | | |||
| +---------+ | | | +---------+ | | | +---------+ | | | +---------+ | | |||
| | | | link-W | | | | | | | | | link-W | | | | | |||
| +-------+ | | | | +-------+ | | | | +-------+ | | | | +-------+ | | | |||
| | | | | | | | | | | | | | | | | | | | |||
+-+---+-+ +--+--+-+ | +-+---+-+ +--+--+-+ | +-+---+-+ +--+--+-+ | +-+---+-+ +--+--+-+ | |||
|Leaf111| |Leaf112+------+ |Leaf121| |Leaf122| LEVEL 0 | |Leaf111| |Leaf112+------+ |Leaf121| |Leaf122| LEVEL 0 | |||
+-------+ +-------+ +-------+ +-------+ | +-------+ +-------+ +-------+ +-------+ | |||
+--------PoD#1----------+ +---------PoD#2---------+ | +--------PoD#1----------+ +---------PoD#2---------+]]></artwork> | |||
]]></artwork> | ||||
</figure> | </figure> | |||
--------PoD#1----------+ <span class="insert">+---------PoD#2---------+]]& gt;</artwork></span> | ||||
<t>RIFT provides an optional level determination procedure in its Zero Touch | <t>RIFT provides an optional level determination procedure in its ZTP mode. | |||
Provisioning mode. Nodes in the fabric without | Nodes in the fabric without | |||
their level configured determine it automatically. This can have possibl | their level configured determine it automatically. However, this can hav | |||
y counter-intuitive consequences however. | e possible counter-intuitive consequences. | |||
One extreme failure scenario is depicted in <xref target='Fallen-spine'/ | One extreme failure scenario is depicted in <xref target='Fallen-spine'/ | |||
> and it shows that if all northbound links of spine11 fail at the same time, | >, and it shows that if all northbound links of Spine11 fail at the same time, | |||
spine11 negotiates a lower level than Leaf11 and Leaf12. | Spine11 negotiates a lower level than Leaf11 and Leaf12. | |||
</t> | </t> | |||
<t>To prevent such scenario where leafs are expected to act as switches, LEA | <t>To prevent such scenario where leaves are expected to act as switches, th | |||
F_ONLY flag can be set for Leaf111 and Leaf112. | e LEAF_ONLY flag can be set for Leaf111 and Leaf112. | |||
Since level -1 is invalid, Spine11 would not derive a valid level from t | Since level -1 is invalid, Spine11 would not derive a valid level from t | |||
he topology in <xref target='Fallen-spine'/>. It will be isolated from the whole | he topology in <xref target='Fallen-spine'/>. It will be isolated from the whole | |||
fabric | fabric, | |||
and it would be up to the leafs to declare the links towards such spine | and it would be up to the leaves to declare the links towards such spine | |||
as miscabled. | as miscabled. | |||
</t> | </t> | |||
<figure align='center' anchor='Fallen-spine'><name>Fallen spine</name> | <!-- [rfced] We note that Figures 8 and 9 have the same title of "Fallen | |||
Spine". Is this intentional? If not, please let us know how we should | ||||
update to make these figures more distinct. | ||||
--> | ||||
<figure align='center' anchor='Fallen-spine'><name>Fallen Spine</name> | ||||
<artwork align='center'><![CDATA[ | <artwork align='center'><![CDATA[ | |||
+-------+ +-------+ +-------+ +-------+ | +-------+ +-------+ +-------+ +-------+ | |||
|ToF A1| |ToF A2| |ToF A1| |ToF A2| | |ToF A1| |ToF A2| |ToF A1| |ToF A2| | |||
+-------+ +-------+ +-------+ +-------+ | +-------+ +-------+ +-------+ +-------+ | |||
| | | | | | | | | | | | | | |||
| +-------+ | | | | | +-------+ | | | | |||
+ + | | ====> | | | + + | | ====> | | | |||
X X +------+ | +------+ | | X X +------+ | +------+ | | |||
+ + | | | | | + + | | | | | |||
+----+--+ +-+-----+ +-+-----+ | +----+--+ +-+-----+ +-+-----+ | |||
skipping to change at line 901 ¶ | skipping to change at line 1060 ¶ | |||
| +-------+ | | +-------+ | | | +-------+ | | +-------+ | | |||
| | | | | | | | | | | | | | |||
+-+---+-+ +--+--+-+ +-----+-+ +-----+-+ | +-+---+-+ +--+--+-+ +-----+-+ +-----+-+ | |||
|Leaf111| |Leaf112| |Leaf111| |Leaf112| | |Leaf111| |Leaf112| |Leaf111| |Leaf112| | |||
+-------+ +-------+ +-+-----+ +-+-----+ | +-------+ +-------+ +-+-----+ +-+-----+ | |||
| | | | | | |||
| +--------+ | | +--------+ | |||
| | | | | | |||
+-+---+-+ | +-+---+-+ | |||
|Spine11| | |Spine11| | |||
+-------+ | +-------+]]></artwork> | |||
]]></artwork> | ||||
</figure> | </figure> | |||
</section> | </section> | |||
<section><name>Miscabling considerations</name> | <section><name>Miscabling Considerations</name> | |||
<t>There are scenarios where operators may want to leverage ZTP and implement ad ditional cabling constraints that go beyond the previously described topology vi olations. Enforcing cabling down to specific level, node, and port combinations might make it simpler for onsite staff to perform troubleshooting activities or replace optical transceivers and/or cabling as the physical layout will be consi stent across the fabric. This is especially true for densely connected fabrics w here it is difficult to physically manipulate those components. It is also easy to imagine other models, such as one where the strict port requirement is relaxe d. | <t>There are scenarios where operators may want to leverage ZTP and implement ad ditional cabling constraints that go beyond the previously described topology vi olations. Enforcing cabling down to specific level, node, and port combinations might make it simpler for onsite staff to perform troubleshooting activities or replace optical transceivers and/or cabling as the physical layout will be consi stent across the fabric. This is especially true for densely connected fabrics w here it is difficult to physically manipulate those components. It is also easy to imagine other models, such as one where the strict port requirement is relaxe d. | |||
</t> | </t> | |||
<t><xref target='miscalbe-cons'/> illustrates an example where the first port on Leaf1 must connect to the first port on Spine1, the second port on Leaf1 must c onnect to the first port on Spine2, and so on. Consider a case where (Leaf1, Por t1) and (Leaf1, Port2) were reversed. RIFT would not consider this to be miscabl ed by default, however, an operator might want to. | <t><xref target='miscalbe-cons'/> illustrates an example where the first port on Leaf1 must connect to the first port on Spine1, the second port on Leaf1 must c onnect to the first port on Spine2, and so on. Consider a case where (Leaf1, Por t1) and (Leaf1, Port2) were reversed. RIFT would not consider this to be miscabl ed by default; however, an operator might want to. | |||
</t> | </t> | |||
<figure align='center' anchor='miscalbe-cons'><name>Fallen Spine</name> | ||||
<figure align='center' anchor='miscalbe-cons'><name>Fallen spine</name> | ||||
<artwork align='center'><![CDATA[ | <artwork align='center'><![CDATA[ | |||
+--------+ +--------+ +--------+ +--------+ | +--------+ +--------+ +--------+ +--------+ | |||
| Spine1 | | Spine2 | | Spine3 | | Spine4 | | | Spine1 | | Spine2 | | Spine3 | | Spine4 | | |||
+-1------+ +-1------+ +-1------+ +-1------+ | +-1------+ +-1------+ +-1------+ +-1------+ | |||
+ + + + | + + + + | |||
| +----------+ | | | | +----------+ | | | |||
| | | | | | | | | | |||
| | +---------------------+ | | | | +---------------------+ | | |||
| | | | | | | | | | |||
| | | +--------------------------------+ | | | | +--------------------------------+ | |||
| | | | | | | | | | |||
| | | | | | | | | | |||
| | | | | | | | | | |||
| | | | | | | | | | |||
+ + + + | + + + + | |||
+-1--2--3--4--+ | +-1--2--3--4--+ | |||
| Leaf1 | ...... | | Leaf1 | ...... | |||
+-------------+ | +-------------+ | |||
]]></artwork> | ]]></artwork> | |||
</figure> | </figure> | |||
<t>RIFT allows implementations to provide programmable plugins that can adjust Z | <t>RIFT allows implementations to provide programmable plug-ins that can adjust | |||
TP operation or capture information during computation. While defining this is o | ZTP operation or capture information during computation. While defining this | |||
utside the scope of this document, such a mechanism could be used to extend misc | is outside the scope of this document, such a mechanism could be used to | |||
abling functionality. | extend the miscabling functionality. | |||
</t> | </t> | |||
<t>For other protocols to achieve this, it would require additional operational | <t>For other protocols to achieve this, it would require additional | |||
overhead. Consider a fabric that is using unnumbered OSPF links, it is still ver | operational overhead. Consider a fabric that is using unnumbered OSPF links; | |||
y likely that a miscabled link will form an adjacency. Each attempts to move cab | it is still very likely that a miscabled link will form an adjacency. Each | |||
les to the correct port may result in the need for additional troubleshooting as | attempt to move cables to the correct port may result in the need for | |||
other links will become miscabled in the process. Without automation to explici | additional troubleshooting as other links will become miscabled in the | |||
tly tell the operator which ports need to be moved where, the process becomes ma | process. Without automation to explicitly tell the operator which ports need | |||
nually intensive and error-prone very quickly. Or if the problem goes unnoticed, | to be moved where, the process becomes manually intensive and error-prone very | |||
result in suboptimal performance in the fabric.</t> | quickly. If the problem goes unnoticed, it will result in suboptimal | |||
performance in the fabric.</t> | ||||
</section> | </section> | |||
</section> | </section> | |||
<section><name>Multicast and Broadcast Implementations</name> | <section><name>Multicast and Broadcast Implementations</name> | |||
<t>RIFT supports both multicast and broadcast implementations. While a multicast | <t>RIFT supports both multicast and broadcast implementations. While a | |||
implementation is preferred, there might cases where a broadcast implementation | multicast implementation is preferred, there might cases where a broadcast | |||
is optimal or even required. For example, operating systems on IoT devices and | implementation is optimal or even required. For example, operating systems on | |||
embedded devices may not have the required multicast support. Another example is | IoT devices and embedded devices may not have the required multicast | |||
containers, which in some cases do support multicast, but tend to be very CPU-i | support. Another example is containers, which do support multicast in some | |||
nefficient and difficult to tune.</t> | cases but tend to be very CPU-inefficient and difficult to tune.</t> | |||
</section> | </section> | |||
<section><name>Positive vs. Negative Disaggregation</name> | <section><name>Positive vs. Negative Disaggregation</name> | |||
<t> | <t> | |||
Disaggregation is the procedure whereby <xref target='I-D.ietf-rift-rift'>RI | Disaggregation is the procedure whereby <xref | |||
FT</xref> | target='RFC9692'>RIFT</xref> advertises a more specific route | |||
advertises a more specific route southwards as an exception to the | southwards as an exception to the aggregated fabric-default | |||
aggregated fabric-default north. Disaggregation is useful when a prefix | north. Disaggregation is useful when a prefix within the aggregation is | |||
within the aggregation is reachable via some of the parents but not the | reachable via some of the parents but not the others at the same level of | |||
others at the same level of the fabric. | the fabric. It is mandatory when the level is the ToF since a ToF node | |||
It is mandatory when the level is the ToF since a ToF node that cannot reach | that cannot reach a prefix becomes a black hole for that prefix. The hard | |||
a prefix becomes a black hole for that prefix. | problem is to know which prefixes are reachable by whom. | |||
The hard problem is to know which prefixes are reachable by whom. | ||||
</t> | </t> | |||
<t> | <t> | |||
In the general case, <xref target='I-D.ietf-rift-rift'>RIFT</xref> solves th | In the general case, <xref target='RFC9692'>RIFT</xref> solves | |||
at | that problem by interconnecting the ToF nodes so that the ToF nodes can | |||
problem by interconnecting the ToF nodes. So the ToF nodes can exchange the | exchange the full list of prefixes that exist in the fabric and figure out | |||
full list | when a ToF node lacks reachability to some prefixes. This requires | |||
of prefixes that exist in the fabric and figure out when a ToF node lacks | additional ports at the ToF, typically two ports per ToF node to form a | |||
reachability to some prefixes. This requires additional ports at the | ToF-spanning ring. <xref target='RFC9692'>RIFT</xref> also | |||
ToF, typically 2 ports per ToF node to form a ToF-spanning ring. | defines the southbound reflection procedure that enables a parent to | |||
<xref target='I-D.ietf-rift-rift'>RIFT</xref> also defines the southbound re | explore the direct connectivity of its peers, meaning their own parents | |||
flection | and children; based on the advertisements received from the shared parents | |||
procedure that enables a parent to explore the direct connectivity of its | and children, it may enable the parent to infer the prefixes its peers can | |||
peers, meaning their own parents and children; based on the advertisements | reach. | |||
received from the shared parents and children, it may enable the parent to | ||||
infer the prefixes its peers can reach. | ||||
</t> | </t> | |||
<t> | <t> | |||
When a parent lacks reachability to a prefix, it may disaggregate the prefix | When a parent lacks reachability to a prefix, it may disaggregate the | |||
negatively, i.e., advertise that this parent can be used to reach any prefix | prefix negatively, i.e., advertise that this parent can be used to reach | |||
in the aggregation except that one. The Negative Disaggregation signaling is | any prefix in the aggregation except that one. The Negative Disaggregation | |||
simple and functions transitively from ToF to top-of-pod (ToP) and then from | signaling is simple and functions transitively from ToF to Top-of-Pod | |||
ToP to Leaf. | (ToP) and then from ToP to Leaf. However, it is hard for a parent to | |||
But it is hard for a parent to figure which prefix it needs to disaggregate, | figure out which prefix it needs to disaggregate because it does not know | |||
because it does not know what it does not know; it results that the use of a | what it does not know; it results that the use of a spanning ring at the | |||
spanning ring at the ToF is required to operate the Negative Disaggregation. | ToF is required to operate the Negative Disaggregation. Also, though it | |||
Also, though it is only an implementation problem, the programming of the | is only an implementation problem, the programming of the FIB is complex | |||
FIB is complex compared to normal routes, and may incur recursions. | compared to normal routes and may incur recursions. | |||
</t> | </t> | |||
<t> | <t> | |||
The more classical alternative is, for the parents that can reach a prefix | The more classical alternative is, for the parents that can reach a prefix | |||
that peers at the same level cannot, to advertise a more specific route to | that peers at the same level cannot, to advertise a more specific route to | |||
that prefix. This leverages the normal longest prefix match in the FIB, and | that prefix. This leverages the normal longest prefix match in the FIB | |||
does not require a special implementation. But as opposed to the Negative | and does not require a special implementation. As opposed to the | |||
Disaggregation, the Positive Disaggregation is difficult and inefficient to | Negative Disaggregation, the Positive Disaggregation is difficult and | |||
operate transitively. | inefficient to operate transitively. | |||
</t> | </t> | |||
<t> | <t> | |||
Transitivity is not needed to a grandchild if all its parents received the | Transitivity is not needed by a grandchild if all its parents received the | |||
Positive Disaggregation, meaning that they shall all avoid the black hole; | Positive Disaggregation, meaning that they shall all avoid the black hole; | |||
when that is the case, they collectively build a ceiling that protects the | when that is the case, they collectively build a ceiling that protects the | |||
grandchild. But until then, a parent that received a Positive Disaggregation | grandchild. Until then, a parent that received the Positive | |||
may believe that some peers are lacking the reachability and readvertise too | Disaggregation may believe that some peers are lacking the reachability | |||
early, or defer and maintain a black hole situation longer than necessary. | and re-advertise too early or defer and maintain a black hole situation | |||
longer than necessary. | ||||
</t> | </t> | |||
<t> | <t> | |||
In a non-partitioned fabric, all the ToF nodes see one another through the | In a non-partitioned fabric, all the ToF nodes see one another through the | |||
reflection and can figure if one is missing a child. In that case it is | reflection and can figure out if one is missing a child. In that case, it is | |||
possible to compute the prefixes that the peer cannot reach and disaggregate | possible to compute the prefixes that the peer cannot reach and | |||
positively without a ToF-spanning ring. The ToF nodes can also ascertain | disaggregate positively without a ToF-spanning ring. The ToF nodes can | |||
that the ToP nodes are connected each to at least a ToF node that can still | also ascertain that the ToP nodes are each connected to at least a ToF | |||
reach the prefix, meaning that the transitive operation is not required. | node that can still reach the prefix, meaning that the transitive | |||
operation is not required. | ||||
</t> | </t> | |||
<t> | <t> | |||
The bottom line is that in a fabric that is partitioned | The bottom line is that in a fabric that is partitioned (e.g., using | |||
(e.g., using multiple planes) and/or where the ToP nodes are not guaranteed | multiple planes) and/or where the ToP nodes are not guaranteed to always | |||
to always form a ceiling for their children, it is | form a ceiling for their children, it is mandatory to use Negative | |||
mandatory to use the Negative Disaggregation. | Disaggregation. On the other hand, in a highly symmetrical and fully | |||
On the other hand, in a highly symmetrical and fully connected fabric, | connected fabric (e.g., a canonical Clos Network), the Positive | |||
(e.g., a canonical Clos Network), the Positive Disaggregation methods allows | Disaggregation methods save the complexity and cost associated | |||
to save the complexity and cost associated to the ToF-spanning ring. | to the ToF-spanning ring. | |||
</t> | </t> | |||
<t> | <t> | |||
Note that in the case of Positive Disaggregation, the first ToF node(s) that | Note that in the case of Positive Disaggregation, the first ToF nodes | |||
announces a more-specific route attracts all the traffic for that route and | that announce a more-specific route attract all the traffic for that | |||
may suffer from a transient incast. A ToP node that defers injecting the | route and may suffer from a transient incast. A ToP node that defers | |||
longer prefix in the FIB, in order to receive more advertisements and spread | injecting the longer prefix in the FIB, in order to receive more | |||
the packets better, also keeps on sending a portion of the traffic to the | advertisements and spread the packets better, also keeps on sending a | |||
black hole in the meantime. In the case of Negative Disaggregation, the last | portion of the traffic to the black hole in the meantime. In the case of | |||
ToF node(s) that injects the route may also incur an incast issue; this | Negative Disaggregation, the last ToF nodes that inject the route may | |||
problem would occur if a prefix that becomes totally unreachable is | also incur an incast issue; this problem would occur if a prefix that | |||
disaggregated. | becomes totally unreachable is disaggregated. | |||
</t> | </t> | |||
</section> <!-- Positive vs. Negative Disaggregation --> | </section> <!-- Positive vs. Negative Disaggregation --> | |||
<section><name>Mobile Edge and Anycast</name> | <section><name>Mobile Edge and Anycast</name> | |||
<t> | <t> | |||
When a physical or a virtual node changes its point of attachment in the | When a physical or a virtual node changes its point of attachment in the | |||
fabric from a previous-leaf to a next-leaf, new routes must be installed | fabric from a previous-leaf to a next-leaf, new routes must be installed | |||
that supersede the old ones. Since the flooding flows northwards, the nodes | that supersede the old ones. Since the flooding flows northwards, the | |||
(if any) between the previous-leaf and the common parent are not immediately | nodes (if any) between the previous-leaf and the common parent are not | |||
aware that the path via previous-leaf is obsolete, and a stale route may | immediately aware that the path via the previous-leaf is obsolete and a stal | |||
exist for a while. The common parent needs to select the freshest route | e | |||
advertisement in order to install the correct route via the next-leaf. This | route may exist for a while. The common parent needs to select the | |||
requires that the fabric determines the sequence of the movements of the | freshest route advertisement in order to install the correct route via the | |||
mobile node. | next-leaf. This requires that the fabric determines the sequence of the | |||
movements of the mobile node. | ||||
</t> | </t> | |||
<t> | <t> | |||
On the one hand, a classical sequence counter provides a total order for a | On the one hand, a classical sequence counter provides a total order for a | |||
while but it will eventually wrap. On the other hand, a timestamp provides a | while, but it will eventually wrap. On the other hand, a timestamp provides | |||
permanent order but it may miss a movement that happens too quickly vs. | a permanent order, but it may miss a movement that happens too quickly vs. t | |||
the granularity of the timing information. | he granularity of the timing information. | |||
It is not envisioned that an average fabric supports <xref target='IEEEstd158 | It is not envisioned that an average fabric supports the <xref | |||
8'>Precision Time Protocol</xref> in the short term, nor that the precision avai | target='IEEEstd1588'>Precision Time Protocol</xref> in the short term nor | |||
lable with the <xref target='RFC5905'>Network Time Protocol</xref> (in the order | that the precision available with the <xref target='RFC5905'>Network Time | |||
of 100 to 200ms) may not be necessarily enough to cover, e.g., the fast mobilit | Protocol</xref> (in the order of 100 to 200 ms) may not be necessarily | |||
y of a Virtual Machine. | enough to cover, e.g., the fast mobility of a Virtual Machine (VM). | |||
</t> | </t> | |||
<t> | ||||
Section 6.8.4 "Mobility" of <xref target='I-D.ietf-rift-rift'>RIFT</xref> | <t>Section <xref target='RFC9692' sectionFormat='bare' section='6.8.4'>"Mobi | |||
lity"</xref> of <xref target='RFC9692'/> | ||||
specifies a hybrid method that combines a sequence counter from the mobile | specifies a hybrid method that combines a sequence counter from the mobile | |||
node and a timestamp from the network taken at the leaf when the route is | node and a timestamp from the network taken at the leaf when the route is | |||
injected. If the timestamps of the concurrent advertisements are comparable | injected. If the timestamps of the concurrent advertisements are | |||
(i.e., more distant than the precision of the timing protocol), then the | comparable (i.e., more distant than the precision of the timing protocol), | |||
timestamp alone is used to determine the relative freshness of the routes. | then the timestamp alone is used to determine the relative freshness of | |||
Otherwise, the sequence counter from the mobile node, if available, is used. | the routes. Otherwise, the sequence counter from the mobile node is used if | |||
One caveat is that the sequence counter must not wrap within the precision | it is available. One caveat is that the sequence counter must not wrap | |||
of the timing protocol. Another is that the mobile node may not even provide | within the precision of the timing protocol. Another is that the mobile | |||
a sequence counter, in which case the mobility itself must be slower than | node may not even provide a sequence counter; in which case, the mobility | |||
the precision of the timing. | itself must be slower than the precision of the timing. | |||
</t> | </t> | |||
<t> | <t> | |||
Mobility must not be confused with anycast. In both cases, a same address is | Mobility must not be confused with anycast. In both cases, the same | |||
injected in RIFT at different leaves. In the case of mobility, only the | address is injected in RIFT at different leaves. In the case of mobility, | |||
freshest route must be conserved, since mobile node changed its point of | only the freshest route must be conserved since the mobile node changes its | |||
attachment for a leaf to the next. In the case of anycast, the node may be | point of attachment for a leaf to the next. In the case of anycast, the | |||
either multihomed (attached to multiple leaves in parallel) or reachable | node may either be multihomed (attached to multiple leaves in parallel) or | |||
beyond the fabric via multiple routes that are redistributed to different | reachable beyond the fabric via multiple routes that are redistributed to | |||
leaves; either way, in the case of anycast, the multiple routes are equally | different leaves. Either way, the multiple routes are equally valid and | |||
valid and should be conserved. Without further information from the | should be conserved in the case of anycast. Without further information | |||
redistributed routing protocol, it is impossible to sort out a movement from | from the redistributed routing protocol, it is impossible to sort out a | |||
a redistribution that happens asynchronously on different leaves. | movement from a redistribution that happens asynchronously on different | |||
<xref target='I-D.ietf-rift-rift'>RIFT</xref> expects that anycast addresses | leaves. <xref target='RFC9692'>RIFT</xref> expects that anycast addresses | |||
are | are advertised within the timing precision, which is typically the case | |||
advertised within the timing precision, which is typically the case with a | with a low-precision timing and a multihomed node. Beyond that time | |||
low-precision timing and a multihomed node. Beyond that time interval, RIFT | interval, RIFT interprets the lag as a mobility and only the freshest | |||
interprets the lag as a mobility and only the freshest route is retained. | route is retained. | |||
</t> | </t> | |||
<!--[rfced] To clarify the content found in RFC 8505, may we rephrase the | ||||
text around its citations as follows? | ||||
Original: | ||||
When using IPv6 [RFC8200], RIFT suggests to leverage [RFC8505] as the | ||||
IPv6 ND interaction between the mobile node and the leaf. | ||||
... | ||||
When using [RFC8505], the parallel registration of an | ||||
anycast address to multiple leaves is done with the same sequence | ||||
counter, whereas the sequence counter is incremented when the point | ||||
of attachment changes. | ||||
Perhaps: | ||||
When using IPv6 [RFC8200], RIFT suggests leveraging 6LoWPAN ND [RFC8505] | ||||
as the IPv6 ND interaction between the mobile node and the leaf. | ||||
... | ||||
When using 6LoWPAN ND [RFC8505], the parallel registration of an | ||||
anycast address to multiple leaves is done with the same sequence | ||||
counter, whereas the sequence counter is incremented when the point | ||||
of attachment changes. | ||||
--> | ||||
<t> | <t> | |||
When using <xref target='RFC8200'>IPv6</xref>, RIFT suggests to leverage | When using <xref target='RFC8200'>IPv6</xref>, RIFT suggests to leverage | |||
<xref target='RFC8505'/> as the IPv6 ND interaction between the mobile node | <xref target='RFC8505'/> as the IPv6 ND interaction between the mobile | |||
and the leaf. This | node and the leaf. This not only provides a sequence counter but also a | |||
provides not only a sequence counter but also a lifetime and a security | lifetime and a security token that may be used to protect the ownership of | |||
token that may be used to protect the ownership of an address <xref target=' | an address <xref target='RFC8928'/>. When using <xref target='RFC8505'/>, | |||
RFC8928'/>. | the parallel registration of an anycast address to multiple leaves is done | |||
When using <xref target='RFC8505'/>, the parallel registration of an anycast | with the same sequence counter, whereas the sequence counter is | |||
address to multiple leaves is done with the same sequence counter, whereas | incremented when the point of attachment changes. This way, it is | |||
the sequence counter is incremented when the point of attachment changes. | possible to differentiate a mobile node from a multihomed node, even when | |||
This way, it is possible to differentiate a mobile node from a multihomed | the mobility happens within the timing precision. It is also possible for | |||
node, even when the mobility happens within the timing precision. It is also | a mobile node to be multihomed as well, e.g., to change only one of its | |||
possible for a mobile node to be multihomed as well, e.g., to change only | points of attachment. | |||
one of its points of attachment. | ||||
</t> | </t> | |||
</section> <!-- Mobile Edge and Anycast --> | </section> <!-- Mobile Edge and Anycast --> | |||
<section anchor='v4ov6'><name>IPv4 over IPv6</name> | <section anchor='v4ov6'><name>IPv4 over IPv6</name> | |||
<t>RIFT allows advertising IPv4 prefixes over IPv6 RIFT network. IPv6 Addres | <t>RIFT allows advertising IPv4 prefixes over an IPv6 RIFT network. An | |||
s Family (AF) configures via the usual Neighbor Discovery (ND) | IPv6 Address Family (AF) configures via the usual ND mechanisms and then | |||
mechanisms and then V4 can use V6 next-hops analogous to <xref target='R | V4 can use V6 next-hops analogous to <xref target='RFC8950'/>. It is | |||
FC8950'/>. It is expected that the whole fabric | expected that the whole fabric supports the same type of forwarding of | |||
supports the same type of forwarding of address families on all the link | AFs on all the links. RIFT provides an indication whether a | |||
s. RIFT provides an indication | node is capable of V4-forwarding and implementations are possible where | |||
whether a node is v4 forwarding capable and implementations are possible | different routing tables are computed per AF as long as the | |||
where different routing tables | computation remains loop-free. | |||
are computed per address family as long as the computation remains loop- | ||||
free. | ||||
</t> | </t> | |||
<figure align='center' anchor='IPV4-o-IPV6'><name>IPv4 over IPv6</name> | <figure align='center' anchor='IPV4-o-IPV6'><name>IPv4 over IPv6</name> | |||
<artwork align='center'><![CDATA[ | <artwork align='center'><![CDATA[ | |||
+-----+ +-----+ | ||||
+---+---+ | ToF | | ToF | | +-----+ +-----+ | |||
^ +--+--+ +-----+ | +---+---+ | ToF | | ToF | | |||
| | | | | | ^ +--+--+ +-----+ | |||
| | +-------------+ | | | | | | | | |||
| | +--------+ | | | | | +-------------+ | | |||
+ | | | | | | | +--------+ | | | |||
V6 +-----+ +-+---+ | + | | | | | |||
Forwarding |Spine| |Spine| | V6 +-----+ +-+---+ | |||
+ +--+--+ +-----+ | Forwarding |Spine| |Spine| | |||
| | | | | | + +--+--+ +-----+ | |||
| | +-------------+ | | | | | | | | |||
| | +--------+ | | | | | +-------------+ | | |||
| | | | | | | | +--------+ | | | |||
v +-----+ +-+---+ | | | | | | | |||
+---+---+ |Leaf | | Leaf| | v +-----+ +-+---+ | |||
+--+--+ +--+--+ | +---+---+ |Leaf | | Leaf| | |||
| | | +--+--+ +--+--+ | |||
IPv4 prefixes| |IPv4 prefixes | | | | |||
| | | IPv4 prefixes| |IPv4 prefixes | |||
+---+----+ +---+----+ | | | | |||
| V4 | | V4 | | +---+----+ +---+----+ | |||
| subnet | | subnet | | | V4 | | V4 | | |||
+--------+ +--------+ | | subnet | | subnet | | |||
+--------+ +--------+ | ||||
]]></artwork> | ]]></artwork> | |||
</figure> | </figure> | |||
</section> | </section> | |||
<section><name>In-Band Reachability of Nodes</name> | <section><name>In-Band Reachability of Nodes</name> | |||
<t>RIFT doesn't precondition that nodes of the fabric have reachable addr | <t>RIFT doesn't precondition that nodes of the fabric have reachable | |||
esses. But the operational reasons to reach the internal nodes may exist. <xref | addresses, but the operational reasons to reach the internal nodes may | |||
target='In-band-reach'/> shows an example that the network management station (N | exist. <xref target='in-band-reach'/> shows an example that the | |||
MS) attaches to leaf1. | network management station (NMS) attaches to Leaf1. | |||
</t> | </t> | |||
<figure align='center' anchor='In-band-reach'><name>In-Band reachability of | ||||
node</name> | <figure align='center' anchor='in-band-reach'><name>In-Band Reachability of | |||
Nodes</name> | ||||
<artwork align='center'><![CDATA[ | <artwork align='center'><![CDATA[ | |||
+-------+ +-------+ | +-------+ +-------+ | |||
| ToF1 | | ToF2 | | | ToF1 | | ToF2 | | |||
++---- ++ ++-----++ | ++---- ++ ++-----++ | |||
| | | | | | | | | | |||
| +----------+ | | | +----------+ | | |||
| +--------+ | | | | +--------+ | | | |||
| | | | | | | | | | |||
++-----++ +--+---++ | ++-----++ +--+---++ | |||
|Spine1 | |Spine2 | | |Spine1 | |Spine2 | | |||
++-----++ ++-----++ | ++-----++ ++-----++ | |||
| | | | | | | | | | |||
| +----------+ | | | +----------+ | | |||
| +--------+ | | | | +--------+ | | | |||
| | | | | | | | | | |||
++-----++ +--+---++ | ++-----++ +--+---++ | |||
| Leaf1 | | Leaf2 | | | Leaf1 | | Leaf2 | | |||
+---+---+ +-------+ | +---+---+ +-------+ | |||
| | | | |||
|NMS | |NMS]]></artwork> | |||
]]></artwork> | ||||
</figure> | </figure> | |||
<t>If NMS wants to access Leaf2, it simply works. Because loopback address o f Leaf2 is flooded in its Prefix North TIE. | <t>If the NMS wants to access Leaf2, it simply works because the loopback ad dress of Leaf2 is flooded in its Prefix North TIE. | |||
</t> | </t> | |||
<t>If NMS wants to access Spine2, it simply works too. Because spine node always advertises its loopback address in the Prefix North TIE. NMS may reach S pine2 from Leaf1-Spine2 or Leaf1-Spine1-ToF1/ToF2-Spine2. | <t>If the NMS wants to access Spine2, it also works because a spine node always advertises its loopback address in the Prefix North TIE. The NMS may reac h Spine2 from Leaf1-Spine2 or Leaf1-Spine1-ToF1/ToF2-Spine2. | |||
</t> | </t> | |||
<t>If NMS wants to access ToF2, ToF2's loopback address needs to be injec ted into its Prefix South TIE. This TIE must be seen by all nodes at the level b elow - the spine nodes in <xref target='In-band-reach'/> – that must form a ceil ing for all the traffic coming from below (south). Otherwise, the traffic from N MS may follow the default route to the wrong ToF Node, e.g., ToF1. | <t>If the NMS wants to access ToF2, ToF2's loopback address needs to be i njected into its Prefix South TIE. This TIE must be seen by all nodes at the lev el below -- the spine nodes in <xref target='miscalbe-cons'/> -- that must form a ceiling for all the traffic coming from below (south). Otherwise, the traffic from the NMS may follow the default route to the wrong ToF Node, e.g., ToF1. | |||
</t> | </t> | |||
<t>In case of failure between ToF2 and spine nodes, ToF2's loopback addre ss must be disaggregated recursively all the way to the leaves. In a partitioned ToF, even with recursive disaggregation a ToF node is only reachable within its plane. | <t>In the case of failure between ToF2 and spine nodes, ToF2's loopback a ddress must be disaggregated recursively all the way to the leaves. In a partiti oned ToF, even with recursive disaggregation, a ToF node is only reachable withi n its plane. | |||
</t> | </t> | |||
<t>A possible alternative to recursive disaggregation is to use a ring th at interconnects the ToF nodes to transmit packets between them for their loopba ck addresses only. The idea is that this is mostly control traffic and should no t alter the load balancing properties of the fabric. | <t>A possible alternative to recursive disaggregation is to use a ring th at interconnects the ToF nodes to transmit packets between them for their loopba ck addresses only. The idea is that this is mostly control traffic and should no t alter the load-balancing properties of the fabric. | |||
</t> | </t> | |||
</section> | </section> | |||
<section><name>Dual Homing Servers</name> | <section><name>Dual-Homing Servers</name> | |||
<!-- [rfced] We are unable to parse the following sentence (specifically, we | ||||
are unable to determine what "or the must" means). May we rephrase as | ||||
follows for clarity and specify "Top-of-Fabric"? | ||||
<t>Each RIFT node may operate in Zero Touch Provisioning (ZTP) mode. It has | Original: | |||
no | It has no configuration (unless it is a Top-of-Fabric at the top of | |||
configuration (unless it is a Top-of-Fabric at the top of the topology or th | the topology or the must operate in the topology as leaf and/or support | |||
e must | leaf-2-leaf procedures) and it will fully configure itself after being | |||
operate in the topology as leaf and/or support leaf-2-leaf procedures) and i | attached to the topology. | |||
t will | ||||
Perhaps: | ||||
It has no configuration (unless it is a ToF node at the top of the | ||||
topology or if it must operate in the topology as a leaf and/or support | ||||
leaf-2-leaf procedures), and it will fully configure itself after being | ||||
attached to the topology. | ||||
--> | ||||
<t>Each RIFT node may operate in ZTP mode. It has no configuration (unless | ||||
it is a ToF at the top of the topology or the must operate in | ||||
the topology as leaf and/or support leaf-2-leaf procedures), and it will | ||||
fully configure itself after being attached to the topology. | fully configure itself after being attached to the topology. | |||
</t> | </t> | |||
<figure align='center' anchor='dualhoming-servers'><name>Dual-homing servers </name> | <figure align='center' anchor='dualhoming-servers'><name>Dual-Homing Servers </name> | |||
<artwork align='center'><![CDATA[ | <artwork align='center'><![CDATA[ | |||
+---+ +---+ +---+ | +---+ +---+ +---+ | |||
|ToF| |ToF| |ToF| ToF | |ToF| |ToF| |ToF| ToF | |||
+---+ +---+ +---+ | +---+ +---+ +---+ | |||
| | | | | | | | | | | | | | |||
| +----------------+ | | | | +----------------+ | | | |||
| +----------------+ | | | +----------------+ | | |||
| | | | | | | | | | | | | | |||
+----------+--+ +--+----------+ | +----------+--+ +--+----------+ | |||
| ToR1 | | ToR2 | Spine | | ToR1 | | ToR2 | Spine | |||
+--+------+---+ +--+-------+--+ | +--+------+---+ +--+-------+--+ | |||
+---+ | | | | | | +---+ | +---+ | | | | | | +---+ | |||
| +-----------------+ | | | | | +-----------------+ | | | | |||
| | | +-------------+ | | | | | | +-------------+ | | | |||
| | | | | +-----------------+ | | | | | | | +-----------------+ | | |||
| | | | +--------------+ | | | | | | | | +--------------+ | | | | |||
| | | | | | | | | | | | | | | | | | |||
+---+ +---+ +---+ +---+ | +---+ +---+ +---+ +---+ | |||
| | | | | | | | | | | | | | | | | | |||
+---+ +---+ ............. +---+ +---+ | +---+ +---+ ............. +---+ +---+ | |||
SV(1) SV(2) SV(n-1) SV(n) Leaf | SV(1) SV(2) SV(n-1) SV(n) Leaf]]></artwork> | |||
]]></artwork> | ||||
</figure> | </figure> | |||
<t>Sometimes, people may prefer to disaggregate from ToR to servers from sta | <!-- [rfced] May we rephrase the sentence below as follows (i.e., specify | |||
rt on, i.e. the servers have couple tens of routes in FIB from start on beside d | "ToR" and update "start on" to "startup")? | |||
efault routes to avoid breakages at rack level. Full disaggregation of the fabri | ||||
c could be achieved by configuration supported by RIFT. | Original: | |||
Sometimes, people may prefer to disaggregate from ToR to servers | ||||
from start on, i.e. the servers have couple tens of routes in FIB from start | ||||
on beside default routes to avoid breakages at rack level. | ||||
Perhaps: | ||||
Sometimes people may prefer to disaggregate from ToR nodes to | ||||
servers from startup, i.e., the servers have multiple routes in the FIB from | ||||
startup other than default routes to avoid breakages at the rack level. | ||||
--> | ||||
<t>Sometimes people may prefer to disaggregate from ToR to servers from start on | ||||
, i.e. the servers have couple tens of routes in FIB from start on beside defaul | ||||
t routes to avoid breakages at rack level. Full disaggregation of the fabric cou | ||||
ld be achieved by configuration supported by RIFT. | ||||
</t> | </t> | |||
</section> | </section> | |||
<section><name>Fabric with A Controller</name> | <section><name>Fabric with a Controller</name> | |||
<t>There are many different ways to deploy the controller. One possibility i s attaching a controller to the RIFT domain from ToF and another possibility is attaching a controller from the leaf. | <t>There are many different ways to deploy the controller. One possibility i s attaching a controller to the RIFT domain from ToF and another possibility is attaching a controller from the leaf. | |||
</t> | </t> | |||
<figure align='center' anchor='Fabric-controller'><name>Fabric with a contro ller</name> | <figure align='center' anchor='Fabric-controller'><name>Fabric with a Contro ller</name> | |||
<artwork align='center'><![CDATA[ | <artwork align='center'><![CDATA[ | |||
+------------+ | +------------+ | |||
| Controller | | | Controller | | |||
++----------++ | ++----------++ | |||
| | | | | | |||
| | | | | | |||
+----++ ++----+ | +----++ ++----+ | |||
------- | ToF | | ToF | | ------- | ToF | | ToF | | |||
| +--+--+ +-----+ | | +--+--+ +-----+ | |||
| | | | | | | | | | | | |||
skipping to change at line 1232 ¶ | skipping to change at line 1466 ¶ | |||
| | | | | | | | | | | | |||
+-----+ +-+---+ | +-----+ +-+---+ | |||
RIFT domain |Spine| |Spine| | RIFT domain |Spine| |Spine| | |||
+--+--+ +-----+ | +--+--+ +-----+ | |||
| | | | | | | | | | | | |||
| | +-------------+ | | | | +-------------+ | | |||
| | +--------+ | | | | | +--------+ | | | |||
| | | | | | | | | | | | |||
| +-----+ +-+---+ | | +-----+ +-+---+ | |||
------- |Leaf | | Leaf| | ------- |Leaf | | Leaf| | |||
+-----+ +-----+ | +-----+ +-----+]]></artwork> | |||
]]></artwork> | ||||
</figure> | </figure> | |||
<section><name>Controller Attached to ToFs</name> | <section><name>Controller Attached to ToFs</name> | |||
<t>If a controller is attaching to the RIFT domain from ToF, it usually uses dual-homing connections. The loopback prefix of the controller should be ad vertised down by the ToF and spine to leaves. If the controller loses link to To F, make sure the ToF withdraw the prefix of the controller.</t> | <t>If a controller is attaching to the RIFT domain from ToF, it usually uses dual-homing connections. The loopback prefix of the controller should be ad vertised down by the ToF and spine to the leaves. If the controller loses the li nk to ToF, make sure the ToF withdraws the prefix of the controller.</t> | |||
</section> | </section> | |||
<section><name>Controller Attached to Leaf</name> | <section><name>Controller Attached to Leaf</name> | |||
<t>If the controller is attaching from a leaf to the fabric, no special provisions are needed. | <t>If the controller is attaching from a leaf to the fabric, no special provisions are needed. | |||
</t> | </t> | |||
</section> | </section> | |||
</section> | </section> | |||
<section><name>Internet Connectivity Within Underlay</name> | <section><name>Internet Connectivity Within Underlay</name> | |||
<t>If global addressing is running without overlay, an external default r oute needs to be advertised through RIFT fabric to achieve internet connectivity . For the purpose of forwarding of the entire RIFT fabric, an internal fabric pr efix needs to be advertised in the South Prefix TIE by ToF and spine nodes.</t> | <t>If global addressing is running without overlay, an external default r oute needs to be advertised through the RIFT fabric to achieve internet connecti vity. For the purpose of forwarding of the entire RIFT fabric, an internal fabri c prefix needs to be advertised in the South Prefix TIE by ToF and spine nodes.< /t> | |||
<section><name>Internet Default on the Leaf</name> | <section><name>Internet Default on the Leaf</name> | |||
<t>In case that the internet gateway is a leaf, the leaf node as the internet gateway needs to advertise a default route in its Prefix North TIE.</t > | <t>In the case that the internet gateway is a leaf, the leaf node as the internet gateway needs to advertise a default route in its Prefix North TIE .</t> | |||
</section> | </section> | |||
<section><name>Internet Default on the ToFs</name> | <section><name>Internet Default on the ToFs</name> | |||
<t>In case that the internet gateway is a ToF, the ToF and spine nod es need to advertise a default route in the Prefix South TIE.</t> | <t>In the case that the internet gateway is a ToF, the ToF and spine nodes need to advertise a default route in the Prefix South TIE.</t> | |||
</section> | </section> | |||
</section> | </section> | |||
<section><name>Subnet Mismatch and Address Families</name> | <section><name>Subnet Mismatch and Address Families</name> | |||
<figure align='center' anchor='subnet-mismatch'><name>subnet mismatch</name> | <figure align='center' anchor='subnet-mismatch'><name>Subnet Mismatch</name> | |||
<artwork align='center'> | <artwork align='center'><![CDATA[ | |||
<![CDATA[ | ||||
+--------+ +--------+ | +--------+ +--------+ | |||
| | LIE LIE | | | | | LIE LIE | | | |||
| A | +----> <----+ | B | | | A | +----> <----+ | B | | |||
| +---------------------+ | | | +---------------------+ | | |||
+--------+ +--------+ | +--------+ +--------+ | |||
X/24 Y/24 | X/24 Y/24]]></artwork> | |||
]]></artwork> | ||||
</figure><t keepWithPrevious='true'></t> | </figure><t keepWithPrevious='true'></t> | |||
<t>LIEs are exchanged over all links running RIFT to perform Link (Neighbor) | ||||
<t>LIEs are exchanged over all links running RIFT to perform Link (Neighbor) | Discovery. A node must NOT originate LIEs on an AF if it does not process recei | |||
Discovery. A node must NOT originate LIEs on an address family if it does not p | ved LIEs on that family. | |||
rocess received LIEs on that family. | LIEs on the same link are considered part of the same negotiation indepe | |||
LIEs on same link are considered part of the same negotiation independen | ndent from the AF they arrive on. | |||
t on the address family they arrive on. | An implementation must be ready to accept TIEs on all addresses it used | |||
An implementation must be ready to accept TIEs on all addresses it used | as the source of LIE frames. | |||
as source of LIE frames. | ||||
</t> | </t> | |||
<t>As shown in the above figure, without further checks | <t>As shown in <xref target='subnet-mismatch'/>, an adjacency of nodes A | |||
adjacency of node A and B may form, but the forwarding between node A an | and B may form without further checks, but the forwarding between nodes A an | |||
d node B may fail because subnet X mismatches with subnet Y. | d B may fail | |||
because subnet X mismatches with subnet Y. | ||||
</t> | </t> | |||
<t>To prevent this a RIFT implementation should check for subnet mismatch ju | <t>To prevent this, a RIFT implementation should check for subnet mismatch i | |||
st like e.g. IS-IS does. This can lead to scenarios where an adjacency, despite | n a way that is similar to how IS-IS does. This can lead to scenarios where an a | |||
exchange of LIEs in both | djacency, despite the exchange of LIEs in both | |||
address families may end up having an adjacency in a single AF only. Thi | AFs, may end up having an adjacency in a single AF only. This is especia | |||
s is a consideration especially in <xref target='v4ov6'/> scenarios. | lly a consideration in scenarios relating to <xref target='v4ov6'/>. | |||
</t> | </t> | |||
</section> | </section> | |||
<section><name>Anycast Considerations</name> | <section><name>Anycast Considerations</name> | |||
<figure align='center' anchor='AnycastTL'><name>Anycast</name> | <figure align='center' anchor='AnycastTL'><name>Anycast</name> | |||
<artwork align='center'><![CDATA[ | <artwork align='center'><![CDATA[ | |||
+ traffic | + traffic | |||
| | | | |||
v | v | |||
+------+------+ | +------+------+ | |||
skipping to change at line 1309 ¶ | skipping to change at line 1540 ¶ | |||
| +---------+ | | +---------+ | | | +---------+ | | +---------+ | | |||
| +-------+ | | | +-------+ | | | | +-------+ | | | +-------+ | | | |||
| | | | | | | | | | | | | | | | | | |||
+-+---+-+ +--+--+-+ +-+---+-+ +--+--+-+ | +-+---+-+ +--+--+-+ +-+---+-+ +--+--+-+ | |||
| | | | | | | | | | | | | | | | | | |||
|Leaf111| |Leaf112| |Leaf121| |Leaf122| LEVEL 0 | |Leaf111| |Leaf112| |Leaf121| |Leaf122| LEVEL 0 | |||
+-+-----+ ++------+ +-----+-+ +-----+-+ | +-+-----+ ++------+ +-----+-+ +-----+-+ | |||
+ + + ^ + | + + + ^ + | |||
PrefixA PrefixB PrefixA | PrefixC | PrefixA PrefixB PrefixA | PrefixC | |||
| | | | |||
+ traffic | + traffic]]></artwork> | |||
]]></artwork> | ||||
</figure> | </figure> | |||
<t>If the traffic comes from ToF to Leaf111 or Leaf121 which has anycast pre fix PrefixA, RIFT can deal with this case well. But if the traffic comes from Le af122, it arrives Spine21 or Spine22 at level 1. But Spine21 or Spine22 doesn't know another PrefixA attaching Leaf111. So it will always get to Leaf121 and nev er get to Leaf111. If the intension is that the traffic should be offloaded to L eaf111, then use policy guided prefixes defined in <xref target='I-D.ietf-rift-r ift'>RIFT</xref>. | <t>If the traffic comes from ToF to Leaf111 or Leaf121, which has anycast prefix PrefixA, RIFT can deal with this case well. However, if the traffic come s from Leaf122, it arrives to Spine21 or Spine22 at LEVEL 1. Additionally, Spine 21 or Spine22 doesn't know another PrefixA attaching Leaf111, so it will always get to Leaf121 and never Leaf111. If the intention is that the traffic should be offloaded to Leaf111, then use the policy-guided prefixes defined in <xref targ et='RFC9692'>RIFT</xref>. | |||
</t> | </t> | |||
</section> | </section> | |||
<section><name>IoT Applicability</name> | <section><name>IoT Applicability</name> | |||
<t>The design of RIFT inherits from RPL <xref target='RFC6550'/> the anisotropic design of a default route upwards (northwards); it also inherits the capability to inject external host routes at the Leaf level using Wireless ND (WiND) <xref target='RFC8505'/><xref target='RFC8928'/> between a RIFT-agnostic host and a R IFT router. Both the RPL and the RIFT protocols are meant for large scale, and W iND enables device mobility at the edge the same way in both cases.</t> | <t>The design of RIFT inherits the anisotropic design of a default route upwards (northwards) from RPL <xref target='RFC6550'/>. It also inherits the capability to inject external host routes at the Leaf level using Wireless ND (WiND) <xref target='RFC8505'/> <xref target='RFC8928'/> between a RIFT-agnostic host and a RIFT router. Both the RPL and the RIFT protocols are meant for a large scale, an d WiND enables device mobility at the edge the same way in both cases.</t> | |||
<t>The main difference between RIFT and RPL is that with RPL, there’s a single R oot, whereas RIFT has many ToF nodes. This adds huge capabilities for leaf-2-lea f ECMP paths, but additional complexity with the need to disaggregate. Also RIFT uses Link State flooding northwards, and is not designed for low-power operatio n.</t> | <t>The main difference between RIFT and RPL is that there's a single root with R PL, whereas RIFT has many ToF nodes. This adds huge capabilities for leaf-2-leaf ECMP paths but additional complexity with the need to disaggregate. Also, RIFT uses link-state flooding northwards and is not designed for low-power operation. </t> | |||
<t>Still nothing prevents that the IP devices connected at the Leaf are IoT devi ces, which typically expose their address using WiND – which is an upgrade from 6LoWPAN ND <xref target='RFC6775'/>.</t> | <t>Still, nothing prevents that the IP devices connected at the Leaf are IoT dev ices, which typically expose their address using WiND -- this is an upgrade from 6LoWPAN ND <xref target='RFC6775'/>.</t> | |||
<t>A network that serves high speed/ high power IoT devices should typically pro vide deterministic capabilities for applications such as high speed control loop s or movement detection. The Fat Tree is highly reliable, and in normal conditio n provides an equivalent multipath operation; but the ECMP doesn’t provide hard guarantees for either delivery or latency. As long as the fabric is non-blocking the result is the same; but there can be load unbalances resulting in incast an d possibly congestion loss that will prevent the delivery within bounded latency .</t> | <t>A network that serves high speed / high power IoT devices should typically pr ovide deterministic capabilities for applications such as high speed control loo ps or movement detection. The Fat Tree is highly reliable and, in normal conditi ons, provides an equivalent multipath operation; however, the ECMP doesn't provi de hard guarantees for either delivery or latency. As long as the fabric is non- blocking, the result is the same, but there can be load unbalances resulting in incast and possibly congestion loss that will prevent the delivery within bounde d latency.</t> | |||
<t>This could be alleviated with Packet Replication, Elimination and Reordering | <t>This could be alleviated with Packet Replication, Elimination, and | |||
(PREOF) <xref target='RFC8655'/> leaf-2-leaf but PREOF is hard to provide at the | Ordering Functions (PREOF) <xref target='RFC8655'/> leaf-2-leaf, but PREOF | |||
scale of all flows, and the replication may increase the probability of the ov | is hard to provide at the scale of all flows and the replication may increase t | |||
erload that it attempts to solve.</t> | he probability of the overload that it attempts to solve.</t> | |||
<t>Note that the load balancing is not RIFT’s problem, but it is key to serve Io T adequately.</t> | <t>Note that the load balancing is not RIFT's problem, but it is key to serve Io T adequately.</t> | |||
</section> | </section> | |||
<section anchor='keys'><name>Key Management</name> | <section anchor='keys'><name>Key Management</name> | |||
<t> | <t> | |||
As outlined in Section 9 "Security Considerations" of <xref target='I-D. ietf-rift-rift'>RIFT</xref>, either a private shared key or a public/private key pair is used to authenticate the adjacency. | As outlined in Section <xref target='RFC9692' sectionFormat='bare' secti on='9'>"Security Considerations"</xref> of <xref target="RFC9692"/>, either a pr ivate shared key or a public/private key pair is used to authenticate the adjace ncy. | |||
Both the key distribution and key synchronization methods are out of | Both the key distribution and key synchronization methods are out of | |||
scope for this document. Both nodes in the adjacency must share the | scope for this document. Both nodes in the adjacency must share the | |||
same keys, key type, and algorithm for a given key ID. Mismatched | same keys, key type, and algorithm for a given key ID. Mismatched | |||
keys will not inter-operate as their security envelopes will be unverifi able. | keys will not interoperate as their security envelopes will be unverifia ble. | |||
</t> | </t> | |||
<t> | <t> | |||
Key roll-over while the adjacency is active may be supported. The | Key rollover while the adjacency is active may be supported. The | |||
specific mechanism is well documented in <xref target="RFC6518"/>. As ou | specific mechanism is well documented in <xref target="RFC6518"/>. | |||
tlined in Section 9.9 "Host Implementations" of <xref target='I-D.ietf-rift-rif | As outlined in <xref target='RFC9692' sectionFormat='bare' section='9.9'>"Host I | |||
t'>RIFT</xref>, hosts as well as VMs act as RIFT devices are possible. KMP such | mplementations"</xref> of <xref target="RFC9692"/>, hosts as well as VMs acting | |||
as KV for key roll-over in the fabric using a symmetric key that can be changed | as RIFT devices are possible. Key Management Protocols (KMPs), such as Key Value | |||
easily when compromised. Wherein symmetric key of a host is more likely to be c | (KV) for key rollover in the fabric, use a symmetric key that can be changed ea | |||
ompromised than of a in-fabric networking node. | sily when compromised; in which case, the symmetric key of a host is more likely | |||
to be compromised than an in-fabric networking node. | ||||
</t> | </t> | |||
</section> | </section> | |||
<section anchor='TTL-HopLimit'><name>TTL/HopLimit of 1 vs. 255 on LIEs/TIEs</nam e> | <section anchor='TTL-HopLimit'><name>TTL/Hop Limit of 1 vs. 255 on LIEs/TIEs</na me> | |||
<t> | <t> | |||
The use of a packet's Time to Live (TTL) (IPv4) or Hop Limit (IPv6) to v | The use of a packet's Time to Live (TTL) (IPv4) or Hop Limit (IPv6) to v | |||
erify whether the packet was originated by an adjacent node on a connected link | erify whether the packet was originated by an adjacent node on a connected link | |||
has been used in RIFT.RIFT explicitly requires the use of a TTL/HL value of 1 *o | has been used in RIFT. | |||
r* 255 when sending/receiving LIEs and TIEs so that implementers have a choice b | RIFT explicitly requires the use of a TTL/HL value of 1 or 255 when sending/rece | |||
etween the two. | iving LIEs and TIEs so that implementers have a choice between the two. | |||
</t> | </t> | |||
<t> | <t> | |||
TTL=1 or HL=1 protects against the information disseminating more than 1 hop in the fabric and should be the default unless configured otherwise. TTL=255 or HL=255 can lead RIFT TIE packet propagation to more than one hop (m ulticast address is already local subnetwork range) in case of implementation pr oblems but does protect against a remote attack as well, and the receiving remot e router will ignore such TIE packet unless the remote router is exactly 254 hop s away and accepts only TTL=1 or HL=1. <xref target="RFC5082"/> defines a Genera lized TTL Security Mechanism (GTSM). The GTSM is applicable to LIEs/TIEs impleme ntations that use a TTL or HL of 255. It provides a defense from infrastructure attacks based on forged protocol packets from outside the fabric. | TTL=1 or HL=1 protects against the information disseminating more than 1 hop in the fabric and should be the default unless configured otherwise. TTL=255 or HL=255 can lead RIFT TIE packet propagation to more than one hop (th e multicast address is already in local subnetwork range) in case of implementat ion problems but does protect against a remote attack as well, and the receiving remote router will ignore such TIE packet unless the remote router is exactly 2 54 hops away and accepts only TTL=1 or HL=1. <xref target="RFC5082"/> defines a Generalized TTL Security Mechanism (GTSM). The GTSM is applicable to LIE/TIE imp lementations that use a TTL or HL of 255. It provides a defense from infrastruct ure attacks based on forged protocol packets from outside the fabric. | |||
</t> | </t> | |||
</section> | </section> | |||
</section> | </section> | |||
<section anchor='Security'><name>Security Considerations</name> | <section anchor='Security'><name>Security Considerations</name> | |||
<t>This document presents applicability of RIFT. As such, it does not | <t>This document presents applicability of RIFT. As such, it does not | |||
introduce any security considerations. However, there are a number | introduce any security considerations. However, there are a number | |||
of security concerns at <xref target='I-D.ietf-rift-rift'>RIFT</xref>.</t> | of security concerns in <xref target='RFC9692'></xref>.</t> | |||
</section> | </section> | |||
<section anchor="iana-tlv-class-reg-sec" title="IANA Considerations"> | <section anchor="iana-tlv-class-reg-sec" title="IANA Considerations"> | |||
<t>This document has no IANA actions.</t> | <t>This document has no IANA actions.</t> | |||
</section> | </section> | |||
</middle> | ||||
<section title="Acknowledgments"> | <back> | |||
<t> | ||||
The authors would like to thank Jaroslaw Kowalczyk, Alvaro Retana, Jim Gu | ||||
ichard and Jeffrey Zhang for providing invaluable concepts and content for this | ||||
document. | ||||
</t> | ||||
</section> | ||||
<section anchor='Contributors'><name>Contributors</name> | <references><name>References</name> | |||
<t>The following people (listed in alphabetical order) contributed significa | <references><name>Normative References</name> | |||
ntly to the content of this document and should be considered co-authors:</t> | ||||
<t>Jordan Head</t> | ||||
<t>Juniper Networks</t> | ||||
<t>Email: jhead@juniper.net</t> | ||||
<t>Tom Verhaeg</t> | <!-- [rfced] References | |||
<t>Juniper Networks</t> | ||||
<t>Email: tverhaeg@juniper.net</t> | ||||
</section> | ||||
</middle> | a) Please review the following reference. We have added the following URL: | |||
https://www.iso.org/standard/30932.html and updated the title to reflect the | ||||
accurate title of this ISO/IEC standard. Please let us know if you have any | ||||
objections. | ||||
<back> | Original: | |||
[ISO10589-Second-Edition] | ||||
International Organization for Standardization, | ||||
"Intermediate system to Intermediate system intra-domain | ||||
routing information exchange protocol for use in | ||||
conjunction with the protocol for providing the | ||||
connectionless-mode Network Service (ISO 8473)", November | ||||
2002. | ||||
<displayreference target="I-D.ietf-rift-rift" to="RIFT"/> | Current: | |||
[ISO10589-Second-Edition] | ||||
ISO/IEC, "Information technology - Telecommunications and | ||||
information exchange between systems - Intermediate System | ||||
to Intermediate System intra-domain routeing information | ||||
exchange protocol for use in conjunction with the protocol | ||||
for providing the connectionless-mode network service (ISO | ||||
8473)", ISO/IEC 10589:2002, November 2002, | ||||
<https://www.iso.org/standard/30932.html>. | ||||
<references><name>Normative References</name> | b) Please review the following reference. We found a URL | |||
<reference anchor='ISO10589-Second-Edition'> | (https://www.broadband-forum.org/pdfs/tr-384-1-0-0.pdf) that matches the | |||
information provided in this reference and updated the reference as | ||||
follows. Please let us know any objections. | ||||
<front> | Original: | |||
<title>Intermediate system to Intermediate system intra-domain | [TR-384] Broadband Forum Technical Report, "TR-384 Cloud Central | |||
routing information exchange protocol for use in | Office Reference Architectural Framework", January 2018. | |||
conjunction with the protocol for providing the | ||||
connectionless-mode Network Service (ISO 8473)</title> | Current: | |||
[TR-384] Broadband Forum Technical Report, "TR-384: Cloud Central | ||||
Office Reference Architectural Framework", TR-384, Issue | ||||
1, January 2018, | ||||
<https://www.broadband-forum.org/pdfs/tr-384-1-0-0.pdf>. | ||||
c) Please review the following reference. We found the following URL: | ||||
https://ieeexplore.ieee.org/document/6012836. We have added this URL to this | ||||
reference. Please let us know if you have any objections. | ||||
Original: | ||||
[CLOS] Yuan, X., "On Nonblocking Folded-Clos Networks in Computer | ||||
Communication Environments", IEEE International Parallel & | ||||
Distributed Processing Symposium, 2011. | ||||
Current: | ||||
[CLOS] Yuan, X., "On Nonblocking Folded-Clos Networks in Computer | ||||
Communication Environments", 2011 IEEE International | ||||
Parallel & Distributed Processing Symposium, | ||||
DOI 10.1109/IPDPS.2011.27, May 2011, | ||||
<https://ieeexplore.ieee.org/document/6012836>. | ||||
d) Please review. We found the following URL for this reference: | ||||
https://ieeexplore.ieee.org/document/6312192. We have added this URL to this | ||||
reference. Please let us know if you have any objections | ||||
Original: | ||||
[FATTREE] Leiserson, C. E., "Fat-Trees: Universal Networks for | ||||
Hardware-Efficient Supercomputing", 1985. | ||||
Current: | ||||
[FATTREE] Leiserson, C. E., "Fat-Trees: Universal Networks for | ||||
Hardware-Efficient Supercomputing", IEEE Transactions on | ||||
Computers, vol. C-34, no. 10, pp. 892-901, | ||||
DOI 10.1109/TC.1985.6312192, October 1985, | ||||
<https://ieeexplore.ieee.org/document/6312192>. | ||||
e) Please review. We found the following URL for this reference: | ||||
https://www.broadband-forum.org/download/af-pnni-0055.001.pdf. We have added | ||||
this URL to this reference. Additionally, please note that the original date | ||||
for this reference was 2003. We were unable to find a version of this | ||||
reference with that date. The version we found at the URL has a date of April | ||||
2002 and updated the reference as follows for consistency. Please let us know | ||||
if you have any objections. | ||||
Original: | ||||
[PNNI] ATM Forum Technical Committee, "Private Network-Network | ||||
Interface Specification, Version 1.1 (PNNI 1.1), af-pnni- | ||||
0055.002", 2003. | ||||
Current: | ||||
[PNNI] The ATM Forum Technical Committee, "Private Network- | ||||
Network Interface - Specification Version 1.1 - (PNNI | ||||
1.1)", af-pnni-0055.001, April 2002, | ||||
<https://www.broadband-forum.org/download/af-pnni- | ||||
0055.001.pdf>. | ||||
--> | ||||
<reference anchor='ISO10589-Second-Edition' target="https://www.iso.org/standard | ||||
/30932.html"> | ||||
<front> | ||||
<title>Information technology - Telecommunications and information | ||||
exchange between systems - Intermediate System to Intermediate System | ||||
intra-domain routeing information exchange protocol for use in conjunction | ||||
with the protocol for providing the connectionless-mode network service (ISO | ||||
8473)</title> | ||||
<author> | <author> | |||
<organization>International Organization for Standardization</organizati on> | <organization>ISO/IEC</organization> | |||
</author> | </author> | |||
<date month='Nov' year='2002'/> | <date month='November' year='2002'/> | |||
</front> | </front> | |||
<seriesInfo name="ISO/IEC" value="10589:2002"/> | ||||
</reference> | </reference> | |||
<reference anchor='TR-384'> | <reference anchor='TR-384' target="https://www.broadband-forum.org/pdfs/tr-384-1 -0-0.pdf"> | |||
<front> | <front> | |||
<title>TR-384 Cloud Central Office Reference Architectural Framework</ti tle> | <title>TR-384: Cloud Central Office Reference Architectural Framework</t itle> | |||
<author> | <author> | |||
<organization>Broadband Forum Technical Report</organization> | <organization>Broadband Forum Technical Report</organization> | |||
</author> | </author> | |||
<date month='Jan' year='2018'/> | <date month='January' year='2018'/> | |||
</front> | </front> | |||
<refcontent>TR-384, Issue 1</refcontent> | ||||
</reference> | </reference> | |||
<xi:include href='https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC | <xi:include href='https://bib.ietf.org/public/rfc/bibxml/reference.RFC.2328.xml' | |||
.2328.xml'/> | /> | |||
<xi:include href='https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC | <xi:include href='https://bib.ietf.org/public/rfc/bibxml/reference.RFC.4861.xml' | |||
.4861.xml'/> | /> | |||
<xi:include href='https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC | <xi:include href='https://bib.ietf.org/public/rfc/bibxml/reference.RFC.5082.xml' | |||
.5082.xml'/> | /> | |||
<xi:include href='https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC | <xi:include href='https://bib.ietf.org/public/rfc/bibxml/reference.RFC.5340.xml' | |||
.5340.xml'/> | /> | |||
<xi:include href='https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC | <xi:include href='https://bib.ietf.org/public/rfc/bibxml/reference.RFC.5357.xml' | |||
.5357.xml'/> | /> | |||
<xi:include href='https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC | <xi:include href='https://bib.ietf.org/public/rfc/bibxml/reference.RFC.6518.xml' | |||
.6518.xml'/> | /> | |||
<xi:include href='https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC | <xi:include href='https://bib.ietf.org/public/rfc/bibxml/reference.RFC.6550.xml' | |||
.6550.xml'/> | /> | |||
<xi:include href='https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC | <xi:include href='https://bib.ietf.org/public/rfc/bibxml/reference.RFC.6775.xml' | |||
.6775.xml'/> | /> | |||
<xi:include href='https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC | <xi:include href='https://bib.ietf.org/public/rfc/bibxml/reference.RFC.7130.xml' | |||
.7130.xml'/> | /> | |||
<xi:include href='https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC | <xi:include href='https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8655.xml' | |||
.8655.xml'/> | /> | |||
<xi:include href='https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC | <xi:include href='https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8950.xml' | |||
.8950.xml'/> | /> | |||
<xi:include href='https://xml2rfc.tools.ietf.org/public/rfc/bibxml3/reference.I- | ||||
D.ietf-rift-rift.xml'/> | ||||
<!--xi:include href='https://xml2rfc.tools.ietf.org/public/rfc/bibxml3/reference | ||||
.I-D.white-distoptflood.xml'/--> | ||||
</references> | ||||
<references><name>Informative References</name> | ||||
<reference anchor="IEEEstd1588" target="https://standards.ieee.org/standard/1588 | ||||
-2019.html" quoteTitle="true" derivedAnchor="IEEEstd1588" > | ||||
<front> | ||||
<title> IEEE Standard for a Precision Clock Synchronization Protocol for N etworked Measurement and Control Systems | <!-- [I-D.ietf-rift-rift] IESG state: RFC Ed queue as of 09/24/24; companion doc ument RFC 9692--> | |||
</title> | <reference anchor="RFC9692" target="https://www.rfc-editor.org/info/rfc9692"> | |||
<front> | ||||
<title>RIFT: Routing in Fat Trees</title> | ||||
<author fullname="Tony Przygienda" initials="T." surname="Przygienda" role="edit | ||||
or"> | ||||
<organization>Juniper Networks</organization> | ||||
</author> | ||||
<author fullname="Jordan Head" initials="J." surname="Head" role="editor"> | ||||
<organization>Juniper Networks</organization> | ||||
</author> | ||||
<author fullname="Alankar Sharma" initials="A." surname="Sharma"> | ||||
<organization>Hudson River Trading</organization> | ||||
</author> | ||||
<author fullname="Pascal Thubert" initials="P." surname="Thubert"> | ||||
<organization>Individual</organization> | ||||
</author> | ||||
<author fullname="Bruno Rijsman" initials="B." surname="Rijsman"> | ||||
<organization>Individual</organization> | ||||
</author> | ||||
<author fullname="Dmitry Afanasiev" initials="D." surname="Afanasiev"> | ||||
<organization>Yandex</organization> | ||||
</author> | ||||
<date month="December" year="2024"/> | ||||
</front> | ||||
<seriesInfo name="RFC" value="9692"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC9692"/> | ||||
</reference> | ||||
<author> | <!--xi:include href='https://xml2rfc.tools.ietf.org/public/rfc/bibxml3/reference .I-D.white-distoptflood.xml'/--> | |||
<organization> IEEE standard for Information Technolog y | </references> | |||
</organization> | <references><name>Informative References</name> | |||
<reference anchor="IEEEstd1588" target="https://ieeexplore.ieee.org/document/912 | ||||
0376"> | ||||
<front> | ||||
<title>IEEE Standard for a Precision Clock Synchronization Protocol for Networ | ||||
ked Measurement and Control Systems</title> | ||||
<author> | ||||
<organization>IEEE</organization> | ||||
</author> | </author> | |||
<date month="June" year="2020"/> | ||||
<date/> | ||||
</front> | </front> | |||
<seriesInfo name="IEEE Std" value="1588-2019"/> | ||||
<seriesInfo name="DOI" value="10.1109/IEEESTD.2020.9120376"/> | ||||
</reference> | </reference> | |||
<reference anchor="CLOS"> | <reference anchor="CLOS" target="https://ieeexplore.ieee.org/document/6012836"> | |||
<front> | <front> | |||
<title>On Nonblocking Folded-Clos Networks in | <title>On Nonblocking Folded-Clos Networks in Computer Communication Env | |||
Computer Communication Environments</title> | ironments</title> | |||
<author initials="X." surname="Yuan"> | <author initials="X." surname="Yuan"/> | |||
<organization>IEEE International Parallel & | <date month="May" year="2011"/> | |||
Distributed Processing Symposium</organization> | ||||
</author> | ||||
<date year="2011"/> | ||||
</front> | </front> | |||
<seriesInfo name="IEEE" value="International Parallel & | <refcontent>2011 IEEE International Parallel & Distributed Processing Sy | |||
Distributed Processing Symposium"/> | mposium</refcontent> | |||
<seriesInfo name="DOI" value="10.1109/IPDPS.2011.27"/> | ||||
</reference> | </reference> | |||
<reference anchor="FATTREE"> | <reference anchor="FATTREE" target="https://ieeexplore.ieee.org/document/6312192 "> | |||
<front> | <front> | |||
<title>Fat-Trees: Universal Networks for Hardware-Efficient | <title>Fat-Trees: Universal Networks for Hardware-Efficient Supercomputi | |||
Supercomputing</title> | ng</title> | |||
<author initials="C. E." surname="Leiserson"> | <author initials="C. E." surname="Leiserson"> | |||
<organization>IEEE Transactions on Computers</organization> | ||||
</author> | </author> | |||
<date year="1985"/> | <date month="October" year="1985"/> | |||
</front> | </front> | |||
<refcontent>IEEE Transactions on Computers, vol. C-34, no. 10, pp. 892-901</ | ||||
refcontent> | ||||
<seriesInfo name="DOI" value="10.1109/TC.1985.6312192"/> | ||||
</reference> | </reference> | |||
<reference anchor="PNNI"> | <reference anchor="PNNI" target="https://www.broadband-forum.org/download/af-pnn i-0055.001.pdf"> | |||
<front> | <front> | |||
<title>Private Network-Network Interface Specification, Version 1.1 (PNN I 1.1), af-pnni-0055.002</title> | <title>Private Network-Network Interface - Specification Version 1.1 - ( PNNI 1.1)</title> | |||
<author> | <author> | |||
<organization>ATM Forum Technical Committee</organization> | <organization>The ATM Forum Technical Committee</organization> | |||
</author> | </author> | |||
<date year="2003"/> | <date month="April" year="2002"/> | |||
</front> | </front> | |||
<refcontent>af-pnni-0055.001</refcontent> | ||||
</reference> | </reference> | |||
<xi:include href='https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC | <xi:include href='https://bib.ietf.org/public/rfc/bibxml/reference.RFC.3626.xml' | |||
.3626.xml'/> | /> | |||
<xi:include href='https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC | <xi:include href='https://bib.ietf.org/public/rfc/bibxml/reference.RFC.4271.xml' | |||
.4271.xml'/> | /> | |||
<xi:include href='https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC | <xi:include href='https://bib.ietf.org/public/rfc/bibxml/reference.RFC.5905.xml' | |||
.5905.xml'/> | /> | |||
<xi:include href='https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC | <xi:include href='https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8200.xml' | |||
.8200.xml'/> | /> | |||
<xi:include href='https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC | <xi:include href='https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8505.xml' | |||
.8505.xml'/> | /> | |||
<xi:include href='https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC | <xi:include href='https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8928.xml' | |||
.8928.xml'/> | /> | |||
</references> | ||||
</references> | </references> | |||
<section title="Acknowledgments" numbered="false"> | ||||
<t> | ||||
The authors would like to thank <contact fullname="Jaroslaw Kowalczyk"/> | ||||
, <contact fullname="Alvaro Retana"/>, <contact | ||||
fullname="Jim Guichard"/>, and <contact fullname="Jeffrey Zhang"/> for providing | ||||
invaluable concepts and content for this document. | ||||
</t> | ||||
</section> | ||||
<section anchor='Contributors' numbered='false'><name>Contributors</name> | ||||
<t> | ||||
The following people contributed substantially to the content of this | ||||
document and should be considered coauthors:</t> | ||||
<contact fullname="Jordan Head"> | ||||
<organization>Juniper Networks</organization> | ||||
<address> | ||||
<email>jhead@juniper.net</email> | ||||
</address> | ||||
</contact> | ||||
<contact fullname="Tom Verhaeg"> | ||||
<organization>Juniper Networks</organization> | ||||
<address> | ||||
<email>tverhaeg@juniper.net</email> | ||||
</address> | ||||
</contact> | ||||
</section> | ||||
<!-- [rfced] The following terminology appears to be used inconsistently. | ||||
Please let us know how we should update for consistency. | ||||
North Prefix TIE vs. Prefix North TIE | ||||
South Prefix TIE vs. South North TIE --> | ||||
<!-- [rfced] Please review the "Inclusive Language" portion of the online | ||||
Style Guide <https://www.rfc-editor.org/styleguide/part2/#inclusive_language> | ||||
and let us know if any changes are needed. Updates of this nature typically | ||||
result in more precise language, which is helpful for readers. | ||||
For example, please consider whether the terms "black" and "natively". | ||||
In addition, please consider whether "traditional" should be updated for clarity | ||||
. | ||||
While the NIST website | ||||
<https://www.nist.gov/nist-research-library/nist-technical-series-publications-a | ||||
uthor-instructions#table1> | ||||
indicates that this term is potentially biased, it is also ambiguous. | ||||
"Tradition" is a subjective term, as it is not the same for everyone. --> | ||||
<!-- [rfced] FYI - We have added expansions for the following abbreviations | ||||
per Section 3.6 of RFC 7322 ("RFC Style Guide"). Please review each | ||||
expansion in the document carefully to ensure correctness. | ||||
Bidirectional Forwarding Detection (BFD) | ||||
Key Management Protocol (KMP) | ||||
Mobile Ad Hoc Network (MANET) | ||||
Optimized Link State Routing (OLSR) | ||||
Private Network-Network Interface (PNNI) | ||||
Routing Protocol for Low-Power and Lossy Networks (RPL) | ||||
--> | ||||
</back> | </back> | |||
</rfc> | </rfc> | |||
End of changes. 245 change blocks. | ||||
829 lines changed or deleted | 1152 lines changed or added | |||
This html diff was produced by rfcdiff 1.48. |