Skip to content

Commit ec3aec1

Browse files
fix: remove stale "contains" docs on rfc-index sync (#9049)
* fix: clean out stale "contains" docs correctly * style: ruff ruff + copyright * refactor: safer "contains" relation removal * test: replacement of subseries doc in sync
1 parent e8252aa commit ec3aec1

File tree

2 files changed

+152
-17
lines changed

2 files changed

+152
-17
lines changed

ietf/sync/rfceditor.py

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright The IETF Trust 2012-2020, All Rights Reserved
1+
# Copyright The IETF Trust 2012-2025, All Rights Reserved
22
# -*- coding: utf-8 -*-
33

44

@@ -12,6 +12,7 @@
1212
from xml.dom import pulldom, Node
1313

1414
from django.conf import settings
15+
from django.db import transaction
1516
from django.db.models import Subquery, OuterRef, F, Q
1617
from django.utils import timezone
1718
from django.utils.encoding import smart_bytes, force_str
@@ -30,9 +31,9 @@
3031
from ietf.utils.mail import send_mail_text
3132
from ietf.utils.timezone import datetime_from_date, RPC_TZINFO
3233

33-
#QUEUE_URL = "https://www.rfc-editor.org/queue2.xml"
34-
#INDEX_URL = "https://www.rfc-editor.org/rfc/rfc-index.xml"
35-
#POST_APPROVED_DRAFT_URL = "https://www.rfc-editor.org/sdev/jsonexp/jsonparser.php"
34+
# QUEUE_URL = "https://www.rfc-editor.org/queue2.xml"
35+
# INDEX_URL = "https://www.rfc-editor.org/rfc/rfc-index.xml"
36+
# POST_APPROVED_DRAFT_URL = "https://www.rfc-editor.org/sdev/jsonexp/jsonparser.php"
3637

3738
MIN_ERRATA_RESULTS = 5000
3839
MIN_INDEX_RESULTS = 8000
@@ -427,7 +428,7 @@ def update_docs_from_rfc_index(
427428
pass
428429
# Logging below warning turns out to be unhelpful - there are many references
429430
# to such things in the index:
430-
# * all april-1 RFCs have an internal name that looks like a draft name, but there
431+
# * all april-1 RFCs have an internal name that looks like a draft name, but there
431432
# was never such a draft. More of these will exist in the future
432433
# * Several documents were created with out-of-band input to the RFC-editor, for a
433434
# variety of reasons.
@@ -436,7 +437,7 @@ def update_docs_from_rfc_index(
436437
# If there is no draft to point to, don't point to one, even if there was an RPC
437438
# internal name in use (and in the RPC database). This will be a requirement on the
438439
# reimplementation of the creation of the rfc-index.
439-
#
440+
#
440441
# log(f"Warning: RFC index for {rfc_number} referred to unknown draft {draft_name}")
441442

442443
# Find or create the RFC document
@@ -466,7 +467,7 @@ def update_docs_from_rfc_index(
466467
if draft:
467468
doc.formal_languages.set(draft.formal_languages.all())
468469
for author in draft.documentauthor_set.all():
469-
# Copy the author but point at the new doc.
470+
# Copy the author but point at the new doc.
470471
# See https://docs.djangoproject.com/en/4.2/topics/db/queries/#copying-model-instances
471472
author.pk = None
472473
author.id = None
@@ -707,12 +708,27 @@ def parse_relation_list(l):
707708
subseries_doc.docevent_set.create(type="sync_from_rfc_editor", by=system, desc=f"Added {doc.name} to {subseries_doc.name}")
708709
rfc_events.append(doc.docevent_set.create(type="sync_from_rfc_editor", by=system, desc=f"Added {doc.name} to {subseries_doc.name}"))
709710

710-
for subdoc in doc.related_that("contains"):
711-
if subdoc.name not in also:
712-
assert(not first_sync_creating_subseries)
713-
subseries_doc.relateddocument_set.filter(target=subdoc).delete()
714-
rfc_events.append(doc.docevent_set.create(type="sync_from_rfc_editor", by=system, desc=f"Removed {doc.name} from {subseries_doc.name}"))
715-
subseries_doc.docevent_set.create(type="sync_from_rfc_editor", by=system, desc=f"Removed {doc.name} from {subseries_doc.name}")
711+
# Delete subseries relations that are no longer current. Use a transaction
712+
# so we are sure we iterate over the same relations that we delete!
713+
with transaction.atomic():
714+
stale_subseries_relations = doc.relations_that("contains").exclude(
715+
source__name__in=also
716+
)
717+
for stale_relation in stale_subseries_relations:
718+
stale_subseries_doc = stale_relation.source
719+
rfc_events.append(
720+
doc.docevent_set.create(
721+
type="sync_from_rfc_editor",
722+
by=system,
723+
desc=f"Removed {doc.name} from {stale_subseries_doc.name}",
724+
)
725+
)
726+
stale_subseries_doc.docevent_set.create(
727+
type="sync_from_rfc_editor",
728+
by=system,
729+
desc=f"Removed {doc.name} from {stale_subseries_doc.name}",
730+
)
731+
stale_subseries_relations.delete()
716732

717733
doc_errata = errata.get(f"RFC{rfc_number}", [])
718734
all_rejected = doc_errata and all(
@@ -754,9 +770,9 @@ def parse_relation_list(l):
754770
)
755771
doc.save_with_history(rfc_events)
756772
yield rfc_number, rfc_changes, doc, rfc_published # yield changes to the RFC
757-
773+
758774
if first_sync_creating_subseries:
759-
# First - create the known subseries documents that have ghosted.
775+
# First - create the known subseries documents that have ghosted.
760776
# The RFC editor (as of 31 Oct 2023) claims these subseries docs do not exist.
761777
# The datatracker, on the other hand, will say that the series doc currently contains no RFCs.
762778
for name in ["fyi17", "std1", "bcp12", "bcp113", "bcp66"]:
@@ -769,7 +785,6 @@ def parse_relation_list(l):
769785
subseries_slug = name[:3]
770786
subseries_doc.docevent_set.create(type=f"{subseries_slug}_history_marker", by=system, desc=f"No history of this {subseries_slug.upper()} document is currently available in the datatracker before this point")
771787

772-
773788
RelatedDocument.objects.filter(
774789
Q(originaltargetaliasname__startswith="bcp") |
775790
Q(originaltargetaliasname__startswith="std") |

ietf/sync/tests.py

Lines changed: 121 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,13 @@
2020
import debug # pyflakes:ignore
2121

2222
from ietf.api.views import EmailIngestionError
23-
from ietf.doc.factories import WgDraftFactory, RfcFactory, DocumentAuthorFactory, DocEventFactory
23+
from ietf.doc.factories import (
24+
WgDraftFactory,
25+
RfcFactory,
26+
DocumentAuthorFactory,
27+
DocEventFactory,
28+
BcpFactory,
29+
)
2430
from ietf.doc.models import Document, DocEvent, DeletedEvent, DocTagName, RelatedDocument, State, StateDocEvent
2531
from ietf.doc.utils import add_state_change_event
2632
from ietf.group.factories import GroupFactory
@@ -508,6 +514,120 @@ def test_rfc_index(self):
508514
changed = list(rfceditor.update_docs_from_rfc_index(data, errata, today - datetime.timedelta(days=30)))
509515
self.assertEqual(len(changed), 0)
510516

517+
def test_rfc_index_subseries_replacement(self):
518+
today = date_today()
519+
author = PersonFactory(name="Some Bozo")
520+
521+
# Start with two BCPs, each containing an rfc
522+
rfc1, rfc2, rfc3 = RfcFactory.create_batch(3, authors=[author])
523+
bcp1 = BcpFactory(contains=[rfc1])
524+
bcp2 = BcpFactory(contains=[rfc2])
525+
526+
def _nameify(doc):
527+
"""Convert a name like 'rfc1' to 'RFC0001"""
528+
return f"{doc.name[:3].upper()}{int(doc.name[3:]):04d}"
529+
530+
# RFC index that replaces rfc2 with rfc3 in bcp2
531+
index_xml = f"""<?xml version="1.0" encoding="UTF-8"?>
532+
<rfc-index xmlns="http://www.rfc-editor.org/rfc-index"
533+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
534+
xsi:schemaLocation="http://www.rfc-editor.org/rfc-index
535+
http://www.rfc-editor.org/rfc-index.xsd">
536+
<bcp-entry>
537+
<doc-id>{_nameify(bcp1)}</doc-id>
538+
<is-also>
539+
<doc-id>{_nameify(rfc1)}</doc-id>
540+
</is-also>
541+
</bcp-entry>
542+
<bcp-entry>
543+
<doc-id>{_nameify(bcp2)}</doc-id>
544+
<is-also>
545+
<doc-id>{_nameify(rfc3)}</doc-id>
546+
</is-also>
547+
</bcp-entry>
548+
<rfc-entry>
549+
<doc-id>{_nameify(rfc1)}</doc-id>
550+
<title>{rfc1.title}</title>
551+
<author>
552+
<name>Some Bozo</name>
553+
</author>
554+
<date>
555+
<month>{today.strftime('%B')}</month>
556+
<year>{today.strftime('%Y')}</year>
557+
</date>
558+
<format>
559+
<file-format>ASCII</file-format>
560+
</format>
561+
<page-count>42</page-count>
562+
<keywords>
563+
<kw>test</kw>
564+
</keywords>
565+
<abstract><p>This is some interesting text.</p></abstract>
566+
<is-also>
567+
<doc-id>{_nameify(bcp1)}</doc-id>
568+
</is-also>
569+
<current-status>PROPOSED STANDARD</current-status>
570+
<publication-status>PROPOSED STANDARD</publication-status>
571+
<stream>IETF</stream>
572+
</rfc-entry>
573+
<rfc-entry>
574+
<doc-id>{_nameify(rfc2)}</doc-id>
575+
<title>{rfc2.title}</title>
576+
<author>
577+
<name>Some Bozo</name>
578+
</author>
579+
<date>
580+
<month>{today.strftime('%B')}</month>
581+
<year>{today.strftime('%Y')}</year>
582+
</date>
583+
<format>
584+
<file-format>ASCII</file-format>
585+
</format>
586+
<page-count>42</page-count>
587+
<keywords>
588+
<kw>test</kw>
589+
</keywords>
590+
<abstract><p>This is some interesting text.</p></abstract>
591+
<current-status>PROPOSED STANDARD</current-status>
592+
<publication-status>PROPOSED STANDARD</publication-status>
593+
<stream>IETF</stream>
594+
</rfc-entry>
595+
<rfc-entry>
596+
<doc-id>{_nameify(rfc3)}</doc-id>
597+
<title>{rfc3.title}</title>
598+
<author>
599+
<name>Some Bozo</name>
600+
</author>
601+
<date>
602+
<month>{today.strftime('%B')}</month>
603+
<year>{today.strftime('%Y')}</year>
604+
</date>
605+
<format>
606+
<file-format>ASCII</file-format>
607+
</format>
608+
<page-count>42</page-count>
609+
<keywords>
610+
<kw>test</kw>
611+
</keywords>
612+
<abstract><p>This is some interesting text.</p></abstract>
613+
<is-also>
614+
<doc-id>{_nameify(bcp2)}</doc-id>
615+
</is-also>
616+
<current-status>PROPOSED STANDARD</current-status>
617+
<publication-status>PROPOSED STANDARD</publication-status>
618+
<stream>IETF</stream>
619+
</rfc-entry>
620+
</rfc-index>"""
621+
data = rfceditor.parse_index(io.StringIO(index_xml)) # parse index
622+
self.assertEqual(len(data), 3) # check that we parsed 3 RFCs
623+
# Process the data by consuming the generator
624+
for _ in rfceditor.update_docs_from_rfc_index(data, []):
625+
pass
626+
# Confirm that the expected changes were made
627+
self.assertCountEqual(rfc1.related_that("contains"), [bcp1])
628+
self.assertCountEqual(rfc2.related_that("contains"), [])
629+
self.assertCountEqual(rfc3.related_that("contains"), [bcp2])
630+
511631
def _generate_rfc_queue_xml(self, draft, state, auth48_url=None):
512632
"""Generate an RFC queue xml string for a draft"""
513633
t = '''<rfc-editor-queue xmlns="http://www.rfc-editor.org/rfc-editor-queue">

0 commit comments

Comments
 (0)