Skip to content

Commit 80a9e3c

Browse files
authored
Merge pull request #11981 from swiftlang/eng/blangmuir/stable-validate-action-cache-cas-refs
🍒[llvm][cas] Improve UnifiedOnDiskActionCache validation to check cas refs
2 parents 45f2435 + 9cdc71d commit 80a9e3c

File tree

4 files changed

+57
-21
lines changed

4 files changed

+57
-21
lines changed

llvm/include/llvm/CAS/OnDiskGraphDB.h

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -278,16 +278,23 @@ class OnDiskGraphDB {
278278
///
279279
/// Returns \p nullopt if the object is not stored in this CAS.
280280
LLVM_ABI_FOR_TEST std::optional<ObjectID>
281-
getExistingReference(ArrayRef<uint8_t> Digest);
281+
getExistingReference(ArrayRef<uint8_t> Digest, bool CheckUpstream = true);
282282

283283
/// Check whether the object associated with \p Ref is stored in the CAS.
284284
/// Note that this function will fault-in according to the policy.
285285
Expected<bool> isMaterialized(ObjectID Ref);
286286

287287
/// Check whether the object associated with \p Ref is stored in the CAS.
288288
/// Note that this function does not fault-in.
289-
bool containsObject(ObjectID Ref) const {
290-
return containsObject(Ref, /*CheckUpstream=*/true);
289+
bool containsObject(ObjectID Ref, bool CheckUpstream = true) const {
290+
switch (getObjectPresence(Ref, CheckUpstream)) {
291+
case ObjectPresence::Missing:
292+
return false;
293+
case ObjectPresence::InPrimaryDB:
294+
return true;
295+
case ObjectPresence::OnlyInUpstreamDB:
296+
return true;
297+
}
291298
}
292299

293300
/// \returns the data part of the provided object handle.
@@ -360,17 +367,6 @@ class OnDiskGraphDB {
360367
LLVM_ABI_FOR_TEST ObjectPresence
361368
getObjectPresence(ObjectID Ref, bool CheckUpstream) const;
362369

363-
bool containsObject(ObjectID Ref, bool CheckUpstream) const {
364-
switch (getObjectPresence(Ref, CheckUpstream)) {
365-
case ObjectPresence::Missing:
366-
return false;
367-
case ObjectPresence::InPrimaryDB:
368-
return true;
369-
case ObjectPresence::OnlyInUpstreamDB:
370-
return true;
371-
}
372-
}
373-
374370
/// When \p load is called for a node that doesn't exist, this function tries
375371
/// to load it from the upstream store and copy it to the primary one.
376372
Expected<std::optional<ObjectHandle>> faultInFromUpstream(ObjectID PrimaryID);
@@ -404,6 +400,10 @@ class OnDiskGraphDB {
404400

405401
IndexProxy getIndexProxyFromRef(InternalRef Ref) const;
406402

403+
// FIXME: on newer branches we have refactored getIndexProxyFromRef to return
404+
// Expected<IndexProxy>. As a stop gap, provide a checked API.
405+
Expected<IndexProxy> getIndexProxyFromRefChecked(InternalRef Ref) const;
406+
407407
static InternalRef makeInternalRef(FileOffset IndexOffset);
408408

409409
IndexProxy

llvm/lib/CAS/OnDiskGraphDB.cpp

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,10 +1102,11 @@ ObjectID OnDiskGraphDB::getExternalReference(const IndexProxy &I) {
11021102
}
11031103

11041104
std::optional<ObjectID>
1105-
OnDiskGraphDB::getExistingReference(ArrayRef<uint8_t> Digest) {
1105+
OnDiskGraphDB::getExistingReference(ArrayRef<uint8_t> Digest,
1106+
bool CheckUpstream) {
11061107
auto tryUpstream =
11071108
[&](std::optional<IndexProxy> I) -> std::optional<ObjectID> {
1108-
if (!UpstreamDB)
1109+
if (!CheckUpstream || !UpstreamDB)
11091110
return std::nullopt;
11101111
std::optional<ObjectID> UpstreamID =
11111112
UpstreamDB->getExistingReference(Digest);
@@ -1138,6 +1139,15 @@ OnDiskGraphDB::getIndexProxyFromRef(InternalRef Ref) const {
11381139
return getIndexProxyFromPointer(P);
11391140
}
11401141

1142+
Expected<OnDiskGraphDB::IndexProxy>
1143+
OnDiskGraphDB::getIndexProxyFromRefChecked(InternalRef Ref) const {
1144+
OnDiskHashMappedTrie::const_pointer P =
1145+
Index.recoverFromFileOffset(Ref.getFileOffset());
1146+
if (LLVM_UNLIKELY(!P))
1147+
return createStringError(make_error_code(std::errc::protocol_error), "corrupt internal reference");
1148+
return getIndexProxyFromPointer(P);
1149+
}
1150+
11411151
ArrayRef<uint8_t> OnDiskGraphDB::getDigest(InternalRef Ref) const {
11421152
IndexProxy I = getIndexProxyFromRef(Ref);
11431153
return I.Hash;
@@ -1232,14 +1242,19 @@ OnDiskGraphDB::ObjectPresence
12321242
OnDiskGraphDB::getObjectPresence(ObjectID ExternalRef,
12331243
bool CheckUpstream) const {
12341244
InternalRef Ref = getInternalRef(ExternalRef);
1235-
IndexProxy I = getIndexProxyFromRef(Ref);
1236-
TrieRecord::Data Object = I.Ref.load();
1245+
Expected<IndexProxy> I = getIndexProxyFromRefChecked(Ref);
1246+
if (!I) {
1247+
// FIXME: this decision should be migrated to callers.
1248+
consumeError(I.takeError());
1249+
return ObjectPresence::Missing;
1250+
}
1251+
TrieRecord::Data Object = I->Ref.load();
12371252
if (Object.SK != TrieRecord::StorageKind::Unknown)
12381253
return ObjectPresence::InPrimaryDB;
12391254
if (!CheckUpstream || !UpstreamDB)
12401255
return ObjectPresence::Missing;
12411256
std::optional<ObjectID> UpstreamID =
1242-
UpstreamDB->getExistingReference(getDigest(I));
1257+
UpstreamDB->getExistingReference(getDigest(*I));
12431258
return UpstreamID.has_value() ? ObjectPresence::OnlyInUpstreamDB
12441259
: ObjectPresence::Missing;
12451260
}

llvm/lib/CAS/UnifiedOnDiskCache.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,11 +165,24 @@ Error UnifiedOnDiskCache::validateActionCache() {
165165
return createStringError(
166166
llvm::errc::illegal_byte_sequence,
167167
"bad record at 0x" +
168-
utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " +
169-
Msg.str());
168+
utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) +
169+
" ref=0x" + utohexstr(ID.getOpaqueData(), /*LowerCase=*/true) +
170+
": " + Msg.str());
170171
};
171172
if (ID.getOpaqueData() == 0)
172173
return formatError("zero is not a valid ref");
174+
// Check containsObject first, because other API assumes a valid ObjectID.
175+
if (!getGraphDB().containsObject(ID, /*CheckUpstream=*/false))
176+
return formatError("ref is not in cas index");
177+
auto Hash = getGraphDB().getDigest(ID);
178+
auto Ref = getGraphDB().getExistingReference(Hash, /*CheckUpstream=*/false);
179+
assert(Ref && "missing object passed containsObject check?");
180+
if (!Ref)
181+
return formatError("ref is not in cas index after contains");
182+
if (*Ref != ID)
183+
return formatError("ref does not match indexed offset " +
184+
utohexstr(Ref->getOpaqueData(), /*LowerCase=*/true) +
185+
" for hash " + toHex(Hash));
173186
return Error::success();
174187
};
175188
if (Error E = PrimaryKVDB->validate(ValidateRef))

llvm/test/tools/llvm-cas/validation.test

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,14 @@ RUN: --data - >%t/abc.casid
2626

2727
RUN: llvm-cas --cas %t/ac --put-cache-key @%t/abc.casid @%t/empty.casid
2828
RUN: llvm-cas --cas %t/ac --validate
29+
30+
# Check that validation fails if the objects referenced are missing.
31+
RUN: mv %t/ac/v1.1/v9.index %t/tmp.v9.index
32+
RUN: not llvm-cas --cas %t/ac --validate
33+
34+
RUN: mv %t/tmp.v9.index %t/ac/v1.1/v9.index
35+
RUN: llvm-cas --cas %t/ac --validate
36+
2937
# Note: records are 40 bytes (32 hash bytes + 8 byte value), so trim the last
3038
# allocated record, leaving it invalid.
3139
RUN: truncate -s -40 %t/ac/v1.1/v4.actions

0 commit comments

Comments
 (0)