Skip to content

Commit 9cdc71d

Browse files
committed
[llvm][cas] Improve UnifiedOnDiskActionCache validation to check cas refs (llvm#171732)
Check that action cache references point to valid CAS objects by ensuring they are contained within the corresponding CAS and also that the offsets match. This prevents accidentally referencing "dead" index records that were not properly flushed to disk, which can lead to the action cache pointing to the wrong data or to garbage data. rdar://126642956 (cherry picked from commit a451ff0)
1 parent cff3418 commit 9cdc71d

File tree

4 files changed

+57
-21
lines changed

4 files changed

+57
-21
lines changed

llvm/include/llvm/CAS/OnDiskGraphDB.h

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -278,16 +278,23 @@ class OnDiskGraphDB {
278278
///
279279
/// Returns \p nullopt if the object is not stored in this CAS.
280280
LLVM_ABI_FOR_TEST std::optional<ObjectID>
281-
getExistingReference(ArrayRef<uint8_t> Digest);
281+
getExistingReference(ArrayRef<uint8_t> Digest, bool CheckUpstream = true);
282282

283283
/// Check whether the object associated with \p Ref is stored in the CAS.
284284
/// Note that this function will fault-in according to the policy.
285285
Expected<bool> isMaterialized(ObjectID Ref);
286286

287287
/// Check whether the object associated with \p Ref is stored in the CAS.
288288
/// Note that this function does not fault-in.
289-
bool containsObject(ObjectID Ref) const {
290-
return containsObject(Ref, /*CheckUpstream=*/true);
289+
bool containsObject(ObjectID Ref, bool CheckUpstream = true) const {
290+
switch (getObjectPresence(Ref, CheckUpstream)) {
291+
case ObjectPresence::Missing:
292+
return false;
293+
case ObjectPresence::InPrimaryDB:
294+
return true;
295+
case ObjectPresence::OnlyInUpstreamDB:
296+
return true;
297+
}
291298
}
292299

293300
/// \returns the data part of the provided object handle.
@@ -360,17 +367,6 @@ class OnDiskGraphDB {
360367
LLVM_ABI_FOR_TEST ObjectPresence
361368
getObjectPresence(ObjectID Ref, bool CheckUpstream) const;
362369

363-
bool containsObject(ObjectID Ref, bool CheckUpstream) const {
364-
switch (getObjectPresence(Ref, CheckUpstream)) {
365-
case ObjectPresence::Missing:
366-
return false;
367-
case ObjectPresence::InPrimaryDB:
368-
return true;
369-
case ObjectPresence::OnlyInUpstreamDB:
370-
return true;
371-
}
372-
}
373-
374370
/// When \p load is called for a node that doesn't exist, this function tries
375371
/// to load it from the upstream store and copy it to the primary one.
376372
Expected<std::optional<ObjectHandle>> faultInFromUpstream(ObjectID PrimaryID);
@@ -404,6 +400,10 @@ class OnDiskGraphDB {
404400

405401
IndexProxy getIndexProxyFromRef(InternalRef Ref) const;
406402

403+
// FIXME: on newer branches we have refactored getIndexProxyFromRef to return
404+
// Expected<IndexProxy>. As a stop gap, provide a checked API.
405+
Expected<IndexProxy> getIndexProxyFromRefChecked(InternalRef Ref) const;
406+
407407
static InternalRef makeInternalRef(FileOffset IndexOffset);
408408

409409
IndexProxy

llvm/lib/CAS/OnDiskGraphDB.cpp

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,10 +1102,11 @@ ObjectID OnDiskGraphDB::getExternalReference(const IndexProxy &I) {
11021102
}
11031103

11041104
std::optional<ObjectID>
1105-
OnDiskGraphDB::getExistingReference(ArrayRef<uint8_t> Digest) {
1105+
OnDiskGraphDB::getExistingReference(ArrayRef<uint8_t> Digest,
1106+
bool CheckUpstream) {
11061107
auto tryUpstream =
11071108
[&](std::optional<IndexProxy> I) -> std::optional<ObjectID> {
1108-
if (!UpstreamDB)
1109+
if (!CheckUpstream || !UpstreamDB)
11091110
return std::nullopt;
11101111
std::optional<ObjectID> UpstreamID =
11111112
UpstreamDB->getExistingReference(Digest);
@@ -1138,6 +1139,15 @@ OnDiskGraphDB::getIndexProxyFromRef(InternalRef Ref) const {
11381139
return getIndexProxyFromPointer(P);
11391140
}
11401141

1142+
Expected<OnDiskGraphDB::IndexProxy>
1143+
OnDiskGraphDB::getIndexProxyFromRefChecked(InternalRef Ref) const {
1144+
OnDiskHashMappedTrie::const_pointer P =
1145+
Index.recoverFromFileOffset(Ref.getFileOffset());
1146+
if (LLVM_UNLIKELY(!P))
1147+
return createStringError(make_error_code(std::errc::protocol_error), "corrupt internal reference");
1148+
return getIndexProxyFromPointer(P);
1149+
}
1150+
11411151
ArrayRef<uint8_t> OnDiskGraphDB::getDigest(InternalRef Ref) const {
11421152
IndexProxy I = getIndexProxyFromRef(Ref);
11431153
return I.Hash;
@@ -1232,14 +1242,19 @@ OnDiskGraphDB::ObjectPresence
12321242
OnDiskGraphDB::getObjectPresence(ObjectID ExternalRef,
12331243
bool CheckUpstream) const {
12341244
InternalRef Ref = getInternalRef(ExternalRef);
1235-
IndexProxy I = getIndexProxyFromRef(Ref);
1236-
TrieRecord::Data Object = I.Ref.load();
1245+
Expected<IndexProxy> I = getIndexProxyFromRefChecked(Ref);
1246+
if (!I) {
1247+
// FIXME: this decision should be migrated to callers.
1248+
consumeError(I.takeError());
1249+
return ObjectPresence::Missing;
1250+
}
1251+
TrieRecord::Data Object = I->Ref.load();
12371252
if (Object.SK != TrieRecord::StorageKind::Unknown)
12381253
return ObjectPresence::InPrimaryDB;
12391254
if (!CheckUpstream || !UpstreamDB)
12401255
return ObjectPresence::Missing;
12411256
std::optional<ObjectID> UpstreamID =
1242-
UpstreamDB->getExistingReference(getDigest(I));
1257+
UpstreamDB->getExistingReference(getDigest(*I));
12431258
return UpstreamID.has_value() ? ObjectPresence::OnlyInUpstreamDB
12441259
: ObjectPresence::Missing;
12451260
}

llvm/lib/CAS/UnifiedOnDiskCache.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,11 +165,24 @@ Error UnifiedOnDiskCache::validateActionCache() {
165165
return createStringError(
166166
llvm::errc::illegal_byte_sequence,
167167
"bad record at 0x" +
168-
utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " +
169-
Msg.str());
168+
utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) +
169+
" ref=0x" + utohexstr(ID.getOpaqueData(), /*LowerCase=*/true) +
170+
": " + Msg.str());
170171
};
171172
if (ID.getOpaqueData() == 0)
172173
return formatError("zero is not a valid ref");
174+
// Check containsObject first, because other API assumes a valid ObjectID.
175+
if (!getGraphDB().containsObject(ID, /*CheckUpstream=*/false))
176+
return formatError("ref is not in cas index");
177+
auto Hash = getGraphDB().getDigest(ID);
178+
auto Ref = getGraphDB().getExistingReference(Hash, /*CheckUpstream=*/false);
179+
assert(Ref && "missing object passed containsObject check?");
180+
if (!Ref)
181+
return formatError("ref is not in cas index after contains");
182+
if (*Ref != ID)
183+
return formatError("ref does not match indexed offset " +
184+
utohexstr(Ref->getOpaqueData(), /*LowerCase=*/true) +
185+
" for hash " + toHex(Hash));
173186
return Error::success();
174187
};
175188
if (Error E = PrimaryKVDB->validate(ValidateRef))

llvm/test/tools/llvm-cas/validation.test

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,14 @@ RUN: --data - >%t/abc.casid
2626

2727
RUN: llvm-cas --cas %t/ac --put-cache-key @%t/abc.casid @%t/empty.casid
2828
RUN: llvm-cas --cas %t/ac --validate
29+
30+
# Check that validation fails if the objects referenced are missing.
31+
RUN: mv %t/ac/v1.1/v9.index %t/tmp.v9.index
32+
RUN: not llvm-cas --cas %t/ac --validate
33+
34+
RUN: mv %t/tmp.v9.index %t/ac/v1.1/v9.index
35+
RUN: llvm-cas --cas %t/ac --validate
36+
2937
# Note: records are 40 bytes (32 hash bytes + 8 byte value), so trim the last
3038
# allocated record, leaving it invalid.
3139
RUN: truncate -s -40 %t/ac/v1.1/v4.actions

0 commit comments

Comments
 (0)