Skip to content

Commit c569ace

Browse files
author
Pavel Marek
committed
Implement LLVM support for PCRE2
(cherry picked from commit 01420e5)
1 parent 0868359 commit c569ace

File tree

5 files changed

+31
-15
lines changed

5 files changed

+31
-15
lines changed

com.oracle.truffle.r.native/fficall/src/truffle_common/pcre2_rffi.c

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ typedef void (*match_cb_t)(size_t start_idx, size_t end_idx);
4646
typedef void (*capture_cb_t)(size_t capture_idx, size_t start_idx, size_t end_idx);
4747
typedef void (*set_capture_name_cb_t)(const char *name, int index);
4848

49-
pcre2_code *call_pcre2_compile(char *pattern, uint32_t options, int *error_code, int *error_offset);
49+
pcre2_code *call_pcre2_compile(uint8_t *pattern, size_t pattern_len, uint32_t options, int *error_code, size_t *error_offset);
5050
uint32_t call_pcre2_capture_count(pcre2_code *re);
5151
/**
5252
* Returns the count of all the named captures. A named capture is also a capture, so the number
@@ -77,6 +77,7 @@ int call_pcre2_match(
7777
capture_cb_t capture_cb,
7878
pcre2_code *re,
7979
uint8_t *subject,
80+
size_t subject_len,
8081
uint32_t options,
8182
int stop_after_first_match
8283
);
@@ -90,13 +91,10 @@ static void report_captures(capture_cb_t capture_cb, uint32_t capture_count, con
9091
static size_t advance_offset(size_t offset, int utf8, const uint8_t *subject, size_t subject_len);
9192

9293

93-
// TODO: error_offset should be `size_t`.
94-
pcre2_code *call_pcre2_compile(char *pattern_str, uint32_t options, int *error_code, int *error_offset)
94+
pcre2_code *call_pcre2_compile(uint8_t *pattern, size_t pattern_len, uint32_t options, int *error_code, size_t *error_offset)
9595
{
96-
uint8_t *pattern = (uint8_t *) pattern_str;
97-
size_t pattern_len = strlen(pattern);
9896
#ifdef FASTR_PCRE2_DEBUG
99-
printf("call_pcre2_compile: pattern_str='%s', pattern_len=%u, pattern=[", pattern, pattern_len);
97+
printf("call_pcre2_compile: pattern_str='%s', pattern_len=%lu, pattern=[", pattern, pattern_len);
10098
for (int i = 0; i < pattern_len; i++) {
10199
printf("%u, ", pattern[i]);
102100
}
@@ -147,7 +145,7 @@ int call_pcre2_get_capture_names(void (*set_capture_name_cb)(const char *name, i
147145
// ovector begins with match, capture groups are after that.
148146
int capture_idx = ovector_idx - 1;
149147
uint8_t *name = tabptr + 2;
150-
set_capture_name_cb((const char *)name, capture_idx);
148+
set_capture_name_cb((const char *)ensure_string(name), capture_idx);
151149
tabptr += name_entry_size;
152150
}
153151
return names_count;
@@ -158,11 +156,11 @@ int call_pcre2_match(
158156
capture_cb_t capture_cb,
159157
pcre2_code *re,
160158
uint8_t *subject,
159+
size_t subject_len,
161160
uint32_t first_match_options,
162161
int stop_after_first_match
163162
)
164163
{
165-
size_t subject_len = strlen((char *) subject);
166164
pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(re, NULL);
167165
uint32_t capture_count = call_pcre2_capture_count(re);
168166

@@ -178,6 +176,7 @@ int call_pcre2_match(
178176
// rc corresponds to the count of captured groups plus one, or error code if rc is negative.
179177
int rc = pcre2_match(re, subject, subject_len, 0, first_match_options, match_data, NULL);
180178
if (rc == PCRE2_ERROR_NOMATCH) {
179+
pcre2_match_data_free(match_data);
181180
return 0;
182181
} else if (rc < 0) {
183182
pcre2_match_data_free(match_data);

com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/ffi/NativeFunction.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,10 @@ public enum NativeFunction {
4747
umask("(sint32): sint32", "call_base_"),
4848
cpolyroot("([double], [double], sint32, [double], [double]): sint32", "call_base_", baseLibrary(), true),
4949
// PCRE2 (in pcre2_rffi.c)
50-
compile("(string, uint32, [sint32], [sint32]): pointer", "call_pcre2_"),
51-
match("((uint32, uint32): void, (uint32, uint32, uint32): void, pointer, [uint8], uint32, sint32): sint32", "call_pcre2_"),
50+
compile("([uint8], uint32, uint32, [sint32], [uint32]): pointer", "call_pcre2_"),
51+
match("((uint32, uint32): void, (uint32, uint32, uint32): void, pointer, [uint8], uint32, uint32, sint32): sint32", "call_pcre2_"),
5252
capture_count("(pointer): uint32", "call_pcre2_"),
53-
names_count("(point): uint32", "call_pcre2_"),
53+
names_count("(pointer): uint32", "call_pcre2_"),
5454
get_capture_names("((string, sint32): void, pointer): sint32", "call_pcre2_"),
5555
match_count("(pointer, string, sint32, sint32, uint32): sint32", "call_pcre2_"),
5656
pattern_free("(pointer): void", "call_pcre2_"),

com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/ffi/PCRE2RFFI.java

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,9 @@ public CompileResult execute(String pattern, int options) {
420420
int[] errorOffSet = new int[]{-1};
421421
// We want to enable UTF-based matching by default.
422422
options |= Option.UTF.value;
423-
Object pcreCode = call(NativeFunction.compile, pattern, options, errorCode, errorOffSet);
423+
byte[] patternBytes = pattern.getBytes(StandardCharsets.UTF_8);
424+
NativeCharArray patternCharArray = new NativeCharArray(patternBytes);
425+
Object pcreCode = call(NativeFunction.compile, patternCharArray, patternBytes.length, options, errorCode, errorOffSet);
424426
String errorMessage = null;
425427
if (interop.isNull(pcreCode)) {
426428
assert errorOffSet[0] >= 0;
@@ -478,6 +480,8 @@ public String[] execute(Object pcrePattern, int captureCount) {
478480
}
479481

480482
public static final class MatchNode extends NativeCallNode {
483+
@Child private GetErrorStringNode getErrorStringNode = RFFIFactory.getPCRE2RFFI().createGetErrorStringNode();
484+
481485
public MatchNode(DownCallNodeFactory downCallNodeFactory) {
482486
super(downCallNodeFactory.createDownCallNode());
483487
}
@@ -499,16 +503,20 @@ public MatchData execute(Object pcreCompiledPattern, String subject, int options
499503
MatchCallback matchCallback = new MatchCallback(matchData);
500504
CaptureCallback captureCallback = new CaptureCallback(matchData);
501505
byte[] subjectBytes = subject.getBytes(StandardCharsets.UTF_8);
502-
NativeCharArray nativeCharArray = new NativeCharArray(subjectBytes);
506+
NativeCharArray subjectCharArray = new NativeCharArray(subjectBytes);
503507
Object matchCount = call(NativeFunction.match, matchCallback, captureCallback,
504-
pcreCompiledPattern, nativeCharArray, options, stopAfterFirstMatch ? 1 : 0);
508+
pcreCompiledPattern, subjectCharArray, subjectBytes.length, options, stopAfterFirstMatch ? 1 : 0);
505509
assert InteropLibrary.getUncached().isNumber(matchCount);
506510
int matchCountInt;
507511
try {
508512
matchCountInt = InteropLibrary.getUncached().asInt(matchCount);
509513
} catch (UnsupportedMessageException e) {
510514
throw RInternalError.shouldNotReachHere(e);
511515
}
516+
if (matchCountInt < 0) {
517+
String errMessage = getErrorStringNode.execute(matchCountInt);
518+
throw RInternalError.shouldNotReachHere("PCRE2Rffi$MatchNode: match failed with " + errMessage);
519+
}
512520
assert matchCountInt == matchData.getMatchCount();
513521
matchData = convertIndexes(matchData, subject, subjectBytes, captureCount);
514522
matchData.padCapturesWithEmptyMatches();

com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/ffi/interop/NativeCharArray.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,15 @@
3333
/**
3434
* A {@link TruffleObject} that represents an array of {@code unsigned char} values, that is
3535
* {@code NULL} terminated in the C domain.
36+
*
37+
* Note that {@link #getArrayLength()} returns effective length, which means that the length
38+
* also includes the terminating null.
39+
*
40+
* Beware of using {@code strlen} on instances of this
41+
* class, as it will return different results on LLVM and on NFI (on LLVM, the return value
42+
* will include the terminating null). If you plan to use an instance of this class as string
43+
* in native code, pass along the length of the string to the native, do not compute it in
44+
* the native code via {@code strlen}.
3645
*/
3746
@ExportLibrary(InteropLibrary.class)
3847
public final class NativeCharArray extends NativeUInt8Array {

com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/ffi/interop/NativeUInt8Array.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
/**
3131
* Parent class of {@link NativeRawArray} and {@link NativeCharArray}, that holds the common logic
32-
* for a C type {@code uint8*}, that may or may not be {@code NULL} terminated (in the C domain).
32+
* for a C type {@code uint8_t*}, that may or may not be {@code NULL} terminated (in the C domain).
3333
*
3434
* The null termination is faked for Java arrays. If this object escapes to native code and we
3535
* allocate native memory for it, then the native memory will be null terminated (and one byte

0 commit comments

Comments
 (0)