Skip to content

Commit aee4355

Browse files
committed
Resolving makefile issues
Also fixing corrupted csv file
1 parent 9f3fe09 commit aee4355

File tree

7 files changed

+70
-118
lines changed

7 files changed

+70
-118
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,6 @@ build/tests/executeEngine-serial-test
1010
build/tests/serial-SELECT-test
1111
tokenizer/src/tokenizer.o
1212
QPEOMP
13+
/.vscode
14+
/.vscode
15+
/.vscode

QPEMPI

190 KB
Binary file not shown.

data-generation/commands_50k.csv

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
version https://git-lfs.github.com/spec/v1
2-
oid sha256:e4239d07eb776e56fda0d3fa7757e702ee9b5c7f16d952ade9ee6b080c66f8ea
3-
size 267
2+
oid sha256:b1a736a51b0cdfca2914157135c4b10067b73baa0fd6c8030fe0b54bb307f4e9
3+
size 5952942

docs/engine.md

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -178,18 +178,3 @@ freeResultSet(res);
178178
// and rewrites the CSV file.
179179
struct resultSetS *res = executeQueryDeleteSerial(engine, "commands", &whereClause);
180180
```
181-
182-
Developer suggestions
183-
- Add a bulk-load routine for B+ trees when building indexes from large CSVs.
184-
- Consider switching CSV persistence strategy to append-only with periodic compaction to avoid expensive full rewrites on DELETE.
185-
- Add thread-safety primitives if you plan to expose concurrent write access or to allow multiple processes to concurrently update the same datafile.
186-
187-
---
188-
189-
If you'd like, I can now:
190-
1. Add short diagrams that visualize the B+ tree node layout and leaf chaining.
191-
2. Produce a small runnable example program that demonstrates index build + select + delete with a tiny CSV and prints the B+ tree state before and after.
192-
3. Expand any of the sections above (e.g., a step-by-step walkthrough of `deleteEntry` internals) — tell me which area you want deeper.
193-
194-
End of engine documentation.
195-

engine/mpi/executeEngine-mpi.c

Lines changed: 50 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -357,54 +357,30 @@ struct resultSetS *executeQuerySelectSerial(
357357

358358
// Start the timer
359359
clock_t start = clock(); // Start a timer
360-
// Replaced whil-loop with parallelized MPI version of code
360+
361361
// Get all indexed attributes in the WHERE clause, using the B+ tree indexes where possible
362-
/* --- Parallel WHERE clause index processing --- */
363-
364-
int rank, size;
365-
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
366-
MPI_Comm_size(MPI_COMM_WORLD, &size);
367-
368-
// Convert linked list into array for easier slicing
369-
int wc_count = 0;
370-
struct whereClauseS *tmp = whereClause;
371-
while (tmp) {
372-
wc_count++;
373-
tmp = tmp->next;
374-
}
375-
376-
struct whereClauseS **wc_array = malloc(wc_count * sizeof(struct whereClauseS *));
377-
tmp = whereClause;
378-
for (int i = 0; i < wc_count; i++) {
379-
wc_array[i] = tmp;
380-
tmp = tmp->next;
381-
}
382-
383-
// Determine each process’s work range
384-
int chunk = (wc_count + size - 1) / size;
385-
int st = rank * chunk;
386-
int ed = (start + chunk < wc_count) ? start + chunk : wc_count;
387-
388-
// Local result buffer
389-
record **local_matches = malloc(engine->num_records * sizeof(record *));
390-
int local_match_count = 0;
391-
392-
// Each rank processes its assigned WHERE clauses
393-
for (int idx = start; idx < ed; idx++) {
394-
struct whereClauseS *wc = wc_array[idx];
362+
struct whereClauseS *wc = whereClause;
363+
while (wc != NULL) {
395364
for (int i = 0; i < engine->num_indexes; i++) {
396365
if (strcmp(wc->attribute, engine->indexed_attributes[i]) == 0) {
397-
node *cur_root = engine->bplus_tree_roots[i];
366+
anyIndexExists = true;
367+
indexExists[i] = true;
368+
369+
// Use B+ tree index for this attribute
370+
node *cur_root = engine->bplus_tree_roots[i]; // B+ tree root for this indexed attribute
398371
FieldType type = engine->attribute_types[i];
399-
372+
400373
KEY_T key_start, key_end;
401374
bool typeSupported = true;
402-
375+
403376
if (type == FIELD_UINT64) {
404377
unsigned long long val = strtoull(wc->value, NULL, 10);
405-
key_start.type = key_end.type = KEY_UINT64;
378+
key_start.type = KEY_UINT64;
379+
key_end.type = KEY_UINT64;
380+
406381
if (strcmp(wc->operator, "=") == 0) {
407-
key_start.v.u64 = key_end.v.u64 = val;
382+
key_start.v.u64 = val;
383+
key_end.v.u64 = val;
408384
} else if (strcmp(wc->operator, ">") == 0) {
409385
key_start.v.u64 = val + 1;
410386
key_end.v.u64 = UINT64_MAX;
@@ -423,9 +399,12 @@ struct resultSetS *executeQuerySelectSerial(
423399
}
424400
} else if (type == FIELD_INT) {
425401
int val = atoi(wc->value);
426-
key_start.type = key_end.type = KEY_INT;
402+
key_start.type = KEY_INT;
403+
key_end.type = KEY_INT;
404+
427405
if (strcmp(wc->operator, "=") == 0) {
428-
key_start.v.i32 = key_end.v.i32 = val;
406+
key_start.v.i32 = val;
407+
key_end.v.i32 = val;
429408
} else if (strcmp(wc->operator, ">") == 0) {
430409
key_start.v.i32 = val + 1;
431410
key_end.v.i32 = INT_MAX;
@@ -443,61 +422,38 @@ struct resultSetS *executeQuerySelectSerial(
443422
key_end.v.i32 = INT_MAX;
444423
}
445424
} else {
425+
// Fallback for unsupported types in index search
446426
typeSupported = false;
427+
indexExists[i] = false;
447428
}
448-
449-
if (!typeSupported) continue;
450-
429+
430+
if (!typeSupported) {
431+
continue;
432+
}
433+
434+
// Allocating for keys, using num_records as upper bound.
451435
KEY_T *returned_keys = malloc(engine->num_records * sizeof(KEY_T));
452436
ROW_PTR *returned_pointers = malloc(engine->num_records * sizeof(ROW_PTR));
437+
453438
int num_found = findRange(cur_root, key_start, key_end, false, returned_keys, returned_pointers);
454-
455-
for (int k = 0; k < num_found; k++) {
456-
local_matches[local_match_count++] = (record *)returned_pointers[k];
439+
440+
// Add found records to matchingRecords
441+
if (num_found > 0) {
442+
// Reallocate matchingRecords if needed (though we alloc'd max size initially)
443+
for (int k = 0; k < num_found; k++) {
444+
matchingRecords[matchCount++] = (record *)returned_pointers[k];
445+
}
457446
}
458-
447+
459448
free(returned_keys);
460449
free(returned_pointers);
461450
}
451+
else {
452+
indexExists[i] = false;
453+
}
462454
}
455+
wc = wc->next;
463456
}
464-
465-
// Gather results from all ranks
466-
int *recvcounts = NULL, *displs = NULL;
467-
int local_count = local_match_count;
468-
if (rank == 0) {
469-
recvcounts = malloc(size * sizeof(int));
470-
}
471-
MPI_Gather(&local_count, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, MPI_COMM_WORLD);
472-
473-
record **global_matches = NULL;
474-
int total = 0;
475-
if (rank == 0) {
476-
displs = malloc(size * sizeof(int));
477-
displs[0] = 0;
478-
for (int i = 0; i < size; i++) {
479-
total += recvcounts[i];
480-
if (i > 0) displs[i] = displs[i - 1] + recvcounts[i - 1];
481-
}
482-
global_matches = malloc(total * sizeof(record *));
483-
}
484-
485-
MPI_Gatherv(local_matches, local_count, MPI_AINT, global_matches, recvcounts, displs, MPI_AINT, 0, MPI_COMM_WORLD);
486-
487-
if (rank == 0) {
488-
// merge deduplicate
489-
matchingRecords = global_matches;
490-
matchCount = 0;
491-
for (int i = 0; i < total; i++) {
492-
matchingRecords[matchCount++] = global_matches[i];
493-
}
494-
}
495-
496-
free(local_matches);
497-
if (rank == 0) { free(recvcounts); free(displs); }
498-
free(wc_array);
499-
500-
/* --- END PARALLEL SECTION --- */
501457

502458

503459
// Perform linear search if on all non-indexed attributes or no indexes matched
@@ -856,6 +812,15 @@ struct resultSetS *executeQueryDeleteMPI(
856812
return result;
857813
}
858814

815+
/* Wrapper for Serial API compatibility */
816+
struct resultSetS *executeQueryDeleteSerial(
817+
struct engineS *engine,
818+
const char *tableName,
819+
struct whereClauseS *whereClause
820+
) {
821+
return executeQueryDeleteMPI(engine, tableName, whereClause, MPI_COMM_WORLD);
822+
}
823+
859824
/* Initialize the engine, allocating space for default values, loading indexes, and loading the data
860825
* Parameters:
861826
* num_indexes - number of indexes to create

makefile

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -26,17 +26,17 @@ TEST_SRCS := $(wildcard tests/*.c)
2626
TEST_BIN_DIR := build/tests
2727
TEST_BINS := $(patsubst tests/%.c,$(TEST_BIN_DIR)/%,$(TEST_SRCS))
2828

29-
# Serial engine sources required for linking (only the modern B+ tree for now)
30-
ENGINE_COMMON_SRCS := engine/bplus.c engine/recordSchema.c engine/printHelper.c engine/serial/buildEngine-serial.c
31-
ENGINE_SERIAL_SRCS := $(ENGINE_COMMON_SRCS) engine/serial/executeEngine-serial.c
29+
# engine sources required for linking (only the modern B+ tree for now)
30+
ENGINE_COMMON_SRCS := engine/bplus.c engine/recordSchema.c engine/printHelper.c
31+
ENGINE_SERIAL_SRCS := $(ENGINE_COMMON_SRCS) engine/serial/buildEngine-serial.c engine/serial/executeEngine-serial.c
3232
ENGINE_SERIAL_OBJS := $(ENGINE_SERIAL_SRCS:.c=.o)
3333

3434
# OMP engine sources
35-
ENGINE_OMP_SRCS := $(ENGINE_COMMON_SRCS) engine/omp/executeEngine-omp.c
35+
ENGINE_OMP_SRCS := $(ENGINE_COMMON_SRCS) engine/omp/executeEngine-omp.c engine/omp/buildEngine-omp.c
3636
ENGINE_OMP_OBJS := $(ENGINE_OMP_SRCS:.c=.o)
3737

3838
# MPI engine sources
39-
ENGINE_MPI_SRCS := $(ENGINE_COMMON_SRCS) engine/mpi/executeEngine-mpi.c
39+
ENGINE_MPI_SRCS := $(ENGINE_COMMON_SRCS) engine/mpi/executeEngine-mpi.c engine/mpi/buildEngine-mpi.c
4040
ENGINE_MPI_OBJS := $(ENGINE_MPI_SRCS:.c=.o)
4141

4242
# Tokenizer sources
@@ -48,7 +48,6 @@ TOKENIZER_OBJS := $(TOKENIZER_SRCS:.c=.o)
4848
all: $(ENGINE_SERIAL_OBJS) $(ENGINE_OMP_OBJS) $(ENGINE_MPI_OBJS) $(QPE_OBJS) $(QPE_EXES) $(TEST_BINS)
4949

5050
# Ensure engine object built before parallel links
51-
.NOTPARALLEL:
5251

5352
# Object build rule for all QPE sources (compile only if no main yet)
5453
%.o: %.c
@@ -63,16 +62,16 @@ QPEMPI.o: QPEMPI.c
6362
mpicc $(CFLAGS) -c $< -o $@
6463

6564
# Link rule for QPESeq (has a main)
66-
QPESeq: QPESeq.o $(ENGINE_SERIAL_OBJS) tokenizer/src/tokenizer.o connectEngine.o
67-
$(CC) $(CFLAGS) QPESeq.o $(ENGINE_SERIAL_OBJS) tokenizer/src/tokenizer.o connectEngine.o $(LDFLAGS) $(LDLIBS) -o $@
65+
QPESeq: QPESeq.o $(ENGINE_SERIAL_OBJS) $(TOKENIZER_OBJS) connectEngine.o
66+
$(CC) $(CFLAGS) QPESeq.o $(ENGINE_SERIAL_OBJS) $(TOKENIZER_OBJS) connectEngine.o $(LDFLAGS) $(LDLIBS) -o $@
6867

6968
# Link rule for QPEOMP (has a main, needs OpenMP)
70-
QPEOMP: QPEOMP.o $(ENGINE_OMP_OBJS) tokenizer/src/tokenizer.o connectEngine.o
71-
$(CC) $(CFLAGS) -fopenmp -pthread QPEOMP.o $(ENGINE_OMP_OBJS) tokenizer/src/tokenizer.o connectEngine.o $(LDFLAGS) $(LDLIBS) -o $@
69+
QPEOMP: QPEOMP.o $(ENGINE_OMP_OBJS) $(TOKENIZER_OBJS) connectEngine.o
70+
$(CC) $(CFLAGS) -fopenmp -pthread QPEOMP.o $(ENGINE_OMP_OBJS) $(TOKENIZER_OBJS) connectEngine.o $(LDFLAGS) $(LDLIBS) -o $@
7271

7372
# Link rule for QPEMPI (has a main, needs MPI)
74-
QPEMPI: QPEMPI.o $(ENGINE_MPI_OBJS) tokenizer/src/tokenizer.o connectEngine.o
75-
mpicc $(CFLAGS) QPEMPI.o $(ENGINE_MPI_OBJS) tokenizer/src/tokenizer.o connectEngine.o $(LDFLAGS) $(LDLIBS) -o $@
73+
QPEMPI: QPEMPI.o $(ENGINE_MPI_OBJS) $(TOKENIZER_OBJS) connectEngine.o
74+
mpicc $(CFLAGS) QPEMPI.o $(ENGINE_MPI_OBJS) $(TOKENIZER_OBJS) connectEngine.o $(LDFLAGS) $(LDLIBS) -o $@
7675

7776
# Pattern rule for test executables (placed under build/tests)
7877
$(TEST_BIN_DIR)/%: tests/%.c $(ENGINE_SERIAL_OBJS) $(TOKENIZER_OBJS)
@@ -85,16 +84,16 @@ $(TEST_BIN_DIR)/test_tokenizer_new: tests/test_tokenizer_new.c $(ENGINE_SERIAL_O
8584
$(CC) $(CFLAGS) $< $(ENGINE_SERIAL_OBJS) $(TOKENIZER_OBJS) $(LDFLAGS) $(LDLIBS) -o $@
8685

8786
# Engine object build rule
88-
engine/serial/%.o: engine/serial/%.c include/bplus.h
87+
engine/serial/%.o: engine/serial/%.c include/*.h
8988
$(CC) $(CFLAGS) -c $< -o $@
9089

91-
engine/omp/%.o: engine/omp/%.c include/bplus.h
90+
engine/omp/%.o: engine/omp/%.c include/*.h
9291
$(CC) $(CFLAGS) -fopenmp -c $< -o $@
9392

94-
engine/mpi/%.o: engine/mpi/%.c include/bplus.h
93+
engine/mpi/%.o: engine/mpi/%.c include/*.h
9594
mpicc $(CFLAGS) -c $< -o $@
9695

97-
engine/%.o: engine/%.c include/bplus.h
96+
engine/%.o: engine/%.c include/*.h
9897
$(CC) $(CFLAGS) -c $< -o $@
9998

10099
# Tokenizer object build rule

test_tokenizer_new

-24.2 KB
Binary file not shown.

0 commit comments

Comments
 (0)