Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 18 additions & 18 deletions src/core/bonded.cu
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,18 @@
* =============================================
*/

double calc_angle_forces(int start, int end) {
float calc_angle_forces(int start, int end) {
int aii, aji, aki;

coord_t ai, aj, ak;
coord_t rji, rjk;
coord_t di, dk;

double bji2inv, bjk2inv, bjiinv, bjkinv;
float bji2inv, bjk2inv, bjiinv, bjkinv;
cangle_t cangle;
double cos_th, th, dth, dv, f1;
double ener;
double angle = 0;
float cos_th, th, dth, dv, f1;
float ener;
float angle = 0;

for (int i = start; i < end; i++) {
aii = angles[i].ai - 1;
Expand Down Expand Up @@ -102,12 +102,12 @@ double calc_angle_forces(int start, int end) {
return angle;
}

double calc_bond_forces(int start, int end) {
float calc_bond_forces(int start, int end) {
int aii, aji;
coord_t ai, aj, dx;
cbond_t cbond;
double dx2, dx1, ddx, ener, ampl;
double bond = 0;
float dx2, dx1, ddx, ener, ampl;
float bond = 0;

for (int i = start; i < end; i++) {
aii = bonds[i].ai-1;
Expand Down Expand Up @@ -147,18 +147,18 @@ double calc_bond_forces(int start, int end) {
return bond;
}

double calc_torsion_forces(int start, int end) {
float calc_torsion_forces(int start, int end) {
int aii, aji, aki, ali;

coord_t ai, aj, ak, al;
coord_t rji, rjk, rkl, rnj, rnk, rki, rlj;
coord_t di, dl, dpi, dpj, dpk, dpl;

double bj2inv, bk2inv, bjinv, bkinv;
double cos_phi, phi;
double arg, dv, f1;
double ener;
double torsion = 0;
float bj2inv, bk2inv, bjinv, bkinv;
float cos_phi, phi;
float arg, dv, f1;
float ener;
float torsion = 0;

torsion_t t;
ctorsion_t ctors;
Expand Down Expand Up @@ -281,18 +281,18 @@ double calc_torsion_forces(int start, int end) {
return torsion;
}

double calc_improper2_forces(int start, int end) {
float calc_improper2_forces(int start, int end) {
int aii, aji, aki, ali;

coord_t ai, aj, ak, al;
coord_t rji, rjk, rkl, rnj, rnk, rki, rlj;
double bj2inv, bk2inv, bjinv, bkinv;
double cos_phi, phi, arg, ener, dv, f1;
float bj2inv, bk2inv, bjinv, bkinv;
float cos_phi, phi, arg, ener, dv, f1;
coord_t di, dl, dpi, dpj, dpk, dpl;

improper_t imp;
cimproper_t cimp;
double improper = 0;
float improper = 0;

for (int i = start; i < end; i++) {
imp = impropers[i];
Expand Down
8 changes: 4 additions & 4 deletions src/core/bonded.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#ifndef __BONDED_H__
#define __BONDED_H__

double calc_angle_forces(int start, int end);
double calc_bond_forces(int start, int end);
double calc_torsion_forces(int start, int end);
double calc_improper2_forces(int start, int end);
float calc_angle_forces(int start, int end);
float calc_bond_forces(int start, int end);
float calc_torsion_forces(int start, int end);
float calc_improper2_forces(int start, int end);

#endif /* __BONDED_H__ */
58 changes: 29 additions & 29 deletions src/core/nonbonded.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ dvel_t *DV_X;
dvel_t *PP_MAT, *h_PP_MAT;
bool pp_gpu_set = false;

double *D_PP_Evdw, *D_PP_Ecoul, *h_PP_Evdw, *h_PP_Ecoul;
double *D_PP_evdw_TOT, *D_PP_ecoul_TOT, PP_evdw_TOT, PP_ecoul_TOT;
float *D_PP_Evdw, *D_PP_Ecoul, *h_PP_Evdw, *h_PP_Ecoul;
float *D_PP_evdw_TOT, *D_PP_ecoul_TOT, PP_evdw_TOT, PP_ecoul_TOT;

// Constants pointers
ccharge_t *D_ccharges;
Expand All @@ -27,13 +27,13 @@ bool *D_excluded;

void calc_nonbonded_pp_forces() {
bool bond14, bond23;
double scaling;
float scaling;
coord_t da;
double r2a, ra, r6a;
double Vela, V_a, V_b;
double dva;
double crg_i, crg_j;
double ai_aii, aj_aii, ai_bii, aj_bii;
float r2a, ra, r6a;
float Vela, V_a, V_b;
float dva;
float crg_i, crg_j;
float ai_aii, aj_aii, ai_bii, aj_bii;
int i, j;
catype_t ai_type, aj_type;

Expand Down Expand Up @@ -93,8 +93,8 @@ void calc_nonbonded_pp_forces_host() {

int mem_size_PP_MAT = n_patoms * n_patoms * sizeof(dvel_t);
int n_blocks = (n_patoms + BLOCK_SIZE - 1) / BLOCK_SIZE;
int mem_size_PP_Evdw = n_blocks * n_blocks * sizeof(double);
int mem_size_PP_Ecoul = n_blocks * n_blocks * sizeof(double);
int mem_size_PP_Evdw = n_blocks * n_blocks * sizeof(float);
int mem_size_PP_Ecoul = n_blocks * n_blocks * sizeof(float);

if (!pp_gpu_set) {
pp_gpu_set = true;
Expand All @@ -112,12 +112,12 @@ void calc_nonbonded_pp_forces_host() {
#endif
check_cudaMalloc((void**) &D_PP_Ecoul, mem_size_PP_Ecoul);

check_cudaMalloc((void**) &D_PP_evdw_TOT, sizeof(double));
check_cudaMalloc((void**) &D_PP_ecoul_TOT, sizeof(double));
check_cudaMalloc((void**) &D_PP_evdw_TOT, sizeof(float));
check_cudaMalloc((void**) &D_PP_ecoul_TOT, sizeof(float));

h_PP_MAT = (dvel_t*) malloc(mem_size_PP_MAT);
h_PP_Evdw = (double*) malloc(mem_size_PP_Evdw);
h_PP_Ecoul = (double*) malloc(mem_size_PP_Ecoul);
h_PP_Evdw = (float*) malloc(mem_size_PP_Evdw);
h_PP_Ecoul = (float*) malloc(mem_size_PP_Ecoul);
}

cudaMemcpy(X, coords, mem_size_X, cudaMemcpyHostToDevice);
Expand Down Expand Up @@ -148,8 +148,8 @@ void calc_nonbonded_pp_forces_host() {

calc_energy_sum<<<1, threads>>>(n_blocks, n_blocks, D_PP_evdw_TOT, D_PP_ecoul_TOT, D_PP_Evdw, D_PP_Ecoul, true);

cudaMemcpy(&PP_evdw_TOT, D_PP_evdw_TOT, sizeof(double), cudaMemcpyDeviceToHost);
cudaMemcpy(&PP_ecoul_TOT, D_PP_ecoul_TOT, sizeof(double), cudaMemcpyDeviceToHost);
cudaMemcpy(&PP_evdw_TOT, D_PP_evdw_TOT, sizeof(float), cudaMemcpyDeviceToHost);
cudaMemcpy(&PP_ecoul_TOT, D_PP_ecoul_TOT, sizeof(float), cudaMemcpyDeviceToHost);

E_nonbond_pp.Uvdw += PP_evdw_TOT;
E_nonbond_pp.Ucoul += PP_ecoul_TOT;
Expand All @@ -161,17 +161,17 @@ void calc_nonbonded_pp_forces_host() {
*/

__device__ void calc_pp_dvel_matrix_incr(int row, int pi, int column, int pj,
coord_t *Xs, coord_t *Ys, int *LJs, bool *excluded_s, double *Evdw, double *Ecoul, dvel_t *patom_a, dvel_t *patom_b,
coord_t *Xs, coord_t *Ys, int *LJs, bool *excluded_s, float *Evdw, float *Ecoul, dvel_t *patom_a, dvel_t *patom_b,
ccharge_t *D_ccharges, charge_t *D_charges, catype_t *D_catypes, atype_t *D_atypes, p_atom_t *D_patoms, topo_t D_topo) {

bool bond14, bond23;
double scaling;
float scaling;
coord_t da;
double r2a, ra, r6a;
double Vela, V_a, V_b;
double dva;
double crg_i, crg_j;
double ai_aii, aj_aii, ai_bii, aj_bii;
float r2a, ra, r6a;
float Vela, V_a, V_b;
float dva;
float crg_i, crg_j;
float ai_aii, aj_aii, ai_bii, aj_bii;
catype_t ai_type, aj_type;

bond23 = LJs[row * BLOCK_SIZE + column] == 3;
Expand Down Expand Up @@ -219,7 +219,7 @@ __device__ void calc_pp_dvel_matrix_incr(int row, int pi, int column, int pj,
}

__global__ void calc_pp_dvel_matrix(int n_patoms, int n_atoms_solute,
coord_t *X, double *Evdw, double *Ecoul, dvel_t *PP_MAT,
coord_t *X, float *Evdw, float *Ecoul, dvel_t *PP_MAT,
ccharge_t *D_ccharges, charge_t *D_charges, catype_t *D_catypes, atype_t *D_atypes, p_atom_t *D_patoms, int *D_LJ_matrix, bool *D_excluded, topo_t D_topo) {
// Block index
int bx = blockIdx.x;
Expand All @@ -232,8 +232,8 @@ __global__ void calc_pp_dvel_matrix(int n_patoms, int n_atoms_solute,
int aStart = BLOCK_SIZE * by;
int bStart = BLOCK_SIZE * bx;

__shared__ double Ecoul_S[BLOCK_SIZE][BLOCK_SIZE];
__shared__ double Evdw_S[BLOCK_SIZE][BLOCK_SIZE];
__shared__ float Ecoul_S[BLOCK_SIZE][BLOCK_SIZE];
__shared__ float Evdw_S[BLOCK_SIZE][BLOCK_SIZE];

Ecoul_S[ty][tx] = 0;
Evdw_S[ty][tx] = 0;
Expand Down Expand Up @@ -272,7 +272,7 @@ __global__ void calc_pp_dvel_matrix(int n_patoms, int n_atoms_solute,
int column = bx * BLOCK_SIZE + tx;

if (bx != by || tx != ty) {
double evdw = 0, ecoul = 0;
float evdw = 0, ecoul = 0;
calc_pp_dvel_matrix_incr(ty, pi, tx, pj, Xs, Ys, LJs, excluded_s, &evdw, &ecoul, &patom_a,
&patom_b, D_ccharges, D_charges, D_catypes, D_atypes, D_patoms, D_topo);
Evdw_S[ty][tx] = evdw;
Expand All @@ -287,8 +287,8 @@ __global__ void calc_pp_dvel_matrix(int n_patoms, int n_atoms_solute,
int rowlen = (n_patoms + BLOCK_SIZE - 1) / BLOCK_SIZE;

if (tx == 0 && ty == 0) {
double tot_Evdw = 0;
double tot_Ecoul = 0;
float tot_Evdw = 0;
float tot_Ecoul = 0;
for (int i = 0; i < BLOCK_SIZE; i++) {
for (int j = 0; j < BLOCK_SIZE; j++) {
tot_Evdw += Evdw_S[i][j];
Expand Down
6 changes: 3 additions & 3 deletions src/core/nonbonded.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,18 @@ extern bool *D_excluded;

// P-P interactions
__device__ void calc_pp_dvel_matrix_incr(int row, int pi, int column, int pj,
coord_t *Xs, coord_t *Ys, int *LJs, bool *excluded_s, double *Evdw, double *Ecoul, dvel_t *patom_a, dvel_t *patom_b,
coord_t *Xs, coord_t *Ys, int *LJs, bool *excluded_s, float *Evdw, float *Ecoul, dvel_t *patom_a, dvel_t *patom_b,
ccharge_t *D_ccharges, charge_t *D_charges, catype_t *D_catypes, atype_t *D_atypes, p_atom_t *D_patoms, topo_t topo);

__global__ void calc_pp_dvel_matrix(int n_patoms, int n_atoms_solute,
coord_t *X, double *Evdw, double *Ecoul, dvel_t *PP_MAT,
coord_t *X, float *Evdw, float *Ecoul, dvel_t *PP_MAT,
ccharge_t *D_ccharges, charge_t *D_charges, catype_t *D_catypes, atype_t *D_atypes, p_atom_t *D_patoms, int *D_LJ_matrix, bool *D_excluded, topo_t topo);

__global__ void calc_pp_dvel_vector(int n_patoms, dvel_t *DV_X, dvel_t *PP_MAT, p_atom_t *D_patoms);


void clean_d_patoms();

__global__ void calc_energy_sum(int rows, int columns, double *Evdw_TOT, double *Ecoul_TOT, double *Evdw, double *Ecoul, bool upper_diagonal);
__global__ void calc_energy_sum(int rows, int columns, float *Evdw_TOT, float *Ecoul_TOT, float *Evdw, float *Ecoul, bool upper_diagonal);

#endif /* __NONBONDED_H__ */
32 changes: 20 additions & 12 deletions src/core/parse.cu
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ void init_md(const char *filename) {
#ifdef VERBOSE
printf("reading in %d lambdas (%s in file)\n", n_lambdas, file.buffer[k][1]);
#endif
lambdas = (double*) malloc(n_lambdas * sizeof(double));
lambdas = (float*) malloc(n_lambdas * sizeof(float));
k++;
for (int i = 0; i < n_lambdas; i++) {
lambdas[i] = strtod(file.buffer[k][0], &eptr);
Expand Down Expand Up @@ -521,20 +521,28 @@ void init_excluded(const char *filename) {
if (fp == NULL)
exit(EXIT_FAILURE);

char line[8192];

n_excluded = 0;
// --- dynamically read a full line ---
char *line = NULL;
size_t len = 0;
ssize_t read_len = getline(&line, &len, fp);
if (read_len == -1) {
fprintf(stderr, "Error: failed to read line from %s\n", path);
free(line);
fclose(fp);
exit(EXIT_FAILURE);
}

if (fgets(line, 8192, fp)) {
for (int i = 0; i < n_atoms; i++) {
bool excl = (line[i] == '1');
excluded[i] = excl;
if (excl) {
n_excluded++;
}
}
// --- parse ---
n_excluded = 0;
for (int i = 0; i < n_atoms && i < read_len; i++) {
bool excl = (line[i] == '1');
excluded[i] = excl;
if (excl) n_excluded++;
}

printf("Number of excluded atoms: %d\n", n_excluded);
Comment on lines 537 to +543
Copy link

Copilot AI Nov 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This printf statement appears to be debug output that should be removed before merging, or wrapped in a conditional compilation flag (e.g., #ifdef VERBOSE or #ifdef DEBUG).

Suggested change
printf("Number of excluded atoms: %d\n", n_excluded);
#ifdef DEBUG
printf("Number of excluded atoms: %d\n", n_excluded);
#endif

Copilot uses AI. Check for mistakes.

free(line);
fclose(fp);
}
Comment on lines +524 to 547
Copy link

Copilot AI Nov 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The init_excluded function has been significantly refactored to use dynamic line reading with getline() instead of a fixed buffer. While this is a good improvement for handling files with many atoms, this change goes beyond the scope of the float conversion PR. Consider:

  1. The loop condition i < read_len may be incorrect - it should likely be i < n_atoms only, since read_len includes the newline character
  2. This functional change should ideally be in a separate PR focused on buffer safety improvements
  3. Added printf on line 543 for debugging should be removed or made conditional

Copilot uses AI. Check for mistakes.

Expand Down
Loading