code

=C++ and Python library=

Just a collection of codes I (TPP) have written performing various tasks. Admittedly, some will be better than others. I usually just consult the gurus at StackOverflow and adapt the suggestions to suit my needs. Not a lot of finesse, but they work.

code format="python"
 * 1) !/usr/bin/python


 * 1) Fits a Gaussian function to single-column data... here it is applied to electrostatics data for
 * 2) the local contribution to the free energy. Output is a plot and the parameters needed to determine
 * 3) the overlap point of curves 1 and 2 and 2 and 3.


 * 1) NOTE: I assemble the plots in ln(x) form at the end, this will cause some data points to bark about
 * 2) being very close to zero (ln 0 = -inf) -- it is SAFE to redirect these messages; 2>/dev/null

import numpy, sys from scipy.optimize import curve_fit import matplotlib.pyplot as plt

def _gaussian(x, *p): A, mu, sigma = p;   return A*numpy.exp(-(x-mu)**2/(2.*sigma**2));

x = []; with open(sys.argv[1]) as inf: for line in inf: x.append(float(line)); hist1, bin_edges = numpy.histogram(x, bins=300, density=True) bin_centres1 = (bin_edges[:-1] + bin_edges[1:])/2 a    = numpy.max(x); sigma = numpy.std(x); mu   = numpy.mean(x); p0 = [a, mu, sigma]; coeff, var_matrix = curve_fit(_gaussian, bin_centres1, hist1, p0) hist_fit1 = _gaussian(bin_centres1, *coeff) print 'Fitted amplitude (Full) = ', numpy.log(coeff[0]) print 'Fitted mean (Full) = ', coeff[1] print 'Fitted standard deviation (Full) = ', coeff[2]
 * 1) FULL

x = []; with open(sys.argv[2]) as inf: for line in inf: x.append(float(line)); hist2, bin_edges = numpy.histogram(x, bins=300, density=True) bin_centres2 = (bin_edges[:-1] + bin_edges[1:])/2 a    = numpy.max(x); sigma = numpy.std(x); mu   = numpy.mean(x); p0 = [a, mu, sigma]; coeff, var_matrix = curve_fit(_gaussian, bin_centres2, hist2, p0) hist_fit2 = _gaussian(bin_centres2, *coeff) print 'Fitted amplitude (Full) = ', numpy.log(coeff[0]) print 'Fitted mean (Full) = ', coeff[1] print 'Fitted standard deviation (Full) = ', coeff[2]
 * 1) HALF

x = []; with open(sys.argv[3]) as inf: for line in inf: x.append(float(line)); hist3, bin_edges = numpy.histogram(x, bins=300, density=True) bin_centres3 = (bin_edges[:-1] + bin_edges[1:])/2 a    = numpy.max(x); sigma = numpy.std(x); mu   = numpy.mean(x); p0 = [a, mu, sigma]; coeff, var_matrix = curve_fit(_gaussian, bin_centres3, hist3, p0) hist_fit3 = _gaussian(bin_centres3, *coeff) print 'Fitted amplitude (Full) = ', numpy.log(coeff[0]) print 'Fitted mean (Full) = ', coeff[1] print 'Fitted standard deviation (Full) = ', coeff[2]
 * 1) HALF-FAR

plt.plot(bin_centres1, numpy.log(hist1), linewidth=1, marker="o", c="black", label='Full') plt.plot(bin_centres1, numpy.log(hist_fit1), linewidth=2, linestyle="-", c="black", label='Full (fitted)') plt.plot(bin_centres2, numpy.log(hist2), linewidth=1, marker="o", c="red", label='Half') plt.plot(bin_centres2, numpy.log(hist_fit2), linewidth=2, linestyle="-", c="red", label='Half (fitted)') plt.plot(bin_centres3, numpy.log(hist3), linewidth=1, marker="o", c="blue", label='Far') plt.plot(bin_centres3, numpy.log(hist_fit3), linewidth=2, linestyle="-", c="blue", label='Far (fitted)')

plt.xlabel('$\\varepsilon$    kcal/mol', fontsize=16) plt.ylabel('ln P($\\varepsilon$)', fontsize=16) plt.xticks(fontsize=16) plt.yticks(fontsize=16)
 * 1) plt.legend(loc='upper left')

plt.savefig(sys.argv[4]) code code format="python" import subprocess, os, sys
 * 1) plt.show
 * 1) !/usr/bin/python


 * 1) this script creates a template PBS script and submits it with info included from the command-line

dir = os.getcwd fil = sys.argv[1] root,ext = os.path.splitext(fil)
 * 1) get file name; will work with and also without file extensions now

if ext in ['.gz', '.bz2']: is_tar = os.path.splitext(root)[1] if is_tar in ['.tar']: ext = is_tar + ext root = os.path.splitext(root)[0]
 * 1) generalize for tar.gz and other common compressed file types

if len(sys.argv[1:]) < 2: job_rsrc = "medium" else: job_rsrc = sys.argv[2]
 * 1) describe some info about the job

if job_rsrc == 'small': walltime = "24:00:00" processors = "nodes=1:ppn=1" cpus = 1 elif job_rsrc == 'medium': walltime = "168:00:00" processors = "nodes=1:ppn=12" cpus = 12 elif job_rsrc == 'large': walltime = "168:00:00" processors = "nodes=1:ppn=12,mem=192GB" cpus = 12 elif job_rsrc == 'custom': walltime = sys.argv[3] # format HH:MM:SS (H=hour, M=minute, S=second) processors = sys.argv[4] # nodes=N:ppn=M,mem=XGB (N=1+, M=1-12, X=1-192) Include memory units (MB, GB) tmp = processors.split('=') cpus = int(filter(str.isdigit, tmp[2])) else: print "ERROR: Formatting/spelling error in one or more of your input arguments."

job_name = "psi4:%s" % root
 * 1) name the job and create input

job_string = """#!/bin/bash trap "cd $PBS_O_WORKDIR ;mkdir $PBS_JOBID;cp -R $TMPDIR/* $PBS_JOBID" TERM
 * 1) PBS -A XXXXXXXXX
 * 2) PBS -N %s
 * 3) PBS -l walltime=%s,%s
 * 4) PBS -S /bin/bash
 * 5) PBS -j oe

cd $PBS_O_WORKDIR; cp %s%s $TMPDIR; cd $TMPDIR psi4_sym -i %s%s -o %s.log -n %d mv %s.log $PBS_O_WORKDIR

""" % (job_name, walltime, processors, root, ext, root, ext, root, cpus, root)

infile = open('fil.pbs','w') infile.write(job_string) infile.close

subprocess.call(['qsub','fil.pbs']) subprocess.call(['rm','fil.pbs']) code code format="cpp"
 * 1) include
 * 2) include
 * 3) include
 * 4) include
 * 5) include
 * 6) include
 * 7) include
 * 8) include
 * 9) include 

using namespace std;

// This code reads Gromacs .gro files and computes the electrostatic potential at a coordinate defined by last // atom or manually. Real and k-space Ewald. This code can be used to calculate Madelung and self-energy constants // in cubes or rectangular prisms. The code is thoroughly commented and includes dimensional analysis.

double aunm = 0.0529177; double nmau = 1./aunm; double aukcal = 627.5094688; double pi = 3.14159265359; double facel = 332.063709; // kcal-Ang/mol-q^2 double one_4pi_eps0 = facel*0.1*nmau; // 0.1 is Ang to nm, nmau is nm to au -- q^2-mol/kcal-au

// Charge library double qcl = -0.2; double qc = -0.006; double qh = 0.103; double qow = -0.8476; double qhw = 0.4238; double qxx = 0.0;

double real_space(double xL, double yL, double zL, double x, double y, double z, double xN, double yN, double zN, double qi); double k_space(double xL, double yL, double zL, double x, double y, double z, double xN, double yN, double zN, double qi);

int main(int argc, char *argv[]) {

int i, n;   double j, k, l, m;    double xL, yL, zL, vol, xN, yN, zN, eta, eta2; double dipx, dipy, dipz; std::vector name; std::vector q, x, y, z;   std::ifstream input(argv[1]);

if(input.is_open) { std::string line; std::getline (input, line); // title, garbage std::getline (input, line); // num atoms std::stringstream parse(line); parse >> i;

n = i; // save original value for after loop

while (i-- > 0) { // do not read final atom yet std::getline (input, line); std::string junk1 = string(line, 0, 5);    // resid std::string junk2 = string(line, 5, 5);    // resname std::string junk3 = string(line, 10, 5);   // atom name std::string junk4 = string(line, 15, 5);   // atom id            std::string junk5 = string(line, 20, 8);    // x            std::string junk6 = string(line, 28, 8);    // y            std::string junk7 = string(line, 36, 8);    // z (no velocity data is kept) name.push_back(junk3); std::istringstream(junk5) >> j;           x.push_back(j); std::istringstream(junk6) >> k;           y.push_back(k); std::istringstream(junk7) >> l;           z.push_back(l); }

while (!input.eof) { getline (input, line); std::stringstream parse(line); parse >> xL; parse >> yL; parse >> zL; }

// remove whitespace from name array for (i=0; i<n; i++) { name[i].erase(remove_if(name[i].begin, name[i].end, ::isspace), name[i].end); }

// This loop assigns the charges for (i=0; i<n; i++) { if ( name[i].compare("CL") == 0 ) { q.push_back(qcl); } else if ( name[i].compare("C") == 0 ) { q.push_back(qc); } else if ( name[i].compare("H") == 0 ) { q.push_back(qh); } else if ( name[i].compare("OW") == 0 ) { q.push_back(qow); } else if ( name[i].compare("HW") == 0 ) { q.push_back(qhw); } else if ( name[i].compare("XX") == 0 ) { q.push_back(qxx); } else { cout << "Cannot match an atom to a charge! Ending now." << endl; break; } }

// convert lengths and coordinates to atomic units for (i=0; i<n; i++) { x[i] = x[i]*nmau; y[i] = y[i]*nmau; z[i] = z[i]*nmau; } xL = xL*nmau; yL = yL*nmau; zL = zL*nmau; vol = xL*yL*zL; eta = 5.6/xL; eta2 = eta*eta;

// dipole correction for slab geometry (note: dielectric dependence goes away in rectangular grid) [WORKS!!!!] double dipcorr = 0.; if (zL != xL) { for (i=0; i<n; i++) { dipx += q[i]*(x[i]-x[n-1]); dipy += q[i]*(y[i]-y[n-1]); dipz += q[i]*(z[i]-z[n-1]); }           dipcorr = 2.*q[n-1]*pi*dipz*dipz/vol; }

// net charge correction double enercorr = q[n-1]*pi/(vol*eta2);

// self-energy correction double k_spc_latt = k_space(xL/xL, yL/xL, zL/xL, 0., 0., 0., 0., 0., 0., 1.); double xL_latt = xL/xL; double yL_latt = yL/xL; double zL_latt = zL/xL; double vol_latt = xL_latt*yL_latt*zL_latt; double eta_latt = 5.6/xL_latt; double eta2_latt = eta_latt*eta_latt; double zeta = (k_spc_latt-(pi/vol_latt/eta2_latt)-(2.*eta_latt/sqrt(pi)))*xL_latt; double self = q[n-1]*zeta/xL;

// real/k space double sumrs = 0.; double sumks = 0.;

for (i=0; i<n-1; i++) { double r_spc = real_space(xL, yL, zL, x[i], y[i], z[i], x[n-1], y[n-1], z[n-1], q[i]); sumrs += r_spc; double k_spc = k_space(xL, yL, zL, x[i], y[i], z[i], x[n-1], y[n-1], z[n-1], q[i]); sumks += k_spc; }

double potl = sumrs+sumks-self-enercorr+dipcorr;

cout.precision(8); cout << sumrs*one_4pi_eps0 << "  " << sumks*one_4pi_eps0 << "   " << one_4pi_eps0*potl << "   " << one_4pi_eps0*self << "   " << one_4pi_eps0*enercorr << "   " << one_4pi_eps0*dipcorr << endl;

}

return 1;

}

// real space double real_space(double xL, double yL, double zL, double x, double y, double z, double xN, double yN, double zN, double qi) {

int imaxr = 3; int imaxrz = ceil(zL*imaxr/xL); double xr, yr, zr, r, rsqr, t1, t2; double eta = 5.6/xL; double eta2 = eta*eta;

t1 = t2 = 0.;

for (int ix = -imaxr; ix <= imaxr; ix++) { for (int iy = -imaxr; iy <= imaxr; iy++) { for (int iz = -imaxrz; iz <= imaxrz; iz++) { xr = xN-x+ix*xL; yr = yN-y+iy*yL; zr = zN-z+iz*zL; rsqr = xr*xr + yr*yr + zr*zr; r = sqrt(rsqr); t1 = erfc(eta*r)/r; t2 += qi*t1; // 1/(q^2-mol/kcal-au)*q*(1/au) = kcal/mol-e }       }    }

return t2;

}

// k-space double k_space(double xL, double yL, double zL, double x, double y, double z, double xN, double yN, double zN, double qi) { int imaxk = 5; int imaxkz = ceil(zL*imaxk/xL); double xr, yr, zr, r, rsqr, t1, t2, t3, t4, t5, t6, vol; double eta = 5.6/xL; double eta2 = eta*eta;

vol = xL*yL*zL; t1 = t2 = t3 = t4 = t5 = t6 = 0.;

for (int ix = -imaxk; ix <= imaxk; ix++) { for (int iy = -imaxk; iy <= imaxk; iy++) { for (int iz = -imaxkz; iz <= imaxkz; iz++) { if (ix*ix+iy*iy+iz*iz != 0.) { xr = xN-x; yr = yN-y; zr = zN-z; double kvec[3]; kvec[0] = 2*pi*ix/xL; kvec[1] = 2*pi*iy/yL; kvec[2] = 2*pi*iz/zL; t1 = pow(2*pi/xL,2)*pow(ix,2)+pow(2*pi/yL,2)*pow(iy,2)+pow(2*pi/zL,2)*pow(iz,2); // net units are 1/bohr/bohr t2 = t1/4./eta2; // net units are NONE (expression is pi*pi/L/L/eta2 * (ix*ix + iy*iy + iz*iz)) double kmat = exp(-t2)/t1; // this is the final step to build the k-space matrix (recompute the element each loop) exp(-t2)*L*L/2/pi/2/pi t3 = kvec[0]*xr + kvec[1]*yr + kvec[2]*zr; // unitless!!! t4 = cos(t3); // again unitless t5 += kmat*t4; // net units are bohr*bohr }               else { t5 += 0.; }           }        }    }

t6 = 4*pi*qi*t5/vol; // final units are kcal/mol-e (bohr*bohr*e*kcal-bohr)/(bohr*bohr*bohr*e*e-mol)

return t6;

} code code format="cpp"
 * 1) include
 * 2) include
 * 3) include
 * 4) include
 * 5) include
 * 6) include
 * 7) include
 * 8) include
 * 9) include 

//This is a basic C++ code that reads an existing XYZ file from the command line and an integer to output an XYZ file with //only a fraction of the initial waters remaining (defined by that integer). The ion is always assumed to be the last atom. // //Example XYZ file //4 //Water     Cubic      Box      (18.643 //O      0.910401      3.451308      0.814824 //H      1.007141      4.120235      1.513075 //H      1.427721      2.740412      1.229442 //Br      2.923151      3.902952      -0.114451

using namespace std;

int main(int argc, char *argv[]) {

int a, i, n, cluster; double j, k, l, m, dist; double xL, yL, zL, vol, xN, yN, zN, eta, eta2; double dipx, dipy, dipz; std::vector name; std::vector x, y, z, r, sorted_list; std::ifstream input(argv[1]);

cluster = atoi(argv[2]);

if(input.is_open) { std::string line; std::getline (input, line); // num atoms std::stringstream parse(line); parse >> i;       std::getline (input, line); // title, garbage

n = i; // save original value for after loop

while (i-- > 0) { std::getline (input, line); std::string junk1 = string(line, 0, 2);    // atom name std::string junk5 = string(line, 2, 16);   // x            std::string junk6 = string(line, 18, 14);   // y            std::string junk7 = string(line, 32, 18);   // z            name.push_back(junk1); std::istringstream(junk5) >> j;           x.push_back(j); std::istringstream(junk6) >> k;           y.push_back(k); std::istringstream(junk7) >> l;           z.push_back(l); }

// remove whitespace from name array for (i=0; i<n; i++) { name[i].erase(remove_if(name[i].begin, name[i].end, ::isspace), name[i].end); }

// This loop gets distances and sorts them, then runs back through to identify the indices for (i=0; i<n; i++) { if ( name[i].compare("O") == 0 ) { r.push_back(sqrt(pow(x[i]-x[n-1],2)+pow(y[i]-y[n-1],2)+pow(z[i]-z[n-1],2))); }       }

std::sort(r.begin,r.end);

// testing to see it sorted /*       for (i=0; i<r.size; i++) { cout << r[i] << endl; } */

// write stuff cout << 1+3*cluster << endl; cout << "Generic title" << endl;

cout.precision(6);

// simple but slow sorting for (i=0; i<cluster; i++) { for (j=0; j<n; j++) { if ( name[j].compare("O") == 0 ) { dist = sqrt(pow(x[j]-x[n-1],2)+pow(y[j]-y[n-1],2)+pow(z[j]-z[n-1],2)); if ( fabs(dist-r[i]) <= 0.000008 ) { cout << name[j] << "   " << x[j] << "    " << y[j] << "    " << z[j] << endl; cout << name[j+1] << "   " << x[j+1] << "    " << y[j+1] << "    " << z[j+1] << endl; cout << name[j+2] << "   " << x[j+2] << "    " << y[j+2] << "    " << z[j+2] << endl; }               }            }        }

cout << name[n-1] << "   " << x[n-1] << "    " << y[n-1] << "    " << z[n-1] << endl;

}

} code code format="python"
 * 1) !/usr/bin/python

import string,sys,re,math,glob,os

eng = []; err = [];

dirname = os.getcwd for filename in os.listdir(dirname): root, ext = os.path.splitext(filename) sum = 0.000 std = 0.000 n  = 0 if ext =='.dat': f1 = open(filename,'r') for line in f1: sum = float(line) + sum std = float(line)*float(line) + std n  = 1 + n  mean = sum/n var = std/n - mean*mean eng.append(mean) err.append(math.sqrt(var/n))

k   = 627.509469 tmp1 = eng[2] - eng[0] tmp2 = eng[1] - eng[3] tmp3 = math.sqrt(err[0]*err[0] + err[2]*err[2]) tmp4 = math.sqrt(err[1]*err[1] + err[3]*err[3])

tmp5 = tmp2-tmp1 tmp6 = math.sqrt(tmp3*tmp3 + tmp4*tmp4)

print k*tmp5, k*tmp6 code code format="python"
 * 1) !/usr/bin/python

import string,sys,re,math,glob,os

for filename in glob.iglob(os.path.join('*.dat')): with open(filename) as f: print f

code The loop logic is as follows... read file, print file name, iterate to next file. Replace the print f line with whatever commands you wish to pass. Works really well and will read only in the directory the code is fed. To read recursively into every subdirectory, I found this... code format="python" import os import fnmatch

for dirpath, dirs, files in os.walk('Test'): for filename in fnmatch.filter(files, '*.txt'): with open(os.path.join(dirpath, filename)):

code And now we remove the path dependence of the script. So long as the script is in your $PATH, you can read data in from any directory using a scheme like this... code format="python"
 * 1) !/usr/bin/python

import string,sys,re,math,glob,os

dirname = os.getcwd for filename in os.listdir(dirname): root, ext = os.path.splitext(filename) if ext =='.sum': f1 = open(filename,'r')

code