Making some of my models public.

This commit is contained in:
2025-02-04 21:49:19 -05:00
commit eb2c910e29
39 changed files with 3564 additions and 0 deletions

Binary file not shown.

View File

@ -0,0 +1,138 @@
#!/usr/bin/python3
import sys
sys.path.append('/home/aschinde/workspace/projects_python/library')
import os,sys,math
import numpy as np
import cv2;
import gzip #May need to use gzip.open instead of open
import struct
#struct unpack allows some interpretation of python binary data
#Example
##import struct
##
##data = open("from_fortran.bin", "rb").read()
##
##(eight, N) = struct.unpack("@II", data)
##
##This unpacks the first two fields, assuming they start at the very
##beginning of the file (no padding or extraneous data), and also assuming
##native byte-order (the @ symbol). The Is in the formatting string mean
##"unsigned integer, 32 bits".
#for integers
#a = int
#a.from_bytes(b'\xaf\xc2R',byteorder='little')
#a.to_bytes(nbytes,byteorder='big')
#analagous operation doens't seem to exist for floats
#what about numpy?
#https://www.devdungeon.com/content/working-binary-data-python
#print("{:02d}".format(2))
#b = b.fromhex('010203040506')
#b.hex()
#c = b.decode(encoding='utf-8' or 'latin-1' or 'ascii'...)
#print(c)
#numpy arrays have tobytes
#numpy arrays have frombuffer (converts to dtypes)
#
#q = np.array([15],dtype=np.uint8);
#q.tobytes();
#q.tobytes(order='C') (options are 'C' and 'F'
#q2 = np.buffer(q.tobytes(),dtype=np.uint8)
#np.frombuffer(buffer,dtype=float,count=-1,offset=0)
##You could also use the < and > endianess format codes in the struct
##module to achieve the same result:
##
##>>> struct.pack('<2h', *struct.unpack('>2h', original))
##'\xde\xad\xc0\xde'
def bytereverse(bts):
## bts2 = bytes(len(bts));
## for I in range(0,len(bts)):
## bts2[len(bts)-I-1] = bts[I];
N = len(bts);
## print(N);
## print(bts);
## bts2 = struct.pack('<{}h'.format(N), *struct.unpack('>{}h'.format(N), bts))
bts2 = bts;
return bts2;
#Read Labels
def read_MNIST_label_file(fname):
#fp = gzip.open('./train-labels-idx1-ubyte.gz','rb');
fp = gzip.open(fname,'rb');
magic = fp.read(4);
#nitems = np.frombuffer(fp.read(4),dtype=np.int32)[0]; #some sort of endiannes problem
bts = fp.read(4);
#bts = bytereverse(bts);
#nitems = np.frombuffer(bts,dtype=np.int32);
nitems = np.int32(struct.unpack('>I',bts)[0]); #it was a non-native endianness in teh integer encoding
#> < @ - endianness
bts = fp.read(nitems);
N = len(bts);
labels = np.zeros((N),dtype=np.uint8);
labels = np.frombuffer(bts,dtype=np.uint8,count=N);
#for i in range(0,10):
# bt = fp.read(1);
# labels[i] = np.frombuffer(bt,dtype=np.uint8);
fp.close();
return labels;
def read_MNIST_image_file(fname):
fp = gzip.open(fname,'rb');
magic = fp.read(4);
bts = fp.read(4);
nitems = np.int32(struct.unpack('>I',bts)[0]);
bts = fp.read(4);
nrows = np.int32(struct.unpack('>I',bts)[0]);
bts = fp.read(4);
ncols = np.int32(struct.unpack('>I',bts)[0]);
images = np.zeros((nitems,nrows,ncols),dtype=np.uint8);
for I in range(0,nitems):
bts = fp.read(nrows*ncols);
img1 = np.frombuffer(bts,dtype=np.uint8,count=nrows*ncols);
img1 = img1.reshape((nrows,ncols));
images[I,:,:] = img1;
fp.close();
return images;
def read_training_data():
rootdir = '/home/aschinde/workspace/machinelearning/datasets/MNIST';
fname1 = 'train-labels-idx1-ubyte.gz';
fname2 = 'train-images-idx3-ubyte.gz';
labels = read_MNIST_label_file(os.path.join(rootdir,fname1));
images = read_MNIST_image_file(os.path.join(rootdir,fname2));
return [labels,images];
def read_test_data():
rootdir = '/home/aschinde/workspace/machinelearning/datasets/MNIST';
fname1 = 't10k-labels-idx1-ubyte.gz';
fname2 = 't10k-images-idx3-ubyte.gz';
labels = read_MNIST_label_file(os.path.join(rootdir,fname1));
images = read_MNIST_image_file(os.path.join(rootdir,fname2));
return [labels,images];
def show_MNIST_image(img):
import matplotlib.pyplot as plt;
plt.figure();
plt.imshow(255-img,cmap='gray');
plt.show();
return;

View File

@ -0,0 +1,92 @@
#!/usr/bin/python3
"""
mnist_loader
~~~~~~~~~~~~
A library to load the MNIST image data. For details of the data
structures that are returned, see the doc strings for ``load_data``
and ``load_data_wrapper``. In practice, ``load_data_wrapper`` is the
function usually called by our neural network code.
"""
##sigh: If you want it to run today, write it in Python.
##If you want it to run tomorrow, write it in ANYTHING ELSE
#### Libraries
# Standard library
##import cPickle
import pickle as cPickle
import gzip
# Third-party libraries
import numpy as np
def load_data():
"""Return the MNIST data as a tuple containing the training data,
the validation data, and the test data.
The ``training_data`` is returned as a tuple with two entries.
The first entry contains the actual training images. This is a
numpy ndarray with 50,000 entries. Each entry is, in turn, a
numpy ndarray with 784 values, representing the 28 * 28 = 784
pixels in a single MNIST image.
The second entry in the ``training_data`` tuple is a numpy ndarray
containing 50,000 entries. Those entries are just the digit
values (0...9) for the corresponding images contained in the first
entry of the tuple.
The ``validation_data`` and ``test_data`` are similar, except
each contains only 10,000 images.
This is a nice data format, but for use in neural networks it's
helpful to modify the format of the ``training_data`` a little.
That's done in the wrapper function ``load_data_wrapper()``, see
below.
"""
#f = gzip.open('../data/mnist.pkl.gz', 'rb')
f = gzip.open('./t10k-images-idx3-ubyte.gz','rb');
training_data, validation_data, test_data = cPickle.load(f)
f.close()
return (training_data, validation_data, test_data)
def load_data_wrapper():
"""Return a tuple containing ``(training_data, validation_data,
test_data)``. Based on ``load_data``, but the format is more
convenient for use in our implementation of neural networks.
In particular, ``training_data`` is a list containing 50,000
2-tuples ``(x, y)``. ``x`` is a 784-dimensional numpy.ndarray
containing the input image. ``y`` is a 10-dimensional
numpy.ndarray representing the unit vector corresponding to the
correct digit for ``x``.
``validation_data`` and ``test_data`` are lists containing 10,000
2-tuples ``(x, y)``. In each case, ``x`` is a 784-dimensional
numpy.ndarry containing the input image, and ``y`` is the
corresponding classification, i.e., the digit values (integers)
corresponding to ``x``.
Obviously, this means we're using slightly different formats for
the training data and the validation / test data. These formats
turn out to be the most convenient for use in our neural network
code."""
tr_d, va_d, te_d = load_data()
training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
training_results = [vectorized_result(y) for y in tr_d[1]]
training_data = zip(training_inputs, training_results)
validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
validation_data = zip(validation_inputs, va_d[1])
test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
test_data = zip(test_inputs, te_d[1])
return (training_data, validation_data, test_data)
def vectorized_result(j):
"""Return a 10-dimensional unit vector with a 1.0 in the jth
position and zeroes elsewhere. This is used to convert a digit
(0...9) into a corresponding desired output from the neural
network."""
e = np.zeros((10, 1))
e[j] = 1.0
return e

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.