Making some of my models public.

2025-02-04 21:49:19 -05:00
commit eb2c910e29
39 changed files with 3564 additions and 0 deletions
--- a/training_data/MNISTDataset.docx
+++ b/training_data/MNISTDataset.docx
--- a/training_data/ams_MNIST_load.py
+++ b/training_data/ams_MNIST_load.py
@ -0,0 +1,138 @@
+#!/usr/bin/python3
+
+import sys
+sys.path.append('/home/aschinde/workspace/projects_python/library')
+
+import os,sys,math
+import numpy as np
+import cv2;
+
+import gzip #May need to use gzip.open instead of open
+
+import struct
+#struct unpack allows some interpretation of python binary data
+#Example
+##import struct
+##
+##data = open("from_fortran.bin", "rb").read()
+##
+##(eight, N) = struct.unpack("@II", data)
+##
+##This unpacks the first two fields, assuming they start at the very
+##beginning of the file (no padding or extraneous data), and also assuming
+##native byte-order (the @ symbol). The Is in the formatting string mean
+##"unsigned integer, 32 bits".
+
+#for integers
+#a = int
+#a.from_bytes(b'\xaf\xc2R',byteorder='little')
+#a.to_bytes(nbytes,byteorder='big')
+#analagous operation doens't seem to exist for floats
+#what about numpy?
+
+
+#https://www.devdungeon.com/content/working-binary-data-python
+
+#print("{:02d}".format(2))
+#b = b.fromhex('010203040506')
+#b.hex()
+#c = b.decode(encoding='utf-8' or 'latin-1' or 'ascii'...)
+#print(c)
+
+#numpy arrays have tobytes
+#numpy arrays have frombuffer (converts to dtypes)
+#
+#q = np.array([15],dtype=np.uint8);
+#q.tobytes();
+#q.tobytes(order='C') (options are 'C' and 'F'
+#q2 = np.buffer(q.tobytes(),dtype=np.uint8)
+#np.frombuffer(buffer,dtype=float,count=-1,offset=0)
+
+##You could also use the < and > endianess format codes in the struct
+##module to achieve the same result:
+##
+##>>> struct.pack('<2h', *struct.unpack('>2h', original))
+##'\xde\xad\xc0\xde'
+
+def bytereverse(bts):
+##    bts2 = bytes(len(bts));
+##    for I in range(0,len(bts)):
+##        bts2[len(bts)-I-1] = bts[I];
+    N = len(bts);
+##    print(N);
+##    print(bts);
+##    bts2 = struct.pack('<{}h'.format(N), *struct.unpack('>{}h'.format(N), bts))
+    bts2 = bts;
+    return bts2;
+
+#Read Labels
+def read_MNIST_label_file(fname):
+    #fp = gzip.open('./train-labels-idx1-ubyte.gz','rb');
+    fp = gzip.open(fname,'rb');
+    magic = fp.read(4);
+    #nitems = np.frombuffer(fp.read(4),dtype=np.int32)[0]; #some sort of endiannes problem
+    bts = fp.read(4);
+    #bts = bytereverse(bts);
+    #nitems = np.frombuffer(bts,dtype=np.int32);
+    nitems = np.int32(struct.unpack('>I',bts)[0]); #it was a non-native endianness in teh integer encoding
+    #> < @ - endianness
+
+    bts = fp.read(nitems);
+    N = len(bts);
+    labels = np.zeros((N),dtype=np.uint8);
+    labels = np.frombuffer(bts,dtype=np.uint8,count=N);
+    #for i in range(0,10):
+    #    bt = fp.read(1);
+    #    labels[i] = np.frombuffer(bt,dtype=np.uint8);
+    fp.close();
+    return labels;
+
+def read_MNIST_image_file(fname):
+    fp = gzip.open(fname,'rb');
+    magic = fp.read(4);
+    bts = fp.read(4);
+    nitems = np.int32(struct.unpack('>I',bts)[0]);
+    bts = fp.read(4);
+    nrows = np.int32(struct.unpack('>I',bts)[0]);
+    bts = fp.read(4);
+    ncols = np.int32(struct.unpack('>I',bts)[0]);
+
+    images = np.zeros((nitems,nrows,ncols),dtype=np.uint8);
+    for I in range(0,nitems):
+        bts = fp.read(nrows*ncols);
+        img1 = np.frombuffer(bts,dtype=np.uint8,count=nrows*ncols);
+        img1 = img1.reshape((nrows,ncols));
+        images[I,:,:] = img1;
+
+    fp.close();
+
+    return images;
+
+def read_training_data():
+    rootdir = '/home/aschinde/workspace/machinelearning/datasets/MNIST';
+    fname1 = 'train-labels-idx1-ubyte.gz';
+    fname2 = 'train-images-idx3-ubyte.gz';    
+
+    labels = read_MNIST_label_file(os.path.join(rootdir,fname1));
+    images = read_MNIST_image_file(os.path.join(rootdir,fname2));
+
+    return [labels,images];
+
+def read_test_data():
+    rootdir = '/home/aschinde/workspace/machinelearning/datasets/MNIST';
+
+    fname1 = 't10k-labels-idx1-ubyte.gz';
+    fname2 = 't10k-images-idx3-ubyte.gz';
+
+    labels = read_MNIST_label_file(os.path.join(rootdir,fname1));
+    images = read_MNIST_image_file(os.path.join(rootdir,fname2));    
+
+    return [labels,images];
+
+def show_MNIST_image(img):
+    import matplotlib.pyplot as plt;
+    plt.figure();
+    plt.imshow(255-img,cmap='gray');
+    plt.show();
+    return;
+
--- a/training_data/an_mnist_loader.py
+++ b/training_data/an_mnist_loader.py
@ -0,0 +1,92 @@
+#!/usr/bin/python3
+
+"""
+mnist_loader
+~~~~~~~~~~~~
+
+A library to load the MNIST image data.  For details of the data
+structures that are returned, see the doc strings for ``load_data``
+and ``load_data_wrapper``.  In practice, ``load_data_wrapper`` is the
+function usually called by our neural network code.
+"""
+
+##sigh: If you want it to run today, write it in Python.
+##If you want it to run tomorrow, write it in ANYTHING ELSE
+
+#### Libraries
+# Standard library
+##import cPickle
+import pickle as cPickle
+import gzip
+
+# Third-party libraries
+import numpy as np
+
+def load_data():
+    """Return the MNIST data as a tuple containing the training data,
+    the validation data, and the test data.
+
+    The ``training_data`` is returned as a tuple with two entries.
+    The first entry contains the actual training images.  This is a
+    numpy ndarray with 50,000 entries.  Each entry is, in turn, a
+    numpy ndarray with 784 values, representing the 28 * 28 = 784
+    pixels in a single MNIST image.
+
+    The second entry in the ``training_data`` tuple is a numpy ndarray
+    containing 50,000 entries.  Those entries are just the digit
+    values (0...9) for the corresponding images contained in the first
+    entry of the tuple.
+
+    The ``validation_data`` and ``test_data`` are similar, except
+    each contains only 10,000 images.
+
+    This is a nice data format, but for use in neural networks it's
+    helpful to modify the format of the ``training_data`` a little.
+    That's done in the wrapper function ``load_data_wrapper()``, see
+    below.
+    """
+    #f = gzip.open('../data/mnist.pkl.gz', 'rb')
+    f = gzip.open('./t10k-images-idx3-ubyte.gz','rb');
+    training_data, validation_data, test_data = cPickle.load(f)
+    f.close()
+    return (training_data, validation_data, test_data)
+
+def load_data_wrapper():
+    """Return a tuple containing ``(training_data, validation_data,
+    test_data)``. Based on ``load_data``, but the format is more
+    convenient for use in our implementation of neural networks.
+
+    In particular, ``training_data`` is a list containing 50,000
+    2-tuples ``(x, y)``.  ``x`` is a 784-dimensional numpy.ndarray
+    containing the input image.  ``y`` is a 10-dimensional
+    numpy.ndarray representing the unit vector corresponding to the
+    correct digit for ``x``.
+
+    ``validation_data`` and ``test_data`` are lists containing 10,000
+    2-tuples ``(x, y)``.  In each case, ``x`` is a 784-dimensional
+    numpy.ndarry containing the input image, and ``y`` is the
+    corresponding classification, i.e., the digit values (integers)
+    corresponding to ``x``.
+
+    Obviously, this means we're using slightly different formats for
+    the training data and the validation / test data.  These formats
+    turn out to be the most convenient for use in our neural network
+    code."""
+    tr_d, va_d, te_d = load_data()
+    training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
+    training_results = [vectorized_result(y) for y in tr_d[1]]
+    training_data = zip(training_inputs, training_results)
+    validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
+    validation_data = zip(validation_inputs, va_d[1])
+    test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
+    test_data = zip(test_inputs, te_d[1])
+    return (training_data, validation_data, test_data)
+
+def vectorized_result(j):
+    """Return a 10-dimensional unit vector with a 1.0 in the jth
+    position and zeroes elsewhere.  This is used to convert a digit
+    (0...9) into a corresponding desired output from the neural
+    network."""
+    e = np.zeros((10, 1))
+    e[j] = 1.0
+    return e
--- a/training_data/t10k-images-idx3-ubyte.gz
+++ b/training_data/t10k-images-idx3-ubyte.gz
--- a/training_data/t10k-labels-idx1-ubyte.gz
+++ b/training_data/t10k-labels-idx1-ubyte.gz
--- a/training_data/train-images-idx3-ubyte.gz
+++ b/training_data/train-images-idx3-ubyte.gz
--- a/training_data/train-labels-idx1-ubyte.gz
+++ b/training_data/train-labels-idx1-ubyte.gz