Making some of my models public.
This commit is contained in:
BIN
training_data/MNISTDataset.docx
Normal file
BIN
training_data/MNISTDataset.docx
Normal file
Binary file not shown.
138
training_data/ams_MNIST_load.py
Normal file
138
training_data/ams_MNIST_load.py
Normal file
@ -0,0 +1,138 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import sys
|
||||
sys.path.append('/home/aschinde/workspace/projects_python/library')
|
||||
|
||||
import os,sys,math
|
||||
import numpy as np
|
||||
import cv2;
|
||||
|
||||
import gzip #May need to use gzip.open instead of open
|
||||
|
||||
import struct
|
||||
#struct unpack allows some interpretation of python binary data
|
||||
#Example
|
||||
##import struct
|
||||
##
|
||||
##data = open("from_fortran.bin", "rb").read()
|
||||
##
|
||||
##(eight, N) = struct.unpack("@II", data)
|
||||
##
|
||||
##This unpacks the first two fields, assuming they start at the very
|
||||
##beginning of the file (no padding or extraneous data), and also assuming
|
||||
##native byte-order (the @ symbol). The Is in the formatting string mean
|
||||
##"unsigned integer, 32 bits".
|
||||
|
||||
#for integers
|
||||
#a = int
|
||||
#a.from_bytes(b'\xaf\xc2R',byteorder='little')
|
||||
#a.to_bytes(nbytes,byteorder='big')
|
||||
#analagous operation doens't seem to exist for floats
|
||||
#what about numpy?
|
||||
|
||||
|
||||
#https://www.devdungeon.com/content/working-binary-data-python
|
||||
|
||||
#print("{:02d}".format(2))
|
||||
#b = b.fromhex('010203040506')
|
||||
#b.hex()
|
||||
#c = b.decode(encoding='utf-8' or 'latin-1' or 'ascii'...)
|
||||
#print(c)
|
||||
|
||||
#numpy arrays have tobytes
|
||||
#numpy arrays have frombuffer (converts to dtypes)
|
||||
#
|
||||
#q = np.array([15],dtype=np.uint8);
|
||||
#q.tobytes();
|
||||
#q.tobytes(order='C') (options are 'C' and 'F'
|
||||
#q2 = np.buffer(q.tobytes(),dtype=np.uint8)
|
||||
#np.frombuffer(buffer,dtype=float,count=-1,offset=0)
|
||||
|
||||
##You could also use the < and > endianess format codes in the struct
|
||||
##module to achieve the same result:
|
||||
##
|
||||
##>>> struct.pack('<2h', *struct.unpack('>2h', original))
|
||||
##'\xde\xad\xc0\xde'
|
||||
|
||||
def bytereverse(bts):
|
||||
## bts2 = bytes(len(bts));
|
||||
## for I in range(0,len(bts)):
|
||||
## bts2[len(bts)-I-1] = bts[I];
|
||||
N = len(bts);
|
||||
## print(N);
|
||||
## print(bts);
|
||||
## bts2 = struct.pack('<{}h'.format(N), *struct.unpack('>{}h'.format(N), bts))
|
||||
bts2 = bts;
|
||||
return bts2;
|
||||
|
||||
#Read Labels
|
||||
def read_MNIST_label_file(fname):
|
||||
#fp = gzip.open('./train-labels-idx1-ubyte.gz','rb');
|
||||
fp = gzip.open(fname,'rb');
|
||||
magic = fp.read(4);
|
||||
#nitems = np.frombuffer(fp.read(4),dtype=np.int32)[0]; #some sort of endiannes problem
|
||||
bts = fp.read(4);
|
||||
#bts = bytereverse(bts);
|
||||
#nitems = np.frombuffer(bts,dtype=np.int32);
|
||||
nitems = np.int32(struct.unpack('>I',bts)[0]); #it was a non-native endianness in teh integer encoding
|
||||
#> < @ - endianness
|
||||
|
||||
bts = fp.read(nitems);
|
||||
N = len(bts);
|
||||
labels = np.zeros((N),dtype=np.uint8);
|
||||
labels = np.frombuffer(bts,dtype=np.uint8,count=N);
|
||||
#for i in range(0,10):
|
||||
# bt = fp.read(1);
|
||||
# labels[i] = np.frombuffer(bt,dtype=np.uint8);
|
||||
fp.close();
|
||||
return labels;
|
||||
|
||||
def read_MNIST_image_file(fname):
|
||||
fp = gzip.open(fname,'rb');
|
||||
magic = fp.read(4);
|
||||
bts = fp.read(4);
|
||||
nitems = np.int32(struct.unpack('>I',bts)[0]);
|
||||
bts = fp.read(4);
|
||||
nrows = np.int32(struct.unpack('>I',bts)[0]);
|
||||
bts = fp.read(4);
|
||||
ncols = np.int32(struct.unpack('>I',bts)[0]);
|
||||
|
||||
images = np.zeros((nitems,nrows,ncols),dtype=np.uint8);
|
||||
for I in range(0,nitems):
|
||||
bts = fp.read(nrows*ncols);
|
||||
img1 = np.frombuffer(bts,dtype=np.uint8,count=nrows*ncols);
|
||||
img1 = img1.reshape((nrows,ncols));
|
||||
images[I,:,:] = img1;
|
||||
|
||||
fp.close();
|
||||
|
||||
return images;
|
||||
|
||||
def read_training_data():
|
||||
rootdir = '/home/aschinde/workspace/machinelearning/datasets/MNIST';
|
||||
fname1 = 'train-labels-idx1-ubyte.gz';
|
||||
fname2 = 'train-images-idx3-ubyte.gz';
|
||||
|
||||
labels = read_MNIST_label_file(os.path.join(rootdir,fname1));
|
||||
images = read_MNIST_image_file(os.path.join(rootdir,fname2));
|
||||
|
||||
return [labels,images];
|
||||
|
||||
def read_test_data():
|
||||
rootdir = '/home/aschinde/workspace/machinelearning/datasets/MNIST';
|
||||
|
||||
fname1 = 't10k-labels-idx1-ubyte.gz';
|
||||
fname2 = 't10k-images-idx3-ubyte.gz';
|
||||
|
||||
labels = read_MNIST_label_file(os.path.join(rootdir,fname1));
|
||||
images = read_MNIST_image_file(os.path.join(rootdir,fname2));
|
||||
|
||||
return [labels,images];
|
||||
|
||||
def show_MNIST_image(img):
|
||||
import matplotlib.pyplot as plt;
|
||||
plt.figure();
|
||||
plt.imshow(255-img,cmap='gray');
|
||||
plt.show();
|
||||
return;
|
||||
|
92
training_data/an_mnist_loader.py
Normal file
92
training_data/an_mnist_loader.py
Normal file
@ -0,0 +1,92 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
"""
|
||||
mnist_loader
|
||||
~~~~~~~~~~~~
|
||||
|
||||
A library to load the MNIST image data. For details of the data
|
||||
structures that are returned, see the doc strings for ``load_data``
|
||||
and ``load_data_wrapper``. In practice, ``load_data_wrapper`` is the
|
||||
function usually called by our neural network code.
|
||||
"""
|
||||
|
||||
##sigh: If you want it to run today, write it in Python.
|
||||
##If you want it to run tomorrow, write it in ANYTHING ELSE
|
||||
|
||||
#### Libraries
|
||||
# Standard library
|
||||
##import cPickle
|
||||
import pickle as cPickle
|
||||
import gzip
|
||||
|
||||
# Third-party libraries
|
||||
import numpy as np
|
||||
|
||||
def load_data():
|
||||
"""Return the MNIST data as a tuple containing the training data,
|
||||
the validation data, and the test data.
|
||||
|
||||
The ``training_data`` is returned as a tuple with two entries.
|
||||
The first entry contains the actual training images. This is a
|
||||
numpy ndarray with 50,000 entries. Each entry is, in turn, a
|
||||
numpy ndarray with 784 values, representing the 28 * 28 = 784
|
||||
pixels in a single MNIST image.
|
||||
|
||||
The second entry in the ``training_data`` tuple is a numpy ndarray
|
||||
containing 50,000 entries. Those entries are just the digit
|
||||
values (0...9) for the corresponding images contained in the first
|
||||
entry of the tuple.
|
||||
|
||||
The ``validation_data`` and ``test_data`` are similar, except
|
||||
each contains only 10,000 images.
|
||||
|
||||
This is a nice data format, but for use in neural networks it's
|
||||
helpful to modify the format of the ``training_data`` a little.
|
||||
That's done in the wrapper function ``load_data_wrapper()``, see
|
||||
below.
|
||||
"""
|
||||
#f = gzip.open('../data/mnist.pkl.gz', 'rb')
|
||||
f = gzip.open('./t10k-images-idx3-ubyte.gz','rb');
|
||||
training_data, validation_data, test_data = cPickle.load(f)
|
||||
f.close()
|
||||
return (training_data, validation_data, test_data)
|
||||
|
||||
def load_data_wrapper():
|
||||
"""Return a tuple containing ``(training_data, validation_data,
|
||||
test_data)``. Based on ``load_data``, but the format is more
|
||||
convenient for use in our implementation of neural networks.
|
||||
|
||||
In particular, ``training_data`` is a list containing 50,000
|
||||
2-tuples ``(x, y)``. ``x`` is a 784-dimensional numpy.ndarray
|
||||
containing the input image. ``y`` is a 10-dimensional
|
||||
numpy.ndarray representing the unit vector corresponding to the
|
||||
correct digit for ``x``.
|
||||
|
||||
``validation_data`` and ``test_data`` are lists containing 10,000
|
||||
2-tuples ``(x, y)``. In each case, ``x`` is a 784-dimensional
|
||||
numpy.ndarry containing the input image, and ``y`` is the
|
||||
corresponding classification, i.e., the digit values (integers)
|
||||
corresponding to ``x``.
|
||||
|
||||
Obviously, this means we're using slightly different formats for
|
||||
the training data and the validation / test data. These formats
|
||||
turn out to be the most convenient for use in our neural network
|
||||
code."""
|
||||
tr_d, va_d, te_d = load_data()
|
||||
training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
|
||||
training_results = [vectorized_result(y) for y in tr_d[1]]
|
||||
training_data = zip(training_inputs, training_results)
|
||||
validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
|
||||
validation_data = zip(validation_inputs, va_d[1])
|
||||
test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
|
||||
test_data = zip(test_inputs, te_d[1])
|
||||
return (training_data, validation_data, test_data)
|
||||
|
||||
def vectorized_result(j):
|
||||
"""Return a 10-dimensional unit vector with a 1.0 in the jth
|
||||
position and zeroes elsewhere. This is used to convert a digit
|
||||
(0...9) into a corresponding desired output from the neural
|
||||
network."""
|
||||
e = np.zeros((10, 1))
|
||||
e[j] = 1.0
|
||||
return e
|
BIN
training_data/t10k-images-idx3-ubyte.gz
Normal file
BIN
training_data/t10k-images-idx3-ubyte.gz
Normal file
Binary file not shown.
BIN
training_data/t10k-labels-idx1-ubyte.gz
Normal file
BIN
training_data/t10k-labels-idx1-ubyte.gz
Normal file
Binary file not shown.
BIN
training_data/train-images-idx3-ubyte.gz
Normal file
BIN
training_data/train-images-idx3-ubyte.gz
Normal file
Binary file not shown.
BIN
training_data/train-labels-idx1-ubyte.gz
Normal file
BIN
training_data/train-labels-idx1-ubyte.gz
Normal file
Binary file not shown.
Reference in New Issue
Block a user