You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
139 lines
3.9 KiB
Python
139 lines
3.9 KiB
Python
3 weeks ago
|
#!/usr/bin/python3
|
||
|
|
||
|
import sys
|
||
|
sys.path.append('/home/aschinde/workspace/projects_python/library')
|
||
|
|
||
|
import os,sys,math
|
||
|
import numpy as np
|
||
|
import cv2;
|
||
|
|
||
|
import gzip #May need to use gzip.open instead of open
|
||
|
|
||
|
import struct
|
||
|
#struct unpack allows some interpretation of python binary data
|
||
|
#Example
|
||
|
##import struct
|
||
|
##
|
||
|
##data = open("from_fortran.bin", "rb").read()
|
||
|
##
|
||
|
##(eight, N) = struct.unpack("@II", data)
|
||
|
##
|
||
|
##This unpacks the first two fields, assuming they start at the very
|
||
|
##beginning of the file (no padding or extraneous data), and also assuming
|
||
|
##native byte-order (the @ symbol). The Is in the formatting string mean
|
||
|
##"unsigned integer, 32 bits".
|
||
|
|
||
|
#for integers
|
||
|
#a = int
|
||
|
#a.from_bytes(b'\xaf\xc2R',byteorder='little')
|
||
|
#a.to_bytes(nbytes,byteorder='big')
|
||
|
#analagous operation doens't seem to exist for floats
|
||
|
#what about numpy?
|
||
|
|
||
|
|
||
|
#https://www.devdungeon.com/content/working-binary-data-python
|
||
|
|
||
|
#print("{:02d}".format(2))
|
||
|
#b = b.fromhex('010203040506')
|
||
|
#b.hex()
|
||
|
#c = b.decode(encoding='utf-8' or 'latin-1' or 'ascii'...)
|
||
|
#print(c)
|
||
|
|
||
|
#numpy arrays have tobytes
|
||
|
#numpy arrays have frombuffer (converts to dtypes)
|
||
|
#
|
||
|
#q = np.array([15],dtype=np.uint8);
|
||
|
#q.tobytes();
|
||
|
#q.tobytes(order='C') (options are 'C' and 'F'
|
||
|
#q2 = np.buffer(q.tobytes(),dtype=np.uint8)
|
||
|
#np.frombuffer(buffer,dtype=float,count=-1,offset=0)
|
||
|
|
||
|
##You could also use the < and > endianess format codes in the struct
|
||
|
##module to achieve the same result:
|
||
|
##
|
||
|
##>>> struct.pack('<2h', *struct.unpack('>2h', original))
|
||
|
##'\xde\xad\xc0\xde'
|
||
|
|
||
|
def bytereverse(bts):
|
||
|
## bts2 = bytes(len(bts));
|
||
|
## for I in range(0,len(bts)):
|
||
|
## bts2[len(bts)-I-1] = bts[I];
|
||
|
N = len(bts);
|
||
|
## print(N);
|
||
|
## print(bts);
|
||
|
## bts2 = struct.pack('<{}h'.format(N), *struct.unpack('>{}h'.format(N), bts))
|
||
|
bts2 = bts;
|
||
|
return bts2;
|
||
|
|
||
|
#Read Labels
|
||
|
def read_MNIST_label_file(fname):
|
||
|
#fp = gzip.open('./train-labels-idx1-ubyte.gz','rb');
|
||
|
fp = gzip.open(fname,'rb');
|
||
|
magic = fp.read(4);
|
||
|
#nitems = np.frombuffer(fp.read(4),dtype=np.int32)[0]; #some sort of endiannes problem
|
||
|
bts = fp.read(4);
|
||
|
#bts = bytereverse(bts);
|
||
|
#nitems = np.frombuffer(bts,dtype=np.int32);
|
||
|
nitems = np.int32(struct.unpack('>I',bts)[0]); #it was a non-native endianness in teh integer encoding
|
||
|
#> < @ - endianness
|
||
|
|
||
|
bts = fp.read(nitems);
|
||
|
N = len(bts);
|
||
|
labels = np.zeros((N),dtype=np.uint8);
|
||
|
labels = np.frombuffer(bts,dtype=np.uint8,count=N);
|
||
|
#for i in range(0,10):
|
||
|
# bt = fp.read(1);
|
||
|
# labels[i] = np.frombuffer(bt,dtype=np.uint8);
|
||
|
fp.close();
|
||
|
return labels;
|
||
|
|
||
|
def read_MNIST_image_file(fname):
|
||
|
fp = gzip.open(fname,'rb');
|
||
|
magic = fp.read(4);
|
||
|
bts = fp.read(4);
|
||
|
nitems = np.int32(struct.unpack('>I',bts)[0]);
|
||
|
bts = fp.read(4);
|
||
|
nrows = np.int32(struct.unpack('>I',bts)[0]);
|
||
|
bts = fp.read(4);
|
||
|
ncols = np.int32(struct.unpack('>I',bts)[0]);
|
||
|
|
||
|
images = np.zeros((nitems,nrows,ncols),dtype=np.uint8);
|
||
|
for I in range(0,nitems):
|
||
|
bts = fp.read(nrows*ncols);
|
||
|
img1 = np.frombuffer(bts,dtype=np.uint8,count=nrows*ncols);
|
||
|
img1 = img1.reshape((nrows,ncols));
|
||
|
images[I,:,:] = img1;
|
||
|
|
||
|
fp.close();
|
||
|
|
||
|
return images;
|
||
|
|
||
|
def read_training_data():
|
||
|
rootdir = '/home/aschinde/workspace/machinelearning/datasets/MNIST';
|
||
|
fname1 = 'train-labels-idx1-ubyte.gz';
|
||
|
fname2 = 'train-images-idx3-ubyte.gz';
|
||
|
|
||
|
labels = read_MNIST_label_file(os.path.join(rootdir,fname1));
|
||
|
images = read_MNIST_image_file(os.path.join(rootdir,fname2));
|
||
|
|
||
|
return [labels,images];
|
||
|
|
||
|
def read_test_data():
|
||
|
rootdir = '/home/aschinde/workspace/machinelearning/datasets/MNIST';
|
||
|
|
||
|
fname1 = 't10k-labels-idx1-ubyte.gz';
|
||
|
fname2 = 't10k-images-idx3-ubyte.gz';
|
||
|
|
||
|
labels = read_MNIST_label_file(os.path.join(rootdir,fname1));
|
||
|
images = read_MNIST_image_file(os.path.join(rootdir,fname2));
|
||
|
|
||
|
return [labels,images];
|
||
|
|
||
|
def show_MNIST_image(img):
|
||
|
import matplotlib.pyplot as plt;
|
||
|
plt.figure();
|
||
|
plt.imshow(255-img,cmap='gray');
|
||
|
plt.show();
|
||
|
return;
|
||
|
|