Inital commitHEAD master

author: Alexander M Pickering <amp215@pitt.edu> 2025-02-01 02:24:13 -0600
committer: Alexander M Pickering <amp215@pitt.edu> 2025-02-01 02:24:13 -0600
commit: 61bdb4fef88c1e83787dbb023b51d8d200844e3a (patch)
tree: 6d905b6f61a0e932b1ace9771c714a80e0388af0 /task_o.py
download: mscbio2046-61bdb4fef88c1e83787dbb023b51d8d200844e3a.tar.gz
mscbio2046-61bdb4fef88c1e83787dbb023b51d8d200844e3a.tar.bz2
mscbio2046-61bdb4fef88c1e83787dbb023b51d8d200844e3a.zip
1 files changed, 258 insertions, 0 deletions
diff --git a/task_o.py b/task_o.py
new file mode 100644
index 0000000..108f3d8
--- /dev/null
+++ b/task_o.py
@@ -0,0 +1,258 @@
+#!/usr/bin/env python
+# Copyright 2016 Google Inc. All Rights Reserved.
+# Modifcations by dkoes.
+# More modifications by Alex P.
+
+"""This is based on:
+
+https://github.com/GoogleCloudPlatform/cloudml-samples/blob/master/mnist/deployable/trainer/task.py
+It includes support for training and prediction on the Google Cloud ML service.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import os.path
+import subprocess
+import tempfile
+import time
+import sys
+import csv
+from google.cloud import bigquery as bq
+from sklearn import preprocessing
+from scipy import misc
+
+import numpy as np
+
+from six.moves import xrange  # pylint: disable=redefined-builtin
+import tensorflow as tf
+import tensorflow.contrib.slim as slim
+from tensorflow.python.lib.io import file_io
+
+query_client = bq.Client()
+
+# Basic model parameters as external flags.
+
+flags = tf.app.flags
+FLAGS = flags.FLAGS
+flags.DEFINE_integer('max_steps', 1000, 'Number of steps to run trainer.')
+flags.DEFINE_integer('batch_size', 20, 'Batch size.')
+flags.DEFINE_string('train_data_db', '[mscbiofin:eventdata.datamore4]', 'Directory containing training data')
+flags.DEFINE_string('start_date',19741210,'The Start time for training')
+flags.DEFINE_string('end_date',20161210,'The end date for training')
+
+flags.DEFINE_integer('hidden1', 1024, 'Number of units in hidden layer 1.')
+flags.DEFINE_integer('hidden2', 1024, 'Number of units in hidden layer 2.')
+flags.DEFINE_integer('hidden3', 1024, 'Number of units in hidden layer 3.')
+flags.DEFINE_integer('hidden4', 1024, 'Number of units in hidden layer 4.')
+flags.DEFINE_integer('hidden5', 1024, 'Number of units in hidden layer 5.')
+flags.DEFINE_integer('hidden6', 1024, 'Number of units in hidden layer 6.')
+flags.DEFINE_integer('hidden7', 1024, 'Number of units in hidden layer 7.')
+flags.DEFINE_integer('hidden8', 1024, 'Number of units in hidden layer 8.')
+flags.DEFINE_integer('hidden9', 1024, 'Number of units in hidden layer 9.')
+flags.DEFINE_integer('hidden10', 1024, 'Number of units in hidden layer 10.')
+
+flags.DEFINE_string('train_output_dir', 'data', 'Directory to put the training data.')
+flags.DEFINE_string('model_dir', 'model', 'Directory to put the model into.')
+
+# Feel free to add additional flags to assist in setting hyper parameters
+
+# Get labels by running sql queries.
+
+# Open the financial data and hold it in memory.
+
+def read_training_list():
+    """
+    Read <train_data_dir>/TRAIN which containing paths and labels in
+    the format label, channel1 file, channel2 file, channel3
+    Returns:
+        List with all filenames in file image_list_file
+    """
+    image_list_file = FLAGS.train_data_dir + '/TRAIN'
+    f = file_io.FileIO(image_list_file, 'r') #this can read files from the cloud
+    filenames = []
+    labels = []
+    n_classes = len(labelmap)
+    for line in f:
+        label, c1, c2, c3 = line.rstrip().split(' ')
+        #convert labels into onehot encoding
+        onehot = np.zeros(n_classes)
+        onehot[labelmap[label]] = 1.0
+        labels.append(onehot)
+        #create absolute paths for image files
+        filenames.append([ FLAGS.train_data_dir + '/' + c for c in (c1,c2,c3)])
+
+    return zip( labels,filenames),n_classes
+
+
+class Fetcher:
+    '''Provides batches of images'''
+    #TODO TODO - you probably want to modify this to implement data augmentation
+    def __init__(self,stockfile):
+        self.current = 0
+        self.cache = {}
+        self.stocks = {}
+        for row in csv.reader(stockfile,delimeter=','):
+            date = row[0]
+            date = int(date.replace("-",""))
+            diff = float(row[4]) - float(row[1])
+            self.stocks[date] = diff
+
+    def load_next(self):
+        print("I want to get stocks[" + current + "]")
+        #Implement a cache for mysql
+        events = []
+        stockchange = 0
+        sys.exit(0);
+        x_batch = []
+        y_batch = []
+        for i in xrange(batchsize):
+            label, files = self.examples[(self.current+i) % len(self.examples)]
+            label = label.flatten()
+            # If you are getting an error reading the image, you probably have
+            # the legacy PIL library installed instead of Pillow
+            # You need Pillow
+            channels = [ misc.imread(file_io.FileIO(f,'r')) for f in files]
+            x_batch.append(np.dstack(channels))
+            y_batch.append(label)
+
+        self.current = (self.current + batchsize) % len(self.examples)
+        return np.array(x_batch), np.array(y_batch)
+
+
+def network(inputs):
+    '''Define the network'''
+    with slim.arg_scope([slim.conv2d, slim.fully_connected],
+                      activation_fn=tf.nn.relu,
+                      weights_initializer=tf.truncated_normal_initializer(0.0, 0.01),
+                      weights_regularizer=slim.l2_regularizer(0.0005)):
+        net = tf.reshape(inputs,[-1, 512,512,3])
+        net = slim.conv2d(net, 32, [3,3], scope='conv1')
+        net = slim.max_pool2d(net, [4,4], scope = 'conv1')
+        net = slim.conv2d(net,64,[3,3], scope = 'conv2')
+        net = slim.max_pool2d(net,[4,4], scope = 'pool2')
+        net = slim.flatten(net)
+        net = slim.fully_connected(net,64, scope = 'fc')
+        net = slim.fully_connected(net, 13, activation_fn = None, scope = 'output')
+    return net
+
+def run_training():
+
+  #Read the training data
+  examples, n_classes = read_training_list() #TODO:Replace this
+  np.random.seed(42) #shuffle the same way each time for consistency
+  np.random.shuffle(examples)
+
+  fetcher = Fetcher()
+
+  # Tell TensorFlow that the model will be built into the default Graph.
+  with tf.Graph().as_default():
+    # Generate placeholders for the images and labels and mark as input.
+
+    x = tf.placeholder(tf.float32, shape=(None, 512,512,3))
+    y_ = tf.placeholder(tf.float32, shape=(None, n_classes))
+
+    # See "Using instance keys": https://cloud.google.com/ml/docs/how-tos/preparing-models
+    # for why we have keys_placeholder
+    keys_placeholder = tf.placeholder(tf.int64, shape=(None,))
+
+    # IMPORTANT: Do not change the input map
+    inputs = {'key': keys_placeholder.name, 'image': x.name}
+    tf.add_to_collection('inputs', json.dumps(inputs))
+
+    # Build a the network
+    net = network(x)
+
+    # Add to the Graph the Ops for loss calculation.
+    loss = slim.losses.softmax_cross_entropy(net, y_)
+    tf.scalar_summary(loss.op.name, loss)  # keep track of value for TensorBoard
+
+    # To be able to extract the id, we need to add the identity function.
+    keys = tf.identity(keys_placeholder)
+
+    # The prediction will be the index in logits with the highest score.
+    # We also use a softmax operation to produce a probability distribution
+    # over all possible digits.
+    # DO NOT REMOVE OR CHANGE VARIABLE NAMES - used when predicting with a model
+    prediction = tf.argmax(net, 1)
+    scores = tf.nn.softmax(net)
+
+    # Mark the outputs.
+    outputs = {'key': keys.name,
+               'prediction': prediction.name,
+               'scores': scores.name}
+    tf.add_to_collection('outputs', json.dumps(outputs))
+
+    # Add to the Graph the Ops that calculate and apply gradients.
+    train_op = tf.train.AdamOptimizer(1e-4).minimize(loss)
+
+
+    # Build the summary operation based on the TF collection of Summaries.
+    summary_op = tf.merge_all_summaries()
+
+    # Add the variable initializer Op.
+    init = tf.initialize_all_variables()
+
+    # Create a saver for writing training checkpoints.
+    saver = tf.train.Saver()
+
+    # Create a session for running Ops on the Graph.
+    sess = tf.Session()
+
+    # Instantiate a SummaryWriter to output summaries and the Graph.
+    summary_writer = tf.train.SummaryWriter(FLAGS.train_output_dir, sess.graph)
+
+    # And then after everything is built:
+
+    # Run the Op to initialize the variables.
+    sess.run(init)
+
+    # Start the training loop.
+    for step in xrange(FLAGS.max_steps):
+      start_time = time.time()
+
+      # Fill a feed dictionary with the actual set of images and labels
+      # for this particular training step.
+      images, labels = fetcher.load_batch(FLAGS.batch_size)
+      feed_dict = {x: images, y_: labels}
+
+      # Run one step of the model.  The return values are the activations
+      # from the `train_op` (which is discarded) and the `loss` Op.  To
+      # inspect the values of your Ops or variables, you may include them
+      # in the list passed to sess.run() and the value tensors will be
+      # returned in the tuple from the call.
+      _, loss_value = sess.run([train_op, loss],
+                               feed_dict=feed_dict)
+
+      duration = time.time() - start_time
+
+      # Write the summaries and print an overview fairly often.
+      if step % 1 == 0:
+        # Print status to stdout.
+        print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))
+        sys.stdout.flush()
+        # Update the events file.
+        summary_str = sess.run(summary_op, feed_dict=feed_dict)
+        summary_writer.add_summary(summary_str, step)
+        summary_writer.flush()
+
+
+    # Export the model so that it can be loaded and used later for predictions.
+    file_io.create_dir(FLAGS.model_dir)
+    saver.save(sess, os.path.join(FLAGS.model_dir, 'export'))
+
+    #make world readable for submission to evaluation server
+    if FLAGS.model_dir.startswith('gs://'):
+        subprocess.call(['gsutil', 'acl','ch','-u','AllUsers:R', FLAGS.model_dir])
+
+    #You probably want to implement some sort of model evaluation here
+    #TODO TODO TODO
+
+def main(_):
+  run_training()
+
+
+if __name__ == '__main__':
+  tf.app.run()
author	Alexander M Pickering <amp215@pitt.edu>	2025-02-01 02:24:13 -0600
committer	Alexander M Pickering <amp215@pitt.edu>	2025-02-01 02:24:13 -0600
commit	61bdb4fef88c1e83787dbb023b51d8d200844e3a (patch)
tree	6d905b6f61a0e932b1ace9771c714a80e0388af0 /task_o.py
download	mscbio2046-61bdb4fef88c1e83787dbb023b51d8d200844e3a.tar.gz mscbio2046-61bdb4fef88c1e83787dbb023b51d8d200844e3a.tar.bz2 mscbio2046-61bdb4fef88c1e83787dbb023b51d8d200844e3a.zip