Sentiment Analysis with Deep Learning#

Imports#

!export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/anaconda/envs/aiking/lib/
# import numpy as np  # regular ol' numpy
import trax
from trax import layers as tl  # core building block
from trax import shapes  # data signatures: dimensionality and type
from trax import fastmath  # uses jax, offers numpy on steroids
from trax.fastmath import numpy as np
from trax.supervised import training
import pandas as pd
from sklearn.model_selection import train_test_split
from aiking.data.external import *
from fastcore.all import *
from nltk.corpus import stopwords          # module for stop words that come with NLTK
from nltk.stem import PorterStemmer        # module for stemming
from nltk.tokenize import TweetTokenizer   # module for tokenizing strings
import string
import itertools
import random
import shutil
!pip list|grep jax
jax                                        0.3.15
jaxlib                                     0.3.15
jupyter-server-mathjax                     0.2.3
relu = tl.Relu()
relu
Serial[
  Relu
]
relu.name, relu.n_in, relu.n_out
('Serial', 1, 1)
x = np.array([-2,-1,0,1,2]); x
DeviceArray([-2, -1,  0,  1,  2], dtype=int32)
!nvidia-smi
Wed Jul 27 09:03:07 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.129.06   Driver Version: 470.129.06   CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|===============================+======================+======================|
|   0  Tesla K80           Off  | 00000000:00:1E.0 Off |                    0 |
| N/A   49C    P8    29W / 149W |      3MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                                  |
|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
|        ID   ID                                                   Usage      |
|=============================================================================|
|  No running processes found                                                 |
+-----------------------------------------------------------------------------+
relu(x)
DeviceArray([0, 0, 0, 1, 2], dtype=int32)
help(tl.Concatenate)
Help on class Concatenate in module trax.layers.combinators:

class Concatenate(trax.layers.base.Layer)
 |  Concatenate(n_items=2, axis=-1)
 |  
 |  Concatenates a number of tensors into a single tensor.
 |  
 |  For example::
 |  
 |      x = np.array([1, 2])
 |      y = np.array([3, 4])
 |      z = np.array([5, 6])
 |      concat3 = tl.Concatenate(n_items=3)
 |      z = concat3((x, y, z))  # z = [1, 2, 3, 4, 5, 6]
 |  
 |  Use the `axis` argument to specify on which axis to concatenate the tensors.
 |  By default it's the last axis, `axis=-1`, and `n_items=2`.
 |  
 |  Method resolution order:
 |      Concatenate
 |      trax.layers.base.Layer
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __init__(self, n_items=2, axis=-1)
 |      Creates a partially initialized, unconnected layer instance.
 |      
 |      Args:
 |        n_in: Number of inputs expected by this layer.
 |        n_out: Number of outputs promised by this layer.
 |        name: Class-like name for this layer; for use when printing this layer.
 |        sublayers_to_print: Sublayers to display when printing out this layer;
 |          if None (the default), display all sublayers.
 |  
 |  forward(self, xs)
 |      Executes this layer as part of a forward pass through the model.
 |  
 |  ----------------------------------------------------------------------
 |  Methods inherited from trax.layers.base.Layer:
 |  
 |  __call__(self, x, weights=None, state=None, rng=None)
 |      Makes layers callable; for use in tests or interactive settings.
 |      
 |      This convenience method helps library users play with, test, or otherwise
 |      probe the behavior of layers outside of a full training environment. It
 |      presents the layer as callable function from inputs to outputs, with the
 |      option of manually specifying weights and non-parameter state per individual
 |      call. For convenience, weights and non-parameter state are cached per layer
 |      instance, starting from default values of `EMPTY_WEIGHTS` and `EMPTY_STATE`,
 |      and acquiring non-empty values either by initialization or from values
 |      explicitly provided via the weights and state keyword arguments, in which
 |      case the old weights will be preserved, and the state will be updated.
 |      
 |      Args:
 |        x: Zero or more input tensors, packaged as described in the `Layer` class
 |            docstring.
 |        weights: Weights or `None`; if `None`, use self's cached weights value.
 |        state: State or `None`; if `None`, use self's cached state value.
 |        rng: Single-use random number generator (JAX PRNG key), or `None`;
 |            if `None`, use a default computed from an integer 0 seed.
 |      
 |      Returns:
 |        Zero or more output tensors, packaged as described in the `Layer` class
 |        docstring.
 |  
 |  __repr__(self)
 |      Renders this layer as a medium-detailed string, to help in debugging.
 |      
 |      Subclasses should aim for high-signal/low-noise when overriding this
 |      method.
 |      
 |      Returns:
 |        A high signal-to-noise string representing this layer.
 |  
 |  __setattr__(self, attr, value)
 |      Sets class attributes and protects from typos.
 |      
 |      In Trax layers, we only allow to set the following public attributes::
 |      
 |        - weights
 |        - state
 |        - rng
 |      
 |      This function prevents from setting other public attributes to avoid typos,
 |      for example, this is not possible and would be without this function::
 |      
 |        [typo]   layer.weighs = some_tensor
 |      
 |      If you need to set other public attributes in a derived class (which we
 |      do not recommend as in almost all cases it suffices to use a private
 |      attribute), override self._settable_attrs to include the attribute name.
 |      
 |      Args:
 |        attr: Name of the attribute to be set.
 |        value: Value to be assigned to the attribute.
 |  
 |  backward(self, inputs, output, grad, weights, state, new_state, rng)
 |      Custom backward pass to propagate gradients in a custom way.
 |      
 |      Args:
 |        inputs: Input tensors; can be a (possibly nested) tuple.
 |        output: The result of running this layer on inputs.
 |        grad: Gradient signal computed based on subsequent layers; its structure
 |            and shape must match output.
 |        weights: This layer's weights.
 |        state: This layer's state prior to the current forward pass.
 |        new_state: This layer's state after the current forward pass.
 |        rng: Single-use random number generator (JAX PRNG key).
 |      
 |      Returns:
 |        The custom gradient signal for the input. Note that we need to return
 |        a gradient for each argument of forward, so it will usually be a tuple
 |        of signals: the gradient for inputs and weights.
 |  
 |  init(self, input_signature, rng=None, use_cache=False)
 |      Initializes weights/state of this layer and its sublayers recursively.
 |      
 |      Initialization creates layer weights and state, for layers that use them.
 |      It derives the necessary array shapes and data types from the layer's input
 |      signature, which is itself just shape and data type information.
 |      
 |      For layers without weights or state, this method safely does nothing.
 |      
 |      This method is designed to create weights/state only once for each layer
 |      instance, even if the same layer instance occurs in multiple places in the
 |      network. This enables weight sharing to be implemented as layer sharing.
 |      
 |      Args:
 |        input_signature: `ShapeDtype` instance (if this layer takes one input)
 |            or list/tuple of `ShapeDtype` instances.
 |        rng: Single-use random number generator (JAX PRNG key), or `None`;
 |            if `None`, use a default computed from an integer 0 seed.
 |        use_cache: If `True`, and if this layer instance has already been
 |            initialized elsewhere in the network, then return special marker
 |            values -- tuple `(GET_WEIGHTS_FROM_CACHE, GET_STATE_FROM_CACHE)`.
 |            Else return this layer's newly initialized weights and state.
 |      
 |      Returns:
 |        A `(weights, state)` tuple.
 |  
 |  init_from_file(self, file_name, weights_only=False, input_signature=None)
 |      Initializes this layer and its sublayers from a pickled checkpoint.
 |      
 |      In the common case (`weights_only=False`), the file must be a gziped pickled
 |      dictionary containing items with keys `'flat_weights', `'flat_state'` and
 |      `'input_signature'`, which are used to initialize this layer.
 |      If `input_signature` is specified, it's used instead of the one in the file.
 |      If `weights_only` is `True`, the dictionary does not need to have the
 |      `'flat_state'` item and the state it not restored either.
 |      
 |      Args:
 |        file_name: Name/path of the pickled weights/state file.
 |        weights_only: If `True`, initialize only the layer's weights. Else
 |            initialize both weights and state.
 |        input_signature: Input signature to be used instead of the one from file.
 |      
 |      Returns:
 |        A `(weights, state)` tuple.
 |  
 |  init_weights_and_state(self, input_signature)
 |      Initializes weights and state, to handle input with the given signature.
 |      
 |      A layer subclass must override this method if the layer uses weights or
 |      state. To initialize weights, set `self.weights` to desired (typically
 |      random) values. To initialize state (uncommon), set `self.state` to desired
 |      starting values.
 |      
 |      Args:
 |        input_signature: A `ShapeDtype` instance (if this layer takes one input)
 |            or a list/tuple of `ShapeDtype` instances.
 |  
 |  output_signature(self, input_signature)
 |      Returns output signature this layer would give for `input_signature`.
 |  
 |  pure_fn(self, x, weights, state, rng, use_cache=False)
 |      Applies this layer as a pure function with no optional args.
 |      
 |      This method exposes the layer's computation as a pure function. This is
 |      especially useful for JIT compilation. Do not override, use `forward`
 |      instead.
 |      
 |      Args:
 |        x: Zero or more input tensors, packaged as described in the `Layer` class
 |            docstring.
 |        weights: A tuple or list of trainable weights, with one element for this
 |            layer if this layer has no sublayers, or one for each sublayer if
 |            this layer has sublayers. If a layer (or sublayer) has no trainable
 |            weights, the corresponding weights element is an empty tuple.
 |        state: Layer-specific non-parameter state that can update between batches.
 |        rng: Single-use random number generator (JAX PRNG key).
 |        use_cache: if `True`, cache weights and state in the layer object; used
 |          to implement layer sharing in combinators.
 |      
 |      Returns:
 |        A tuple of `(tensors, state)`. The tensors match the number (`n_out`)
 |        promised by this layer, and are packaged as described in the `Layer`
 |        class docstring.
 |  
 |  save_to_file(self, file_name, weights_only=False, input_signature=None)
 |      Saves this layer and its sublayers to a pickled checkpoint.
 |      
 |      Args:
 |        file_name: Name/path of the pickled weights/state file.
 |        weights_only: If `True`, save only the layer's weights. Else
 |            save both weights and state.
 |        input_signature: Input signature to be used.
 |  
 |  weights_and_state_signature(self, input_signature, unsafe=False)
 |      Return a pair containing the signatures of weights and state.
 |  
 |  ----------------------------------------------------------------------
 |  Readonly properties inherited from trax.layers.base.Layer:
 |  
 |  has_backward
 |      Returns `True` if this layer provides its own custom backward pass code.
 |      
 |      A layer subclass that provides custom backward pass code (for custom
 |      gradients) must override this method to return `True`.
 |  
 |  n_in
 |      Returns how many tensors this layer expects as input.
 |  
 |  n_out
 |      Returns how many tensors this layer promises as output.
 |  
 |  name
 |      Returns the name of this layer.
 |  
 |  sublayers
 |      Returns a tuple containing this layer's sublayers; may be empty.
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors inherited from trax.layers.base.Layer:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)
 |  
 |  rng
 |      Returns this layer's current single-use random number generator.
 |      
 |      Code that wants to base random samples on this generator must explicitly
 |      split off new generators from it. (See, for example, the `rng` setter code
 |      below.)
 |  
 |  state
 |      Returns a tuple containing this layer's state; may be empty.
 |      
 |      If the layer has sublayers, the state by convention will be
 |      a tuple of length `len(sublayers)` containing sublayer states.
 |      Note that in this case self._state only marks which ones are shared.
 |  
 |  weights
 |      Returns this layer's weights.
 |      
 |      Depending on the layer, the weights can be in the form of:
 |      
 |        - an empty tuple
 |        - a tensor (ndarray)
 |        - a nested structure of tuples and tensors
 |      
 |      If the layer has sublayers, the weights by convention will be
 |      a tuple of length `len(sublayers)` containing the weights of sublayers.
 |      Note that in this case self._weights only marks which ones are shared.
# help(tl.LayerNorm)
help(shapes.signature)
Help on function signature in module trax.shapes:

signature(obj)
    Returns a `ShapeDtype` signature for the given `obj`.
    
    A signature is either a `ShapeDtype` instance or a tuple of `ShapeDtype`
    instances. Note that this function is permissive with respect to its inputs
    (accepts lists or tuples or dicts, and underlying objects can be any type
    as long as they have shape and dtype attributes) and returns the corresponding
    nested structure of `ShapeDtype`.
    
    Args:
      obj: An object that has `shape` and `dtype` attributes, or a list/tuple/dict
          of such objects.
    
    Returns:
      A corresponding nested structure of `ShapeDtype` instances.
norm = tl.LayerNorm()
x = np.array([0,1,2,3], dtype='float');x
/tmp/ipykernel_389065/4190421137.py:2: UserWarning: Explicitly requested dtype float requested in array is not available, and will be truncated to dtype float32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/google/jax#current-gotchas for more.
  x = np.array([0,1,2,3], dtype='float');x
DeviceArray([0., 1., 2., 3.], dtype=float32)
norm.init(shapes.signature(x))
((DeviceArray([1., 1., 1., 1.], dtype=float32),
  DeviceArray([0., 0., 0., 0.], dtype=float32)),
 ())
norm(x)
DeviceArray([-1.3416404 , -0.44721344,  0.44721344,  1.3416404 ], dtype=float32)
# Define a custom Layer

def Power():
    layer_name = "Power"
    
    def func(x):
        return x**2
    
    return tl.Fn(layer_name, func)
power = Power()
power.name, power.n_in, power.n_out
('Power', 1, 1)
power(x)
DeviceArray([0., 1., 4., 9.], dtype=float32)
serial = tl.Serial(
    tl.LayerNorm(), 
    tl.Relu(),
    Power()
)

x = np.array([-2,-1,0,1,2])
serial.init(shapes.signature(x))
(((DeviceArray([1, 1, 1, 1, 1], dtype=int32),
   DeviceArray([0, 0, 0, 0, 0], dtype=int32)),
  ((), (), ()),
  ()),
 ((), ((), (), ()), ()))
serial(x)
DeviceArray([0.        , 0.        , 0.        , 0.49999973, 1.9999989 ],            dtype=float32)
class My_Class:
    def __init__(self, y):
        self.x = y
    def __call__(self, z):
        self.x += z
        print(self.x)
       
instance_c = My_Class(10); instance_c(3); instance_c.x
13
13
def f(x): return 3.0*x**2+x
grad_f  = trax.fastmath.grad(f)
f(2.0), grad_f(2.0)
(14.0, DeviceArray(13., dtype=float32, weak_type=True))
a = [1,2,3,4]
b = [0]*10
a, b
([1, 2, 3, 4], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
# lines_index = [*range(len(a))]; lines_index
# def f(ind, a_size): return ind%a_size
b = [a[ind%len(a)] for ind in range(len(b))]
b
[1, 2, 3, 4, 1, 2, 3, 4, 1, 2]

Financial Sentiment Analysis#

Read Dataset#

df = pd.read_csv("financial_sentiment.csv"); df.head()
Sentence Sentiment
0 The GeoSolutions technology will leverage Benefon 's GPS solutions by providing Location Based Search Technology , a Communities Platform , location relevant multimedia content and a new and powerful commercial model . positive
1 $ESI on lows, down $1.50 to $2.50 BK a real possibility negative
2 For the last quarter of 2010 , Componenta 's net sales doubled to EUR131m from EUR76m for the same period a year earlier , while it moved to a zero pre-tax profit from a pre-tax loss of EUR7m . positive
3 According to the Finnish-Russian Chamber of Commerce , all the major construction companies of Finland are operating in Russia . neutral
4 The Swedish buyout firm has sold its remaining 22.4 percent stake , almost eighteen months after taking the company public in Finland . neutral
df.describe()
Sentence Sentiment
count 5842 5842
unique 5322 3
top Managing Director 's comments : `` Net sales for the first quarter were notably lower than a year before , especially in Finland , Russia and the Baltic countries . neutral
freq 2 3130

Split Train Test and Validation Dataset#

df_train, df_test = train_test_split(df, stratify=df['Sentiment'])
df_train, df_valid = train_test_split(df_train, stratify=df_train['Sentiment'])
df_train.shape, df_valid.shape, df_test.shape
((3285, 2), (1096, 2), (1461, 2))

Data Processing and Cleaning#

def remove_old_style(tweet): return re.sub(r'^RT[\s]+', '', tweet)
def remove_url(tweet): return re.sub(r'https?://[^\s\n\r]+', '', tweet)
def remove_hash(tweet): return re.sub(r'#', "", tweet)
def remove_numbers(tweet): return re.sub(r'\d*\.?\d+', "", tweet)
tokenizer = TweetTokenizer(preserve_case=False, strip_handles=True, reduce_len=True)
skip_words = stopwords.words('english')+list(string.punctuation)
stemmer = PorterStemmer() 
def filter_stem_tokens(tweet_tokens, skip_words=skip_words, stemmer=stemmer): 
    return [ stemmer.stem(token) for token in tweet_tokens if token not in skip_words]

process_sentence = compose(remove_old_style, remove_url, remove_hash, remove_numbers, tokenizer.tokenize, filter_stem_tokens)
process_sentence(df_train.loc[df_train.index[0],'Sentence'])
# df_train
['cdp',
 'establish',
 'initi',
 'institut',
 'investor',
 'howev',
 'annual',
 'publish',
 'result',
 'also',
 'interest',
 'increas',
 'number',
 'custom',
 'interest',
 'group',
 'report',
 'compani']
# inverse_vocab = dict(enumerate(['__PAD__', '__</e>__', '__UNK__'] + list(set(df_train.Sentence.apply(process_sentence).sum()))))
# vocab  = pd.Dataframe({v:k for k,v in inverse_vocab.items()})
# vocab

df_vocab = pd.DataFrame(['__PAD__', '__</e>__', '__UNK__'] + list(set(df_train.Sentence.apply(process_sentence).sum()))).reset_index()
df_vocab = df_vocab.set_index(0)
sentence = process_sentence(df_train.loc[df_train.index[0],'Sentence']); sentence
['cdp',
 'establish',
 'initi',
 'institut',
 'investor',
 'howev',
 'annual',
 'publish',
 'result',
 'also',
 'interest',
 'increas',
 'number',
 'custom',
 'interest',
 'group',
 'report',
 'compani']
df_vocab.loc[sentence]['index'].tolist()
[668,
 4016,
 4646,
 3481,
 1716,
 2861,
 3277,
 2761,
 4386,
 1228,
 423,
 5176,
 3239,
 3189,
 423,
 575,
 2018,
 3359]
msg =  "My blog name is Soliloquium"

def process_with_vocab(msg,df_vocab=df_vocab, unknown_token='__UNK__'):
    tokens = process_sentence(msg)
    return df_vocab.loc[[token if token in df_vocab.index else unknown_token 
                         for token in tokens]][df_vocab.columns[0]].tolist()

process_with_vocab(msg)
[2508, 4496, 2]
df_train.Sentiment.value_counts()
neutral     1760
positive    1041
negative     484
Name: Sentiment, dtype: int64

Data Batching#

def data_generator(df, batch_sz, df_vocab,  stop=False, shuffle=True, loop=True,
             unknown_token='__UNK__', pad_token='__PAD__',x_col='Sentence', y_col='Sentiment',
             process=process_with_vocab, class_dict={'neutral':0, 'positive':1, 'negative':-1}):
    while not stop:
        index = 0
        print("Restarting Loop")
        if shuffle: df = df.sample(frac=1)
        itr = itertools.cycle(df.iterrows())
        pad_id = df_vocab.loc[pad_token, df_vocab.columns[0]]
        while index <= len(df):
            batch  = [next(itr) for i in range(batch_sz)]
            X,y = zip(*[(process_with_vocab(i[1][x_col], df_vocab=df_vocab, unknown_token=unknown_token), i[1][y_col])  for i in batch])
            inputs = np.array(pd.DataFrame(X).fillna(pad_id), dtype='int32')
            targets = np.array([class_dict[i] for i in y])
            
            index += batch_sz
            example_weights = np.array([1.0]*len(targets))
            yield inputs, targets, example_weights
        if loop: continue
        else: break
            
count = 0
g = data_generator(df_train[:10].copy(), 3, df_vocab)
# while count < 20:
#     batch = next(g)
#     count +=1
next(g)
Restarting Loop
(DeviceArray([[4869, 2664, 2478, 4233, 3262, 3262, 1336, 2877,  414, 4386,
               2744, 3359, 5280,  750,    0,    0,    0,    0,    0,    0,
                  0,    0,    0],
              [ 668, 4016, 4646, 3481, 1716, 2861, 3277, 2761, 4386, 1228,
                423, 5176, 3239, 3189,  423,  575, 2018, 3359,    0,    0,
                  0,    0,    0],
              [ 665, 1388, 5432,  601, 1870,  914,  116, 4680,  601, 5924,
               3968,  638, 5938,  990, 2468,  568, 2054, 5560, 5660, 4278,
               5924, 5938, 3283]], dtype=int32),
 DeviceArray([0, 0, 0], dtype=int32),
 DeviceArray([1., 1., 1.], dtype=float32))
df_train.iloc[0]['Sentence']
'CDP was established on the initiative of institutional investors ; however , the annually published results also interest an increasing number of customers and other interest groups of the reporting companies .'
df_vocab.shape
(5992, 1)

Model Definition#

def classifier(vocab_sz=5920, emb_dims=256, output_dims=3, mode='train'):
    model = tl.Serial(
        tl.Embedding(vocab_size=vocab_sz, d_feature=emb_dims),
        tl.Mean(axis=1),
        tl.Dense(n_units=output_dims),
        tl.LogSoftmax()
        
    )
    return model

inputs, targets, weights = next(data_generator(df_train[:10].copy(), 4, df_vocab))
# model = classifier()
# model(inputs)
inputs.shape, inputs
Restarting Loop
((4, 18),
 DeviceArray([[ 668, 4016, 4646, 3481, 1716, 2861, 3277, 2761, 4386, 1228,
                423, 5176, 3239, 3189,  423,  575, 2018, 3359],
              [5083, 1819, 1815,  256, 3504,  234,  895, 2100, 5673,    0,
                  0,    0,    0,    0,    0,    0,    0,    0],
              [1934, 4707,   40, 3698, 3921,  566,    0,    0,    0,    0,
                  0,    0,    0,    0,    0,    0,    0,    0],
              [4869, 2664, 2478, 4233, 3262, 3262, 1336, 2877,  414, 4386,
               2744, 3359, 5280,  750,    0,    0,    0,    0]],            dtype=int32))
vocab_sz = 5920
emb_dims = 256
embed_layer = tl.Embedding(vocab_size=vocab_sz, d_feature=emb_dims)
# embed_layer.init(trax.shapes.signature(inputs))
# embed_layer(inputs)
trax.shapes.signature(inputs)
ShapeDtype{shape:(4, 18), dtype:int32}
el = embed_layer.init(trax.shapes.signature(inputs)); 

el[0].shape
(5920, 256)
# An example of and embedding layer
# rnd.seed(31)
tmp_embed = tl.Embedding(d_feature=256, vocab_size=5920)

# tmp_in_arr = np.array([[0.0, 1,2],
#                     [3,2,0]
#                    ])

# random_key = trax.fastmath.random.get_prng(seed=0)
# tmp_in_arr = trax.fastmath.random.normal(key = random_key, shape = (4, 18))
tmp_in_arr = inputs
tmp_embed.init(trax.shapes.signature(tmp_in_arr))

# Embedding layer will return an array of shape (batch size, vocab size, d_feature)
tmp_embedded_arr = tmp_embed(tmp_in_arr)
print(f"Shape of returned array is {tmp_embedded_arr.shape}")
# display(tmp_embedded_arr)
# display(tmp_embed)
# display(inputs)
Shape of returned array is (4, 18, 256)
tmp_mean = tl.Mean(axis=1)
tmp_mean(tmp_embedded_arr).shape
(4, 256)

Train Eval Task Definition#

def get_train_eval_task(df_train, df_valid, 
                        df_vocab, loop, batch_sz=16):
    random.seed(271)
    train_task = training.TrainTask(
            labeled_data=data_generator(df_train, batch_sz, df_vocab,  stop=False, shuffle=True, loop=loop,
                     unknown_token='__UNK__', pad_token='__PAD__',x_col='Sentence', y_col='Sentiment',
                     process=process_with_vocab, class_dict={'neutral':0, 'positive':1, 'negative':-1}),
            loss_layer=tl.WeightedCategoryCrossEntropy(),
            optimizer=trax.optimizers.Adam(0.01),
            n_steps_per_checkpoint=10)
    eval_task = training.EvalTask(
            labeled_data=data_generator(df_valid, batch_sz, df_vocab,  stop=False, shuffle=True, loop=loop,
                 unknown_token='__UNK__', pad_token='__PAD__',x_col='Sentence', y_col='Sentiment',
                 process=process_with_vocab, class_dict={'neutral':0, 'positive':1, 'negative':-1}),
            metrics=[tl.WeightedCategoryCrossEntropy(), tl.WeightedCategoryAccuracy()]
        
    )
    return train_task, eval_task

    
get_train_eval_task(df_train, df_valid, df_vocab, loop=True)
Restarting Loop
Restarting Loop
(<trax.supervised.training.TrainTask at 0x101ae78eabb0>,
 <trax.supervised.training.EvalTask at 0x101ae791a3d0>)
dir_path = './model/'

try:
    shutil.rmtree(dir_path)
except OSError as e:
    pass


output_dir = './model/'
output_dir_expand = os.path.expanduser(output_dir)
print(output_dir_expand)
./model/

Model Training#

def train_model(classifier, train_task, eval_task, n_steps, output_dir):
    random.seed(1234)
    training_loop = training.Loop(
                        classifier, 
                        train_task,
                        eval_tasks=eval_task,
                        output_dir=output_dir, 
                        random_seed=31)
    training_loop.run(n_steps = n_steps)
    return training_loop
    
    
train_task, eval_task = get_train_eval_task(df_train, df_valid, df_vocab, loop=True)
model = classifier(vocab_sz=len(df_vocab), emb_dims=256, output_dims=3, mode='train')
training_loop = train_model(model, train_task, eval_task, n_steps=100, output_dir=output_dir)
Restarting Loop
Restarting Loop
/opt/anaconda/envs/aiking/lib/python3.9/site-packages/jax/_src/lib/xla_bridge.py:528: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
  warnings.warn(
/opt/anaconda/envs/aiking/lib/python3.9/site-packages/trax/layers/base.py:851: FutureWarning: GzipFile was opened for writing, but this will change in future Python releases.  Specify the mode argument for opening it for writing.
  with gzip.GzipFile(fileobj=f, compresslevel=compresslevel) as gzipf:
Step      1: Total number of trainable weights: 1534723
Step      1: Ran 1 train steps in 1.16 secs
Step      1: train WeightedCategoryCrossEntropy |  1.02272236
/opt/anaconda/envs/aiking/lib/python3.9/site-packages/trax/supervised/training.py:1249: FutureWarning: GzipFile was opened for writing, but this will change in future Python releases.  Specify the mode argument for opening it for writing.
  with gzip_lib.GzipFile(fileobj=f, compresslevel=2) as gzipf:
Step      1: eval  WeightedCategoryCrossEntropy |  0.70422697
Step      1: eval      WeightedCategoryAccuracy |  0.37500000

Step     10: Ran 9 train steps in 5.69 secs
Step     10: train WeightedCategoryCrossEntropy |  0.73040473
Step     10: eval  WeightedCategoryCrossEntropy |  0.69985682
Step     10: eval      WeightedCategoryAccuracy |  0.62500000

Step     20: Ran 10 train steps in 4.34 secs
Step     20: train WeightedCategoryCrossEntropy |  0.56657821
Step     20: eval  WeightedCategoryCrossEntropy |  0.60906959
Step     20: eval      WeightedCategoryAccuracy |  0.68750000

Step     30: Ran 10 train steps in 1.63 secs
Step     30: train WeightedCategoryCrossEntropy |  0.54628116
Step     30: eval  WeightedCategoryCrossEntropy |  0.41477004
Step     30: eval      WeightedCategoryAccuracy |  0.62500000

Step     40: Ran 10 train steps in 2.34 secs
Step     40: train WeightedCategoryCrossEntropy |  0.50867021
Step     40: eval  WeightedCategoryCrossEntropy |  0.58183014
Step     40: eval      WeightedCategoryAccuracy |  0.43750000

Step     50: Ran 10 train steps in 1.68 secs
Step     50: train WeightedCategoryCrossEntropy |  0.50223523
Step     50: eval  WeightedCategoryCrossEntropy |  0.40479019
Step     50: eval      WeightedCategoryAccuracy |  0.68750000

Step     60: Ran 10 train steps in 0.97 secs
Step     60: train WeightedCategoryCrossEntropy |  0.54113311
Step     60: eval  WeightedCategoryCrossEntropy |  0.38476127
Step     60: eval      WeightedCategoryAccuracy |  0.43750000

Step     70: Ran 10 train steps in 2.34 secs
Step     70: train WeightedCategoryCrossEntropy |  0.50714481
Step     70: eval  WeightedCategoryCrossEntropy |  0.31111774
Step     70: eval      WeightedCategoryAccuracy |  0.56250000

Step     80: Ran 10 train steps in 1.04 secs
Step     80: train WeightedCategoryCrossEntropy |  0.48107988
Step     80: eval  WeightedCategoryCrossEntropy |  0.53283501
Step     80: eval      WeightedCategoryAccuracy |  0.68750000

Step     90: Ran 10 train steps in 1.04 secs
Step     90: train WeightedCategoryCrossEntropy |  0.48959431
Step     90: eval  WeightedCategoryCrossEntropy |  0.52207285
Step     90: eval      WeightedCategoryAccuracy |  0.62500000

Step    100: Ran 10 train steps in 1.08 secs
Step    100: train WeightedCategoryCrossEntropy |  0.49267441
Step    100: eval  WeightedCategoryCrossEntropy |  0.34768826
Step    100: eval      WeightedCategoryAccuracy |  0.81250000