provide the detailed training code proforma code below- def train_classifier(args, train, dev): # raise Exception("Not fully implemented yet") # The following code DOES NOT WORK but can be a starting point for your implementation # Some suggested snippets to use: model = Transformer(...) model.zero_grad() model.train() optimizer = optim.Adam(model.parameters(), lr=1e-4) num_epochs = 10 for t in range(0, num_epochs): loss_this_epoch = 0.0 random.seed(t) # You can use batching if you'd like ex_idxs = [i for i in range(0, len(train))] random.shuffle(ex_idxs) loss_fcn = nn.NLLLoss() for ex_idx in ex_idxs: loss = loss_fcn(...) # TODO: Run forward and compute loss # model.zero_grad() # loss.backward() # optimizer.step() loss_this_epoch += loss.item() model.eval() return model