provide the detailed training code
proforma code below-
def train_classifier(args, train, dev):
# raise Exception("Not fully implemented yet")
# The following code DOES NOT WORK but can be a starting point for your implementation
# Some suggested snippets to use:
model = Transformer(...)
model.zero_grad()
model.train()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
num_epochs = 10
for t in range(0, num_epochs):
loss_this_epoch = 0.0
random.seed(t)
# You can use batching if you'd like
ex_idxs = [i for i in range(0, len(train))]
random.shuffle(ex_idxs)
loss_fcn = nn.NLLLoss()
for ex_idx in ex_idxs:
loss = loss_fcn(...) # TODO: Run forward and compute loss
# model.zero_grad()
# loss.backward()
# optimizer.step()
loss_this_epoch += loss.item()
model.eval()
return model