# Import the relevant libraries import math import numpy as np import os from __future__ import print_function # Use a function definition from future version (say 3.x from 2.7 interpreter)
# import cntk as C # import cntk.tests.test_utils # cntk.tests.test_utils.set_device_from_pytest_env() # (only needed for our build system) # C.cntk_py.set_fixed_random_seed(1) # fix a random seed for CNTK components
defdownload(url, filename): """ utility function to download a file """ response = requests.get(url, stream=True) with open(filename, "wb") as handle: for data in response.iter_content(): handle.write(data)
ifnot os.path.exists(location): os.mkdir(location) for item in data.values(): path = os.path.normpath(os.path.join(location, item['file']))
# Create the containers for input feature (x) and the label (y) axis_qry = C.Axis.new_unique_dynamic_axis('axis_qry') qry = C.sequence.input_variable(QRY_SIZE, sequence_axis=axis_qry)
axis_ans = C.Axis.new_unique_dynamic_axis('axis_ans') ans = C.sequence.input_variable(ANS_SIZE, sequence_axis=axis_ans)
model = C.combine(network['query_vector'], network['answer_vector'])
#Notify the network that the two dynamic axes are indeed same query_reconciled = C.reconcile_dynamic_axes(network['query_vector'], network['answer_vector']) network['loss'] = create_loss(query_reconciled, network['answer_vector']) network['error'] = None
print('Using momentum sgd with no l2') dssm_learner = C.learners.momentum_sgd(model.parameters, lr_schedule, mm_schedule)
t = 0 for epoch in range(MAX_EPOCHS): # loop over epochs epoch_end = (epoch+1) * EPOCH_SIZE while t < epoch_end: # loop over minibatches on the epoch data = train_source.next_minibatch(MINIBATCH_SIZE, input_map= input_map) # fetch minibatch trainer.train_minibatch(data) # update model with it t += MINIBATCH_SIZE
trainer.summarize_training_progress()
1
do_train(network, trainer, train_source)
Learning rate per 1 samples: 0.0015625
Momentum per 1 samples: 0.0
Finished Epoch[1 of 5]: [Training] loss = 0.343046 * 1522, metric = 0.00% * 1522 5.720s (266.1 samples/s);
Finished Epoch[2 of 5]: [Training] loss = 0.102804 * 1530, metric = 0.00% * 1530 3.464s (441.7 samples/s);
Finished Epoch[3 of 5]: [Training] loss = 0.066461 * 1525, metric = 0.00% * 1525 3.402s (448.3 samples/s);
Finished Epoch[4 of 5]: [Training] loss = 0.048511 * 1534, metric = 0.00% * 1534 3.390s (452.5 samples/s);
Finished Epoch[5 of 5]: [Training] loss = 0.035384 * 1510, metric = 0.00% * 1510 3.383s (446.3 samples/s);
# load dictionaries query_wl = [line.rstrip('\n') for line in open(data['query']['file'])] answers_wl = [line.rstrip('\n') for line in open(data['answer']['file'])] query_dict = {query_wl[i]:i for i in range(len(query_wl))} answers_dict = {answers_wl[i]:i for i in range(len(answers_wl))}
# let's run a sequence through qry = 'BOS what contribution did e1 made to science in 1665 EOS' ans = 'BOS book author book_editions_published EOS' ans_poor = 'BOS language human_language main_country EOS'
qry_idx = [query_dict[w+' '] for w in qry.split()] # convert to query word indices print('Query Indices:', qry_idx)
ans_idx = [answers_dict[w+' '] for w in ans.split()] # convert to answer word indices print('Answer Indices:', ans_idx)
ans_poor_idx = [answers_dict[w+' '] for w in ans_poor.split()] # convert to fake answer word indices print('Poor Answer Indices:', ans_poor_idx)
# Create the one hot representations qry_onehot = np.zeros([len(qry_idx),len(query_dict)], np.float32) for t in range(len(qry_idx)): qry_onehot[t,qry_idx[t]] = 1 ans_onehot = np.zeros([len(ans_idx),len(answers_dict)], np.float32) for t in range(len(ans_idx)): ans_onehot[t,ans_idx[t]] = 1 ans_poor_onehot = np.zeros([len(ans_poor_idx),len(answers_dict)], np.float32) for t in range(len(ans_poor_idx)): ans_poor_onehot[t, ans_poor_idx[t]] = 1