sPyNNaker neural_modelling  7.4.2
local_only_conv_impl.c
1 /*
2  * Copyright (c) 2021 The University of Manchester
3  * based on work Copyright (c) The University of Sussex,
4  * Garibaldi Pineda Garcia, James Turner, James Knight and Thomas Nowotny
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * https://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
19 
20 #include "local_only_impl.h"
21 #include "local_only_2d_common.h"
22 #include <stdlib.h>
23 #include <debug.h>
24 #include <stdfix-full-iso.h>
25 #include "../population_table/population_table.h"
26 #include "../neuron.h"
27 
28 // One per connector
29 typedef struct {
39  uint16_t delay_stage;
41  uint16_t delay;
43  uint16_t kernel_index;
45  uint16_t _PAD;
54 } connector;
55 
56 typedef struct {
60  uint32_t source_height_per_core: 16;
62  uint32_t source_width_per_core: 16;
66  uint32_t source_width_last_core: 16;
70  uint32_t cores_per_source_width: 16;
77 } source_info;
78 
79 typedef struct {
80  lc_coord_t post_start;
81  lc_coord_t post_end;
82  lc_shape_t post_shape;
83  uint32_t n_sources;
84  uint32_t n_connectors_total;
85  uint32_t n_weights_total;
86  source_info sources[];
87  // In SDRAM, after sources[n_sources] is the following:
88  // connector connectors[n_connectors_total];
89  // lc_weight_t[n_weights_total] weights;
90 } conv_config;
91 
92 // The main configuration data
93 static conv_config *config;
94 
95 static connector *connectors;
96 
97 static lc_weight_t *weights;
98 
99 static inline void log_div_const(const char *name, div_const d) {
100  log_debug(" %s=(m: %u, sh1: %u, sh2: %u)", name, d.m, d.sh1, d.sh2);
101 }
102 
104 bool local_only_impl_initialise(void *address){
105  log_info("+++++++++++++++++ CONV init ++++++++++++++++++++");
106  conv_config* sdram_config = address;
107  uint32_t n_bytes = sizeof(conv_config) +
108  (sizeof(source_info) * sdram_config->n_sources);
109  config = spin1_malloc(n_bytes);
110  if (config == NULL) {
111  log_error("Can't allocate memory for config!");
112  return false;
113  }
114  spin1_memcpy(config, sdram_config, n_bytes);
115 
116  log_info("post_start = %u, %u, post_end = %u, %u, post_shape = %u, %u",
117  config->post_start.col, config->post_start.row,
118  config->post_end.col, config->post_end.row,
119  config->post_shape.width, config->post_shape.height);
120  log_info("num sources = %u", config->n_sources);
121 
122  if (config->n_sources == 0) {
123  log_error("No sources!");
124  return false;
125  }
126 
127  // The connectors come after the sources in SDRAM
128  connector *sdram_connectors =
129  (connector *) &(sdram_config->sources[config->n_sources]);
130  uint32_t n_connector_bytes = sizeof(connector) * config->n_connectors_total;
131  connectors = spin1_malloc(n_connector_bytes);
132  if (connectors == NULL) {
133  log_error("Can't allocate %u bytes of memory for %u connectors!",
134  n_connector_bytes, config->n_connectors_total);
135  return false;
136  }
137  spin1_memcpy(connectors, sdram_connectors, n_connector_bytes);
138 
139  // The weights come after the connectors in SDRAM
140  lc_weight_t *kernel_weights =
141  (lc_weight_t *) &(sdram_connectors[config->n_connectors_total]);
142  uint32_t n_weight_bytes = sizeof(lc_weight_t) * config->n_weights_total;
143  weights = spin1_malloc(n_weight_bytes);
144  if (weights == NULL) {
145  log_error("Can't allocate %u bytes of memory for %u weights!",
146  n_weight_bytes, config->n_weights_total);
147  return false;
148  }
149  spin1_memcpy(weights, kernel_weights, n_weight_bytes);
150 
151  // Print what we have
152  for (uint32_t i = 0; i < config->n_sources; i++) {
153  source_info *s_info = &(config->sources[i]);
154  log_debug("Source %u: key=0x%08x, mask=0x%08x, start=%u, count=%u",
155  i, s_info->key_info.key, s_info->key_info.mask,
156  s_info->key_info.start, s_info->key_info.count);
157  log_debug(" core_mask=0x%08x, mask_shift=0x%08x",
158  s_info->key_info.core_mask, s_info->key_info.mask_shift);
159  log_debug(" height_per_core=%u, width_per_core=%u",
161  log_debug(" height_last_core=%u, width_last_core=%u",
163  log_debug(" cores_per_height=%u, cores_per_width=%u",
165  log_div_const("source_width_div", s_info->source_width_div);
166  log_div_const("source_width_last_div", s_info->source_width_last_div);
167  log_div_const("cores_per_width_div", s_info->cores_per_width_div);
168  }
169 
170  for (uint32_t i = 0; i < config->n_connectors_total; i++) {
171  connector *conn = &(connectors[i]);
172  log_debug("Connector %u: kernel size=%u, %u", i, conn->kernel.width,
173  conn->kernel.height);
174  log_debug(" delay=%u, delay_stage=%u", conn->delay, conn->delay_stage);
175  }
176 
177  return true;
178 }
179 
184 static inline lc_coord_t map_pre_to_post(connector *connector, lc_coord_t pre,
185  int16_t half_kh, int16_t half_kw) {
186  lc_coord_t post = pre;
187  post.row = div_by_const(post.row, connector->pool_stride_height_div);
188  post.col = div_by_const(post.col, connector->pool_stride_width_div);
189  post.row = post.row - half_kh + connector->padding.height;
190  post.col = post.col - half_kw + connector->padding.width;
191  post.row = div_by_const(post.row, connector->stride_height_div);
192  post.col = div_by_const(post.col, connector->stride_width_div);
193  return post;
194 }
195 
196 
200 static inline void do_convolution_operation(
201  uint32_t time, lc_coord_t pre_coord, connector *connector,
202  uint16_t *ring_buffers) {
203  int32_t half_kh = connector->kernel.height / 2;
204  int32_t half_kw = connector->kernel.width / 2;
205  lc_coord_t post_coord = map_pre_to_post(connector, pre_coord, half_kh, half_kw);
206  log_debug("pre row %d, col %d AS post row %d, col %d",
207  pre_coord.row, pre_coord.col, post_coord.row, post_coord.col);
208  lc_weight_t *connector_weights = &weights[connector->kernel_index];
209  uint32_t sat_flag = 0xFFFF0000;
210  uint32_t sat_value = 0xFFFF;
211 
212  int32_t kw = connector->kernel.width;
213  for (int32_t r = -half_kh, kr = 0; r <= half_kh; r++, kr++) {
214  int32_t tmp_row = post_coord.row + r;
215  if ((tmp_row < config->post_start.row) || (tmp_row > config->post_end.row)) {
216  continue;
217  }
218  for (int32_t c = -half_kw, kc = 0; c <= half_kw; c++, kc++) {
219  int32_t tmp_col = post_coord.col + c;
220  if ((tmp_col < config->post_start.col) || (tmp_col > config->post_end.col)) {
221  continue;
222  }
223 
224  // This the neuron id relative to the neurons on this core
225  uint32_t post_index =
226  ((tmp_row - config->post_start.row) * config->post_shape.width)
227  + (tmp_col - config->post_start.col);
228  uint32_t k = (kr * kw) + kc;
229  lc_weight_t weight = connector_weights[k];
230  if (weight == 0) {
231  continue;
232  }
233  uint32_t rb_index = 0;
234  if (weight > 0) {
236  connector->positive_synapse_type, post_index,
239  } else {
241  connector->negative_synapse_type, post_index,
244  weight = -weight;
245  }
246  log_debug("Updating ring_buffers[%u] for post neuron %u = %u, %u, with weight %u",
247  rb_index, post_index, tmp_col, tmp_row, weight);
248 
249  // Add weight to current ring buffer value, avoiding saturation
250  uint32_t accumulation = ring_buffers[rb_index] + weight;
251  uint32_t sat_test = accumulation & sat_flag;
252  if (sat_test) {
253  accumulation = sat_value;
254  }
255  ring_buffers[rb_index] = accumulation;
256  }
257  }
258 }
259 
260 static inline uint32_t get_core_row(uint32_t core_id, source_info *s_info) {
261  return div_by_const(core_id, s_info->cores_per_width_div);
262 }
263 
264 static inline uint32_t get_core_col(uint32_t core_id, uint32_t core_row,
265  source_info *s_info) {
266  return core_id - (core_row * s_info->cores_per_source_width);
267 }
268 
269 static inline bool is_last_core_on_row(uint32_t core_col, source_info *s_info) {
270  return core_col == (uint32_t) (s_info->cores_per_source_width - 1);
271 }
272 
273 static inline bool is_last_core_in_col(uint32_t core_row, source_info *s_info) {
274  return core_row == (uint32_t) (s_info->cores_per_source_height - 1);
275 }
276 
277 static inline bool key_to_index_lookup(uint32_t spike, source_info **rs_info) {
278  for (uint32_t i = 0; i < config->n_sources; i++) {
279  source_info *s_info = &(config->sources[i]);
280  // We have a match on key
281  if ((spike & s_info->key_info.mask) == s_info->key_info.key) {
282  *rs_info = s_info;
283  return true;
284  }
285  }
286  return false;
287 }
288 
295 void local_only_impl_process_spike(
296  uint32_t time, uint32_t spike, uint16_t* ring_buffers) {
297 
298  // Lookup the spike, and if found, get the appropriate parts
299  source_info *s_info;
300  if (!key_to_index_lookup(spike, &s_info)) {
301  log_debug("Spike %x didn't match any connectors!", spike);
302  return;
303  }
304 
305  uint32_t core_id = get_core_id(spike, s_info->key_info);
306  uint32_t core_row = get_core_row(core_id, s_info);
307  uint32_t core_col = get_core_col(core_id, core_row, s_info);
308  bool last_core_on_row = is_last_core_on_row(core_col, s_info);
309  bool last_core_in_col = is_last_core_in_col(core_row, s_info);
310  uint32_t source_height = 0;
311  uint32_t source_width = 0;
312  div_const source_width_d;
313  if (last_core_on_row) {
314  source_width = s_info->source_width_last_core;
315  source_width_d = s_info->source_width_last_div;
316  } else {
317  source_width = s_info->source_width_per_core;
318  source_width_d = s_info->source_width_div;
319  }
320  if (last_core_in_col) {
321  source_height = s_info->source_height_last_core;
322  } else {
323  source_height = s_info->source_height_per_core;
324  }
325  uint32_t local_id = get_local_id(spike, s_info->key_info);
326  uint32_t neurons_per_core = source_width * source_height;
327 
328  log_debug("Spike %x, on core %u (%u, %u), is last (%u, %u), local %u",
329  spike, core_id, core_col, core_row, last_core_on_row, last_core_in_col,
330  local_id);
331 
332  // compute the population-based coordinates
333  uint32_t end = s_info->key_info.start + s_info->key_info.count;
334  for (uint32_t i = s_info->key_info.start; i < end; i++) {
335  connector *connector = &(connectors[i]);
336 
337  // Ignore the neuron if the delay does not match
338  uint32_t first_neuron = neurons_per_core * connector->delay_stage;
339  uint32_t last_neuron = first_neuron + neurons_per_core;
340  log_debug("Connector %u, delay stage = %u, first = %u, last = %u",
341  i, connector->delay_stage, first_neuron, last_neuron);
342  if (local_id < first_neuron || local_id >= last_neuron) {
343  continue;
344  }
345 
346  uint32_t local_neuron_id = local_id - first_neuron;
347  uint32_t local_row = div_by_const(local_neuron_id, source_width_d);
348  uint32_t local_col = local_neuron_id - (local_row * source_width);
349 
350  lc_coord_t pre_coord = {
351  // The x-coordinate is the remainder of the "division"
352  .col = (core_col * s_info->source_width_per_core) + local_col,
353  // The y-coordinate is the integer part of the "division".
354  .row = (core_row * s_info->source_height_per_core) + local_row
355  };
356 
357  log_debug("Local coord = %u, %u, Pre coord = %u, %u",
358  local_col, local_row, pre_coord.col, pre_coord.col);
359 
360  // Compute the convolution
361  do_convolution_operation(time, pre_coord, connector, ring_buffers);
362  }
363 }
static weight_t * ring_buffers
The ring buffers to be used in the simulation.
Definition: c_main.c:118
static uint32_t time
Simulation time.
uint32_t synapse_delay_mask
The mask to get the synaptic delay from a "synapse".
Definition: local_only.c:71
uint32_t synapse_type_index_bits
The number of bits used by the synapse type and post-neuron index.
Definition: local_only.c:74
uint32_t synapse_index_bits
The number of bits used by just the post-neuron index.
Definition: local_only.c:77
static struct local_only_config config
A local copy of the configuration.
Definition: local_only.c:43
div_const source_width_last_div
Division by last core width.
key_info key_info
Information about the key.
uint32_t source_height_last_core
The source population height on the last core in a column.
uint32_t cores_per_source_width
Number of cores in a width of the source.
uint32_t source_height_per_core
The source population height per core.
uint32_t cores_per_source_height
The number cores in a height of the source.
div_const cores_per_width_div
Division by cores per source width.
uint32_t source_width_per_core
The source population width per core.
div_const source_width_div
Used to calculate division by the source width per core efficiently.
uint32_t source_width_last_core
The source population width on the last core on a row.
A region of SDRAM used to transfer synapses.
Collection of rates to apply over time to a particular spike source.
div_const pool_stride_width_div
1 / pooling stride width
div_const stride_width_div
1 / stride width;
uint16_t kernel_index
The index of the weights for the kernel.
lc_shape_t padding
The shape of the padding.
div_const stride_height_div
1 / stride height
uint16_t delay
The delay in time steps.
uint16_t positive_synapse_type
The index of the synapse for positive weights.
lc_shape_t kernel
The shape of the kernel.
uint16_t negative_synapse_type
The index of the synapse for negative weights.
uint16_t delay_stage
The delay stage.
div_const pool_stride_height_div
1 / pooling stride height
Structure for constants for precise constant integer division (see div_by_const)
uint32_t start
The index into ::connectors for this entry.
uint32_t count
The number of entries in ::connectors for this entry.
uint32_t key
The key to match against the incoming message.
uint32_t mask_shift
The shift to apply to the key to get the core part.
uint32_t mask
The mask to select the relevant bits of key for matching.
uint32_t core_mask
The mask to apply to the key once shifted to get the core index.
A coordinate in terms of rows and columns (y and x)
A shape in terms of height and width.
static index_t synapse_row_get_ring_buffer_index(uint32_t simulation_timestep, uint32_t synapse_type_index, uint32_t neuron_index, uint32_t synapse_type_index_bits, uint32_t synapse_index_bits, uint32_t synapse_delay_mask)
Get the index of the ring buffer for a given timestep, synapse type and neuron index.
Definition: synapse_row.h:259