sPyNNaker neural_modelling  development
local_only_pool_dense_impl.c
1 /*
2  * Copyright (c) 2021 The University of Manchester
3  * based on work Copyright (c) The University of Sussex,
4  * Garibaldi Pineda Garcia, James Turner, James Knight and Thomas Nowotny
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * https://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
19 
20 #include "local_only_impl.h"
21 #include "local_only_2d_common.h"
22 #include <stdlib.h>
23 #include <debug.h>
24 #include "../population_table/population_table.h"
25 #include "../neuron.h"
26 
27 typedef struct {
29  uint32_t size_per_core;
35  uint32_t cores;
37  uint32_t cum_cores;
41  uint32_t size_last_core;
47 } source_dim;
48 
49 typedef struct {
51  uint32_t n_dims;
53 } source_info;
54 
55 // One per connector
56 typedef struct {
57  uint16_t n_dims;
58  uint16_t n_weights;
59  uint16_t positive_synapse_type;
60  uint16_t negative_synapse_type;
61  uint16_t delay_stage;
62  uint16_t delay;
63  div_const pool_stride_div[];
64  // Also follows:
65  // lc_weight_t weights[];
66 } connector;
67 
68 typedef struct {
69  uint32_t n_post;
70  uint32_t n_sources;
71  uint32_t n_connectors;
72  // In SDRAM, below here is the following (each variable size):
73  // source_info sources[];
74  // connector connectors[n_connectors]
75 } conv_config;
76 
77 // The main configuration data
78 static conv_config *config;
79 
80 // The source information
81 static source_info **source_infos;
82 
83 // The per-connection data
84 static connector** connectors;
85 
86 static inline lc_weight_t *get_weights(connector *conn) {
87  return (lc_weight_t *) &(conn->pool_stride_div[conn->n_dims]);
88 }
89 
91 bool local_only_impl_initialise(void *address){
92  log_info("+++++++++++++++++ CONV init ++++++++++++++++++++");
93  conv_config* sdram_config = address;
94  uint32_t config_size = sizeof(conv_config) +
95  (sizeof(source_info) * sdram_config->n_sources);
96  config = spin1_malloc(config_size);
97  if (config == NULL) {
98  log_error("Can't allocate %u bytes of memory for config with %u sources",
99  config_size, sdram_config->n_sources);
100  return false;
101  }
102  spin1_memcpy(config, sdram_config, config_size);
103 
104  log_info("num connectors = %u", config->n_connectors);
105  if (config->n_connectors == 0) {
106  return false;
107  }
108 
109  log_info("num post = %u", config->n_post);
110 
111  // Allocate space for source information
112  source_infos = spin1_malloc(config->n_sources * sizeof(source_infos[0]));
113  if (source_infos == NULL) {
114  log_error("Can't allocate memory for source infos");
115  }
116 
117  // Allocate space for connector information
118  connectors = spin1_malloc(config->n_connectors * sizeof(connectors[0]));
119  if (connectors == NULL) {
120  log_error("Can't allocate memory for connectors");
121  return false;
122  }
123 
124  // The first source comes after the configuration in SDRAM
125  source_info *s_info = (source_info *) &sdram_config[1];
126  for (uint32_t i = 0; i < config->n_sources; i++) {
127  uint32_t n_bytes = sizeof(*s_info) + (s_info->n_dims * sizeof(source_dim));
128  source_infos[i] = spin1_malloc(n_bytes);
129  if (source_infos[i] == NULL) {
130  log_error("Can't allocate %u bytes for source_infos[%u]", n_bytes, i);
131  }
132  spin1_memcpy(source_infos[i], s_info, n_bytes);
133 
134  // Move to the next source, after the last dimension
135  s_info = (source_info *) &s_info->source_dim[source_infos[i]->n_dims];
136  }
137 
138  // The first connector comes after the sources in SDRAM
139  connector *conn = (connector *) s_info;
140  for (uint32_t i = 0; i < config->n_connectors; i++) {
141 
142  uint32_t n_bytes = sizeof(*conn) + (conn->n_weights * sizeof(lc_weight_t)) +
143  (conn->n_dims * sizeof(div_const));
144 
145  // Copy the data from SDRAM
146  connectors[i] = spin1_malloc(n_bytes);
147  if (connectors[i] == NULL) {
148  log_error("Can't allocate %u bytes for connectors[%u]", n_bytes, i);
149  return false;
150  }
151  spin1_memcpy(connectors[i], conn, n_bytes);
152 
153  // Move to the next connector; because it is dynamically sized,
154  // this comes after the last weight in the last connector, which comes
155  // after the last dimension!
156  lc_weight_t* weights = get_weights(conn);
157  uint32_t n_weights = connectors[i]->n_weights;
158 
159  if (n_weights & 0x1) {
160  n_weights += 1;
161  }
162  conn = (connector *) &weights[n_weights];
163  }
164 
165  for (uint s = 0; s < config->n_sources; s++) {
166  uint32_t start = source_infos[s]->key_info.start;
167  uint32_t end = source_infos[s]->key_info.count + start;
168  log_info("Source %u: Key = 0x%08x, Mask = 0x%08x, %u Dimensions",
169  s, source_infos[s]->key_info.key, source_infos[s]->key_info.mask,
170  source_infos[s]->n_dims);
171  for (uint32_t d = 0; d < source_infos[s]->n_dims; d++) {
172  log_info(" Dim %u, core size=%u, cores per size=%u, last core=%u",
173  d, source_infos[s]->source_dim[d].size_per_core,
174  source_infos[s]->source_dim[d].cores,
175  source_infos[s]->source_dim[d].size_last_core);
176  }
177  for (uint32_t c = start; c < end; c++) {
178  log_info(" Connector %u, %u dims, %u weights, +synapse %u, -synapse %u,"
179  " delay_stage %u, delay %u",
180  c, connectors[c]->n_dims, connectors[c]->n_weights,
181  connectors[c]->positive_synapse_type, connectors[c]->negative_synapse_type,
182  connectors[c]->delay_stage, connectors[c]->delay);
183  }
184  }
185 
186  return true;
187 }
188 
189 static inline bool key_to_index_lookup(uint32_t spike, source_info **rs_info) {
190  for (uint32_t i = 0; i < config->n_sources; i++) {
191  source_info *s_info = source_infos[i];
192  if ((spike & s_info->key_info.mask) == s_info->key_info.key) {
193  *rs_info = s_info;
194  return true;
195  }
196  }
197  return false;
198 }
199 
200 static bool get_conn_weights(connector *c, source_info *s_info, uint32_t local_id,
201  uint32_t *sizes, uint32_t *core_coords, div_const *divs,
202  uint32_t neurons_per_core, lc_weight_t **weights) {
203 
204  // Stop if the delay means it is out of range
205  uint32_t first_neuron = c->delay_stage * neurons_per_core;
206  uint32_t last_neuron = first_neuron + neurons_per_core;
207  if (local_id < first_neuron || local_id >= last_neuron) {
208  return false;
209  }
210  local_id -= first_neuron;
211 
212  // Now work out the index into the weights from the coordinates
213  uint32_t last_extent = 1;
214  uint32_t index = 0;
215  uint32_t remainder = local_id;
216  for (uint32_t j = 0; j < s_info->n_dims; j++) {
217  div_const stride_div = c->pool_stride_div[j];
218  source_dim *s_dim = &s_info->source_dim[j];
219 
220  uint32_t coord = div_by_const(remainder, divs[j]);
221  remainder -= coord * sizes[j];
222  coord += core_coords[j] * s_dim->size_per_core;
223 
224  // Work out the position after pooling
225  coord = div_by_const(coord, stride_div);
226 
227  // Add into the final index
228  index += (coord * last_extent);
229 
230  // Remember the full stride size from this dimension to pass to the next
231  last_extent = ((s_dim->cores - 1) * s_dim->size_per_core) + s_dim->size_last_core;
232  last_extent = div_by_const(last_extent, stride_div);
233  }
234  lc_weight_t *all_weights = get_weights(c);
235  *weights = &all_weights[index * config->n_post];
236  return true;
237 }
238 
245 void local_only_impl_process_spike(
246  uint32_t time, uint32_t spike, uint16_t* ring_buffers) {
247 
248  // Lookup the spike, and if found, get the appropriate parts
249  source_info *s_info;
250  if (!key_to_index_lookup(spike, &s_info)) {
251  return;
252  }
253 
254  // Work out the local coordinate for this source
255  uint32_t core_id = get_core_id(spike, s_info->key_info);
256  uint32_t local_id = get_local_id(spike, s_info->key_info);
257  uint32_t n_dims = s_info->n_dims;
258  uint32_t sizes[n_dims];
259  uint32_t core_coords[n_dims];
260  div_const divs[n_dims];
261  uint32_t neurons_per_core = 1;
262  uint32_t core_remainder = core_id;
263  for (uint32_t j = 0; j < n_dims; j++) {
264  source_dim *s_dim = &s_info->source_dim[j];
265  // Get the core coordinates for this dimension in the global space
266  core_coords[j] = div_by_const(core_remainder, s_dim->cum_cores_div);
267  bool is_last_core = core_coords[j] == (s_dim->cores - 1);
268  core_remainder -= core_coords[j] * s_dim->cum_cores;
269  if (is_last_core) {
270  neurons_per_core *= s_dim->size_last_core;
271  sizes[j] = s_dim->cum_size_last_core;
272  divs[j] = s_dim->cum_size_last_core_div;
273  } else {
274  neurons_per_core *= s_dim->size_per_core;
275  sizes[j] = s_dim->cum_size_per_core;
276  divs[j] = s_dim->cum_size_per_core_div;
277  }
278  }
279 
280  // Go through the weights and process them into the ring buffers
281  uint32_t end = s_info->key_info.start + s_info->key_info.count;
282  for (uint32_t i = s_info->key_info.start; i < end; i++) {
283  connector *connector = connectors[i];
284  lc_weight_t *weights;
285  if (!get_conn_weights(connector, s_info, local_id, sizes, core_coords,
286  divs, neurons_per_core, &weights)) {
287  continue;
288  }
289 
290  for (uint32_t post_index = 0; post_index < config->n_post; post_index++) {
291 
292  lc_weight_t weight = weights[post_index];
293  if (weight == 0) {
294  continue;
295  }
296  uint32_t rb_index = 0;
297  if (weight > 0) {
299  connector->positive_synapse_type, post_index,
302  } else {
304  connector->negative_synapse_type, post_index,
307  weight = -weight;
308  }
309  log_debug("Updating ring_buffers[%u] for post neuron %u with weight %u",
310  rb_index, post_index, weight);
311 
312  // Add weight to current ring buffer value, avoiding saturation
313  uint32_t accumulation = ring_buffers[rb_index] + weight;
314  uint32_t sat_test = accumulation & 0x10000;
315  if (sat_test) {
316  accumulation = sat_test - 1;
317  }
318  ring_buffers[rb_index] = accumulation;
319  }
320  }
321 }
static weight_t * ring_buffers
The ring buffers to be used in the simulation.
Definition: c_main.c:118
static uint32_t time
Simulation time.
uint32_t synapse_delay_mask
The mask to get the synaptic delay from a "synapse".
Definition: local_only.c:71
uint32_t synapse_type_index_bits
The number of bits used by the synapse type and post-neuron index.
Definition: local_only.c:74
uint32_t synapse_index_bits
The number of bits used by just the post-neuron index.
Definition: local_only.c:77
static struct local_only_config config
A local copy of the configuration.
Definition: local_only.c:43
key_info key_info
Information about the key.
A region of SDRAM used to transfer synapses.
Collection of rates to apply over time to a particular spike source.
uint16_t delay
The delay in time steps.
uint16_t positive_synapse_type
The index of the synapse for positive weights.
uint16_t negative_synapse_type
The index of the synapse for negative weights.
uint16_t delay_stage
The delay stage.
Structure for constants for precise constant integer division (see div_by_const)
uint32_t start
The index into ::connectors for this entry.
uint32_t count
The number of entries in ::connectors for this entry.
uint32_t key
The key to match against the incoming message.
uint32_t mask
The mask to select the relevant bits of key for matching.
div_const cum_size_per_core_div
The values used to divide to get the dimension value from a scalar.
div_const cum_size_last_core_div
The division by the dimension on the last core.
uint32_t cum_cores
The cumulative cores to divide by to get this dimension.
uint32_t cum_size_per_core
The cumulative size per core to be divided by to get this dimension.
uint32_t size_per_core
The size of the source in the dimension.
uint32_t size_last_core
The size of the last core in the dimension.
uint32_t cores
The number of cores in the full population in this dimension.
uint32_t cum_size_last_core
The cumulative size on the last core to divide by to get this dimension.
div_const cum_cores_div
Division by cores per dim.
static index_t synapse_row_get_ring_buffer_index(uint32_t simulation_timestep, uint32_t synapse_type_index, uint32_t neuron_index, uint32_t synapse_type_index_bits, uint32_t synapse_index_bits, uint32_t synapse_delay_mask)
Get the index of the ring buffer for a given timestep, synapse type and neuron index.
Definition: synapse_row.h:259