Home:ALL Converter>how can i improve multithreading efficiency?

how can i improve multithreading efficiency?

Ask Time:2020-07-18T18:13:27         Author:GiovanniSan

Json Formatter

Brief recap: I have a C++ multithreading sudoku solver, for which I want to improve the efficiency, i need your help.

I'm implementing a brute force sudoku solver in multithreading in C++. The main struct is a tree and the all logic is this: I start reading the initial matrix in input and this will be my root node, then I find the first empty cell, I find all the possible numbers that could fit that position, and for each possibility I create a sub-node of the parent node, so that each node will have a child-node for each possible number. The tree continues to grow in this way until a solution is met, that is a node has no more free cells(so it is full), and the node grid satisfy all the rules of the sudoku.

I tried to implement this algorithm in multithreading like this: I follow the exactly same logic of above sequentially but making one step each time, storing all the children I have met until that moment (each child will be a path, and so a tree) in a vector. If the children are less than the threads chosen by the user, then I solve them sequentially and I make one more step(children will grow). When I have more children than threads, then I split the children for each thread, and I start the threads each one with his part(that is a tree).

Now, taking into account that the "brute force" approch and that "only std lib" requirements are mandatory, so I can't do in a different way, but I can change of course the logic on how to implement this.

The question is: how can I improve the efficiency of this program ? All the suggestions that uses only std lib are welcome.

#define UNASSIGNED 0
#define N 9
#define ERROR_PAIR std::make_pair(-1, -1)

using namespace std;

atomic<bool> solutionFound{false};

//Each node has a sudokuMatrix and some sub-trees
struct Node {
    vector<vector<int>> grid;
    vector<Node *> child;
};


Node *newNode(const vector<vector<int>> &newGrid) {
    Node *temp = new Node;
    temp->grid = newGrid;
    return temp;
}

//Check if a number can be inserted in a given position
bool canInsert(const int &val, const int &row_, const int &col_,
               const vector<vector<int>> &grid) {
    // Check column
    for (int row = 0; row < N; row++) {
        if (grid[row][col_] == val) return false;
    }
    // Check row
    for (int col = 0; col < N; col++) {
        if (grid[row_][col] == val) return false;
    }
    // check box
    for (int row = 0; row < N; row++) {
        for (int col = 0; col < N; col++) {
            if (row / 3 == row_ / 3 &&
                col / 3 == col_ / 3) {  // they are in the same square 3x3
                if ((grid[row][col] == val)) return false;
            }
        }
    }
    return true;
}

//Check if the sudoku is solved
bool isSafe(const vector<vector<int>> &grid) 
{
    // Hashmap for row column and boxes
    unordered_map<int, int> row_[9], column_[9], box[3][3];
    for (int row = 0; row < N; row++) {
        for (int col = 0; col < N; col++) {
            // mark the element in row column and box
            row_[row][grid[row][col]] += 1;
            column_[col][grid[row][col]] += 1;
            box[row / 3][col / 3][grid[row][col]] += 1;

            // if an element is already
            // present in the hashmap
            if (box[row / 3][col / 3][grid[row][col]] > 1 ||
                column_[col][grid[row][col]] > 1 ||
                row_[row][grid[row][col]] > 1)
                return false;
        }
    }
    return true;
}
//Find the first empty cell
pair<int, int> findCell(const vector<vector<int>> &grid) {
    for (int i = 0; i < N; i++) {
        for (int j = 0; j < N; j++) {
            if (grid[i][j] == UNASSIGNED) {
                return make_pair(i, j);
            }
        }
    }
    return ERROR_PAIR;
}

//Find all the numbers i can insert in a given position, and update the matrix with that number. Return
//the set of all the matrixes(one for each possibility).
list<vector<vector<int>>> getChoices(const int &row, const int &col,
                                     const vector<vector<int>> &grid) {
    list<vector<vector<int>>> choices;
    for (int i = 1; i < 10; i++) {
        if (canInsert(i, row, col, grid)) {
            // cout << "posso inserire =" << i << endl;
            vector<vector<int>> tmpGrid = grid;
            tmpGrid[row][col] = i;
            choices.push_back(tmpGrid);
        }
    }
    return choices;
}

//Update the childreen of a node.
void addChoices(list<vector<vector<int>>> &choices, Node &node) {
    while (!choices.empty()) {
        node.child.push_back(newNode(choices.front()));
        choices.pop_front();
    }
    return;
}

//Compute one step of computation for each node in input, and put all the childreen in the task vector.
void solveOneStep(vector<Node *> &nodes, const int &nw, vector<Node *> &tasks) {
    if (solutionFound) return;
    for (Node *&n : nodes) {
        if (findCell(n->grid) != ERROR_PAIR) {
            pair<int, int> freeCell = findCell(n->grid);
            list<vector<vector<int>>> choices =
                getChoices(freeCell.first, freeCell.second, n->grid);
            if (choices.empty()) {
                continue;
            }
            addChoices(choices, *n);
            for (Node *&n : n->child) {
                tasks.push_back(n);
            }
            continue;
        } else if (isSafe(n->grid)) {
            if (!solutionFound.load()) {
                solutionFound.store(true);
                printGrid(n->grid);
                cout << "That's the first solution found !" << endl;
            }
            return;
        }
    }
}

//Compute step by step the computation until you reach a level of the entire tree of nodes where
//the nodes of that level are more than the number of worker(NW) choose by the user. 
vector<Node *> splitChunks(Node *root, const int &nw) {
    vector<Node *> tasks;
    vector<Node *> nodes;
    nodes.push_back(root);

    while ((int)tasks.size() < nw && !solutionFound) {
        tasks.clear();
        solveOneStep(nodes, nw, tasks);
        nodes = tasks;
    }
    return tasks;
}

//Solve recursively all the sub-trees of all the nodes given in input, until a solution is found or no
//solution exist.
void solveSubTree(vector<Node *> &nodes, const int &nw,) {
    if (solutionFound) return;
    for (Node *&n : nodes) {
        if (findCell(n->grid) != ERROR_PAIR) {
            pair<int, int> freeCell = findCell(n->grid);
            list<vector<vector<int>>> choices =
                getChoices(freeCell.first, freeCell.second, n->grid);
            if (choices.empty()) {
                continue;
            }
            addChoices(choices, *n);
            solveSubTree(n->child, nw);
        } else if (isSafe(n->grid)) {
            if (!solutionFound.load()) {
                solutionFound.store(true);
                printGrid(n->grid);
                std::cout << "That's the first solution found !" << endl;
            }
            return;
        }
    }
}


int main(int argc, char *argv[]) {
    if (argc != 2) {
        cout << "Usage is: nw " << endl;
        return (-1);
    }
//A test matrix.
    vector<vector<int>> grid = 
                            { { 0, 1, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 8, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 } };
    
    Node *root = newNode(grid);
    vector<thread> tids;
    const int nw = atoi(argv[1]); //Number of worker
    vector<vector<Node *>> works(nw, vector<Node *>()); 
    vector<Node *> tasks = splitChunks(root, nw);

//Split the tasks for each thread, the main thread takes the last part of the work.
    for (int i = 0; i < nw; i++) {
        int limit = 0;
        i == nw - 1 ? limit = tasks.size() : limit = tasks.size() / nw;
        for (int j = 0; j < limit; j++) {
            works[i].push_back(tasks.back());
            tasks.pop_back();
        }
    }

//Start each thread, and then the main thread start his computation.
    for (int i = 0; i < nw - 1; i++) {
        tids.push_back(thread(solveSubTree, ref(works[i]), ref(nw)));
    }
    solveSubTree(works[nw - 1], nw, t1);  // Main thread do last part of the work

    for (thread &t : tids) {
        t.join();
    }

    std::cout << "end" << endl;
    return (0);
}

Author:GiovanniSan,eproduced under the CC 4.0 BY-SA copyright license with a link to the original source and this disclaimer.
Link to original article:https://stackoverflow.com/questions/62967304/how-can-i-improve-multithreading-efficiency
Jérôme Richard :

Here are several points to improve the efficiency of the reference implementation:\n\nusing vector<vector<int>> for 2D array is far from being efficient: it is not contiguous in memory and cause many slow allocations. A big flatten array should be preferred.\nunordered_map<int, int> for set-like operations are not needed since the integers in the sets are in a small (contiguous) range. Using a simple array is much faster.\nsome copies are not needed and can be removed with std::move.\nas integers in the grid are small, char can be used over int (to reduce the memory footprint, to keep data in CPU caches and to possibly make allocations faster).\nI see one new but no delete in the code...\nThe work between threads seems clearly unbalanced in many case resulting in a slower parallel execution, a load balancing should be performed to scale better. One way is to do that is to use task scheduling.\nOne can use heuristics to drastically speed up the exploration. To begin with, I advise you to look constraint satisfaction problem (CSP) because CSP solvers known to be very good at solving it. More general and theoretical information can be found in the book Artificial Intelligence: a modern approach.\n\nHere is a code applying the first remarks resulting in a 5 times faster execution on my machine (note that the grid has been modified in the main) :\n#define UNASSIGNED 0\n#define N 9\n#define ERROR_PAIR std::make_pair(-1, -1)\n\nusing namespace std;\n\nvoid printGrid(const array<char, N*N>& grid)\n{\n for (int row = 0; row < N; row++)\n {\n for (int col = 0; col < N; col++)\n {\n cout << (int)grid[row*N+col] << " ";\n }\n cout << endl;\n }\n}\n\natomic<bool> solutionFound{false};\n\n//Each node has a sudokuMatrix and some sub-trees\nstruct Node {\n array<char, N*N> grid;\n vector<Node *> child;\n};\n\n\nNode *newNode(const array<char, N*N> &newGrid) {\n Node *temp = new Node;\n temp->grid = newGrid;\n return temp;\n}\n\n//Check if a number can be inserted in a given position\nbool canInsert(const int &val, const int &row_, const int &col_,\n const array<char, N*N> &grid) {\n // Check column\n for (int row = 0; row < N; row++) {\n if (grid[row*N+col_] == val) return false;\n }\n // Check row\n for (int col = 0; col < N; col++) {\n if (grid[row_*N+col] == val) return false;\n }\n // check box\n for (int row = 0; row < N; row++) {\n for (int col = 0; col < N; col++) {\n if (row / 3 == row_ / 3 &&\n col / 3 == col_ / 3) { // they are in the same square 3x3\n if ((grid[row*N+col] == val)) return false;\n }\n }\n }\n return true;\n}\n\n//Check if the sudoku is solved\nbool isSafe(const array<char, N*N> &grid) \n{\n // No need for a hashmap for row column and boxes, \n // just an array since associated values are small integer\n char row_[9][N+1] = {0};\n char column_[9][N+1] = {0};\n char box[3][3][N+1] = {0};\n\n for (int row = 0; row < N; row++) {\n for (int col = 0; col < N; col++) {\n // mark the element in row column and box\n row_[row][grid[row*N+col]] += 1;\n column_[col][grid[row*N+col]] += 1;\n box[row / 3][col / 3][grid[row*N+col]] += 1;\n\n // if an element is already\n // present in the hashmap\n if (box[row / 3][col / 3][grid[row*N+col]] > 1 ||\n column_[col][grid[row*N+col]] > 1 ||\n row_[row][grid[row*N+col]] > 1)\n return false;\n }\n }\n return true;\n}\n//Find the first empty cell\npair<int, int> findCell(const array<char, N*N> &grid) {\n for (int i = 0; i < N; i++) {\n for (int j = 0; j < N; j++) {\n if (grid[i*N+j] == UNASSIGNED) {\n return make_pair(i, j);\n }\n }\n }\n return ERROR_PAIR;\n}\n\n//Find all the numbers i can insert in a given position, and update the matrix with that number. Return\n//the set of all the matrixes(one for each possibility).\nlist<array<char, N*N>> getChoices(const int &row, const int &col,\n const array<char, N*N> &grid) {\n list<array<char, N*N>> choices;\n for (int i = 1; i < 10; i++) {\n if (canInsert(i, row, col, grid)) {\n // cout << "posso inserire =" << i << endl;\n array<char, N*N> tmpGrid = grid;\n tmpGrid[row*N+col] = i;\n choices.push_back(std::move(tmpGrid));\n }\n }\n return choices;\n}\n\n//Update the childreen of a node.\nvoid addChoices(list<array<char, N*N>> &choices, Node &node) {\n while (!choices.empty()) {\n node.child.push_back(newNode(choices.front()));\n choices.pop_front();\n }\n return;\n}\n\n//Compute one step of computation for each node in input, and put all the childreen in the task vector.\nvoid solveOneStep(vector<Node *> &nodes, const int &nw, vector<Node *> &tasks) {\n if (solutionFound) return;\n for (Node *&n : nodes) {\n if (findCell(n->grid) != ERROR_PAIR) {\n pair<int, int> freeCell = findCell(n->grid);\n list<array<char, N*N>> choices =\n getChoices(freeCell.first, freeCell.second, n->grid);\n if (choices.empty()) {\n continue;\n }\n addChoices(choices, *n);\n for (Node *&n : n->child) {\n tasks.push_back(n);\n }\n continue;\n } else if (isSafe(n->grid)) {\n if (!solutionFound.load()) {\n solutionFound.store(true);\n printGrid(n->grid);\n cout << "That's the first solution found !" << endl;\n }\n return;\n }\n }\n}\n\n//Compute step by step the computation until you reach a level of the entire tree of nodes where\n//the nodes of that level are more than the number of worker(NW) choose by the user. \nvector<Node *> splitChunks(Node *root, const int &nw) {\n vector<Node *> tasks;\n vector<Node *> nodes;\n nodes.push_back(root);\n\n while ((int)tasks.size() < nw && !solutionFound) {\n tasks.clear();\n solveOneStep(nodes, nw, tasks);\n nodes = tasks;\n }\n return tasks;\n}\n\n//Solve recursively all the sub-trees of all the nodes given in input, until a solution is found or no\n//solution exist.\nvoid solveSubTree(vector<Node *> &nodes, const int &nw) {\n if (solutionFound) return;\n for (Node *&n : nodes) {\n if (findCell(n->grid) != ERROR_PAIR) {\n pair<int, int> freeCell = findCell(n->grid);\n list<array<char, N*N>> choices =\n getChoices(freeCell.first, freeCell.second, n->grid);\n if (choices.empty()) {\n continue;\n }\n addChoices(choices, *n);\n solveSubTree(n->child, nw);\n } else if (isSafe(n->grid)) {\n if (!solutionFound.load()) {\n solutionFound.store(true);\n printGrid(n->grid);\n std::cout << "That's the first solution found !" << endl;\n }\n return;\n }\n }\n}\n\n\nint main(int argc, char *argv[]) {\n if (argc != 2) {\n cout << "Usage is: nw " << endl;\n return (-1);\n }\n//A test matrix.\n array<char, N*N> grid = \n { 0, 0, 0, 0, 0, 0, 2, 0, 0,\n 0, 8, 0, 0, 0, 7, 0, 9, 0,\n 6, 0, 2, 0, 0, 0, 5, 0, 0,\n 0, 7, 0, 0, 6, 0, 0, 0, 0,\n 0, 0, 0, 9, 0, 1, 0, 0, 0,\n 0, 0, 0, 0, 2, 0, 0, 4, 0,\n 0, 0, 5, 0, 0, 0, 6, 0, 3,\n 0, 9, 0, 4, 0, 0, 0, 7, 0,\n 0, 0, 6, 0, 0, 0, 0, 0, 0 };\n \n Node *root = newNode(grid);\n vector<thread> tids;\n const int nw = atoi(argv[1]); //Number of worker\n vector<vector<Node *>> works(nw, vector<Node *>()); \n vector<Node *> tasks = splitChunks(root, nw);\n\n//Split the tasks for each thread, the main thread takes the last part of the work.\n for (int i = 0; i < nw; i++) {\n int limit = 0;\n i == nw - 1 ? limit = tasks.size() : limit = tasks.size() / nw;\n for (int j = 0; j < limit; j++) {\n works[i].push_back(tasks.back());\n tasks.pop_back();\n }\n }\n\n//Start each thread, and then the main thread start his computation.\n for (int i = 0; i < nw - 1; i++) {\n tids.push_back(thread(solveSubTree, ref(works[i]), ref(nw)));\n }\n solveSubTree(works[nw - 1], nw); // Main thread do last part of the work\n\n for (thread &t : tids) {\n t.join();\n }\n\n std::cout << "end" << endl;\n return (0);\n}\n",
2020-07-18T12:15:15
yy