/* ============================================================================
     place and route a standard CMOS circuit schematic.
         input:  Berkeley Logic Interchange Format (BLIF)
         output: black-white schematic image (PNG)

     compiling options: g++ cirschem.cpp -std=c++26 -O3
     requires c++26 (g++15 or above)

     Copyright (c) 2025 zhaoyongwei<zhaoyongwei@ict.ac.cn>
     IPRC, ICT CAS. Visit https://yongwei.site/pnr-circuit-schematic

     Permission is hereby granted, free of charge, to any person obtaining a copy
     of this software and associated documentation files (the "Software"), to deal
     in the Software without restriction, including without limitation the rights
     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     copies of the Software, and to permit persons to whom the Software is
     furnished to do so, subject to the following conditions:
     
     The above copyright notice and this permission notice shall be included in all
     copies or substantial portions of the Software.
     
     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     SOFTWARE.
 
 * ============================================================================ */

#include <iostream>
#include <format>
#include <fstream>
#include <string>
#include <vector>
#include <deque>
#include <array>
#include <tuple>
#include <set>
#include <map>
#include <unordered_map>
#include <utility>
#include <algorithm>
#include <ranges>
#include <random>
#include <cmath>
#include <cstdint>
#include <chrono>
using namespace std::chrono_literals;
using namespace std::literals;

// simulated annealing placement parameters
constexpr double initial_temperature = std::exp(5);
constexpr double cooling_rate        = 0.99999;
constexpr double stop_temperature    = 0.1;
constexpr auto step_timeout          = 5ms;
// placement optimizing target
constexpr double initial_area_margins = std::exp(1);
constexpr int area_cost = 5;
constexpr int cross_track_penalty = 50;
// output file name
constexpr auto schematic_filename = "schematic.png";

enum {
  NOT = 0,
  NAND = 1,
};

std::tuple<std::set<int>,
           std::set<int>,
           std::unordered_map<int, std::pair<int, std::vector<int>>>,
           std::vector<std::string>>
parse(int argc, char* argv[]) {

    std::set<int> inputs;
    std::set<int> outputs;
    std::unordered_map<int, std::pair<int, std::vector<int>>> gates;
    std::vector<std::string> names {"NOT"s, "NAND"s};
    std::map<std::string, int> name_ids {{"NOT"s, 0}, {"NAND"s, 1}}; // compromised alt for trie.
    auto parse_name = [&](std::string str) {
        if (!name_ids.contains(str)) {
            name_ids.emplace(str, names.size());
            names.push_back(str);
        }
        return name_ids.at(str);
    };

    if (argc < 2) {
        std::cerr << "Usage: " << argv[0] << " <blif_file>\n";
        return {inputs, outputs, gates, names};
    }

    std::ifstream blif(argv[1]);
    if (!blif.is_open()) {
        std::cerr << "Error opening file: " << argv[1] << '\n';
        return {inputs, outputs, gates, names};
    }

    std::string line;
    while (std::getline(blif, line)) {
        if (line.starts_with(".inputs")) {
            size_t start = 7; // length of ".inputs"
            while (start < line.size()) {
                size_t end = line.find(' ', start);
                if (end == std::string::npos) end = line.size();
                std::string token = line.substr(start, end - start);
                if (!token.empty()) {
                    inputs.insert(parse_name(token));
                }
                start = end + 1;
            }
        } else if (line.starts_with(".outputs")) {
            size_t start = 8; // length of ".outputs"
            while (start < line.size()) {
                size_t end = line.find(' ', start);
                if (end == std::string::npos) end = line.size();
                std::string token = line.substr(start, end - start);
                if (!token.empty()) {
                    outputs.insert(parse_name(token));
                }
                start = end + 1;
            }
        } else if (line.starts_with(".subckt")) {
            std::vector<int> ports;
            size_t start = 8; // length of ".subckt"
            while (start < line.size()) {
                size_t end = line.find(' ', start);
                if (end == std::string::npos) end = line.size();
                std::string token = line.substr(start, end - start);
                if (!token.empty()) {
                    size_t start = token.find('=', 0);
                    if (start != std::string::npos) {
                        token = token.substr(start+1, token.size());
                    }
                    ports.push_back(parse_name(token));
                }
                start = end + 1;
            }
            int gate = ports.front();
            int name = ports.back();
            ports.erase(ports.begin());
            ports.pop_back();
            gates[name] = { gate, ports };
        }
    }

    return {inputs, outputs, gates, names};
}

template<typename T, T... Ints>
constexpr auto make_crc_table(std::integer_sequence<T, Ints...> is) {
    return std::array<T, sizeof...(Ints)>{ [](T n){
               for (int k : std::views::iota(0,8)) n = (n>>1)^(n&1?0xedb88320:0); return n;
           }(Ints)...};
}
constexpr auto crc_table{ make_crc_table(std::make_integer_sequence<uint32_t, 256>{}) };

template<typename T>
constexpr uint32_t crc(const T& buf) {
    return ~std::ranges::fold_left(buf, 0xffffffff, [](uint32_t c, uint8_t d){
                return crc_table[(c ^ d) & 0xff] ^ (c>>8);
            });
}

template<typename T>
constexpr uint32_t adler32(const T& buf) {
    return std::ranges::fold_left(buf, 1, [](uint32_t ab, uint8_t d){
               auto a = ((ab & 0xffff) + d) % 65521; return a | ((((ab >> 16) + a) % 65521) << 16);
           });
}

template<typename Iter, typename T>
void insert_be(Iter it, T value) {
    auto value_be = std::endian::native == std::endian::little ? std::byteswap(value) : value;
    auto arr = std::bit_cast<std::array<uint8_t,sizeof(T)>>(value_be);
    std::ranges::copy(arr, it);
}
template<typename Iter, typename T>
void insert_le(Iter it, T value) {
    auto value_le = std::endian::native == std::endian::big ? std::byteswap(value) : value;
    auto arr = std::bit_cast<std::array<uint8_t,sizeof(T)>>(value_le);
    std::ranges::copy(arr, it);
}

std::vector<uint8_t> deflate(const std::vector<uint8_t>& data) {
    std::vector<uint8_t> compressed_data;
    auto it = std::back_inserter(compressed_data);
    for (auto chunk : data | std::views::chunk(0xfffb)) {
        *it = 0;
        insert_le(it, static_cast<uint16_t>( chunk.size()));
        insert_le(it, static_cast<uint16_t>(~chunk.size()));
        std::ranges::copy(chunk | std::views::transform(std::bit_not<uint8_t>{}), it);
    }
    compressed_data[(compressed_data.size()-1) & ~0xffffULL] = 0x1;
    return compressed_data;
}

constexpr std::array<std::tuple<int,std::array<uint16_t,8>>,96> font {{
{ 0, {0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000}}, //
{ 0, {0x0000,0x0000,0x0380,0x6FC0,0x6FC0,0x0380,0x0000,0x0000}}, // !
{ 6, {0x0000,0x3800,0x7800,0x0000,0x0000,0x7800,0x3800,0x0000}}, // "
{ 0, {0x1100,0x7FC0,0x7FC0,0x1100,0x7FC0,0x7FC0,0x1100,0x0000}}, // #
{-2, {0x0CE0,0x19F0,0x1110,0x711C,0x711C,0x1F30,0x0E60,0x0000}}, // $
{ 0, {0x4300,0x6300,0x3000,0x1800,0x0C00,0x6600,0x6300,0x0000}}, // %
{ 0, {0x3800,0x7D80,0x47C0,0x4E40,0x3BC0,0x7D80,0x4400,0x0000}}, // &
{ 6, {0x0000,0x4000,0x7800,0x3800,0x0000,0x0000,0x0000,0x0000}}, // '
{ 0, {0x0000,0x0000,0x1F00,0x3F80,0x60C0,0x4040,0x0000,0x0000}}, // (
{ 0, {0x0000,0x0000,0x4040,0x60C0,0x3F80,0x1F00,0x0000,0x0000}}, // )
{ 2, {0x1000,0x5400,0x7C00,0x3800,0x3800,0x7C00,0x5400,0x1000}}, // *
{ 2, {0x0000,0x1000,0x1000,0x7C00,0x7C00,0x1000,0x1000,0x0000}}, // +
{-1, {0x0000,0x0000,0x4000,0x7800,0x3800,0x0000,0x0000,0x0000}}, // ,
{ 4, {0x4000,0x4000,0x4000,0x4000,0x4000,0x4000,0x4000,0x0000}}, // -
{ 0, {0x0000,0x0000,0x0000,0x6000,0x6000,0x0000,0x0000,0x0000}}, // .
{ 1, {0x6000,0x3000,0x1800,0x0C00,0x0600,0x0300,0x0180,0x0000}}, // /
{ 0, {0x3F80,0x7FC0,0x4C40,0x4640,0x4340,0x7FC0,0x3F80,0x0000}}, // 0
{ 0, {0x0000,0x4100,0x4180,0x7FC0,0x7FC0,0x4000,0x4000,0x0000}}, // 1
{ 0, {0x6080,0x70C0,0x5840,0x4C40,0x4640,0x63C0,0x6180,0x0000}}, // 2
{ 0, {0x2080,0x60C0,0x4440,0x4440,0x4440,0x7FC0,0x3B80,0x0000}}, // 3
{ 0, {0x0C00,0x0E00,0x0B00,0x4980,0x7FC0,0x7FC0,0x4800,0x0000}}, // 4
{ 0, {0x27C0,0x67C0,0x4440,0x4440,0x4440,0x7C40,0x3840,0x0000}}, // 5
{ 0, {0x3F00,0x7F80,0x44C0,0x4440,0x4440,0x7C00,0x3800,0x0000}}, // 6
{ 0, {0x00C0,0x00C0,0x7840,0x7C40,0x0640,0x03C0,0x01C0,0x0000}}, // 7
{ 0, {0x3B80,0x7FC0,0x4440,0x4440,0x4440,0x7FC0,0x3B80,0x0000}}, // 8
{ 0, {0x0380,0x47C0,0x4440,0x4440,0x6440,0x3FC0,0x1F80,0x0000}}, // 9
{ 1, {0x0000,0x0000,0x0000,0x6300,0x6300,0x0000,0x0000,0x0000}}, // :
{ 0, {0x0000,0x0000,0x4000,0x7180,0x3180,0x0000,0x0000,0x0000}}, // ;
{ 0, {0x0000,0x0400,0x0E00,0x1B00,0x3180,0x60C0,0x4040,0x0000}}, // <
{ 2, {0x0000,0x4800,0x4800,0x4800,0x4800,0x4800,0x4800,0x0000}}, // =
{ 0, {0x0000,0x4040,0x60C0,0x3180,0x1B00,0x0E00,0x0400,0x0000}}, // >
{ 0, {0x0180,0x01C0,0x0040,0x6C40,0x6E40,0x03C0,0x0180,0x0000}}, // ?
{ 0, {0x3F80,0x7FC0,0x4040,0x5E40,0x5E40,0x5FC0,0x0F80,0x0000}}, // @
{ 0, {0x7E00,0x7F00,0x0980,0x08C0,0x0980,0x7F00,0x7E00,0x0000}}, // A
{ 0, {0x4040,0x7FC0,0x7FC0,0x4440,0x4440,0x7FC0,0x3B80,0x0000}}, // B
{ 0, {0x1F00,0x3F80,0x60C0,0x4040,0x4040,0x60C0,0x3180,0x0000}}, // C
{ 0, {0x4040,0x7FC0,0x7FC0,0x4040,0x60C0,0x3F80,0x1F00,0x0000}}, // D
{ 0, {0x4040,0x7FC0,0x7FC0,0x4440,0x4E40,0x60C0,0x71C0,0x0000}}, // E
{ 0, {0x4040,0x7FC0,0x7FC0,0x4440,0x0E40,0x00C0,0x01C0,0x0000}}, // F
{ 0, {0x1F00,0x3F80,0x60C0,0x4840,0x4840,0x38C0,0x7980,0x0000}}, // G
{ 0, {0x7FC0,0x7FC0,0x0400,0x0400,0x0400,0x7FC0,0x7FC0,0x0000}}, // H
{ 0, {0x0000,0x0000,0x4040,0x7FC0,0x7FC0,0x4040,0x0000,0x0000}}, // I
{ 0, {0x3000,0x7000,0x4000,0x4040,0x7FC0,0x3FC0,0x0040,0x0000}}, // J
{ 0, {0x4040,0x7FC0,0x7FC0,0x0400,0x1F00,0x7BC0,0x60C0,0x0000}}, // K
{ 0, {0x4040,0x7FC0,0x7FC0,0x4040,0x4000,0x6000,0x7000,0x0000}}, // L
{ 0, {0x7FC0,0x7FC0,0x0380,0x0700,0x0380,0x7FC0,0x7FC0,0x0000}}, // M
{ 0, {0x7FC0,0x7FC0,0x0380,0x0700,0x0E00,0x7FC0,0x7FC0,0x0000}}, // N
{ 0, {0x1F00,0x3F80,0x60C0,0x4040,0x60C0,0x3F80,0x1F00,0x0000}}, // O
{ 0, {0x4040,0x7FC0,0x7FC0,0x4440,0x0440,0x07C0,0x0380,0x0000}}, // P
{-1, {0x0FC0,0x1FE0,0x1020,0x1C20,0x7820,0x7FE0,0x4FC0,0x0000}}, // Q
{ 0, {0x4040,0x7FC0,0x7FC0,0x0440,0x0C40,0x7FC0,0x7380,0x0000}}, // R
{ 0, {0x3180,0x73C0,0x4640,0x4440,0x4C40,0x79C0,0x3180,0x0000}}, // S
{ 0, {0x0000,0x01C0,0x40C0,0x7FC0,0x7FC0,0x40C0,0x01C0,0x0000}}, // T
{ 0, {0x3FC0,0x7FC0,0x4000,0x4000,0x4000,0x7FC0,0x3FC0,0x0000}}, // U
{ 0, {0x0FC0,0x1FC0,0x3000,0x6000,0x3000,0x1FC0,0x0FC0,0x0000}}, // V
{ 0, {0x1FC0,0x7FC0,0x7000,0x3C00,0x7000,0x7FC0,0x1FC0,0x0000}}, // W
{ 0, {0x60C0,0x71C0,0x1F00,0x0E00,0x1F00,0x71C0,0x60C0,0x0000}}, // X
{ 0, {0x0000,0x03C0,0x47C0,0x7C00,0x7C00,0x47C0,0x03C0,0x0000}}, // Y
{ 0, {0x71C0,0x78C0,0x4C40,0x4640,0x4340,0x61C0,0x70C0,0x0000}}, // Z
{ 0, {0x0000,0x0000,0x7FC0,0x7FC0,0x4040,0x4040,0x0000,0x0000}}, // [
{ 0, {0x01C0,0x0380,0x0700,0x0E00,0x1C00,0x3800,0x7000,0x0000}}, //'\'
{ 0, {0x0000,0x0000,0x4040,0x4040,0x7FC0,0x7FC0,0x0000,0x0000}}, // ]
{ 7, {0x4000,0x6000,0x3000,0x1800,0x3000,0x6000,0x4000,0x0000}}, // ^
{-2, {0x4000,0x4000,0x4000,0x4000,0x4000,0x4000,0x4000,0x4000}}, // _
{ 8, {0x0000,0x0000,0x3000,0x7000,0x4000,0x0000,0x0000,0x0000}}, // `
{ 0, {0x3000,0x7A00,0x4A00,0x4A00,0x3E00,0x7C00,0x4000,0x0000}}, // a
{ 0, {0x0040,0x7FC0,0x7FC0,0x4200,0x4600,0x7C00,0x3800,0x0000}}, // b
{ 0, {0x3C00,0x7E00,0x4200,0x4200,0x4200,0x6600,0x2400,0x0000}}, // c
{ 0, {0x3800,0x7C00,0x4600,0x4240,0x3FC0,0x7FC0,0x4000,0x0000}}, // d
{ 0, {0x3C00,0x7E00,0x4A00,0x4A00,0x4A00,0x6E00,0x2C00,0x0000}}, // e
{ 0, {0x4400,0x7F80,0x7FC0,0x4440,0x00C0,0x0180,0x0000,0x0000}}, // f
{-2, {0x2700,0x6F80,0x4880,0x4880,0x7F00,0x3F80,0x0080,0x0000}}, // g
{ 0, {0x4040,0x7FC0,0x7FC0,0x0400,0x0200,0x7E00,0x7C00,0x0000}}, // h
{ 0, {0x0000,0x0000,0x4200,0x7EC0,0x7EC0,0x4000,0x0000,0x0000}}, // i
{-2, {0x0000,0x3000,0x7000,0x4000,0x4080,0x7FB0,0x3FB0,0x0000}}, // j
{ 0, {0x4040,0x7FC0,0x7FC0,0x0800,0x1C00,0x7600,0x6200,0x0000}}, // k
{ 0, {0x0000,0x0000,0x4040,0x7FC0,0x7FC0,0x4000,0x0000,0x0000}}, // l
{ 0, {0x7E00,0x7E00,0x0600,0x3C00,0x0600,0x7E00,0x7C00,0x0000}}, // m
{ 0, {0x0200,0x7E00,0x7C00,0x0200,0x0200,0x7E00,0x7C00,0x0000}}, // n
{ 0, {0x3C00,0x7E00,0x4200,0x4200,0x4200,0x7E00,0x3C00,0x0000}}, // o
{-2, {0x4080,0x7F80,0x7F00,0x4880,0x0880,0x0F80,0x0700,0x0000}}, // p
{-2, {0x0700,0x0F80,0x0880,0x4880,0x7F00,0x7F80,0x4080,0x0000}}, // q
{ 0, {0x4200,0x7E00,0x7C00,0x4600,0x0200,0x0E00,0x0C00,0x0000}}, // r
{ 0, {0x2400,0x6E00,0x4A00,0x5A00,0x5200,0x7600,0x2400,0x0000}}, // s
{ 0, {0x0200,0x0200,0x3F80,0x7FC0,0x4200,0x6200,0x2000,0x0000}}, // t
{ 0, {0x3E00,0x7E00,0x4000,0x4000,0x3E00,0x7E00,0x4000,0x0000}}, // u
{ 0, {0x0000,0x1E00,0x3E00,0x6000,0x6000,0x3E00,0x1E00,0x0000}}, // v
{ 0, {0x3E00,0x7E00,0x6000,0x3800,0x6000,0x7E00,0x3E00,0x0000}}, // w
{ 0, {0x4200,0x6600,0x3C00,0x1800,0x3C00,0x6600,0x4200,0x0000}}, // x
{-2, {0x4780,0x4F80,0x4800,0x4800,0x6800,0x3F80,0x1F80,0x0000}}, // y
{ 0, {0x4600,0x6600,0x7200,0x5A00,0x4E00,0x6600,0x6200,0x0000}}, // z
{ 0, {0x0000,0x0400,0x0400,0x3F80,0x7BC0,0x4040,0x4040,0x0000}}, // {
{ 0, {0x0000,0x0000,0x0000,0x7BC0,0x7BC0,0x0000,0x0000,0x0000}}, // |
{ 0, {0x0000,0x4040,0x4040,0x7BC0,0x3F80,0x0400,0x0400,0x0000}}, // }
{ 7, {0x4000,0x6000,0x2000,0x6000,0x4000,0x6000,0x2000,0x0000}}, // ~
{ 1, {0x7000,0x7800,0x4C00,0x4600,0x4C00,0x7800,0x7000,0x0000}}  //
}};

struct draw_t {
    size_t w, row_bytes;
    size_t h;
    std::vector<uint8_t> pixels;
    draw_t(size_t w, size_t h) : w(w), row_bytes(1 + (w+7)/8), h(h), pixels(h * row_bytes) {
        for (auto & x: pixels | std::views::stride(row_bytes))
            x = 0xff;
    }

    void set_pixel(int x, int y, bool value) {
        uint8_t m = 0x80 >> (x & 7);
        size_t addr = y * row_bytes + 1 + x/8;
        if (value) {
            pixels[addr] |= m;
        } else {
            pixels[addr] &= ~m;
        }
    }
    void set_hline(int x1, int x2, int y, bool value) {
        uint8_t m1 =  ((0x100 >> (x1 & 7)) - 1);
        uint8_t m2 = ~((0x080 >> (x2 & 7)) - 1);
        size_t base_addr = y * row_bytes + 1;
        if (x1/8 == x2/8) {
            if (value) {
                pixels[base_addr + x1/8] |= m1 & m2;
            } else {
                pixels[base_addr + x1/8] &= ~(m1 & m2);
            }
        } else {
            if (value) {
                pixels[base_addr + x1/8] |= m1;
                pixels[base_addr + x2/8] |= m2;
            } else {
                pixels[base_addr + x1/8] &= ~m1;
                pixels[base_addr + x2/8] &= ~m2;
            }
            for (int addr : std::views::iota(base_addr + x1/8 + 1, base_addr + x2/8))
                pixels[addr] = -!!value;
        }
    }
    void set_slope(int x1, int x2, int y1, int y2, float thick, bool value) {
        float slope = 1.0 * (x2 - x1) / std::abs(y2 - y1);
        float tan = 0.5 / std::sqrt(1. + slope*slope);
        int x_low  = std::min<int>(std::max<int>(0, x1 - tan*thick), w-1);
        int x_high = std::min<int>(std::max<int>(0, x2 + tan*thick), w-1);
        for (int d : std::views::iota(0, std::abs(y2-y1)+1)) {
            int l = (d + 0.5) * slope - tan * thick;
            int r = (d + 1.5) * slope + tan * thick;
            int y = y2 > y1 ? y1 + d : y1 - d;
            l = std::max(x_low, std::min(l + x1, x_high));
            r = std::max(l,     std::min(r - 1 + x1, x_high));
            set_hline(l, r, y, value);
        }
    }

    void write_png(const std::string& filename) { 
        std::ofstream out(filename, std::ios::binary);
        uint32_t width = w;
        uint32_t height = h;
        std::vector<uint8_t> png_data {137, 80, 78, 71, 13, 10, 26, 10};
        auto png = std::back_inserter(png_data);
        std::vector<uint8_t> ihdr_data {'I', 'H', 'D', 'R'};
        auto ihdr = std::back_inserter(ihdr_data);
        insert_be(ihdr, width);
        insert_be(ihdr, height);
        std::ranges::copy(std::array<uint8_t,5>{1,0,0,0,0}, ihdr);
        insert_be(png, uint32_t{13});
        std::ranges::copy(ihdr_data, png);
        insert_be(png, crc(ihdr_data));
        std::vector<uint8_t> compressed_data { deflate(pixels) };
        insert_be(std::back_inserter(compressed_data), adler32(pixels));
        constexpr std::array<uint8_t,6> idat_type {'I', 'D', 'A', 'T', 0x78, 0x01};
        insert_be(png, static_cast<uint32_t>(compressed_data.size() + 2));
        std::ranges::copy(idat_type, png);
        std::ranges::copy(compressed_data, png);
        insert_be(png, crc(std::views::concat(idat_type, compressed_data)));
        constexpr std::array<uint8_t, 4> iend_type {'I', 'E', 'N', 'D'};
        insert_be(png, uint32_t{0});
        std::ranges::copy(iend_type, png);
        insert_be(png, crc(iend_type));
        out.write(reinterpret_cast<const char*>(png_data.data()), png_data.size());
    }

    struct kwargs_t {int fill = 0; int outline=0; int width = 1; int m = 0; int os = 0;};
    void line(std::tuple<int,int,int,int> coord, kwargs_t kwargs) {
        auto [x1,y1,x2,y2] = coord;
        x1 = std::min<int>(w-1, std::max<int>(x1, 0));
        x2 = std::min<int>(w-1, std::max<int>(x2, 0));
        y1 = std::min<int>(h-1, std::max<int>(y1, 0));
        y2 = std::min<int>(h-1, std::max<int>(y2, 0));
        if (y1 == y2)
            for (int i : std::views::iota(0,kwargs.width))
                set_hline(std::min(x1, x2), std::max(x1, x2), y1 + (i+1)/2 * ((i&1)*2-1), !kwargs.fill);
        else {
            if (x1 > x2) {
                std::tie(x1, x2) = std::tuple{x2, x1};
                std::tie(y1, y2) = std::tuple{y2, y1};
            }
            set_slope(x1, x2, y1, y2, kwargs.width, !kwargs.fill);
        }
    }
    void ellipse(std::tuple<int,int,int,int> coord, kwargs_t kwargs) {
        auto [x1,y1,x2,y2] = coord;
        std::tie(x1, x2) = std::tuple{std::min(x1, x2), std::max(x1, x2)};
        std::tie(y1, y2) = std::tuple{std::min(y1, y2), std::max(y1, y2)};
        int dx = (x2 - x1) + kwargs.width;
        float ox = (x1 + x2) * 0.5;
        float oy = (y1 + y2) * 0.5;
        float ro = dx * 0.5, ri = dx * 0.5 - kwargs.width;
        for (int y : std::views::iota(std::max(0,y1-kwargs.width), std::min<int>(h-1,y2+kwargs.width+1))) {
            for (int x : std::views::iota(std::max(0,x1-kwargs.width), std::min<int>(w-1,x2+kwargs.width+1))) {
                float rsq = (x-ox)*(x-ox) + (y-oy)*(y-oy);
                if (rsq <= ri*ri)
                    set_pixel(x, y, !kwargs.fill);
                else if (rsq <= ro*ro)
                    set_pixel(x, y, !kwargs.outline);
            }
        }
    }
    void text(std::tuple<int,int> coord, std::string text, std::string anchor) {
        auto [l,t] = coord;
        int length = text.size() * 8;
        auto [r,b] = std::tuple{l + length, t + 15};
        if (anchor[0] == 'r') {
            l -= length; r -= length;
        }
        for (int x : std::views::iota(std::max(l,0),std::min<int>(r,w))) {
            size_t nchr = (x - l) / 8;
            size_t ncol = (x - l) & 7;
            int chr = text[nchr];
            if (chr > 0x7f or chr < 0x20) chr = '?';
            const auto& [offset, dots] = font[chr - 0x20];
            
            for (int y : std::views::iota(std::max(t-offset,0), std::min<int>(b-offset,h))) {
                if ((dots[ncol] >> (y - t + offset)) & 1)
                    set_pixel(x, y, 1);
            }
        }
        
    }
    void mos(int x, int y) {
        line({x+15,y+25,x+20,y+25},{.fill=1,.width=2});
        line({x+15,y+10,x+15,y+40},{.fill=0,.width=2});
        line({x+20,y+10,x+20,y+40},{.fill=0,.width=2});
        line({x+25,y   ,x+25,y+10},{.fill=0,.width=2});
        line({x+25,y+40,x+25,y+50},{.fill=0,.width=2});
        line({x+20,y+10,x+25,y+10},{.fill=0,.width=2});
        line({x+20,y+40,x+25,y+40},{.fill=0,.width=2});
    }
    void nmos(std::tuple<int,int> coord) {
        auto [x,y] = coord;
        line({x,y+25,x+15,y+25},{.fill=0,.width=2});
        mos(x, y);
    }
    void pmos(std::tuple<int,int> coord) {
        auto [x,y] = coord;
        line({x,y+25,x+11,y+25},{.fill=0,.width=2});
        ellipse({x+11,y+22,x+15,y+27},{.fill=1,.outline=0,.width=2});
        mos(x, y);
    }
    void inv(std::tuple<int,int> coord) {
        auto [x,y] = coord;
        ellipse({x+35-2,y-2,x+35+2,y+2},{.fill=0,.outline=0,.width=2});
        line({x+35,y,    x+35,y+10}, {.fill=0,.width=2});
        pmos({x+10,y+10});
        line({x+35,y+60, x+35,y+80}, {.fill=0,.width=2});
        line({x+35,y+70, x+60,y+70}, {.fill=0,.width=2});
        nmos({x+10,y+80});
        line({x+35,y+130,x+35,y+140},{.fill=0,.width=2});
        line({x+30,y+140,x+40,y+140},{.fill=0,.width=1});
        line({x+30,y+140,x+35,y+145},{.fill=0,.width=1});
        line({x+40,y+140,x+35,y+145},{.fill=0,.width=1});
        line({x,   y+35, x,   y+105},{.fill=0,.width=2});
        line({x,   y+35, x+10,y+35}, {.fill=0,.width=2});
        line({x,   y+105,x+10,y+105},{.fill=0,.width=2});
    }
    void nand(std::tuple<int,int> coord) {
        auto [x,y] = coord;
        ellipse({x+35-2, y-2,x+35+2, y+2},{.fill=0,.outline=0,.width=2});
        ellipse({x+115-2,y-2,x+115+2,y+2},{.fill=0,.outline=0,.width=2});
        line({x+35, y,    x+35, y+10}, {.fill=0,.width=2});
        pmos({x+10,y+10});
        line({x+115,y,    x+115,y+10}, {.fill=0,.width=2});
        pmos({x+90,y+10});
        line({x+35, y+60, x+35, y+80}, {.fill=0,.width=2});
        line({x+115,y+60, x+115,y+70}, {.fill=0,.width=2});
        line({x+35, y+70, x+140,y+70}, {.fill=0,.width=2});
        nmos({x+10,y+80});
        nmos({x+90,y+80});
        line({x+35, y+130,x+115,y+130},{.fill=0,.width=2});
        line({x+115,y+80, x+130,y+80}, {.fill=0,.width=2});
        line({x+130,y+80, x+130,y+140},{.fill=0,.width=2});
        line({x+125,y+140,x+135,y+140},{.fill=0,.width=1});
        line({x+125,y+140,x+130,y+145},{.fill=0,.width=1});
        line({x+135,y+140,x+130,y+145},{.fill=0,.width=1});
        line({x,    y+35, x,    y+105},{.fill=0,.width=2});
        line({x,    y+35, x+10, y+35}, {.fill=0,.width=2});
        line({x,    y+105,x+10, y+105},{.fill=0,.width=2});
        line({x+80, y+35, x+80, y+105},{.fill=0,.width=2});
        line({x+80, y+35, x+90, y+35}, {.fill=0,.width=2});
        line({x+80, y+105,x+90, y+105},{.fill=0,.width=2});
    }
};

const std::unordered_map<int, std::tuple<int, int>> CELL_SIZE = {
    {NOT,  {12, 28}},
    {NAND, {28, 28}}
};
const std::unordered_map<int, std::vector<std::tuple<int, int>>> PORT_OFFSET = {
    {NOT,  {{0,14},{12,14}}},
    {NAND, {{0,14},{16,14},{28,14}}}
};

constexpr auto operator+(const std::tuple<int,int>& lhs, const std::tuple<int,int>& rhs) {
    return std::tuple{std::get<0>(lhs)+std::get<0>(rhs), std::get<1>(lhs)+std::get<1>(rhs)};
}

std::tuple<long,
           std::vector<std::vector<std::tuple<int,int>>>,
           std::vector<std::tuple<int,int>>>
route(const std::set<int>& inputs,
      const std::set<int>& outputs,
      const std::unordered_map<int, std::pair<int, std::vector<int>>>& gates,
      const std::unordered_map<int, std::tuple<int,int>>& placement,
                        size_t canvas_w, size_t canvas_h) {
    std::vector<std::tuple<int,int,int,int,int,int>> todo;
    std::vector<std::vector<std::tuple<int,int>>> paths;
    std::vector<std::tuple<int,int>> solders;
    std::vector<int> drive(canvas_w*canvas_h);
    std::vector<int> mask(canvas_w*canvas_h);
    auto at = [canvas_h](size_t x, size_t y){ return x*canvas_h + y; };
    long cost = 0;
    for (const auto& [sink, values] : gates) {
        const auto& [sinkgatetype, inports] = values;
        for (const auto& [i, source] : std::views::enumerate(inports)) {
            if (inputs.contains(source) or outputs.contains(source)) continue;
            auto sourcegatetype = gates.at(source).first;
            auto [x1, y1] = placement.at(source) + PORT_OFFSET.at(sourcegatetype).back();
            auto [x2, y2] = placement.at(sink)   + PORT_OFFSET.at(sinkgatetype)[i];
            auto semipm = std::abs(x1 - (int)canvas_w/2) + std::abs(y1 - (int)canvas_h/2);

            todo.emplace_back(semipm, x1, y1, x2, y2, source);
        }
        static const std::unordered_map<int, void(*)(int,int,size_t,decltype(mask)&)> mask_func {
            {NOT,  +[](int X, int Y, size_t canvas_h, decltype(mask)& mask){
                       for (size_t x = X+1; x <= X+11; x++) for (size_t y = Y-2; y <= Y+30; y++) {
                           mask[x*canvas_h + y] = 3;
                       }
                   } },
            {NAND, +[](int X, int Y, size_t canvas_h, decltype(mask)& mask){
                       for (size_t x = X+1; x <= X+15; x++) for (size_t y = Y-2; y <= Y+30; y++) {
                           mask[x*canvas_h + y] = 3;
                       }
                       for (size_t x = X+15; x <= X+17; x++) for (size_t y = Y+8; y <= Y+20; y++) {
                           mask[x*canvas_h + y] = 3;
                       }
                       for (size_t x = X+17; x <= X+27; x++) for (size_t y = Y-2; y <= Y+30; y++) {
                           mask[x*canvas_h + y] = 3;
                       }
                   } },
        };
        auto [X, Y] = placement.at(sink);
        (*mask_func.at(sinkgatetype))(X, Y, canvas_h, mask);
    }
    for (size_t x = 0; x < canvas_w; x++) for (size_t y = 5; y < canvas_h; y+=40) {
        mask[at(x,y)] = 1;
    }
    std::sort(todo.begin(), todo.end());
    int done = 0;
    for (auto [_, x1, y1, x2, y2, driver_id] : todo) {
        std::vector<int> mark(canvas_w*canvas_h);
        bool marked = true, reached = false;
        int i = 0;
        size_t x, y;
        for (y = y2-7; y <= y2+7; y++)
            mark[at(x2,y)] = 1;
        do { i++;
            [&](){
                marked = false;
                for (x = 0; x < canvas_w; x++) for (y = 0; y < canvas_h; y++) {
                    if (mark[at(x,y)] == i) {
                        marked = true;
                        if (drive[at(x,y)] == driver_id or (x == x1 and y == y1)) {
                            reached = true; return;
                        }
                        if (x+1<canvas_w and (mask[at(x,y)]   & 1) == 0 and !mark[at(x+1,y)])
                            mark[at(x+1,y)] = i+1;
                        if (x>=1         and (mask[at(x-1,y)] & 1) == 0 and !mark[at(x-1,y)])
                            mark[at(x-1,y)] = i+1;
                        if (y+1<canvas_h and (mask[at(x,y)]   & 2) == 0 and !mark[at(x,y+1)])
                            mark[at(x,y+1)] = i+1;
                        if (y>=1         and (mask[at(x,y-1)] & 2) == 0 and !mark[at(x,y-1)])
                            mark[at(x,y-1)] = i+1;
                    }
                }
            }();
        } while (marked and !reached);
        if (!marked) {
            std::cout << "\nerror: routing failed" << std::endl;
            return { -1, paths, solders };
        }
        if (x != x1 or y != y1) solders.emplace_back(x,y);
        decltype(paths)::value_type path;
        int k = 1;
        while (mark[at(x,y)] > 1) {
            path.emplace_back(x,y);
            drive[at(x,y)] = driver_id;
            cost += 1;
            if        (k == 0 and x>=1           and mark[at(x-1,y)] > 0 and mark[at(x-1,y)] < mark[at(x,y)]) {
                mask[at(x-1,y)] |= 1;
                x--;
            } else if (k == 1 and x+1 < canvas_w and mark[at(x+1,y)] > 0 and mark[at(x+1,y)] < mark[at(x,y)]) {
                mask[at(x,y)]   |= 1;
                x++;
            } else if (k == 2 and y>=1           and mark[at(x,y-1)] > 0 and mark[at(x,y-1)] < mark[at(x,y)]) {
                mask[at(x,y-1)] |= 2;
                y--;
            } else if (k == 3 and y+1 < canvas_h and mark[at(x,y+1)] > 0 and mark[at(x,y+1)] < mark[at(x,y)]) {
                mask[at(x,y)]   |= 2;
                y++;
            } else if (x>=1           and mark[at(x-1,y)] > 0 and mark[at(x-1,y)] < mark[at(x,y)]) {
                mask[at(x-1,y)] |= 1;
                mask[at(x,y)]   |= 2;
                x--; k = 0;
            } else if (x+1 < canvas_w and mark[at(x+1,y)] > 0 and mark[at(x+1,y)] < mark[at(x,y)]) {
                mask[at(x,y)]   |= 3;
                x++; k = 1;
            } else if (y>=1           and mark[at(x,y-1)] > 0 and mark[at(x,y-1)] < mark[at(x,y)]) {
                mask[at(x,y)]   |= 1;
                mask[at(x,y-1)] |= 2;
                y--; k = 2;
            } else if (y+1 < canvas_h and mark[at(x,y+1)] > 0 and mark[at(x,y+1)] < mark[at(x,y)]) {
                mask[at(x,y)]   |= 3;
                y++; k = 3;
            } else {
                std::cout << "\nerror: routing failed" << std::endl;
                return { -1, paths, solders };
            }
        }
        path.emplace_back(x,y);
        drive[at(x,y)] = driver_id;
        paths.push_back(std::move(path));
        solders.emplace_back(x,y);
        for (y = y2-7; y <= y2+7; y++)
            drive[at(x2,y)] = driver_id;
        done++;
        std::cout << "[" << std::string(done * 70 / todo.size(), '=')
                  << '>' << std::string(70 - done * 70 / todo.size(), '.')
                  << std::format("] {:3d}%\r", done * 100 / todo.size());
    }
    std::cout << std::endl;
    return { cost, paths, solders };
}

std::tuple<size_t,size_t> snap_to_microgrid(
        std::unordered_map<int, std::tuple<int,int>>& placement,
        const std::unordered_map<int, std::pair<int, std::vector<int>>>& gates,
        double slackness) {
    int min_x = std::numeric_limits<int>::max();
    int min_y = std::numeric_limits<int>::max();
    int max_x = 0;
    int max_y = 0;
    for (auto& [gate, coord] : placement) {
        auto& [x, y] = coord;
        x = std::ceil(x * 2 * slackness); y = y * 40;
        min_x = std::min(x, min_x);
        min_y = std::min(y, min_y);
    }
    for (auto& [gate, coord] : placement) {
        auto& [x, y] = coord;
        x = x - min_x + 10; y = y - min_y + 5;
        auto [gate_size_x, gate_size_y] = CELL_SIZE.at(std::get<0>(gates.at(gate)));
        max_x = std::max(max_x, x + gate_size_x);
        max_y = std::max(max_y, y + gate_size_y);
    }
    return std::tuple<size_t,size_t>{ max_x + 10, max_y + 5 };
}

double fast_route_estimate(
        const std::unordered_map<int, std::tuple<int,int>>& placement,
        const std::set<int>& inputs, const std::set<int>& outputs,
        const std::unordered_map<int, std::pair<int, std::vector<int>>>& gates) {
    std::vector<std::tuple<int,int,int>> connections;
    auto [l, t] = placement.cbegin()->second;
    auto [r, b] = placement.cbegin()->second;
    for (const auto& [sink, values] : gates) {
        const auto& [sinkgatetype, inports] = values;
        auto [x, y] = placement.at(sink);
        l = std::min(l, x); t = std::min(t, y);
        r = std::max(r, x); b = std::max(b, y);
        for (const auto& [i, source] : std::views::enumerate(inports)) {
            if (inputs.contains(source) or outputs.contains(source)) continue;
            connections.emplace_back(source, sink, i);
        }
    }
    double cost = (r-l) * (b-t) * area_cost;
    for (const auto& [source, sink, port_id] : connections) {
        auto [sx, sy] = placement.at(source);
        auto [dx, dy] = placement.at(sink);
        auto [source_xoff, source_yoff] = PORT_OFFSET.at(std::get<0>(gates.at(source))).back();
        auto [sink_xoff, sink_yoff]     = PORT_OFFSET.at(std::get<0>(gates.at(sink  )))[port_id];
        cost += std::abs((sx+source_xoff)-(dx+sink_xoff)) + std::abs(((sy+source_yoff)-(dy+sink_yoff))*40);
        cost += (sy == dy) ? 0 : cross_track_penalty;
    }
    return cost;
}

std::unordered_map<int, std::tuple<int,int>> place(
        std::set<int> inputs, std::set<int> outputs,
        const std::unordered_map<int, std::pair<int, std::vector<int>>>& gates) {
    double temperature = initial_temperature;
    std::unordered_map<int, std::tuple<int,int>> placement;
    auto is_overlapping = [&](const decltype(placement)& placement) {
        std::deque<std::map<int,int>> track;
        int min_track = 0;
        for (const auto& [gate, coord] : placement) {
            const auto& [x, y] = coord;
            const auto& gatetype = std::get<0>(gates.at(gate));
            auto [width, _] = CELL_SIZE.at(gatetype);
            int l = x, r = x + width/2; 
            
            if (y - min_track >= (int)track.size()) {
                track.append_range(std::views::repeat(std::map<int,int>{}, y - min_track - track.size() + 1));
                track.back().emplace(l, r);
            } else if (y - min_track < 0) {
                track.prepend_range(std::views::repeat(std::map<int,int>{}, min_track - y));
                min_track = y;
                track.front().emplace(l, r);
            } else {
                auto& occupacy = track[y - min_track];
                auto il = std::ranges::upper_bound(std::views::values(occupacy), l).base();
                auto ir = occupacy.lower_bound(r);
                if (il == ir) occupacy.emplace_hint(ir, l, r);
                else return true;
            }
        }
        return false;
    };
    int estimated_tracks = std::round(std::sqrt(gates.size() * initial_area_margins));
    std::mt19937 rng(std::random_device{}());
    for (const auto& [gate, values] : gates) {
        const auto& gatetype = std::get<0>(values);
        do {
            int new_x = std::uniform_int_distribution<int>(0, estimated_tracks * 20)(rng);
            int new_y = std::uniform_int_distribution<int>(0, estimated_tracks     )(rng);
            placement[gate] = {new_x, new_y};
        } while (is_overlapping(placement));
    }
    double current_energy = fast_route_estimate(placement, inputs, outputs, gates);
    auto best_placement{ placement };
    auto best_energy{ current_energy };
    auto choice = std::uniform_int_distribution<int>(0, placement.size()-1);
    auto gauss = std::normal_distribution<double>(0, 1);
    auto dice = std::uniform_real_distribution<double>(0, 1);
    auto tick = std::chrono::system_clock::now();
    auto tock = tick;
    for (int iteration = 0; temperature > stop_temperature; iteration++) {
        auto new_placement{ placement };
        do {
            std::array<int,2> ridx { choice(rng), choice(rng) };
            std::tie(ridx[0], ridx[1]) = std::tuple<int,int>{ std::min(ridx[0], ridx[1]), std::abs(ridx[0]-ridx[1]) };
            auto iter = new_placement.begin();
            for (auto i : ridx) {
                std::advance(iter, i);
                std::get<0>(iter->second) += int(gauss(rng)*std::log(temperature)*20);
                std::get<1>(iter->second) += int(gauss(rng)*std::log(temperature));
            }
        } while (is_overlapping(new_placement));
        double new_energy = fast_route_estimate(new_placement, inputs, outputs, gates);
        if (new_energy < current_energy or dice(rng) < std::exp((current_energy-new_energy)/temperature)) {
            placement = std::move(new_placement);
            current_energy = new_energy;
            if (current_energy < best_energy) {
                best_placement = placement;
                best_energy = current_energy;
            }
        }
        temperature *= cooling_rate;
        auto now = std::chrono::system_clock::now();
        if (now - tick > 50ms) {
            std::cout << std::format("Temperature {:3.3f}, Energy {}  \r", temperature, current_energy) << std::flush;
            tick = now;
        }
        if (now - tock > step_timeout) break;
        tock = now;
    }
    std::cout << std::format("Temperature {:3.3f}, Energy {}  \n", temperature, best_energy);
    return best_placement;
}

void draw_schematic(
        const std::set<int>& inputs, const std::set<int>& outputs,
        const std::unordered_map<int, std::pair<int, std::vector<int>>>& gates,
        const std::vector<std::string> names,
        const std::vector<std::vector<std::tuple<int,int>>>& wires,
        const std::vector<std::tuple<int,int>>& solders,
        const std::unordered_map<int, std::tuple<int,int>>& placement,
        size_t canvas_w, size_t canvas_h) {
    draw_t draw(canvas_w*5, canvas_h*5);
    static const std::unordered_map<int, void(draw_t::*)(std::tuple<int,int>)> DRAWFN {
        {NOT, &draw_t::inv}, {NAND, &draw_t::nand}
    };
    // draw stdcells
    for (const auto& [name, values] : gates) {
        const auto& [gatetype, _] = values;
        const auto& [x, y] = placement.at(name);
        (draw.*DRAWFN.at(gatetype))({x*5,y*5});
    }
    // draw powergrid
    for (size_t y = 25; y < canvas_h*5; y+=200)
        draw.line({0,y,canvas_w*5,y},{.width=2});
    // draw wires
    for (const auto& wire : wires) {
        auto [x1, y1] = wire.front();
        for (const auto& [x2, y2] : wire | std::views::drop(1)) {
            draw.line({x1*5, y1*5, x2*5, y2*5},{.fill=0,.width=2});
            std::tie(x1, y1) = std::tuple{x2, y2};
        }
    }
    // draw solder joints
    for (const auto& [x, y] : solders) {
        draw.ellipse({x*5-2, y*5-2, x*5+2, y*5+2},{.fill=0,.outline=0,.width=1});
    }
    // draw port labels
    for (const auto& [sink, values] : gates) {
        const auto& [sinkgatetype, inports] = values;
        for (const auto& [i, source] : std::views::enumerate(inports)) {
            if (inputs.contains(source)) {
                auto [x, y] = placement.at(sink) + PORT_OFFSET.at(sinkgatetype)[i];
                y = y + 2;
                draw.text({x*5-12, y*5-8}, names[source], "rt");
                draw.line({x*5-10,y*5-2,x*5-6,y*5-2},   {.fill=0,.width=1});
                draw.line({x*5-10,y*5-1,x*5-4,y*5-1},   {.fill=0,.width=1});
                draw.line({x*5-10,y*5-0,x*5-2,y*5-0},   {.fill=0,.width=1});
                draw.line({x*5-10,y*5+1,x*5-4,y*5+1},   {.fill=0,.width=1});
                draw.line({x*5-10,y*5+2,x*5-6,y*5+2},   {.fill=0,.width=1});
            }
        }
    }
    for (const auto& output : outputs) {
        const auto& gatetype = gates.at(output).first;
        auto [x, y] = placement.at(output) + PORT_OFFSET.at(gatetype).back();
        draw.text({x*5+10, y*5-8}, names[output], "lt");
        draw.line({x*5+8,y*5-2,x*5+4,y*5-2},   {.fill=0,.width=1});
        draw.line({x*5+8,y*5-1,x*5+2,y*5-1},   {.fill=0,.width=1});
        draw.line({x*5+8,y*5-0,x*5+0,y*5-0},   {.fill=0,.width=1});
        draw.line({x*5+8,y*5+1,x*5+2,y*5+1},   {.fill=0,.width=1});
        draw.line({x*5+8,y*5+2,x*5+4,y*5+2},   {.fill=0,.width=1});
    }
    draw.write_png(schematic_filename);
}

int main(int argc, char *argv[]) {
    auto [inputs, outputs, gates, names] = parse(argc, argv);
    if (gates.empty()) return 0;
    auto placement = place(inputs, outputs, gates);
    for (double slackness = 1.0; slackness < 10.; slackness *= 1.05) {
        std::cout << "slackness: " << slackness << std::endl;
        decltype(placement) micro_placement(placement);
        auto [canvas_w, canvas_h] = snap_to_microgrid(micro_placement, gates);
        auto [cost, wires, solders] = route(inputs, outputs, gates, micro_placement, canvas_w, canvas_h);
        if (cost < 0) continue;
        std::cout << "total wire length: " << cost << std::endl;
        draw_schematic(inputs, outputs, gates, names, wires, solders, micro_placement, canvas_w, canvas_h);
        std::cout << "schematic generated." << std::endl;
        return 0;
    }
    std::cout << "error: all routing attempts failed, no schematic generated." << std::endl;
    return 0;
}
