Home > Enterprise >  R: Recreating the Travelling Salesman Problem
R: Recreating the Travelling Salesman Problem

Time:01-19

I am working with the R programming language.

I am trying to recreate the travelling salesman problem. The travelling salesman problem is a problem where a salesman has to visit "n" number of cities exactly once, in such a way that his total distance is minimized.

For this problem, I first created a dataset made of n = 6 cities (longitude, latitude):

set.seed(123)

data_1 = data.frame(id = c(1,2,3), long = rnorm(3, -74, 1 ), lat = rnorm(3, 40, 1 ))

data_2 = data.frame(id = c(4,5,6), long = rnorm(3, -78, 1 ), lat = rnorm(3, 42, 1 ))

final_data = rbind(data_1, data_2) 

 final_data
  id      long      lat
1  1 -74.56048 40.07051
2  2 -74.23018 40.12929
3  3 -72.44129 41.71506
4  4 -77.53908 41.55434
5  5 -79.26506 43.22408
6  6 -78.68685 42.35981

For a given order of cities (e.g. 1,2,3,4,5,6), I created a function which determines the distance (based on the Euclidean Distance) between each successive pair of cities, and then calculates the total distance travelled:

distance <- function( long1, lat1, long2, lat2, long3, lat3, long4, lat4, long5, lat5, long6, lat6) {

d1_2 = sqrt( (long1 - lat1)^2   (long2 - lat2)^2 ) 

d2_3 = sqrt( (long2 - lat2)^2   (lat3 - long3)^2 ) 

d3_4 = sqrt( (long3 - lat3)^2   (long4 - lat4)^2 ) 

d4_5 = sqrt( (long4 - lat4)^2   (long5 - lat5)^2 ) 

d5_6 = sqrt( (long5 - lat5)^2   (long6 - lat6)^2 ) 

return( d1_2   d2_3   d3_4   d4_5   d5_6 )
}

distance(final_data[1,2], final_data[1,3], final_data[2,2], final_data[2,3], final_data[3,2], final_data[3,3], final_data[4,2], final_data[4,3], final_data[5,2], final_data[5,3], final_data[6,2], final_data[6,3]) 

Then, I can randomize the order of the rows to obtain different routes and calculate the distance for each route:

#first route 
rows <- sample(nrow(final_data))
route_1 <- final_data[rows, ]

> route_1
  id      long      lat
1  1 -74.56048 40.07051
3  3 -72.44129 41.71506
4  4 -77.53908 41.55434
2  2 -74.23018 40.12929
6  6 -78.68685 42.35981
5  5 -79.26506 43.22408
    
distance(route_1[1,2], route_1[1,3], route_1[2,2], route_1[2,3], route_1[3,2], route_1[3,3], route_1[4,2], route_1[4,3], route_1[5,2], route_1[5,3], route_1[6,2], route_1[6,3]) 

[1] 830.5902

Next Route:

#second route

rows <- sample(nrow(final_data))
route_2 <- final_data[rows, ]

> route_2
  id      long      lat
5  5 -79.26506 43.22408
4  4 -77.53908 41.55434
3  3 -72.44129 41.71506
2  2 -74.23018 40.12929
1  1 -74.56048 40.07051
6  6 -78.68685 42.35981

distance(route_2[1,2], route_2[1,3], route_2[2,2], route_2[2,3], route_2[3,2], route_2[3,3], route_2[4,2], route_2[4,3], route_2[5,2], route_2[5,3], route_2[6,2], route_2[6,3]) 

[1] 826.028
#etc

My Question: In the spirit of the Travelling Salesman Problem, I am trying to (ironically) show that what I am doing is extremely inefficient and will not work for more than 10 cities (i.e. take too long to run). In the case of 6 cities, can someone please show me how to calculate the distance for every possible route (6! = 720 routes) and calculate the time required to compute all these distances?

Here is what I know how to do so far:

Part 1: Generate All Possible Routes

library(combinat)
 all_routes = permn(c(1,2,3,4,5,6))

> head(all_routes)
[[1]]
[1] 1 2 3 4 5 6

[[2]]
[1] 1 2 3 4 6 5

[[3]]
[1] 1 2 3 6 4 5

[[4]]
[1] 1 2 6 3 4 5

[[5]]
[1] 1 6 2 3 4 5

[[6]]
[1] 6 1 2 3 4 5

Part 2: Record the Time Required to Calculate a Single Route

start.time <- Sys.time()
distance(route_1[1,2], route_1[1,3], route_1[2,2], route_1[2,3], route_1[3,2], route_1[3,3], route_1[4,2], route_1[4,3], route_1[5,2], route_1[5,3], route_1[6,2], route_1[6,3]) 
end.time <- Sys.time()
time.taken <- end.time - start.time

time.taken

Time difference of 0.003665924 secs

Can someone please show me how to put this all together?

Thanks!

CodePudding user response:

To calculate the cumulative distance for all 6! routes for the given final_data could be done like this:

set.seed(123)
data_1 = data.frame(id = c(1,2,3), long = rnorm(3, -74, 1 ), lat = rnorm(3, 40, 1 ))
data_2 = data.frame(id = c(4,5,6), long = rnorm(3, -78, 1 ), lat = rnorm(3, 42, 1 ))
final_data = rbind(data_1, data_2)
N <- nrow(final_data) # just for repeated convenience
final_data
#   id      long      lat
# 1  1 -74.56048 40.07051
# 2  2 -74.23018 40.12929
# 3  3 -72.44129 41.71506
# 4  4 -77.53908 41.55434
# 5  5 -79.26506 43.22408
# 6  6 -78.68685 42.35981

Calculate the distances between each city, pair-wise. I'm using distHaversine because you listed lat/lon, and part of me cringes seeing cartesian distance calcs applied to that :-)

dists <- outer(seq_len(N), seq_len(N), function(a,b) {
  geosphere::distHaversine(final_data[a,2:3], final_data[b,2:3]) # Notes 1, 2
})
dists
#           [,1]      [,2]     [,3]     [,4]     [,5]     [,6]
# [1,]      0.00  28876.24 255554.4 300408.5 525566.9 429264.3
# [2,]  28876.24      0.00 231942.7 320616.0 541980.9 448013.6
# [3,] 255554.43 231942.67      0.0 424449.9 584761.5 521210.7
# [4,] 300408.47 320616.03 424449.9      0.0 233840.9 130640.9
# [5,] 525566.87 541980.93 584761.5 233840.9      0.0 107178.2
# [6,] 429264.34 448013.57 521210.7 130640.9 107178.2      0.0

(Units are in meters.)

Calculate the cumulative distance along each of the routes:

perms <- gtools::permutations(N, N)
nrow(perms)
# [1] 720
perms[c(1:4, 719:720),]
#      [,1] [,2] [,3] [,4] [,5] [,6]
# [1,]    1    2    3    4    5    6
# [2,]    1    2    3    4    6    5
# [3,]    1    2    3    5    4    6
# [4,]    1    2    3    5    6    4
# [5,]    6    5    4    3    1    2
# [6,]    6    5    4    3    2    1

allroutes5 <- t(apply(perms, 1, function(route) {
  dists[cbind(route[-N], route[-1])]
}))
head(allroutes5)
#          [,1]     [,2]     [,3]     [,4]     [,5]
# [1,] 28876.24 231942.7 424449.9 233840.9 107178.2
# [2,] 28876.24 231942.7 424449.9 130640.9 107178.2
# [3,] 28876.24 231942.7 584761.5 233840.9 130640.9
# [4,] 28876.24 231942.7 584761.5 107178.2 130640.9
# [5,] 28876.24 231942.7 521210.7 130640.9 233840.9
# [6,] 28876.24 231942.7 521210.7 107178.2 233840.9

allroutes_total <- rowSums(allroutes5)
head(allroutes_total)
# [1] 1026287.9  923087.9 1210062.2 1083399.4 1146511.4 1123048.7

As confirmation of this, the first row of allroutes5 is the sequence of cities 1, 2, 3, 4, 5, and 6. Recalling dists above, from 1-2 is 28876; 2-3 is 231942; 3-4 is 424449; etc. Sum these up, and we have the total distance traveled over all cities in that route. allroutes_total holds the distances for all 720 possible routings (permutations).

min(allroutes_total)
# [1] 799046.4
which.min(allroutes_total)
# [1] 266
perms[which.min(allroutes_total),]
# [1] 3 2 1 4 6 5

Notes:

  1. Using your formula, I was able to duplicate your distances:

    dists <- outer(seq_len(N), seq_len(N), function(a,b) {
      sqrt((final_data[a,"long"] - final_data[a,"lat"])^2   (final_data[b,"long"] - final_data[b,"lat"])^2)
    })
    dists
    #          [,1]     [,2]     [,3]     [,4]     [,5]     [,6]
    # [1,] 162.1127 161.9208 161.7774 165.2982 167.7613 166.7110
    # [2,] 161.9208 161.7287 161.5852 165.1101 167.5759 166.5244
    # [3,] 161.7774 161.5852 161.4415 164.9694 167.4373 166.3850
    # [4,] 165.2982 165.1101 164.9694 168.4235 170.8415 169.8103
    # [5,] 167.7613 167.5759 167.4373 170.8415 173.2258 172.2088
    # [6,] 166.7110 166.5244 166.3850 169.8103 172.2088 171.1858
    
    ### first route
    which(apply(perms, 1, identical, c(1L, 3L, 4L, 2L, 6L, 5L)))
    # [1] 32
    allroutes_total[32]
    # [1] 830.5902
    
    ### second route
    which(apply(perms, 1, identical, c(5L, 4L, 3L, 2L, 1L, 6L)))
    # [1] 567
    allroutes_total[567]
    # [1] 826.028
    

    And if you're curious, your second route was tied for fifth-shortest:

    min(allroutes_total)
    # [1] 826.0252
    which.min(allroutes_total)
    # [1] 561
    perms[which.min(allroutes_total),]
    # [1] 5 4 2 3 1 6
    rank(allroutes_total)[567]
    # [1] 5.5
    
  2. I'm not sure that's the right distance calculation, though. I'd think the euclidean distance should be:

    dists <- outer(seq_len(N), seq_len(N), function(a,b) {
      sqrt((final_data[a,"long"] - final_data[b,"long"])^2   (final_data[a,"lat"] - final_data[b,"lat"])^2)
    })
    dists
    #           [,1]      [,2]     [,3]     [,4]     [,5]     [,6]
    # [1,] 0.0000000 0.3354875 2.682444 3.327741 5.663758 4.718888
    # [2,] 0.3354875 0.0000000 2.390565 3.602725 5.909975 4.983694
    # [3,] 2.6824442 2.3905652 0.000000 5.100325 6.988631 6.278753
    # [4,] 3.3277405 3.6027253 5.100325 0.000000 2.401467 1.402200
    # [5,] 5.6637577 5.9099750 6.988631 2.401467 0.000000 1.039848
    # [6,] 4.7188885 4.9836936 6.278753 1.402200 1.039848 0.000000
    
  •  Tags:  
  • Related