错误:二元运算符的非数字参数

当我将帖子 https://stackoverflow.com/questions/47722530/count-match-ratio-between-two-dataset 的答案应用于真实数据时,它有一些错误。

由于真实数据非常大,数据集的结构

> str(sesmic_events)
'data.frame':   13 obs. of  3 variables:
 $ TIME_    : POSIXct, format: "2013-10-15 00:12:32" "2013-10-25 17:10:19" "2014-04-11 07:07:23" "2014-04-12 20:14:39" ...
 $ LATITUDE : num  9.88 37.16 -6.59 -11.27 -6.75 ...
 $ LONGITUDE: num  124 145 155 162 155 ...
> str(events_data)
'data.frame':   53646 obs. of  17 variables:
 $ LONGITUDE   : num  118 118 118 118 118 ...
 $ LATITUDE    : num  -11.5 -11.5 -11.5 -11.5 -11.5 -11.5 -11.5 -11.5 -11.5 -11.5 ...
 $ DATE_START  : POSIXct, format: "2014-12-30" "2014-12-31" "2015-01-01" "2015-01-02" ...
 $ DATE_END    : POSIXct, format: "2015-01-06" "2015-01-07" "2015-01-08" "2015-01-09" ...
 $ FLAG        : num  2 2 2 2 2 2 2 2 2 2 ...
 $ SURFSKINTEMP: int  13 1 16 16 7 13 9 9 9 9 ...
 $ SURFAIRTEMP : int  6 6 6 6 6 6 6 4 5 2 ...
 $ TOTH2OVAP   : int  5 17 17 17 17 17 17 2 2 12 ...
 $ TOTO3       : int  16 16 16 10 7 7 7 14 14 14 ...
 $ TOTCO       : int  12 12 8 4 12 12 10 10 10 10 ...
 $ TOTCH4      : int  13 14 6 6 11 7 14 14 14 14 ...
 $ OLR_ARIS    : int  10 4 4 7 5 10 6 4 4 4 ...
 $ CLROLR_ARIS : int  10 4 4 7 5 10 6 4 4 4 ...
 $ OLR_NOAA    : int  10 10 10 10 7 9 9 9 9 6 ...
 $ MODIS_LST   : int  1 1 1 1 1 1 1 1 1 1 ...
 $ MATCH_COUNT : num  2 16 14 9 8 9 12 11 16 11 ...
 $ MATCH_RATIO : num  0.00627 0.05016 0.04389 0.02821 0.02508 ...

数据集负责人

> dput(head(sesmic_events))
structure(list(TIME_ = structure(c(1381795952.05, 1382721019.71, 
1397200043.13, 1397333679.3, 1397914080.81, 1416018701.72), class = c("POSIXct", 
"POSIXt")), LATITUDE = c(9.88, 37.156, -6.586, -11.27, -6.755, 
1.893), LONGITUDE = c(124.1167, 144.6611, 155.0485, 162.1481, 
155.0241, 126.5217)), .Names = c("TIME_", "LATITUDE", "LONGITUDE"
), row.names = c(NA, 6L), class = "data.frame")
> dput(head(events_data))
structure(list(LONGITUDE = c(118.5, 118.5, 118.5, 118.5, 118.5, 
118.5), LATITUDE = c(-11.5, -11.5, -11.5, -11.5, -11.5, -11.5
), DATE_START = structure(c(1419897600, 1419984000, 1420070400, 
1420156800, 1420243200, 1420329600), class = c("POSIXct", "POSIXt"
)), DATE_END = structure(c(1420502400, 1420588800, 1420675200, 
1420761600, 1420848000, 1420934400), class = c("POSIXct", "POSIXt"
)), FLAG = c(2, 2, 2, 2, 2, 2), SURFSKINTEMP = c(13L, 1L, 16L, 
16L, 7L, 13L), SURFAIRTEMP = c(6L, 6L, 6L, 6L, 6L, 6L), TOTH2OVAP = c(5L, 
17L, 17L, 17L, 17L, 17L), TOTO3 = c(16L, 16L, 16L, 10L, 7L, 7L
), TOTCO = c(12L, 12L, 8L, 4L, 12L, 12L), TOTCH4 = c(13L, 14L, 
6L, 6L, 11L, 7L), OLR_ARIS = c(10L, 4L, 4L, 7L, 5L, 10L), CLROLR_ARIS = c(10L, 
4L, 4L, 7L, 5L, 10L), OLR_NOAA = c(10L, 10L, 10L, 10L, 7L, 9L
), MODIS_LST = c(1L, 1L, 1L, 1L, 1L, 1L), MATCH_COUNT = c(2, 
16, 14, 9, 8, 9), MATCH_RATIO = c(0.00626959247648903, 0.0501567398119122, 
0.0438871473354232, 0.0282131661442006, 0.0250783699059561, 0.0282131661442006
)), .Names = c("LONGITUDE", "LATITUDE", "DATE_START", "DATE_END", 
"FLAG", "SURFSKINTEMP", "SURFAIRTEMP", "TOTH2OVAP", "TOTO3", 
"TOTCO", "TOTCH4", "OLR_ARIS", "CLROLR_ARIS", "OLR_NOAA", "MODIS_LST", 
"MATCH_COUNT", "MATCH_RATIO"), row.names = c(NA, 6L), class = "data.frame")

当我使用以下语句时

long <- events_data$LONGITUDE
lat <- events_data$LATITUDE
date_s <- events_data$DATE_START
date_e <- events_data$DATE_END
myVals <- events_data$MATCH_RATIO

sesmic_events$MatchRatio <- apply(sesmic_events, 1, function(x) {
  v <- which(long > (x[3] - 5) & long < (x[3] + 5))
  z <- which(lat > (x[2] - 5) & lat < (x[2] + 5))
  t_s <- which(date_s >= x[1] - 60)
  t_e <- which(date_e <= x[1])
  ind <- Reduce(intersect, list(z,v,t_s,t_e))
  sum(myVals[ind] > 0.01)/length(ind)
})

它有错误:

Error in x[3] - 5 : non-numeric argument to binary operator 
stack overflow Error in : non-numeric argument to binary operator
原文答案

答案:

作者头像

问题是 x 被强制转换为 character 。观察:

sesmic_events$MatchRatio <- apply(sesmic_events, 1, function(x) {

    ## Add print statement to see what's going on
    print(class(x))

    v <- which(long > (x[3] - 5) & long < (x[3] + 5))
    z <- which(lat > (x[2] - 5) & lat < (x[2] + 5))
    t_s <- which(date_s >= x[1] - 60)
    t_e <- which(date_e <= x[1])
    ind <- Reduce(intersect, list(z,v,t_s,t_e))
    sum(myVals[ind] > 0.01)/length(ind)
})

[1] "character"
Error in x[3] - 5 : non-numeric argument to binary operator 

只需将日期字段转换为整数即可解决问题。观察:

long <- events_data$LONGITUDE
lat <- events_data$LATITUDE
date_s <- as.integer(events_data$DATE_START)
date_e <- as.integer(events_data$DATE_END)
myVals <- events_data$MATCH_RATIO

sesmic_events$TIME_ <- as.integer(sesmic_events$TIME_)

sesmic_events$MatchRatio <- apply(sesmic_events, 1, function(x) {
  v <- which(long > (x[3] - 5) & long < (x[3] + 5))
  z <- which(lat > (x[2] - 5) & lat < (x[2] + 5))
  t_s <- which(date_s >= x[1] - 60)
  t_e <- which(date_e <= x[1])
  ind <- Reduce(intersect, list(z,v,t_s,t_e))
  sum(myVals[ind] > 0.01)/length(ind)
})

唯一的问题是基于样本数据, MatchRatio 字段为每个值返回 NaN ,因为每个案例的 length(ind) = 0

无论如何,OP 应该认真考虑可能使用 data.tabledplyr 的更健壮的解决方案。