最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

Decode output from GET request in R - Stack Overflow

programmeradmin3浏览0评论

I'm trying to pull some data using httr::GET but the problem I am facing now is that the response seems to be encoded and I am not sure how I can solve this.

Here is the url of historical data on football for argentina

I had managed to locate the corresponding GET request that pulls the data so I was doing the following:

ul1<- "//"

t<-httr::GET(ul1, add_headers('User-Agent' = 'Mozilla/5.0 ', 'sec-ch-ua-platform' = "Windows", 'x-requested-with' = "XMLHttpRequest"), accept_json())

get2json<- content(t, as = "text")

However the response looks like this (sample):

> get2json
"ZDRCVTcxaklMRU5DRHRyeXdPRzdUbnhsWm44MFpZTkVWaWI1am"

From what I've read this might be some base64 encoding so I tried converting it doing this:

rawToChar(base64enc::base64decode(base64url_dec(get2json)))

It seems like it's doing some converting since I now have some / in the output but still it is a bunch of alphanumeric characters.

Any hints appreciated

I'm trying to pull some data using httr::GET but the problem I am facing now is that the response seems to be encoded and I am not sure how I can solve this.

Here is the url of historical data on football for argentina

I had managed to locate the corresponding GET request that pulls the data so I was doing the following:

ul1<- "https://www.oddsportal/ajax-sport-country-tournament-archive_/1/vZTffojG/X218480680X24584X0X0X136839680X0X0X0X0X0X0X0X0X134283269X512X1048578X1048576X0X1024X18464X131072X256X0X0X0X0X131072X0X128/0/1/page/1//"

t<-httr::GET(ul1, add_headers('User-Agent' = 'Mozilla/5.0 ', 'sec-ch-ua-platform' = "Windows", 'x-requested-with' = "XMLHttpRequest"), accept_json())

get2json<- content(t, as = "text")

However the response looks like this (sample):

> get2json
"ZDRCVTcxaklMRU5DRHRyeXdPRzdUbnhsWm44MFpZTkVWaWI1am"

From what I've read this might be some base64 encoding so I tried converting it doing this:

rawToChar(base64enc::base64decode(base64url_dec(get2json)))

It seems like it's doing some converting since I now have some / in the output but still it is a bunch of alphanumeric characters.

Any hints appreciated

Share Improve this question asked Mar 17 at 15:27 M.OM.O 5092 silver badges11 bronze badges 0
Add a comment  | 

1 Answer 1

Reset to default 4

Using this great answer so CREDIT to @Juiced - the guy is insane for figuring this one out.

"GuTDQp+LlsfHwf3/a9DJnZn0ba3U+3i7V6PeguPzc72QKl8luFqUyJTx7FTdDCgZIlKDX9Tw9/fjI6tOXi0SB5e4tTR6746yNMfd4FJtZIYxFYg9QLfDD0fRHilIaYxqNh3BG6rRI4crRDZmdZ9rvPjod5X5ZKrBgDjvhH0XZNsbANKx2XpKn3l9KqI97gN4QuSIjxWvb+RL5jIGhYQb5Q==:c0e60d0675dd38a69c8f2e937a6497df"

decodes to

{'s': 1, 'd': {'nullResultText': 'Unfortunately, no matches can be displayed because there are no odds available from your selected bookmakers.'}, 'refresh': 20}

In Order to decode, you need the salt & passwordobtained from the App.js + an initialization vector IV - IV is given in your decoded text after the collon:. I tried to recreate this in R but could not find any function, that replaces PBKDF2HMAC. So I used the python-obtained aes_key.

Code

library(httr)
library(jsonlite)
library(base64enc)

ul1 <- "https://www.oddsportal/ajax-sport-country-tournament-archive_/1/vZTffojG/X218480680X24584X0X0X136839680X0X0X0X0X0X134283269X512X1048578X1048576X0X1024X18464X131072X256X0X0X0X0X131072X0X128/0/1/page/1//"

t <- httr::GET(ul1, add_headers(
  'User-Agent' = 'Mozilla/5.0 ',
  'sec-ch-ua-platform' = "Windows",
  'x-requested-with' = "XMLHttpRequest"
), accept_json())

# Decode base64 content
decoded_base64 <- base64enc::base64decode(rawToChar(t$content))
result <- rawToChar(decoded_base64)
split_data <- unlist(strsplit(result, ":"))
encrypted <- split_data[1]
key_hex <- split_data[2]

encrypted_bytes <- base64enc::base64decode(encrypted)

# hex to raw format
key_bytes <- as.raw(as.numeric(sapply(seq(1, nchar(key_hex), by=2), function(x) strtoi(substr(key_hex, x, x+1), 16L))))

# aes_key obtained from python script
aes_key <- as.raw(c(0x8f, 0x3b, 0x48, 0x9f, 0x3a, 0x7d, 0xa5, 0xc2, 0x1e, 0x51, 0xcc, 0x15, 0xe5, 0xbe, 0xa6, 0x6f,
                         0x67, 0xe6, 0x05, 0x49, 0x47, 0x37, 0xa7, 0xc2, 0x12, 0xe3, 0xc0, 0xea, 0x31, 0x95, 0xeb, 0xbd))

# Decrypt using AES-CBC
decrypted_bytes <- openssl::aes_cbc_decrypt(encrypted_bytes, key = aes_key, iv = key_bytes)

decoded <- fromJSON(rawToChar(decrypted_bytes))

giving

> decoded 

$s
[1] 1

$d
$d$nullResultText
[1] "Unfortunately, no matches can be displayed because there are no odds available from your selected bookmakers."


$refresh
[1] 20

Add

So I found this implementation of PBKDF2HMAC and translated it to R. Using this I could obtain the aes_key.

How to obtain Password and Salt

Using Browser Tools, go to Sources -> www.oddsportal/res/public/js/build/app.js?v=250312132723 and search for jt(r.data the left is the Password, the right is the Salt.

Full Code

library(httr)
library(jsonlite)
library(base64enc)

ul1 <- "https://www.oddsportal/ajax-sport-country-tournament-archive_/1/vZTffojG/X218480680X24584X0X0X136839680X0X0X0X0X0X134283269X512X1048578X1048576X0X1024X18464X131072X256X0X0X0X0X131072X0X128/0/1/page/1//"

t <- httr::GET(ul1, add_headers(
  'User-Agent' = 'Mozilla/5.0 ',
  'sec-ch-ua-platform' = "Windows",
  'x-requested-with' = "XMLHttpRequest"
), accept_json())

# Decode base64 content
decoded_base64 <- base64enc::base64decode(rawToChar(t$content))
result <- rawToChar(decoded_base64)
split_data <- unlist(strsplit(result, ":"))
encrypted <- split_data[1]
key_hex <- split_data[2]

# Decode the encrypted data
encrypted_bytes <- base64enc::base64decode(encrypted)

# Convert key from hex to raw format
key_bytes <- as.raw(as.numeric(sapply(seq(1, nchar(key_hex), by=2), function(x) strtoi(substr(key_hex, x, x+1), 16L))))

# Obtaining aes_key 

# Helper function for HMAC
hmac_sha1 <- function(key, data) {
  require(digest)
  digest::hmac(key, data, algo = "sha256", raw = TRUE)
}

# Pack integer to big-endian 4-byte representation
INT <- function(i) {
  stopifnot(i > 0)
  result <- raw(4)
  result[1] <- as.raw((i %/% 16777216) %% 256)
  result[2] <- as.raw((i %/% 65536) %% 256)
  result[3] <- as.raw((i %/% 256) %% 256)
  result[4] <- as.raw(i %% 256)
  return(result)
}

# XOR two raw vectors - fixed implementation
xor_raw <- function(A, B) {
  stopifnot(length(A) == length(B))
  result <- raw(length(A))
  for (i in 1:length(A)) {
    # Convert raw to integer, perform XOR, then convert back to raw
    result[i] <- as.raw(bitwXor(as.integer(A[i]), as.integer(B[i])))
  }
  return(result)
}

# Main PBKDF2 function
pbkdf2 <- function(P, S, c, dkLen, prf = hmac_sha1) {
  # Get hash length
  test_hash <- prf(P, S)
  hLen <- length(test_hash)
  
  # Validate parameters
  if (dkLen > (2^32 - 1) * hLen) {
    stop("derived key too long")
  }
  
  # Calculate required number of blocks
  l <- ceiling(dkLen / hLen)
  r <- dkLen - (l - 1) * hLen
  
  # F function as per RFC 2898
  F <- function(i) {
    U <- S
    U <- c(U, INT(i))
    result <- NULL
    
    U_prev <- U
    for (j in 1:c) {
      U_prev <- prf(P, U_prev)
      if (is.null(result)) {
        result <- U_prev
      } else {
        result <- xor_raw(result, U_prev)
      }
    }
    
    return(result)
  }
  
  # Calculate all blocks
  T <- vector("list", l)
  for (i in 1:l) {
    T[[i]] <- F(i)
  }
  
  # Concatenate blocks and trim to required length
  DK <- unlist(T)
  if (length(DK) > dkLen) {
    DK <- DK[1:dkLen]
  }
  
  return(DK)
}

password <- charToRaw("%RtR8AB&nWsh=AQC+v!=pgAe@dSQG3kQ")
salt <- charToRaw("orieC_jQQWRmhkPvR6u2kzXeTube6aYupiOddsPortal")
derived_key <- pbkdf2(password, salt, 1000, 32)

# Decrypt using AES-CBC
decrypted_bytes <- openssl::aes_cbc_decrypt(encrypted_bytes, key = derived_key, iv = key_bytes)

# show
decoded <- fromJSON(rawToChar(decrypted_bytes))
decoded
发布评论

评论列表(0)

  1. 暂无评论