最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

C: Linux Socket Programming, TCP, a simple HTTP client

运维笔记admin57浏览0评论

C: Linux Socket Programming, TCP, a simple HTTP client

C: Linux Socket Programming, TCP, a simple HTTP client

C: Linux Socket Programming, TCP, a simple HTTP client - 安東尼隨手記 - Yahoo!奇摩部落格

C: Linux Socket Programming, TCP, a simple HTTP client - 安東尼隨手記 - Yahoo!奇摩部落格








 

檢舉

C: Linux Socket Programming, TCP, a simple HTTP client

分類: Linux程式設計 2009/04/16 10:46

分享


Facebook
Plurk
YAHOO!

 

  • 分享在我的 Facebook
  • 分享在我的 Plurk
  • 分享在我的即時通

Copy from


  1. #include <stdio.h>
  2. #include <sys/socket.h>
  3. #include <arpa/inet.h>
  4. #include <stdlib.h>
  5. #include <netdb.h>
  6. #include <string.h>
  7. int create_tcp_socket ( );
  8. char *get_ip ( char *host );
  9. char *build_get_query ( char *host, char *page );
  10. void usage ( );
  11. #define HOST "coding.debuntu.org"
  12. #define PAGE "/"
  13. #define PORT 80
  14. #define USERAGENT "HTMLGET 1.0"
  15. int main ( int argc, char **argv )
  16. {
  17. struct sockaddr_in *remote;
  18. int sock;
  19. int tmpres;
  20. char *ip;
  21. char *get;
  22. char buf [BUFSIZ +1 ];
  23. char *host;
  24. char *page;
  25. if (argc == 1 ) {
  26. usage ( );
  27. exit ( 2 );
  28. }  
  29. host = argv [ 1 ];
  30. if (argc > 2 ) {
  31. page = argv [ 2 ];
  32. } else {
  33. page = PAGE;
  34. }
  35. sock = create_tcp_socket ( );
  36. ip = get_ip (host );
  37. fprintf (stderr, "IP is %s\n", ip );
  38. remote = ( struct sockaddr_in * )malloc ( sizeof ( struct sockaddr_in * ) );
  39. remote ->sin_family = AF_INET;
  40. tmpres = inet_pton (AF_INET, ip, ( void * ) ( & (remote ->sin_addr. s_addr ) ) );
  41. if ( tmpres < 0 )  
  42. {
  43. perror ( "Can't set remote->sin_addr.s_addr" );
  44. exit ( 1 );
  45. } else if (tmpres == 0 )
  46. {
  47. fprintf (stderr, "%s is not a valid IP address\n", ip );
  48. exit ( 1 );
  49. }
  50. remote ->sin_port = htons (PORT );
  51. if (connect (sock, ( struct sockaddr * )remote, sizeof ( struct sockaddr ) ) < 0 ) {
  52. perror ( "Could not connect" );
  53. exit ( 1 );
  54. }
  55. get = build_get_query (host, page );
  56. fprintf (stderr, "Query is:\n<<START>>\n%s<<END>>\n", get );
  57. //Send the query to the server
  58. int sent = 0;
  59. while (sent < strlen (get ) )
  60. {
  61. tmpres = send (sock, get +sent, strlen (get ) -sent, 0 );
  62. if (tmpres == -1 ) {
  63. perror ( "Can't send query" );
  64. exit ( 1 );
  65. }
  66. sent += tmpres;
  67. }
  68. //now it is time to receive the page
  69. memset (buf, 0, sizeof (buf ) );
  70. int htmlstart = 0;
  71. char * htmlcontent;
  72. while ( (tmpres = recv (sock, buf, BUFSIZ, 0 ) ) > 0 ) {
  73. if (htmlstart == 0 )
  74. {
  75. /* Under certain conditions this will not work.
  76.       * If the \r\n\r\n part is splitted into two messages
  77.       * it will fail to detect the beginning of HTML content
  78.       */
  79. htmlcontent = strstr (buf, "\r\n\r\n" );
  80. if (htmlcontent != NULL ) {
  81. htmlstart = 1;
  82. htmlcontent += 4;
  83. }
  84. } else {
  85. htmlcontent = buf;
  86. }
  87. if (htmlstart ) {
  88. fprintf (stdout, htmlcontent );
  89. }
  90. memset (buf, 0, tmpres );
  91. }
  92. if (tmpres < 0 )
  93. {
  94. perror ( "Error receiving data" );
  95. }
  96. free (get );
  97. free (remote );
  98. free (ip );
  99. close (sock );
  100. return 0;
  101. }
  102. void usage ( )
  103. {
  104. fprintf (stderr, "USAGE: htmlget host [page]\n\
  105. \thost: the website hostname. ex: coding.debuntu.org\n\
  106. \tpage: the page to retrieve. ex: index.html, default: /\n" );
  107. }
  108. int create_tcp_socket ( )
  109. {
  110. int sock;
  111. if ( (sock = socket (AF_INET, SOCK_STREAM, IPPROTO_TCP ) ) < 0 ) {
  112. perror ( "Can't create TCP socket" );
  113. exit ( 1 );
  114. }
  115. return sock;
  116. }
  117. char *get_ip ( char *host )
  118. {
  119. struct hostent *hent;
  120. int iplen = 15; //XXX.XXX.XXX.XXX
  121. char *ip = ( char * )malloc (iplen +1 );
  122. memset (ip, 0, iplen +1 );
  123. if ( (hent = gethostbyname (host ) ) == NULL )
  124. {
  125. herror ( "Can't get IP" );
  126. exit ( 1 );
  127. }
  128. if (inet_ntop (AF_INET, ( void * )hent ->h_addr_list [ 0 ], ip, iplen ) == NULL )
  129. {
  130. perror ( "Can't resolve host" );
  131. exit ( 1 );
  132. }
  133. return ip;
  134. }
  135. char *build_get_query ( char *host, char *page )
  136. {
  137. char *query;
  138. char *getpage = page;
  139. char *tpl = "GET /%s HTTP/1.0\r\nHost: %s\r\nUser-Agent: %s\r\n\r\n";
  140. if (getpage [ 0 ] == '/' ) {
  141. getpage = getpage + 1;
  142. fprintf (stderr, "Removing leading \"/\", converting %s to %s\n", page, getpage );
  143. }
  144. // -5 is to consider the %s %s %s in tpl and the ending \0
  145. query = ( char * )malloc (strlen (host ) +strlen (getpage ) +strlen (USERAGENT ) +strlen (tpl ) -5 );
  146. sprintf (query, tpl, getpage, host, USERAGENT );
  147. return query;
  148. }

To compile it, run:

$ gcc -o htmlget htmlget.c
$ ./htmlget 
USAGE: htmlget host [page]host: the website hostname. ex: coding.debuntu.orgpage: the page to retrieve. ex: index.html, default: /

Informative messages and errors are printed to stderr. The content of the page is printed to stdout. Thus, to save the HTML content of a page to a file, you will need to run:

$ ./htmlget coding.debuntu.org category > /tmp/page.html
posted on 2012-07-17 09:11  lexus 阅读( ...) 评论( ...) 编辑 收藏

转载于:.html

发布评论

评论列表(0)

  1. 暂无评论