C: Linux Socket Programming, TCP, a simple HTTP client
C: Linux Socket Programming, TCP, a simple HTTP client - 安東尼隨手記 - Yahoo!奇摩部落格C: Linux Socket Programming, TCP, a simple HTTP client - 安東尼隨手記 - Yahoo!奇摩部落格
posted on 2012-07-17 09:11 lexus 阅读( ...) 评论( ...) 编辑 收藏
檢舉
C: Linux Socket Programming, TCP, a simple HTTP client
分類: Linux程式設計 2009/04/16 10:46分享
Plurk
YAHOO!
Copy from
- 分享在我的 Facebook
- 分享在我的 Plurk
- 分享在我的即時通
- #include <stdio.h>
- #include <sys/socket.h>
- #include <arpa/inet.h>
- #include <stdlib.h>
- #include <netdb.h>
- #include <string.h>
- int create_tcp_socket ( );
- char *get_ip ( char *host );
- char *build_get_query ( char *host, char *page );
- void usage ( );
- #define HOST "coding.debuntu.org"
- #define PAGE "/"
- #define PORT 80
- #define USERAGENT "HTMLGET 1.0"
- int main ( int argc, char **argv )
- {
- struct sockaddr_in *remote;
- int sock;
- int tmpres;
- char *ip;
- char *get;
- char buf [BUFSIZ +1 ];
- char *host;
- char *page;
- if (argc == 1 ) {
- usage ( );
- exit ( 2 );
- }
- host = argv [ 1 ];
- if (argc > 2 ) {
- page = argv [ 2 ];
- } else {
- page = PAGE;
- }
- sock = create_tcp_socket ( );
- ip = get_ip (host );
- fprintf (stderr, "IP is %s\n", ip );
- remote = ( struct sockaddr_in * )malloc ( sizeof ( struct sockaddr_in * ) );
- remote ->sin_family = AF_INET;
- tmpres = inet_pton (AF_INET, ip, ( void * ) ( & (remote ->sin_addr. s_addr ) ) );
- if ( tmpres < 0 )
- {
- perror ( "Can't set remote->sin_addr.s_addr" );
- exit ( 1 );
- } else if (tmpres == 0 )
- {
- fprintf (stderr, "%s is not a valid IP address\n", ip );
- exit ( 1 );
- }
- remote ->sin_port = htons (PORT );
- if (connect (sock, ( struct sockaddr * )remote, sizeof ( struct sockaddr ) ) < 0 ) {
- perror ( "Could not connect" );
- exit ( 1 );
- }
- get = build_get_query (host, page );
- fprintf (stderr, "Query is:\n<<START>>\n%s<<END>>\n", get );
- //Send the query to the server
- int sent = 0;
- while (sent < strlen (get ) )
- {
- tmpres = send (sock, get +sent, strlen (get ) -sent, 0 );
- if (tmpres == -1 ) {
- perror ( "Can't send query" );
- exit ( 1 );
- }
- sent += tmpres;
- }
- //now it is time to receive the page
- memset (buf, 0, sizeof (buf ) );
- int htmlstart = 0;
- char * htmlcontent;
- while ( (tmpres = recv (sock, buf, BUFSIZ, 0 ) ) > 0 ) {
- if (htmlstart == 0 )
- {
- /* Under certain conditions this will not work.
- * If the \r\n\r\n part is splitted into two messages
- * it will fail to detect the beginning of HTML content
- */
- htmlcontent = strstr (buf, "\r\n\r\n" );
- if (htmlcontent != NULL ) {
- htmlstart = 1;
- htmlcontent += 4;
- }
- } else {
- htmlcontent = buf;
- }
- if (htmlstart ) {
- fprintf (stdout, htmlcontent );
- }
- memset (buf, 0, tmpres );
- }
- if (tmpres < 0 )
- {
- perror ( "Error receiving data" );
- }
- free (get );
- free (remote );
- free (ip );
- close (sock );
- return 0;
- }
- void usage ( )
- {
- fprintf (stderr, "USAGE: htmlget host [page]\n\
- \thost: the website hostname. ex: coding.debuntu.org\n\
- \tpage: the page to retrieve. ex: index.html, default: /\n" );
- }
- int create_tcp_socket ( )
- {
- int sock;
- if ( (sock = socket (AF_INET, SOCK_STREAM, IPPROTO_TCP ) ) < 0 ) {
- perror ( "Can't create TCP socket" );
- exit ( 1 );
- }
- return sock;
- }
- char *get_ip ( char *host )
- {
- struct hostent *hent;
- int iplen = 15; //XXX.XXX.XXX.XXX
- char *ip = ( char * )malloc (iplen +1 );
- memset (ip, 0, iplen +1 );
- if ( (hent = gethostbyname (host ) ) == NULL )
- {
- herror ( "Can't get IP" );
- exit ( 1 );
- }
- if (inet_ntop (AF_INET, ( void * )hent ->h_addr_list [ 0 ], ip, iplen ) == NULL )
- {
- perror ( "Can't resolve host" );
- exit ( 1 );
- }
- return ip;
- }
- char *build_get_query ( char *host, char *page )
- {
- char *query;
- char *getpage = page;
- char *tpl = "GET /%s HTTP/1.0\r\nHost: %s\r\nUser-Agent: %s\r\n\r\n";
- if (getpage [ 0 ] == '/' ) {
- getpage = getpage + 1;
- fprintf (stderr, "Removing leading \"/\", converting %s to %s\n", page, getpage );
- }
- // -5 is to consider the %s %s %s in tpl and the ending \0
- query = ( char * )malloc (strlen (host ) +strlen (getpage ) +strlen (USERAGENT ) +strlen (tpl ) -5 );
- sprintf (query, tpl, getpage, host, USERAGENT );
- return query;
- }
To compile it, run:
$ gcc -o htmlget htmlget.c $ ./htmlget USAGE: htmlget host [page]host: the website hostname. ex: coding.debuntu.orgpage: the page to retrieve. ex: index.html, default: /Informative messages and errors are printed to stderr. The content of the page is printed to stdout. Thus, to save the HTML content of a page to a file, you will need to run:
$ ./htmlget coding.debuntu.org category > /tmp/page.html
转载于:.html