Gentics Portal.Node PHP API
 All Classes Namespaces Functions Variables Pages
parserXML.php
1 <?php
2 /**
3  * Class to work with content of an apa xml file.
4  * It provides an opportunity to get necessary parameters of news
5  *
6  * @author Andrew Voloshin <andrew.voloshin@oberig.com>
7  *
8  */
9 
10 class parserXML{
11 
12  /**
13  * @var object object of SimpleXMLElement class
14  */
15  public $xml;
16 
17  /**
18  * Constructor
19  */
20  public function __construct($xml){
21 
22  $this->xml = $xml;
23  }
24  /**
25  * Function returns content of tag with name $field from META tag
26  *
27  * @param string $field name of tag which content should be returned
28  *
29  * @return string - content of tag
30  */
31  public function getMeta($field = null){
32 
33  if(isset($field)){
34  return (string)$this->xml->HEAD->META[$field];
35  }else{
36  return $this->xml->HEAD->META;
37  }
38  }
39  /**
40  * Function returns content of tag with name $field from BODY tag
41  *
42  * @param string $field name of tag which content should be returned
43  *
44  * @return string - content of tag
45  */
46  public function getBody($field = null){
47 
48  if(isset($field)){
49  return $this->xml->BODY->$field;
50  }else{
51  return $this->xml->BODY;
52  }
53  }
54  /**
55  * Function returns content of tag with name $field from HEAD tag
56  *
57  * @param string $field name of tag which content should be returned
58  *
59  * @return string - content of tag
60  */
61  public function getHead($field = null){
62 
63  if(isset($field)){
64  return $this->xml->HEAD->$field;
65  }else{
66  return $this->xml->HEAD;
67  }
68  }
69  /**
70  * Function returns content of tag with name $field from INFO tag
71  *
72  * @param string $field name of tag which content should be returned
73  *
74  * @return string - content of tag
75  */
76  public function getInfo($field = null){
77 
78  if(isset($field)){
79  return $this->xml->INFO->$field;
80  }else{
81  return $this->xml->INFO;
82  }
83  }
84  /**
85  * Function returns DOCID of current xml file
86  *
87  * @return string - docid of the document
88  */
89  public function getDocId(){
90 
91  return (string)$this->getMeta('DOCID');
92  }
93  /**
94  * Function returns ID of current xml file
95  *
96  * @return string - id of the document
97  */
98  public function getApaId(){
99 
100  return (string)$this->getMeta('ID');
101  }
102  /**
103  * Function returns string without accents
104  *
105  * @param string $string string in which should be removed accents
106  *
107  * @return string - string without accents
108  */
109  public static function unaccent($string){
110  return strtr(utf8_decode($string),
111  utf8_decode('ŠŒŽšœžŸ¥µÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýÿ'),
112  'SOZsozYYuAAAAAAACEEEEIIIIDNOOOOOOUUUUYsaaaaaaaceeeeiiiionoooooouuuuyy');
113  }
114  /**
115  * Function returns title of current xml file
116  *
117  * @return string - title of the document
118  */
119  public function getTitle(){
120 
121  return (string)$this->getHead('TITLE');
122  }
123  /**
124  * Function returns date of current xml file
125  *
126  * @return string - date in unix form of the document
127  */
128  public function getDate(){
129 
130  $date = (string)$this->getMeta('TIMEDATE');
131  //Migrate to normal format
132  $date = date('Y-m-d G:i:s',strtotime($date));
133  $date = new DateTime($date);
134  return $date->format('U');
135  }
136  /**
137  * Function returns DOCTYP of current xml file
138  *
139  * @return string - DOCTYP of the document
140  */
141  public function getDocType(){
142 
143  return (string)$this->getMeta('DOCTYP');
144  }
145  /**
146  * Function returns array of child files
147  *
148  * @return array - child files array
149  */
150  public function getRelations(){
151 
152  $relations = array();
153  foreach($this->getBody('TEXT')->LINK as $relation){
154  $relations[] = (string)$relation['HREF'];
155  }
156  return $relations;
157  }
158  /**
159  * Function returns array of child files parameters
160  *
161  * @return array child files parameters
162  */
163  public function getFiles(){
164 
165  $files = array();
166  foreach($this->getBody('TEXT')->LINK as $file){
167  $files[] = array(
168  'href' => (string)$file['HREF'],
169  'size' => (string)$file['SIZE'],
170  'width' => (string)$file['WIDTH'],
171  'height' => (string)$file['HEIGHT'],
172  'format' => (string)$file['FORMAT'],
173  );
174  }
175  return $files;
176  }
177  /**
178  * Function returns place
179  *
180  * @return string - content of place tag
181  */
182  public function getPlace(){
183 
184  return (string)$this->getBody('PLACE');
185  }
186  /**
187  * Function returns source
188  *
189  * @return string - content of source tag
190  */
191  public function getSource(){
192 
193  return (string)$this->getBody('SOURCE');
194  }
195  /**
196  * Function returns author
197  *
198  * @return string - content of author tag
199  */
200  public function getAuthor(){
201 
202  return (string)$this->getInfo('AUTHOR');
203  }
204  /**
205  * Function returns title of article in text tag
206  *
207  * @return string - title of article
208  */
209  public function getTeaser(){
210 
211  return (string)$this->getBody('TEXT')->LEAD;
212  }
213  /**
214  * Function returns title of document without accents and shortened to 64 characters
215  *
216  * @return string - document title
217  */
218  public function getFilename(){
219 
220  $filename = self::unaccent((string)$this->getHead('TITLE'));
221  $filename = strtr($filename,array(' ' => '_', '"'=>''));
222  $filename = substr($filename,0,64);
223  return $filename;
224  }
225  /**
226  * Function returns keywords of document
227  *
228  * @return array - keywords
229  */
230  public function getKeywords(){
231 
232  $keywords = array();
233  foreach($this->getHead('KEYWORD') as $kw){
234  $keywords[] = (string)$kw;
235  }
236  $keywords = implode(',',$keywords);
237 
238  return $keywords;
239  }
240  /**
241  * Function returns text of article
242  *
243  * @return string - text
244  */
245  public function getText(){
246 
247  $text = array();
248  foreach($this->getBody('TEXT')->P as $paragraph){
249  $text[] = (string)$paragraph;
250  }
251  $text = '<p>'.implode('</p><p>',$text).'</p>';
252 
253  return $text;
254  }
255  /**
256  * Function returns docid of news line which should be removed
257  *
258  * @return string - docid
259  */
260  public function getDeleteId(){
261 
262  return (string)$this->xml->delitem['apaid'];
263  }
264 
265 }